PyPI - eckity-bert-gp - Versions diffs - 0.1.0__py3-none-any.whl - Mend

eckity-bert-gp 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

aux_func.py +95 -0
bert_mutation.py +243 -0
eckity_bert_gp/__init__.py +6 -0
eckity_bert_gp-0.1.0.dist-info/METADATA +137 -0
eckity_bert_gp-0.1.0.dist-info/RECORD +9 -0
eckity_bert_gp-0.1.0.dist-info/WHEEL +5 -0
eckity_bert_gp-0.1.0.dist-info/licenses/LICENSE +28 -0
eckity_bert_gp-0.1.0.dist-info/top_level.txt +4 -0
uniform_mutation.py +162 -0

aux_func.py ADDED Viewed

@@ -0,0 +1,95 @@
+from typing import List
+import numpy as np
+from eckity.genetic_encodings.gp import Tree, FunctionNode, TerminalNode
+def prefix_to_postfix(tokens, precedence):
+    stack = []
+    tokens = tokens[::-1]
+    original_indexes = list(range(len(tokens)))[::-1]
+    for original_index, t in zip(original_indexes, tokens):
+        if t == 'const' or t.startswith('x'):
+            arity = 0
+        else:
+            _, arity = precedence[t]
+        if arity > 0:
+            operators = [stack.pop() for _ in range(arity)]
+            temp_exp = tuple()
+            for op in operators:
+                temp_exp += op
+            temp_exp += ((original_index, t),)
+            stack.append(temp_exp)
+        else:
+            stack.append(((original_index, t),))
+    assert len(stack) == 1
+    indexes = [x[0] for x in stack[0]]
+    return indexes
+def prefix_to_infix(tokens, precedence):
+    stack = []
+    tokens = tokens[::-1]
+    original_indexes = list(range(len(tokens)))[::-1]
+    for original_index, t in zip(original_indexes, tokens):
+        if t == 'const' or t.startswith('x'):
+            arity = 0
+        else:
+            _, arity = precedence[t]
+        if arity > 0:
+            operators = [stack.pop() for _ in range(arity)]
+            temp_exp = tuple()
+            if len(operators) == 1:
+                temp_exp += ((original_index, t),)
+                temp_exp += operators[0]
+            elif len(operators) == 2:
+                temp_exp += operators[0]
+                temp_exp += ((original_index, t),)
+                temp_exp += operators[1]
+            else:
+                raise ValueError("Invalid arity")
+            stack.append(temp_exp)
+        else:
+            stack.append(((original_index, t),))
+    assert len(stack) == 1
+    indexes = [x[0] for x in stack[0]]
+    return indexes
+def get_inverse_mapping(origin_to_target_mapping: np.ndarray) -> np.ndarray:
+    inverse_mapping = np.zeros_like(origin_to_target_mapping)
+    for index, target_mapping in enumerate(origin_to_target_mapping):
+        inverse_mapping[target_mapping] = index
+    return inverse_mapping
+def program_to_labels(program: Tree, mask_indexes) -> List[str]:
+    labels = []
+    for index, node in enumerate(program.tree):
+        if index < len(mask_indexes) and mask_indexes[index]:
+            labels.append('<mask>')
+        elif type(node) is FunctionNode:
+            labels.append(node.function.__name__)
+        elif type(node) is TerminalNode:
+            if type(node.value) is str:
+                labels.append(node.value)
+            else:
+                labels.append('const')
+        else:
+            raise ValueError(f"Node type {type(node)} not supported")
+    return labels

bert_mutation.py ADDED Viewed

@@ -0,0 +1,243 @@
+import random
+import numpy as np
+import torch
+from eckity.genetic_encodings.gp import TerminalNode, FunctionNode
+from sklearn.preprocessing import LabelEncoder
+from transformers import BertConfig
+from transformers import BertForMaskedLM
+from torch.optim import Adam
+from aux_func import program_to_labels
+def convert_arity_to_tensors(allowed_operators, allowed_operators_arity, arity_of_masked_locations,
+                             mask_indices):
+    # if no arity is provided, assume all operators have the same arity (set as 0)
+    if arity_of_masked_locations is None:
+        arity_of_masked_locations = torch.zeros(len(mask_indices))
+    if allowed_operators_arity is None:
+        allowed_operators_arity = torch.zeros(len(allowed_operators))
+    arity_of_masked_locations = torch.Tensor(arity_of_masked_locations).type(torch.LongTensor)
+    allowed_operators_arity = torch.Tensor(allowed_operators_arity).type(torch.LongTensor)
+    return allowed_operators_arity, arity_of_masked_locations
+def get_transformed_notation(arity_ndarray, masked_nodes, program_tokens, unmasked_tokens):
+    # default order
+    mapped_tokens_indices = np.arange(len(unmasked_tokens))
+    sorted_mask_order = np.argsort(masked_nodes)
+    mapped_tokens = program_tokens
+    mapped_mask_arity = arity_ndarray
+    mapped_masked_nodes = np.array(masked_nodes)
+    return mapped_mask_arity, mapped_masked_nodes, mapped_tokens, mapped_tokens_indices, sorted_mask_order
+class BertMutation:
+    # todo: when a program is too long, take only the last 2048 tokens
+    def __init__(self, operators_list, constant_names, get_fitness_func, batch_size=64, learning_rate=1e-3,
+                 adam_decay=0,
+                 epsilon_greedy=0.01, word_embedding_dim=120, context_size=2048, n_layers=3, n_attention_heads=3,
+                 internal_size=128, clip_grad_norm=1.0, full_trajectory_query=True, diff_reward=True,
+                 function_mappings=None, terminals_mappings=None, higher_is_better=True, allow_constant_terminals=True):
+        if constant_names is None:
+            constant_names = []
+        # functions + constants + [<mask>] + [const]
+        self.vocab_size = len(operators_list) + len(constant_names) + 2
+        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        print(f'Using device: {self.device}')
+        self.bert_config = {
+            'vocab_size': self.vocab_size,
+            'hidden_size': word_embedding_dim,
+            'num_hidden_layers': n_layers,
+            'num_attention_heads': n_attention_heads,
+            'intermediate_size': internal_size,
+            'max_position_embeddings': context_size
+        }
+        self.model = BertForMaskedLM(BertConfig(**self.bert_config)).to(self.device)
+        self.action_probabilities = []
+        self.rewards = []
+        self.batch_size = batch_size
+        if allow_constant_terminals:
+            self.terminals = np.array(constant_names + ['const'])
+        else:
+            self.terminals = np.array(constant_names)
+        self.token_encoder = LabelEncoder().fit(
+            list(operators_list) + ['<mask>'] + list(self.terminals))
+        self.mask_id = self.token_encoder.transform(['<mask>'])[0]
+        self.trajectory_probabilities = []
+        self.n_features = len(constant_names)
+        self.rewards = []
+        self.get_fitness_func = get_fitness_func
+        self.optimizer = Adam(self.model.parameters(), lr=learning_rate, weight_decay=adam_decay)
+        self.epsilon_greedy = epsilon_greedy
+        self.clip_grad_norm = clip_grad_norm
+        self.full_trajectory_query = full_trajectory_query
+        self.diff_reward = diff_reward
+        self.function_mappings = function_mappings
+        if terminals_mappings is None:
+            self.terminals_mappings = {var: i for i, var in enumerate(constant_names)}
+            if allow_constant_terminals:
+                self.terminals_mappings['const'] = self.n_features
+        else:
+            self.terminals_mappings = terminals_mappings
+        self.higher_is_better = higher_is_better
+    def mutate(self, program_tokens, allowed_operators, tree_program, masked_nodes,
+               arity_ndarray=None, allowed_operators_arity=None, terminal_traj=False):
+        """
+        Parameters
+        ----------
+        program_tokens: list of string tokens (length == program length). Example : ['add', 'x', 'const']
+        allowed_operators: list of allowed operators to be used in the mutation. Example: ['add', 'sub']
+        tree_program: eckity object of the tree
+        masked_nodes: indexes of the masked nodes in the program
+        arity_ndarray: numpy array of the arity of the masked nodes (length == masked_nodes length)
+        allowed_operators_arity: numpy array of the arity of the allowed operators (length == allowed_operators length)
+        terminal_traj: boolean, if True, the mutation will be done on the terminal nodes, otherwise on
+        the function nodes
+        Returns
+        -------
+        """
+        unmasked_tokens = program_to_labels(tree_program, [])
+        mapped_mask_arity, mapped_masked_nodes, mapped_tokens, mapped_tokens_indices, sorted_mask_order = get_transformed_notation(
+            arity_ndarray, masked_nodes, program_tokens, unmasked_tokens)
+        initial_fitness = self.get_fitness_func(tree_program)
+        tokens_ids = torch.Tensor([self.token_encoder.transform(mapped_tokens)]).type(torch.LongTensor).to(self.device)
+        logits = self.model(tokens_ids, attention_mask=torch.ones_like(tokens_ids).to(self.device)).logits
+        mask_indices = torch.where(tokens_ids == self.mask_id)[1]
+        suggested_mutation, trajectory_action_probabilities = self.masked_trajectory_generation(allowed_operators,
+                                                                                                logits, mask_indices,
+                                                                                                mapped_mask_arity,
+                                                                                                allowed_operators_arity,
+                                                                                                torch.clone(tokens_ids))
+        # return the suggested mutation to the original order
+        # notice that the suggested_mutation is returned in sorted order, so we use the sorted_mask_order to realign it
+        realigned_order = mapped_tokens_indices[mapped_masked_nodes[sorted_mask_order]]
+        for node, current_mutation in zip(realigned_order, suggested_mutation):
+            if terminal_traj:
+                current_mapping = self.terminals_mappings[current_mutation]
+            else:
+                current_mapping = self.function_mappings[current_mutation]
+            if current_mutation == 'const':
+                if type(tree_program.erc_range[0]) is float:
+                    rand_constant = random.uniform(*tree_program.erc_range)
+                else:
+                    rand_constant = random.randint(*tree_program.erc_range)
+                tree_program.tree[node] = TerminalNode(rand_constant)
+            elif current_mutation in self.function_mappings:
+                tree_program.tree[node] = FunctionNode(current_mapping)
+            else:
+                if callable(self.terminals_mappings[current_mutation]):
+                    tree_program.tree[node] = TerminalNode(self.terminals_mappings[current_mutation])
+                else:
+                    tree_program.tree[node] = TerminalNode(current_mutation)
+        new_fitness = self.get_fitness_func(tree_program)
+        if self.diff_reward:
+            reward = (new_fitness - initial_fitness)
+        else:
+            reward = new_fitness
+        if self.higher_is_better:
+            reward *= -1
+        trajectory_probability = torch.log(torch.cat(trajectory_action_probabilities)).sum().unsqueeze(
+            0).unsqueeze(0)
+        self.rewards.append(torch.full_like(trajectory_probability, reward))
+        self.trajectory_probabilities.append(trajectory_probability)
+        self.run_epoch()
+    def masked_trajectory_generation(self, allowed_operators, logits, mask_indices, arity_of_masked_locations,
+                                     allowed_operators_arity, tokens_ids):
+        """
+        :param tokens_ids:
+        :param allowed_operators: list of allowed operators
+        :param logits: model logits
+        :param mask_indices: indices of the masked tokens
+        :param arity_of_masked_locations: arity of the masked tokens
+        :param allowed_operators_arity: arity of the allowed operators
+        :return: suggested mutation and trajectory action probabilities
+        """
+        allowed_operators_arity, arity_of_masked_locations = convert_arity_to_tensors(allowed_operators,
+                                                                                      allowed_operators_arity,
+                                                                                      arity_of_masked_locations,
+                                                                                      mask_indices)
+        masked_softmax_indexes = torch.Tensor(self.token_encoder.transform(allowed_operators)).type(torch.LongTensor)
+        suggested_mutation = []
+        trajectory_action_probabilities = []
+        # masked trajectory generation
+        for trajectory_index in range(len(mask_indices)):
+            current_mask_arity = arity_of_masked_locations[trajectory_index]
+            current_allowed_operators = allowed_operators[current_mask_arity == allowed_operators_arity]
+            current_masked_softmax_indexes = masked_softmax_indexes[
+                current_mask_arity == allowed_operators_arity].to(self.device)
+            # get the probability of the allowed operators and normalize them
+            mask_index = torch.tensor([mask_indices[trajectory_index]]).type(torch.LongTensor)
+            operators_proba = torch.softmax(logits[0, mask_index], dim=-1)[:,
+                              current_masked_softmax_indexes].to(self.device)
+            operators_proba = operators_proba / operators_proba.sum(dim=-1).unsqueeze(-1)
+            # sample an operator with epsilon greedy
+            if torch.rand(1) < self.epsilon_greedy:
+                sampled_operators_dist = torch.randint(0, len(current_allowed_operators), (1,)).to(self.device)
+            else:
+                sampled_operators_dist = torch.distributions.Categorical(operators_proba).sample().to(self.device)
+            sampled_actions_probability = torch.gather(operators_proba, dim=1,
+                                                       index=sampled_operators_dist.unsqueeze(-1))
+            trajectory_action_probabilities.append(sampled_actions_probability)
+            suggested_mutation += [current_allowed_operators[sampled_operators_dist.detach().cpu().numpy()][0]]
+            if self.full_trajectory_query:
+                tokens_ids = torch.clone(tokens_ids)
+                tokens_ids[0, mask_index] = current_masked_softmax_indexes[sampled_operators_dist]
+                logits = self.model(tokens_ids, attention_mask=torch.ones_like(tokens_ids).to(self.device)).logits
+        return suggested_mutation, trajectory_action_probabilities
+    def run_epoch(self, numerical_stability=1e-10):
+        current_batch_size = sum([len(reward) for reward in self.rewards])
+        if current_batch_size < self.batch_size:
+            return
+        all_traj_proba = torch.cat(self.trajectory_probabilities, dim=0).to(self.device)
+        all_rewards = torch.cat(self.rewards, dim=0).to(self.device)
+        self.trajectory_probabilities.clear()
+        self.rewards.clear()
+        self.optimizer.zero_grad()
+        advantages = (all_rewards - torch.mean(all_rewards)) / (torch.std(all_rewards) + numerical_stability)
+        # advantages = all_rewards
+        advantages = advantages.to(self.device)
+        loss = torch.mean(all_traj_proba * advantages).to(self.device)
+        loss.backward()
+        if self.clip_grad_norm is not None:
+            torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.clip_grad_norm)
+        self.optimizer.step()
+        print(f'loss: {loss}, reward: {torch.mean(all_rewards)}')

eckity_bert_gp/__init__.py ADDED Viewed

@@ -0,0 +1,6 @@
+"""BERT mutation operator for EC-KitY genetic programming."""
+from bert_mutation import BertMutation
+from uniform_mutation import BERTUniformMutation
+__all__ = ["BertMutation", "BERTUniformMutation"]

eckity_bert_gp-0.1.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,137 @@
+Metadata-Version: 2.4
+Name: eckity-bert-gp
+Version: 0.1.0
+Summary: BERT mutation operator for EC-KitY genetic programming
+Author: EC-KitY
+License-Expression: BSD-3-Clause
+Project-URL: Homepage, https://github.com/EC-KitY/BERT-Mutation-for-GP
+Project-URL: Repository, https://github.com/EC-KitY/BERT-Mutation-for-GP
+Project-URL: Paper, https://doi.org/10.3390/math13050779
+Classifier: Development Status :: 3 - Alpha
+Classifier: Intended Audience :: Science/Research
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
+Requires-Python: >=3.9
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: eckity~=0.4.1
+Requires-Dist: numpy>=2.0.2
+Requires-Dist: overrides>=7.7.0
+Requires-Dist: scikit-learn>=1.5.0
+Requires-Dist: scipy>=1.13.0
+Requires-Dist: torch>=2.7.1
+Requires-Dist: transformers>=4.50.0
+Provides-Extra: dev
+Requires-Dist: build>=1.2; extra == "dev"
+Requires-Dist: pandas>=2.2; extra == "dev"
+Requires-Dist: pytest>=8.0; extra == "dev"
+Requires-Dist: twine>=5.0; extra == "dev"
+Dynamic: license-file
+# BERT Mutation for EC-KitY Genetic Programming
+`eckity-bert-gp` provides the BERT mutation operator for tree-based genetic programming in [EC-KitY](https://github.com/EC-KitY/EC-KitY).
+The operator is described in **“BERT Mutation: Deep Transformer Model for Masked Uniform Mutation in Genetic Programming”**, Mathematics 2025, 13(5), 779 ([paper](https://doi.org/10.3390/math13050779)). It masks selected GP-tree nodes and uses a compact BERT masked-language model to sample replacements that preserve the required node arity.
+## Installation
+```bash
+pip install eckity-bert-gp
+```
+## Public API
+```python
+from eckity_bert_gp import BertMutation, BERTUniformMutation
+```
+`BertMutation` owns and trains the BERT policy. `BERTUniformMutation` adapts that policy to EC-KitY's genetic-operator interface.
+## Usage
+The BERT model needs the function names, terminal names, fitness callback, and mappings back to the EC-KitY functions:
+```python
+import numpy as np
+from eckity.base.untyped_functions import f_add, f_div, f_mul, f_sub
+from eckity_bert_gp import BertMutation, BERTUniformMutation
+function_set = [f_add, f_sub, f_mul, f_div]
+terminal_set = ["x", "y", "z"]
+function_mappings = {function.__name__: function for function in function_set}
+bert_model = BertMutation(
+    operators_list=np.array(list(function_mappings)),
+    constant_names=terminal_set,
+    get_fitness_func=evaluator.evaluate_individual,
+    context_size=256,
+    word_embedding_dim=20,
+    n_layers=1,
+    n_attention_heads=1,
+    function_mappings=function_mappings,
+    higher_is_better=False,
+)
+bert_mutation = BERTUniformMutation(
+    bert_model=bert_model,
+    probability=1.0,
+    node_probability=0.1,
+)
+```
+Add `bert_mutation` to the EC-KitY subpopulation's `operators_sequence`.
+- `get_fitness_func` accepts an EC-KitY GP tree and returns its fitness.
+- `function_mappings` maps each function name used by BERT back to the callable stored in GP trees.
+- Terminal mappings default to the names supplied in `constant_names`.
+- `probability` controls whether the EC-KitY mutation operator runs.
+- `node_probability` controls the probability of masking each tree node.
+- `context_size` must be large enough for the longest tree representation expected during evolution.
+The model is initialized locally from `BertConfig`; installing or constructing the operator does not download pretrained model weights.
+## Compatibility
+- Python 3.9 or newer
+- EC-KitY 0.4.x
+- NumPy 2.0.2 or newer
+- SciPy 1.13.0 or newer
+- PyTorch 2.7.1 or newer
+- Transformers 4.50.0 or newer
+- scikit-learn 1.5.0 or newer
+These bounds are compatible with `eckity-dnc` and `eckity-bert-ga`; none of the three packages directly depends on another operator package.
+## Repository experiment
+The repository includes the paper's experiment runner and datasets. They are development resources and are not included in the wheel.
+Install the development dependencies and run the symbolic-regression example:
+```bash
+python -m pip install -e ".[dev]"
+python runner.py
+```
+Additional benchmark data is stored under `data/`, and Artificial Ant maps are stored under `ant_opt/`.
+## Development
+With [uv](https://docs.astral.sh/uv/):
+```bash
+uv sync --extra dev --resolution lowest-direct
+uv run pytest
+uv build
+```
+Release preparation and manual PyPI upload commands are documented in [`RELEASING.md`](RELEASING.md).
+## License
+This project is licensed under the BSD 3-Clause License. See [`LICENSE`](LICENSE).

eckity_bert_gp-0.1.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,9 @@
+aux_func.py,sha256=Mu5eSalMuQyaQAbABo9J1Cd-naFn8FVEai5MdNG5Gyo,2821
+bert_mutation.py,sha256=nN_3tr6Ia1EnUuDF_A_xtukznRQ-Kh-cbpvlOSnMa6w,12523
+uniform_mutation.py,sha256=Z8KIEyd6pbBuhKtrf0ehm6hxHRISFIb4denizW4fZzA,6570
+eckity_bert_gp/__init__.py,sha256=szBUoo1sDCfoUmOSS_UX8DMZoG3xjebT6ZH4euKPSKM,202
+eckity_bert_gp-0.1.0.dist-info/licenses/LICENSE,sha256=ljMDMJVYrp0IntMunve15Q7PHVxid-NXY00rJis8WTs,1522
+eckity_bert_gp-0.1.0.dist-info/METADATA,sha256=vM0156WoUVAVrOwznXwKStmI3rOTX85jXL2UukYYd2c,4923
+eckity_bert_gp-0.1.0.dist-info/WHEEL,sha256=K260EYznzXsJYBQGqmI8VTxEdiZYNvDZwW9cBh9-_MA,91
+eckity_bert_gp-0.1.0.dist-info/top_level.txt,sha256=s6B-haXAtyxlPcyw9Yes5ISHzzZ_wpQM7r3ZPERQwhY,55
+eckity_bert_gp-0.1.0.dist-info/RECORD,,

eckity_bert_gp-0.1.0.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,5 @@
+Wheel-Version: 1.0
+Generator: setuptools (83.0.0)
+Root-Is-Purelib: true
+Tag: py3-none-any

eckity_bert_gp-0.1.0.dist-info/licenses/LICENSE ADDED Viewed

@@ -0,0 +1,28 @@
+BSD 3-Clause License
+Copyright (c) 2024, EC-KitY
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+3. Neither the name of the copyright holder nor the names of its
+   contributors may be used to endorse or promote products derived from
+   this software without specific prior written permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

eckity_bert_gp-0.1.0.dist-info/top_level.txt ADDED Viewed

@@ -0,0 +1,4 @@
+aux_func
+bert_mutation
+eckity_bert_gp
+uniform_mutation

uniform_mutation.py ADDED Viewed

@@ -0,0 +1,162 @@
+from typing import Any, List, Tuple
+from eckity.base.utils import arity
+from overrides import override
+import numpy as np
+from eckity.genetic_encodings.gp import Tree, TreeNode, FunctionNode, TerminalNode
+from eckity.genetic_operators import FailableOperator
+import random
+from bert_mutation import BertMutation
+from aux_func import program_to_labels
+class UniformNodeMutation(FailableOperator):
+    def __init__(
+            self,
+            probability: float = 1.0,
+            node_probability: float = 0.1,
+            events=None,
+            attempts=1,
+    ):
+        super().__init__(
+            probability=probability, arity=1, events=events, attempts=attempts
+        )
+        self.node_probability = node_probability
+    @override
+    def attempt_operator(
+            self, payload: Any, attempt_num: int
+    ) -> Tuple[bool, Any]:
+        """
+        Perform subtree mutation: select a subtree at random
+        to be replaced by a new, randomly generated subtree.
+        Returns
+        -------
+        Tuple[bool, Any]
+            A tuple containing a boolean indicating whether the operator was
+            successful and a list of the individuals.
+        """
+        individuals: List[Tree] = payload
+        uniform_masks = self._sample_masks(individuals)
+        for ind, mask in zip(individuals, uniform_masks):
+            for i, node in enumerate(ind.tree):
+                if mask[i]:
+                    replacement = self._get_node_replacement(ind, node)
+                    ind.tree[i] = replacement
+        self.applied_individuals = individuals
+        return True, individuals
+    def _sample_masks(self, individuals: List[Tree]):
+        masks = []
+        for ind in individuals:
+            mask = np.random.choice([True, False], size=len(ind.tree),
+                                    p=[self.node_probability, 1 - self.node_probability])
+            masks.append(mask)
+        return masks
+    def _get_node_replacement(self, ind: Tree, node: TreeNode):
+        if type(node) is FunctionNode:
+            cur_arity = node.n_args
+            relevant_functions = [func for func in ind.function_set if arity(func) == cur_arity]
+            func = random.choice(relevant_functions)
+            return FunctionNode(func)
+        elif type(node) is TerminalNode:
+            return ind.random_terminal(node_type=node.node_type)
+        else:
+            raise ValueError(f"Node type {type(node)} not supported")
+class BERTUniformMutation(FailableOperator):
+    def __init__(
+            self,
+            bert_model: BertMutation,
+            probability: float = 1.0,
+            node_probability: float = 0.1,
+            max_trajectory_length=100,
+            events=None,
+            attempts=1,
+    ):
+        super().__init__(
+            probability=probability, arity=1, events=events, attempts=attempts
+        )
+        self.node_probability = node_probability
+        self.bert_model = bert_model
+        self.max_trajectory_length = max_trajectory_length
+    @override
+    def attempt_operator(
+            self, payload: Any, attempt_num: int
+    ) -> Tuple[bool, Any]:
+        """
+        Perform subtree mutation: select a subtree at random
+        to be replaced by a new, randomly generated subtree.
+        Returns
+        -------
+        Tuple[bool, Any]
+            A tuple containing a boolean indicating whether the operator was
+            successful and a list of the individuals.
+        """
+        individuals: List[Tree] = payload
+        uniform_masks = self._sample_masks(individuals)
+        assert len(individuals) == 1
+        individual = individuals[0]
+        mutation_mask = uniform_masks[0]
+        allowed_functions = np.array(list(self.bert_model.function_mappings.keys()))
+        allowed_functions_arity = np.array([arity(func) for func in list(self.bert_model.function_mappings.values())])
+        functions_mutation_mask = np.array([type(node) is FunctionNode for node in individual.tree])
+        masked_functions = np.where(functions_mutation_mask & mutation_mask)[0]
+        masked_variables = np.where(~functions_mutation_mask & mutation_mask)[0]
+        if len(masked_functions) > 0:
+            program_labels = program_to_labels(individual, mutation_mask & functions_mutation_mask)
+            self.bert_model.mutate(program_labels, allowed_functions, individual,
+                                   masked_functions, self._get_arity_of_masked_nodes(individual, mutation_mask),
+                                   allowed_functions_arity)
+        if len(masked_variables) > 0:
+            program_labels = program_to_labels(individual, mutation_mask & ~functions_mutation_mask)
+            self.bert_model.mutate(program_labels, self.bert_model.terminals, individual,
+                                   masked_variables, None, None, terminal_traj=True)
+        self.applied_individuals = individuals
+        return True, individuals
+    def _sample_masks(self, individuals: List[Tree]):
+        masks = []
+        for ind in individuals:
+            if len(ind.tree) * self.node_probability < self.max_trajectory_length:
+                mask = np.random.choice([True, False], size=len(ind.tree),
+                                        p=[self.node_probability, 1 - self.node_probability])
+            else:
+                mask = np.random.choice([True, False], size=len(ind.tree),
+                                        p=[self.max_trajectory_length / len(ind.tree),
+                                           1 - self.max_trajectory_length / len(ind.tree)])
+            masks.append(mask)
+        return masks
+    def _get_arity_of_masked_nodes(self, ind: Tree, mask: np.ndarray):
+        arities = []
+        for i, node in enumerate(ind.tree):
+            if mask[i] and type(node) is FunctionNode:
+                arities.append(node.n_args)
+        return np.array(arities)
+    def _get_node_replacement(self, ind: Tree, node: TreeNode):
+        if type(node) is FunctionNode:
+            cur_arity = node.n_args
+            relevant_functions = [func for func in ind.function_set if arity(func) == cur_arity]
+            func = random.choice(relevant_functions)
+            return FunctionNode(func)
+        elif type(node) is TerminalNode:
+            return ind.random_terminal(node_type=node.node_type)
+        else:
+            raise ValueError(f"Node type {type(node)} not supported")