eckity-bert-gp 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
aux_func.py ADDED
@@ -0,0 +1,95 @@
1
+ from typing import List
2
+
3
+ import numpy as np
4
+ from eckity.genetic_encodings.gp import Tree, FunctionNode, TerminalNode
5
+
6
+
7
+ def prefix_to_postfix(tokens, precedence):
8
+ stack = []
9
+ tokens = tokens[::-1]
10
+ original_indexes = list(range(len(tokens)))[::-1]
11
+
12
+ for original_index, t in zip(original_indexes, tokens):
13
+ if t == 'const' or t.startswith('x'):
14
+ arity = 0
15
+ else:
16
+ _, arity = precedence[t]
17
+
18
+ if arity > 0:
19
+ operators = [stack.pop() for _ in range(arity)]
20
+ temp_exp = tuple()
21
+
22
+ for op in operators:
23
+ temp_exp += op
24
+
25
+ temp_exp += ((original_index, t),)
26
+ stack.append(temp_exp)
27
+
28
+ else:
29
+ stack.append(((original_index, t),))
30
+
31
+ assert len(stack) == 1
32
+ indexes = [x[0] for x in stack[0]]
33
+ return indexes
34
+
35
+
36
+ def prefix_to_infix(tokens, precedence):
37
+ stack = []
38
+ tokens = tokens[::-1]
39
+ original_indexes = list(range(len(tokens)))[::-1]
40
+
41
+ for original_index, t in zip(original_indexes, tokens):
42
+ if t == 'const' or t.startswith('x'):
43
+ arity = 0
44
+ else:
45
+ _, arity = precedence[t]
46
+
47
+ if arity > 0:
48
+
49
+ operators = [stack.pop() for _ in range(arity)]
50
+ temp_exp = tuple()
51
+
52
+ if len(operators) == 1:
53
+ temp_exp += ((original_index, t),)
54
+ temp_exp += operators[0]
55
+ elif len(operators) == 2:
56
+ temp_exp += operators[0]
57
+ temp_exp += ((original_index, t),)
58
+ temp_exp += operators[1]
59
+
60
+ else:
61
+ raise ValueError("Invalid arity")
62
+
63
+ stack.append(temp_exp)
64
+
65
+ else:
66
+ stack.append(((original_index, t),))
67
+
68
+ assert len(stack) == 1
69
+ indexes = [x[0] for x in stack[0]]
70
+ return indexes
71
+
72
+
73
+ def get_inverse_mapping(origin_to_target_mapping: np.ndarray) -> np.ndarray:
74
+ inverse_mapping = np.zeros_like(origin_to_target_mapping)
75
+ for index, target_mapping in enumerate(origin_to_target_mapping):
76
+ inverse_mapping[target_mapping] = index
77
+ return inverse_mapping
78
+
79
+
80
+ def program_to_labels(program: Tree, mask_indexes) -> List[str]:
81
+ labels = []
82
+ for index, node in enumerate(program.tree):
83
+ if index < len(mask_indexes) and mask_indexes[index]:
84
+ labels.append('<mask>')
85
+ elif type(node) is FunctionNode:
86
+ labels.append(node.function.__name__)
87
+ elif type(node) is TerminalNode:
88
+ if type(node.value) is str:
89
+ labels.append(node.value)
90
+ else:
91
+ labels.append('const')
92
+
93
+ else:
94
+ raise ValueError(f"Node type {type(node)} not supported")
95
+ return labels
bert_mutation.py ADDED
@@ -0,0 +1,243 @@
1
+ import random
2
+
3
+ import numpy as np
4
+ import torch
5
+ from eckity.genetic_encodings.gp import TerminalNode, FunctionNode
6
+ from sklearn.preprocessing import LabelEncoder
7
+ from transformers import BertConfig
8
+ from transformers import BertForMaskedLM
9
+ from torch.optim import Adam
10
+ from aux_func import program_to_labels
11
+
12
+
13
+ def convert_arity_to_tensors(allowed_operators, allowed_operators_arity, arity_of_masked_locations,
14
+ mask_indices):
15
+ # if no arity is provided, assume all operators have the same arity (set as 0)
16
+ if arity_of_masked_locations is None:
17
+ arity_of_masked_locations = torch.zeros(len(mask_indices))
18
+ if allowed_operators_arity is None:
19
+ allowed_operators_arity = torch.zeros(len(allowed_operators))
20
+ arity_of_masked_locations = torch.Tensor(arity_of_masked_locations).type(torch.LongTensor)
21
+ allowed_operators_arity = torch.Tensor(allowed_operators_arity).type(torch.LongTensor)
22
+ return allowed_operators_arity, arity_of_masked_locations
23
+
24
+
25
+ def get_transformed_notation(arity_ndarray, masked_nodes, program_tokens, unmasked_tokens):
26
+ # default order
27
+ mapped_tokens_indices = np.arange(len(unmasked_tokens))
28
+ sorted_mask_order = np.argsort(masked_nodes)
29
+ mapped_tokens = program_tokens
30
+ mapped_mask_arity = arity_ndarray
31
+ mapped_masked_nodes = np.array(masked_nodes)
32
+
33
+ return mapped_mask_arity, mapped_masked_nodes, mapped_tokens, mapped_tokens_indices, sorted_mask_order
34
+
35
+
36
+ class BertMutation:
37
+ # todo: when a program is too long, take only the last 2048 tokens
38
+
39
+ def __init__(self, operators_list, constant_names, get_fitness_func, batch_size=64, learning_rate=1e-3,
40
+ adam_decay=0,
41
+ epsilon_greedy=0.01, word_embedding_dim=120, context_size=2048, n_layers=3, n_attention_heads=3,
42
+ internal_size=128, clip_grad_norm=1.0, full_trajectory_query=True, diff_reward=True,
43
+ function_mappings=None, terminals_mappings=None, higher_is_better=True, allow_constant_terminals=True):
44
+
45
+ if constant_names is None:
46
+ constant_names = []
47
+
48
+ # functions + constants + [<mask>] + [const]
49
+ self.vocab_size = len(operators_list) + len(constant_names) + 2
50
+ self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
51
+ print(f'Using device: {self.device}')
52
+ self.bert_config = {
53
+ 'vocab_size': self.vocab_size,
54
+ 'hidden_size': word_embedding_dim,
55
+ 'num_hidden_layers': n_layers,
56
+ 'num_attention_heads': n_attention_heads,
57
+ 'intermediate_size': internal_size,
58
+ 'max_position_embeddings': context_size
59
+ }
60
+
61
+ self.model = BertForMaskedLM(BertConfig(**self.bert_config)).to(self.device)
62
+ self.action_probabilities = []
63
+ self.rewards = []
64
+ self.batch_size = batch_size
65
+
66
+ if allow_constant_terminals:
67
+ self.terminals = np.array(constant_names + ['const'])
68
+ else:
69
+ self.terminals = np.array(constant_names)
70
+
71
+ self.token_encoder = LabelEncoder().fit(
72
+ list(operators_list) + ['<mask>'] + list(self.terminals))
73
+ self.mask_id = self.token_encoder.transform(['<mask>'])[0]
74
+ self.trajectory_probabilities = []
75
+ self.n_features = len(constant_names)
76
+ self.rewards = []
77
+ self.get_fitness_func = get_fitness_func
78
+ self.optimizer = Adam(self.model.parameters(), lr=learning_rate, weight_decay=adam_decay)
79
+ self.epsilon_greedy = epsilon_greedy
80
+ self.clip_grad_norm = clip_grad_norm
81
+ self.full_trajectory_query = full_trajectory_query
82
+ self.diff_reward = diff_reward
83
+ self.function_mappings = function_mappings
84
+
85
+ if terminals_mappings is None:
86
+ self.terminals_mappings = {var: i for i, var in enumerate(constant_names)}
87
+ if allow_constant_terminals:
88
+ self.terminals_mappings['const'] = self.n_features
89
+ else:
90
+ self.terminals_mappings = terminals_mappings
91
+
92
+ self.higher_is_better = higher_is_better
93
+
94
+ def mutate(self, program_tokens, allowed_operators, tree_program, masked_nodes,
95
+ arity_ndarray=None, allowed_operators_arity=None, terminal_traj=False):
96
+ """
97
+
98
+ Parameters
99
+ ----------
100
+ program_tokens: list of string tokens (length == program length). Example : ['add', 'x', 'const']
101
+ allowed_operators: list of allowed operators to be used in the mutation. Example: ['add', 'sub']
102
+ tree_program: eckity object of the tree
103
+ masked_nodes: indexes of the masked nodes in the program
104
+ arity_ndarray: numpy array of the arity of the masked nodes (length == masked_nodes length)
105
+ allowed_operators_arity: numpy array of the arity of the allowed operators (length == allowed_operators length)
106
+ terminal_traj: boolean, if True, the mutation will be done on the terminal nodes, otherwise on
107
+ the function nodes
108
+ Returns
109
+ -------
110
+
111
+ """
112
+ unmasked_tokens = program_to_labels(tree_program, [])
113
+
114
+ mapped_mask_arity, mapped_masked_nodes, mapped_tokens, mapped_tokens_indices, sorted_mask_order = get_transformed_notation(
115
+ arity_ndarray, masked_nodes, program_tokens, unmasked_tokens)
116
+
117
+ initial_fitness = self.get_fitness_func(tree_program)
118
+ tokens_ids = torch.Tensor([self.token_encoder.transform(mapped_tokens)]).type(torch.LongTensor).to(self.device)
119
+ logits = self.model(tokens_ids, attention_mask=torch.ones_like(tokens_ids).to(self.device)).logits
120
+ mask_indices = torch.where(tokens_ids == self.mask_id)[1]
121
+
122
+ suggested_mutation, trajectory_action_probabilities = self.masked_trajectory_generation(allowed_operators,
123
+ logits, mask_indices,
124
+ mapped_mask_arity,
125
+ allowed_operators_arity,
126
+ torch.clone(tokens_ids))
127
+
128
+ # return the suggested mutation to the original order
129
+ # notice that the suggested_mutation is returned in sorted order, so we use the sorted_mask_order to realign it
130
+ realigned_order = mapped_tokens_indices[mapped_masked_nodes[sorted_mask_order]]
131
+
132
+ for node, current_mutation in zip(realigned_order, suggested_mutation):
133
+
134
+ if terminal_traj:
135
+ current_mapping = self.terminals_mappings[current_mutation]
136
+ else:
137
+ current_mapping = self.function_mappings[current_mutation]
138
+
139
+ if current_mutation == 'const':
140
+ if type(tree_program.erc_range[0]) is float:
141
+ rand_constant = random.uniform(*tree_program.erc_range)
142
+ else:
143
+ rand_constant = random.randint(*tree_program.erc_range)
144
+
145
+ tree_program.tree[node] = TerminalNode(rand_constant)
146
+ elif current_mutation in self.function_mappings:
147
+ tree_program.tree[node] = FunctionNode(current_mapping)
148
+ else:
149
+ if callable(self.terminals_mappings[current_mutation]):
150
+ tree_program.tree[node] = TerminalNode(self.terminals_mappings[current_mutation])
151
+ else:
152
+ tree_program.tree[node] = TerminalNode(current_mutation)
153
+
154
+ new_fitness = self.get_fitness_func(tree_program)
155
+
156
+ if self.diff_reward:
157
+ reward = (new_fitness - initial_fitness)
158
+ else:
159
+ reward = new_fitness
160
+
161
+ if self.higher_is_better:
162
+ reward *= -1
163
+
164
+ trajectory_probability = torch.log(torch.cat(trajectory_action_probabilities)).sum().unsqueeze(
165
+ 0).unsqueeze(0)
166
+ self.rewards.append(torch.full_like(trajectory_probability, reward))
167
+ self.trajectory_probabilities.append(trajectory_probability)
168
+ self.run_epoch()
169
+
170
+ def masked_trajectory_generation(self, allowed_operators, logits, mask_indices, arity_of_masked_locations,
171
+ allowed_operators_arity, tokens_ids):
172
+ """
173
+ :param tokens_ids:
174
+ :param allowed_operators: list of allowed operators
175
+ :param logits: model logits
176
+ :param mask_indices: indices of the masked tokens
177
+ :param arity_of_masked_locations: arity of the masked tokens
178
+ :param allowed_operators_arity: arity of the allowed operators
179
+ :return: suggested mutation and trajectory action probabilities
180
+ """
181
+ allowed_operators_arity, arity_of_masked_locations = convert_arity_to_tensors(allowed_operators,
182
+ allowed_operators_arity,
183
+ arity_of_masked_locations,
184
+ mask_indices)
185
+
186
+ masked_softmax_indexes = torch.Tensor(self.token_encoder.transform(allowed_operators)).type(torch.LongTensor)
187
+ suggested_mutation = []
188
+ trajectory_action_probabilities = []
189
+
190
+ # masked trajectory generation
191
+ for trajectory_index in range(len(mask_indices)):
192
+ current_mask_arity = arity_of_masked_locations[trajectory_index]
193
+ current_allowed_operators = allowed_operators[current_mask_arity == allowed_operators_arity]
194
+ current_masked_softmax_indexes = masked_softmax_indexes[
195
+ current_mask_arity == allowed_operators_arity].to(self.device)
196
+
197
+ # get the probability of the allowed operators and normalize them
198
+ mask_index = torch.tensor([mask_indices[trajectory_index]]).type(torch.LongTensor)
199
+ operators_proba = torch.softmax(logits[0, mask_index], dim=-1)[:,
200
+ current_masked_softmax_indexes].to(self.device)
201
+ operators_proba = operators_proba / operators_proba.sum(dim=-1).unsqueeze(-1)
202
+
203
+ # sample an operator with epsilon greedy
204
+ if torch.rand(1) < self.epsilon_greedy:
205
+ sampled_operators_dist = torch.randint(0, len(current_allowed_operators), (1,)).to(self.device)
206
+ else:
207
+ sampled_operators_dist = torch.distributions.Categorical(operators_proba).sample().to(self.device)
208
+
209
+ sampled_actions_probability = torch.gather(operators_proba, dim=1,
210
+ index=sampled_operators_dist.unsqueeze(-1))
211
+ trajectory_action_probabilities.append(sampled_actions_probability)
212
+ suggested_mutation += [current_allowed_operators[sampled_operators_dist.detach().cpu().numpy()][0]]
213
+
214
+ if self.full_trajectory_query:
215
+ tokens_ids = torch.clone(tokens_ids)
216
+ tokens_ids[0, mask_index] = current_masked_softmax_indexes[sampled_operators_dist]
217
+ logits = self.model(tokens_ids, attention_mask=torch.ones_like(tokens_ids).to(self.device)).logits
218
+
219
+ return suggested_mutation, trajectory_action_probabilities
220
+
221
+ def run_epoch(self, numerical_stability=1e-10):
222
+ current_batch_size = sum([len(reward) for reward in self.rewards])
223
+ if current_batch_size < self.batch_size:
224
+ return
225
+
226
+ all_traj_proba = torch.cat(self.trajectory_probabilities, dim=0).to(self.device)
227
+ all_rewards = torch.cat(self.rewards, dim=0).to(self.device)
228
+
229
+ self.trajectory_probabilities.clear()
230
+ self.rewards.clear()
231
+
232
+ self.optimizer.zero_grad()
233
+ advantages = (all_rewards - torch.mean(all_rewards)) / (torch.std(all_rewards) + numerical_stability)
234
+ # advantages = all_rewards
235
+ advantages = advantages.to(self.device)
236
+ loss = torch.mean(all_traj_proba * advantages).to(self.device)
237
+ loss.backward()
238
+
239
+ if self.clip_grad_norm is not None:
240
+ torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.clip_grad_norm)
241
+
242
+ self.optimizer.step()
243
+ print(f'loss: {loss}, reward: {torch.mean(all_rewards)}')
@@ -0,0 +1,6 @@
1
+ """BERT mutation operator for EC-KitY genetic programming."""
2
+
3
+ from bert_mutation import BertMutation
4
+ from uniform_mutation import BERTUniformMutation
5
+
6
+ __all__ = ["BertMutation", "BERTUniformMutation"]
@@ -0,0 +1,137 @@
1
+ Metadata-Version: 2.4
2
+ Name: eckity-bert-gp
3
+ Version: 0.1.0
4
+ Summary: BERT mutation operator for EC-KitY genetic programming
5
+ Author: EC-KitY
6
+ License-Expression: BSD-3-Clause
7
+ Project-URL: Homepage, https://github.com/EC-KitY/BERT-Mutation-for-GP
8
+ Project-URL: Repository, https://github.com/EC-KitY/BERT-Mutation-for-GP
9
+ Project-URL: Paper, https://doi.org/10.3390/math13050779
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Intended Audience :: Science/Research
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.9
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
18
+ Requires-Python: >=3.9
19
+ Description-Content-Type: text/markdown
20
+ License-File: LICENSE
21
+ Requires-Dist: eckity~=0.4.1
22
+ Requires-Dist: numpy>=2.0.2
23
+ Requires-Dist: overrides>=7.7.0
24
+ Requires-Dist: scikit-learn>=1.5.0
25
+ Requires-Dist: scipy>=1.13.0
26
+ Requires-Dist: torch>=2.7.1
27
+ Requires-Dist: transformers>=4.50.0
28
+ Provides-Extra: dev
29
+ Requires-Dist: build>=1.2; extra == "dev"
30
+ Requires-Dist: pandas>=2.2; extra == "dev"
31
+ Requires-Dist: pytest>=8.0; extra == "dev"
32
+ Requires-Dist: twine>=5.0; extra == "dev"
33
+ Dynamic: license-file
34
+
35
+ # BERT Mutation for EC-KitY Genetic Programming
36
+
37
+ `eckity-bert-gp` provides the BERT mutation operator for tree-based genetic programming in [EC-KitY](https://github.com/EC-KitY/EC-KitY).
38
+
39
+ The operator is described in **“BERT Mutation: Deep Transformer Model for Masked Uniform Mutation in Genetic Programming”**, Mathematics 2025, 13(5), 779 ([paper](https://doi.org/10.3390/math13050779)). It masks selected GP-tree nodes and uses a compact BERT masked-language model to sample replacements that preserve the required node arity.
40
+
41
+ ## Installation
42
+
43
+ ```bash
44
+ pip install eckity-bert-gp
45
+ ```
46
+
47
+ ## Public API
48
+
49
+ ```python
50
+ from eckity_bert_gp import BertMutation, BERTUniformMutation
51
+ ```
52
+
53
+ `BertMutation` owns and trains the BERT policy. `BERTUniformMutation` adapts that policy to EC-KitY's genetic-operator interface.
54
+
55
+ ## Usage
56
+
57
+ The BERT model needs the function names, terminal names, fitness callback, and mappings back to the EC-KitY functions:
58
+
59
+ ```python
60
+ import numpy as np
61
+ from eckity.base.untyped_functions import f_add, f_div, f_mul, f_sub
62
+ from eckity_bert_gp import BertMutation, BERTUniformMutation
63
+
64
+ function_set = [f_add, f_sub, f_mul, f_div]
65
+ terminal_set = ["x", "y", "z"]
66
+ function_mappings = {function.__name__: function for function in function_set}
67
+
68
+ bert_model = BertMutation(
69
+ operators_list=np.array(list(function_mappings)),
70
+ constant_names=terminal_set,
71
+ get_fitness_func=evaluator.evaluate_individual,
72
+ context_size=256,
73
+ word_embedding_dim=20,
74
+ n_layers=1,
75
+ n_attention_heads=1,
76
+ function_mappings=function_mappings,
77
+ higher_is_better=False,
78
+ )
79
+
80
+ bert_mutation = BERTUniformMutation(
81
+ bert_model=bert_model,
82
+ probability=1.0,
83
+ node_probability=0.1,
84
+ )
85
+ ```
86
+
87
+ Add `bert_mutation` to the EC-KitY subpopulation's `operators_sequence`.
88
+
89
+ - `get_fitness_func` accepts an EC-KitY GP tree and returns its fitness.
90
+ - `function_mappings` maps each function name used by BERT back to the callable stored in GP trees.
91
+ - Terminal mappings default to the names supplied in `constant_names`.
92
+ - `probability` controls whether the EC-KitY mutation operator runs.
93
+ - `node_probability` controls the probability of masking each tree node.
94
+ - `context_size` must be large enough for the longest tree representation expected during evolution.
95
+
96
+ The model is initialized locally from `BertConfig`; installing or constructing the operator does not download pretrained model weights.
97
+
98
+ ## Compatibility
99
+
100
+ - Python 3.9 or newer
101
+ - EC-KitY 0.4.x
102
+ - NumPy 2.0.2 or newer
103
+ - SciPy 1.13.0 or newer
104
+ - PyTorch 2.7.1 or newer
105
+ - Transformers 4.50.0 or newer
106
+ - scikit-learn 1.5.0 or newer
107
+
108
+ These bounds are compatible with `eckity-dnc` and `eckity-bert-ga`; none of the three packages directly depends on another operator package.
109
+
110
+ ## Repository experiment
111
+
112
+ The repository includes the paper's experiment runner and datasets. They are development resources and are not included in the wheel.
113
+
114
+ Install the development dependencies and run the symbolic-regression example:
115
+
116
+ ```bash
117
+ python -m pip install -e ".[dev]"
118
+ python runner.py
119
+ ```
120
+
121
+ Additional benchmark data is stored under `data/`, and Artificial Ant maps are stored under `ant_opt/`.
122
+
123
+ ## Development
124
+
125
+ With [uv](https://docs.astral.sh/uv/):
126
+
127
+ ```bash
128
+ uv sync --extra dev --resolution lowest-direct
129
+ uv run pytest
130
+ uv build
131
+ ```
132
+
133
+ Release preparation and manual PyPI upload commands are documented in [`RELEASING.md`](RELEASING.md).
134
+
135
+ ## License
136
+
137
+ This project is licensed under the BSD 3-Clause License. See [`LICENSE`](LICENSE).
@@ -0,0 +1,9 @@
1
+ aux_func.py,sha256=Mu5eSalMuQyaQAbABo9J1Cd-naFn8FVEai5MdNG5Gyo,2821
2
+ bert_mutation.py,sha256=nN_3tr6Ia1EnUuDF_A_xtukznRQ-Kh-cbpvlOSnMa6w,12523
3
+ uniform_mutation.py,sha256=Z8KIEyd6pbBuhKtrf0ehm6hxHRISFIb4denizW4fZzA,6570
4
+ eckity_bert_gp/__init__.py,sha256=szBUoo1sDCfoUmOSS_UX8DMZoG3xjebT6ZH4euKPSKM,202
5
+ eckity_bert_gp-0.1.0.dist-info/licenses/LICENSE,sha256=ljMDMJVYrp0IntMunve15Q7PHVxid-NXY00rJis8WTs,1522
6
+ eckity_bert_gp-0.1.0.dist-info/METADATA,sha256=vM0156WoUVAVrOwznXwKStmI3rOTX85jXL2UukYYd2c,4923
7
+ eckity_bert_gp-0.1.0.dist-info/WHEEL,sha256=K260EYznzXsJYBQGqmI8VTxEdiZYNvDZwW9cBh9-_MA,91
8
+ eckity_bert_gp-0.1.0.dist-info/top_level.txt,sha256=s6B-haXAtyxlPcyw9Yes5ISHzzZ_wpQM7r3ZPERQwhY,55
9
+ eckity_bert_gp-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (83.0.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,28 @@
1
+ BSD 3-Clause License
2
+
3
+ Copyright (c) 2024, EC-KitY
4
+
5
+ Redistribution and use in source and binary forms, with or without
6
+ modification, are permitted provided that the following conditions are met:
7
+
8
+ 1. Redistributions of source code must retain the above copyright notice, this
9
+ list of conditions and the following disclaimer.
10
+
11
+ 2. Redistributions in binary form must reproduce the above copyright notice,
12
+ this list of conditions and the following disclaimer in the documentation
13
+ and/or other materials provided with the distribution.
14
+
15
+ 3. Neither the name of the copyright holder nor the names of its
16
+ contributors may be used to endorse or promote products derived from
17
+ this software without specific prior written permission.
18
+
19
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@@ -0,0 +1,4 @@
1
+ aux_func
2
+ bert_mutation
3
+ eckity_bert_gp
4
+ uniform_mutation
uniform_mutation.py ADDED
@@ -0,0 +1,162 @@
1
+ from typing import Any, List, Tuple
2
+
3
+ from eckity.base.utils import arity
4
+ from overrides import override
5
+ import numpy as np
6
+ from eckity.genetic_encodings.gp import Tree, TreeNode, FunctionNode, TerminalNode
7
+ from eckity.genetic_operators import FailableOperator
8
+ import random
9
+
10
+ from bert_mutation import BertMutation
11
+ from aux_func import program_to_labels
12
+
13
+
14
+ class UniformNodeMutation(FailableOperator):
15
+ def __init__(
16
+ self,
17
+ probability: float = 1.0,
18
+ node_probability: float = 0.1,
19
+ events=None,
20
+ attempts=1,
21
+ ):
22
+ super().__init__(
23
+ probability=probability, arity=1, events=events, attempts=attempts
24
+ )
25
+ self.node_probability = node_probability
26
+
27
+ @override
28
+ def attempt_operator(
29
+ self, payload: Any, attempt_num: int
30
+ ) -> Tuple[bool, Any]:
31
+ """
32
+ Perform subtree mutation: select a subtree at random
33
+ to be replaced by a new, randomly generated subtree.
34
+
35
+ Returns
36
+ -------
37
+ Tuple[bool, Any]
38
+ A tuple containing a boolean indicating whether the operator was
39
+ successful and a list of the individuals.
40
+ """
41
+ individuals: List[Tree] = payload
42
+ uniform_masks = self._sample_masks(individuals)
43
+ for ind, mask in zip(individuals, uniform_masks):
44
+ for i, node in enumerate(ind.tree):
45
+ if mask[i]:
46
+ replacement = self._get_node_replacement(ind, node)
47
+ ind.tree[i] = replacement
48
+
49
+ self.applied_individuals = individuals
50
+ return True, individuals
51
+
52
+ def _sample_masks(self, individuals: List[Tree]):
53
+ masks = []
54
+ for ind in individuals:
55
+ mask = np.random.choice([True, False], size=len(ind.tree),
56
+ p=[self.node_probability, 1 - self.node_probability])
57
+ masks.append(mask)
58
+ return masks
59
+
60
+ def _get_node_replacement(self, ind: Tree, node: TreeNode):
61
+ if type(node) is FunctionNode:
62
+ cur_arity = node.n_args
63
+ relevant_functions = [func for func in ind.function_set if arity(func) == cur_arity]
64
+ func = random.choice(relevant_functions)
65
+ return FunctionNode(func)
66
+
67
+
68
+ elif type(node) is TerminalNode:
69
+ return ind.random_terminal(node_type=node.node_type)
70
+ else:
71
+ raise ValueError(f"Node type {type(node)} not supported")
72
+
73
+
74
+ class BERTUniformMutation(FailableOperator):
75
+ def __init__(
76
+ self,
77
+ bert_model: BertMutation,
78
+ probability: float = 1.0,
79
+ node_probability: float = 0.1,
80
+ max_trajectory_length=100,
81
+ events=None,
82
+ attempts=1,
83
+ ):
84
+ super().__init__(
85
+ probability=probability, arity=1, events=events, attempts=attempts
86
+ )
87
+ self.node_probability = node_probability
88
+ self.bert_model = bert_model
89
+ self.max_trajectory_length = max_trajectory_length
90
+
91
+ @override
92
+ def attempt_operator(
93
+ self, payload: Any, attempt_num: int
94
+ ) -> Tuple[bool, Any]:
95
+ """
96
+ Perform subtree mutation: select a subtree at random
97
+ to be replaced by a new, randomly generated subtree.
98
+
99
+ Returns
100
+ -------
101
+ Tuple[bool, Any]
102
+ A tuple containing a boolean indicating whether the operator was
103
+ successful and a list of the individuals.
104
+ """
105
+ individuals: List[Tree] = payload
106
+ uniform_masks = self._sample_masks(individuals)
107
+ assert len(individuals) == 1
108
+ individual = individuals[0]
109
+ mutation_mask = uniform_masks[0]
110
+
111
+ allowed_functions = np.array(list(self.bert_model.function_mappings.keys()))
112
+ allowed_functions_arity = np.array([arity(func) for func in list(self.bert_model.function_mappings.values())])
113
+ functions_mutation_mask = np.array([type(node) is FunctionNode for node in individual.tree])
114
+ masked_functions = np.where(functions_mutation_mask & mutation_mask)[0]
115
+ masked_variables = np.where(~functions_mutation_mask & mutation_mask)[0]
116
+
117
+ if len(masked_functions) > 0:
118
+ program_labels = program_to_labels(individual, mutation_mask & functions_mutation_mask)
119
+ self.bert_model.mutate(program_labels, allowed_functions, individual,
120
+ masked_functions, self._get_arity_of_masked_nodes(individual, mutation_mask),
121
+ allowed_functions_arity)
122
+
123
+ if len(masked_variables) > 0:
124
+ program_labels = program_to_labels(individual, mutation_mask & ~functions_mutation_mask)
125
+ self.bert_model.mutate(program_labels, self.bert_model.terminals, individual,
126
+ masked_variables, None, None, terminal_traj=True)
127
+
128
+ self.applied_individuals = individuals
129
+ return True, individuals
130
+
131
+ def _sample_masks(self, individuals: List[Tree]):
132
+ masks = []
133
+ for ind in individuals:
134
+ if len(ind.tree) * self.node_probability < self.max_trajectory_length:
135
+ mask = np.random.choice([True, False], size=len(ind.tree),
136
+ p=[self.node_probability, 1 - self.node_probability])
137
+ else:
138
+ mask = np.random.choice([True, False], size=len(ind.tree),
139
+ p=[self.max_trajectory_length / len(ind.tree),
140
+ 1 - self.max_trajectory_length / len(ind.tree)])
141
+ masks.append(mask)
142
+ return masks
143
+
144
+ def _get_arity_of_masked_nodes(self, ind: Tree, mask: np.ndarray):
145
+ arities = []
146
+ for i, node in enumerate(ind.tree):
147
+ if mask[i] and type(node) is FunctionNode:
148
+ arities.append(node.n_args)
149
+ return np.array(arities)
150
+
151
+ def _get_node_replacement(self, ind: Tree, node: TreeNode):
152
+ if type(node) is FunctionNode:
153
+ cur_arity = node.n_args
154
+ relevant_functions = [func for func in ind.function_set if arity(func) == cur_arity]
155
+ func = random.choice(relevant_functions)
156
+ return FunctionNode(func)
157
+
158
+
159
+ elif type(node) is TerminalNode:
160
+ return ind.random_terminal(node_type=node.node_type)
161
+ else:
162
+ raise ValueError(f"Node type {type(node)} not supported")