pysips 0.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pysips/__init__.py +53 -0
- pysips/crossover_proposal.py +138 -0
- pysips/laplace_nmll.py +104 -0
- pysips/metropolis.py +126 -0
- pysips/mutation_proposal.py +220 -0
- pysips/prior.py +106 -0
- pysips/random_choice_proposal.py +177 -0
- pysips/regressor.py +451 -0
- pysips/sampler.py +159 -0
- pysips-0.0.0.dist-info/METADATA +156 -0
- pysips-0.0.0.dist-info/RECORD +26 -0
- pysips-0.0.0.dist-info/WHEEL +5 -0
- pysips-0.0.0.dist-info/licenses/LICENSE +94 -0
- pysips-0.0.0.dist-info/top_level.txt +2 -0
- tests/integration/test_log_likelihood.py +18 -0
- tests/integration/test_prior_with_bingo.py +45 -0
- tests/regression/test_basic_end_to_end.py +131 -0
- tests/regression/test_regressor_end_to_end.py +95 -0
- tests/unit/test_crossover_proposal.py +156 -0
- tests/unit/test_laplace_nmll.py +111 -0
- tests/unit/test_metropolis.py +111 -0
- tests/unit/test_mutation_proposal.py +196 -0
- tests/unit/test_prior.py +135 -0
- tests/unit/test_random_choice_proposal.py +136 -0
- tests/unit/test_regressor.py +227 -0
- tests/unit/test_sampler.py +133 -0
pysips/__init__.py
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
"""
|
2
|
+
Notices:
|
3
|
+
Copyright 2025 United States Government as represented by the Administrator of the National
|
4
|
+
Aeronautics and Space Administration. No copyright is claimed in the United States under Title 17,
|
5
|
+
U.S. Code. All Other Rights Reserved.
|
6
|
+
|
7
|
+
The NASA Software “PySIPS” (LAR-20644-1) calls the following third-party software, which is
|
8
|
+
subject to the terms and conditions of its licensor, as applicable at the time of licensing. The
|
9
|
+
third-party software is not bundled or included with this software but may be available from the
|
10
|
+
licensor. License hyperlinks are provided here for information purposes only.
|
11
|
+
|
12
|
+
NumPy
|
13
|
+
https://numpy.org/devdocs/license.html
|
14
|
+
Copyright (c) 2005-2025, NumPy Developers.
|
15
|
+
All rights reserved.
|
16
|
+
|
17
|
+
h5py
|
18
|
+
https://github.com/h5py/h5py/blob/master/LICENSE
|
19
|
+
Copyright (c) 2008 Andrew Collette and contributors
|
20
|
+
All rights reserved.
|
21
|
+
|
22
|
+
tqdm
|
23
|
+
https://github.com/tqdm/tqdm/blob/master/LICENCE
|
24
|
+
Copyright (c) 2013 noamraph
|
25
|
+
|
26
|
+
SciPy
|
27
|
+
https://github.com/scipy/scipy/blob/main/LICENSE.txt
|
28
|
+
Copyright (c) 2001-2002 Enthought, Inc. 2003, SciPy Developers.
|
29
|
+
All rights reserved.
|
30
|
+
|
31
|
+
Disclaimers
|
32
|
+
No Warranty: THE SUBJECT SOFTWARE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY OF ANY KIND, EITHER
|
33
|
+
EXPRESSED, IMPLIED, OR STATUTORY, INCLUDING, BUT NOT LIMITED TO, ANY WARRANTY THAT THE SUBJECT
|
34
|
+
SOFTWARE WILL CONFORM TO SPECIFICATIONS, ANY IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
|
35
|
+
PARTICULAR PURPOSE, OR FREEDOM FROM INFRINGEMENT, ANY WARRANTY THAT THE SUBJECT SOFTWARE WILL BE
|
36
|
+
ERROR FREE, OR ANY WARRANTY THAT DOCUMENTATION, IF PROVIDED, WILL CONFORM TO THE SUBJECT SOFTWARE.
|
37
|
+
THIS AGREEMENT DOES NOT, IN ANY MANNER, CONSTITUTE AN ENDORSEMENT BY GOVERNMENT AGENCY OR ANY
|
38
|
+
PRIOR RECIPIENT OF ANY RESULTS, RESULTING DESIGNS, HARDWARE, SOFTWARE PRODUCTS OR ANY OTHER
|
39
|
+
APPLICATIONS RESULTING FROM USE OF THE SUBJECT SOFTWARE. FURTHER, GOVERNMENT AGENCY DISCLAIMS
|
40
|
+
ALL WARRANTIES AND LIABILITIES REGARDING THIRD-PARTY SOFTWARE, IF PRESENT IN THE ORIGINAL
|
41
|
+
SOFTWARE, AND DISTRIBUTES IT "AS IS."
|
42
|
+
|
43
|
+
Waiver and Indemnity: RECIPIENT AGREES TO WAIVE ANY AND ALL CLAIMS AGAINST THE UNITED STATES
|
44
|
+
GOVERNMENT, ITS CONTRACTORS AND SUBCONTRACTORS, AS WELL AS ANY PRIOR RECIPIENT. IF RECIPIENT'S
|
45
|
+
USE OF THE SUBJECT SOFTWARE RESULTS IN ANY LIABILITIES, DEMANDS, DAMAGES, EXPENSES OR LOSSES
|
46
|
+
ARISING FROM SUCH USE, INCLUDING ANY DAMAGES FROM PRODUCTS BASED ON, OR RESULTING FROM,
|
47
|
+
RECIPIENT'S USE OF THE SUBJECT SOFTWARE, RECIPIENT SHALL INDEMNIFY AND HOLD HARMLESS THE UNITED
|
48
|
+
STATES GOVERNMENT, ITS CONTRACTORS, AND SUBCONTRACTORS, AS WELL AS ANY PRIOR RECIPIENT, TO THE
|
49
|
+
EXTENT PERMITTED BY LAW. RECIPIENT'S SOLE REMEDY FOR ANY SUCH MATTER SHALL BE THE IMMEDIATE,
|
50
|
+
UNILATERAL TERMINATION OF THIS AGREEMENT.
|
51
|
+
"""
|
52
|
+
|
53
|
+
from .regressor import PysipsRegressor
|
@@ -0,0 +1,138 @@
|
|
1
|
+
"""
|
2
|
+
Crossover-Based Proposal Generator for Symbolic Regression Models.
|
3
|
+
|
4
|
+
This module provides a crossover-based proposal mechanism for symbolic regression
|
5
|
+
that creates new candidate models by combining genetic material from existing
|
6
|
+
models. It implements genetic programming crossover operations using bingo's
|
7
|
+
AGraphCrossover functionality within an MCMC or evolutionary algorithm framework.
|
8
|
+
|
9
|
+
The crossover operation mimics biological reproduction by exchanging subtrees
|
10
|
+
between two parent expressions to create offspring that inherit characteristics
|
11
|
+
from both parents. This approach can effectively explore the space of symbolic
|
12
|
+
expressions by combining successful components from different models.
|
13
|
+
|
14
|
+
Key Features
|
15
|
+
------------
|
16
|
+
- Random partner selection from a configurable gene pool
|
17
|
+
- Stochastic child selection (50/50 probability between two crossover offspring)
|
18
|
+
- Avoids self-crossover by ensuring different parent selection
|
19
|
+
- Updateable gene pool for adaptive sampling strategies
|
20
|
+
- Seeded random number generation for reproducible results
|
21
|
+
|
22
|
+
Crossover Mechanism
|
23
|
+
-------------------
|
24
|
+
The crossover operation works by:
|
25
|
+
1. Selecting a random crossover point in each parent expression tree
|
26
|
+
2. Swapping the subtrees at those points between the two parents
|
27
|
+
3. Producing two offspring that combine features from both parents
|
28
|
+
4. Randomly selecting one of the two offspring as the proposal
|
29
|
+
|
30
|
+
This process allows successful expression fragments to be preserved and
|
31
|
+
recombined in novel ways, potentially discovering better solutions through
|
32
|
+
the exploration of hybrid models.
|
33
|
+
|
34
|
+
Usage Example
|
35
|
+
-------------
|
36
|
+
>>> # Assume you have a collection of symbolic models
|
37
|
+
>>> gene_pool = [model1, model2, model3, model4] # List of AGraph models
|
38
|
+
>>>
|
39
|
+
>>> # Create crossover proposal generator
|
40
|
+
>>> crossover = CrossoverProposal(gene_pool, seed=42)
|
41
|
+
>>>
|
42
|
+
>>> # Use in MCMC or evolutionary sampling
|
43
|
+
>>> current_model = model1
|
44
|
+
>>> new_proposal = crossover(current_model)
|
45
|
+
>>>
|
46
|
+
>>> # Update gene pool as better models are discovered
|
47
|
+
>>> updated_pool = [best_model1, best_model2, new_good_model]
|
48
|
+
>>> crossover.update(updated_pool)
|
49
|
+
|
50
|
+
Integration Notes
|
51
|
+
-----------------
|
52
|
+
The update() method allows for dynamic gene pool management, enabling
|
53
|
+
adaptive strategies where successful models from the sampling process
|
54
|
+
can be added to influence future proposals.
|
55
|
+
"""
|
56
|
+
|
57
|
+
import numpy as np
|
58
|
+
from bingo.symbolic_regression import (
|
59
|
+
AGraphCrossover,
|
60
|
+
)
|
61
|
+
|
62
|
+
|
63
|
+
class CrossoverProposal:
|
64
|
+
"""A proposal operator that performs crossover between AGraph models.
|
65
|
+
|
66
|
+
This class implements a callable object that creates new models by performing
|
67
|
+
crossover operations between an input model and randomly selected partners
|
68
|
+
from a gene pool. It utilizes bingo's AGraphCrossover mechanism and randomly
|
69
|
+
selects one of the two children produced by each crossover operation.
|
70
|
+
|
71
|
+
Parameters
|
72
|
+
----------
|
73
|
+
gene_pool : list of AGraph
|
74
|
+
A collection of AGraph models that will be used as potential partners
|
75
|
+
during crossover operations
|
76
|
+
seed : int, optional
|
77
|
+
Random seed for the internal random number generator, used to control
|
78
|
+
repeatability of operations
|
79
|
+
"""
|
80
|
+
|
81
|
+
def __init__(self, gene_pool, seed=None):
|
82
|
+
self._crossover = AGraphCrossover()
|
83
|
+
self._gene_pool = gene_pool
|
84
|
+
self._rng = np.random.default_rng(seed)
|
85
|
+
|
86
|
+
def _select_other_parent(self, model):
|
87
|
+
ind = self._rng.integers(0, len(self._gene_pool))
|
88
|
+
while self._gene_pool[ind] == model:
|
89
|
+
ind = self._rng.integers(0, len(self._gene_pool))
|
90
|
+
return self._gene_pool[ind]
|
91
|
+
|
92
|
+
def _do_crossover(self, model, other_parent):
|
93
|
+
child_1, child_2 = self._crossover(model, other_parent)
|
94
|
+
if self._rng.random() < 0.5:
|
95
|
+
return child_1
|
96
|
+
return child_2
|
97
|
+
|
98
|
+
def __call__(self, model):
|
99
|
+
"""Perform crossover between the input model and a randomly selected one from the gene pool.
|
100
|
+
|
101
|
+
This method randomly selects a parent from the gene pool, performs crossover between
|
102
|
+
the input model and the selected parent, and returns one of the two resulting children
|
103
|
+
with equal probability.
|
104
|
+
|
105
|
+
Parameters
|
106
|
+
----------
|
107
|
+
model : AGraph
|
108
|
+
The model to be used as the first parent in the crossover operation
|
109
|
+
|
110
|
+
Returns
|
111
|
+
-------
|
112
|
+
AGraph
|
113
|
+
A new model resulting from crossover between the input model and a
|
114
|
+
randomly selected model from the gene pool
|
115
|
+
"""
|
116
|
+
other_parent = self._select_other_parent(model)
|
117
|
+
new_model = self._do_crossover(model, other_parent)
|
118
|
+
return new_model
|
119
|
+
|
120
|
+
def update(self, gene_pool, *_, **__):
|
121
|
+
"""Update the gene pool used for selecting crossover partners.
|
122
|
+
|
123
|
+
Parameters
|
124
|
+
----------
|
125
|
+
gene_pool : iterable of AGraph
|
126
|
+
The new collection of AGraph models to use as the gene pool
|
127
|
+
*_ : tuple
|
128
|
+
Additional positional arguments (ignored)
|
129
|
+
**__ : dict
|
130
|
+
Additional keyword arguments (ignored)
|
131
|
+
|
132
|
+
Notes
|
133
|
+
-----
|
134
|
+
This method allows for updating the gene pool while maintaining the same
|
135
|
+
crossover behavior. The additional parameters are included for compatibility
|
136
|
+
with other proposal update interfaces but are not used.
|
137
|
+
"""
|
138
|
+
self._gene_pool = list(gene_pool)
|
pysips/laplace_nmll.py
ADDED
@@ -0,0 +1,104 @@
|
|
1
|
+
"""
|
2
|
+
Laplace Approximation for Normalized Marginal Log-Likelihood Estimation.
|
3
|
+
|
4
|
+
This module provides functionality for computing the Normalized Marginal Log-Likelihood
|
5
|
+
(NMLL) using the Laplace approximation method. It integrates with the bingo symbolic
|
6
|
+
regression library to evaluate the likelihood of symbolic mathematical models given
|
7
|
+
observed data.
|
8
|
+
|
9
|
+
The Laplace approximation is a method for approximating integrals that appear in
|
10
|
+
Bayesian model selection, particularly useful for comparing different symbolic
|
11
|
+
regression models. It approximates the marginal likelihood by making a Gaussian
|
12
|
+
approximation around the maximum a posteriori (MAP) estimate of the parameters.
|
13
|
+
|
14
|
+
Key Features
|
15
|
+
------------
|
16
|
+
- Integration with bingo's symbolic regression framework
|
17
|
+
- Multiple optimization restarts to avoid local minima
|
18
|
+
- Configurable scipy-based optimization backend
|
19
|
+
- Automatic parameter bound initialization for robust optimization
|
20
|
+
|
21
|
+
Usage Example
|
22
|
+
-------------
|
23
|
+
>>> import numpy as np
|
24
|
+
>>> from bingo.symbolic_regression import AGraph
|
25
|
+
>>>
|
26
|
+
>>> # Generate sample data
|
27
|
+
>>> X = np.random.randn(100, 2)
|
28
|
+
>>> y = X[:, 0]**2 + X[:, 1] + np.random.normal(0, 0.1, 100)
|
29
|
+
>>>
|
30
|
+
>>> # Create NMLL evaluator
|
31
|
+
>>> nmll_evaluator = LaplaceNmll(X, y, opt_restarts=3)
|
32
|
+
>>>
|
33
|
+
>>> # Evaluate a symbolic model (assuming you have an AGraph model)
|
34
|
+
>>> # nmll_score = nmll_evaluator(model)
|
35
|
+
|
36
|
+
Notes
|
37
|
+
-----
|
38
|
+
The multiple restart strategy helps ensure robust optimization by avoiding
|
39
|
+
local minima in the parameter space, which is especially important for
|
40
|
+
complex symbolic expressions.
|
41
|
+
"""
|
42
|
+
|
43
|
+
from bingo.symbolic_regression.explicit_regression import (
|
44
|
+
ExplicitTrainingData,
|
45
|
+
ExplicitRegression,
|
46
|
+
)
|
47
|
+
from bingo.local_optimizers.scipy_optimizer import ScipyOptimizer
|
48
|
+
|
49
|
+
|
50
|
+
# pylint: disable=R0903
|
51
|
+
class LaplaceNmll:
|
52
|
+
"""Normalized Marginal Likelihood using Laplace approximation
|
53
|
+
|
54
|
+
Parameters
|
55
|
+
----------
|
56
|
+
X : 2d Numpy Array
|
57
|
+
Array of shape [num_datapoints, num_features] representing the input features
|
58
|
+
y : 1d Numpy Array
|
59
|
+
Array of labels of shape [num_datapoints]
|
60
|
+
opt_restarts : int, optional
|
61
|
+
number of times to perform gradient based optimization, each with different
|
62
|
+
random initialization, by default 1
|
63
|
+
**optimizer_kwargs :
|
64
|
+
any keyword arguments to be passed to bingo's scipy optimizer
|
65
|
+
"""
|
66
|
+
|
67
|
+
def __init__(self, X, y, opt_restarts=1, **optimizer_kwargs):
|
68
|
+
self._neg_nmll = self._init_neg_nmll(X, y)
|
69
|
+
self._deterministic_optimizer = self._init_deterministic_optimizer(
|
70
|
+
self._neg_nmll, **optimizer_kwargs
|
71
|
+
)
|
72
|
+
self._opt_restarts = opt_restarts
|
73
|
+
|
74
|
+
def _init_neg_nmll(self, X, y):
|
75
|
+
training_data = ExplicitTrainingData(X, y)
|
76
|
+
return ExplicitRegression(
|
77
|
+
training_data=training_data, metric="negative nmll laplace"
|
78
|
+
)
|
79
|
+
|
80
|
+
def _init_deterministic_optimizer(self, objective, **optimizer_kwargs):
|
81
|
+
if "param_init_bounds" not in optimizer_kwargs:
|
82
|
+
optimizer_kwargs["param_init_bounds"] = [-5, 5]
|
83
|
+
return ScipyOptimizer(objective, method="lm", **optimizer_kwargs)
|
84
|
+
|
85
|
+
def __call__(self, model):
|
86
|
+
"""calaculates NMLL using the Laplace approximation
|
87
|
+
|
88
|
+
Parameters
|
89
|
+
----------
|
90
|
+
model : AGraph
|
91
|
+
a bingo equation using the AGraph representation
|
92
|
+
"""
|
93
|
+
self._deterministic_optimizer(model)
|
94
|
+
nmll = -self._neg_nmll(model)
|
95
|
+
consts = model.get_local_optimization_params()
|
96
|
+
for _ in range(self._opt_restarts - 1):
|
97
|
+
self._deterministic_optimizer(model)
|
98
|
+
trial_nmll = -self._neg_nmll(model)
|
99
|
+
if trial_nmll > nmll:
|
100
|
+
nmll = trial_nmll
|
101
|
+
consts = model.get_local_optimization_params()
|
102
|
+
model.set_local_optimization_params(consts)
|
103
|
+
|
104
|
+
return nmll
|
pysips/metropolis.py
ADDED
@@ -0,0 +1,126 @@
|
|
1
|
+
"""
|
2
|
+
Metropolis-Hastings MCMC Implementation for Symbolic Regression.
|
3
|
+
|
4
|
+
This module provides a specialized Metropolis-Hastings Markov Chain Monte Carlo
|
5
|
+
(MCMC) sampler designed for symbolic regression models. It extends the smcpy
|
6
|
+
VectorMCMC class to handle symbolic expressions (bingo AGraph objects) as parameters,
|
7
|
+
with custom proposal mechanisms and likelihood evaluation for equation discovery.
|
8
|
+
|
9
|
+
The implementation supports both single-process and multiprocess likelihood
|
10
|
+
evaluation, making it suitable for computationally intensive symbolic regression
|
11
|
+
tasks where model evaluation is the computational bottleneck.
|
12
|
+
|
13
|
+
Algorithm Overview
|
14
|
+
------------------
|
15
|
+
The Metropolis algorithm follows the standard accept/reject framework:
|
16
|
+
|
17
|
+
1. **Proposal Generation**: Uses a provided proposal function to generate
|
18
|
+
new symbolic expressions from current ones
|
19
|
+
|
20
|
+
2. **Likelihood Evaluation**: Computes log-likelihood for proposed expressions
|
21
|
+
using the provided likelihood function
|
22
|
+
|
23
|
+
3. **Accept/Reject Decision**: Accepts or rejects proposals based on the
|
24
|
+
Metropolis criterion comparing likelihoods
|
25
|
+
|
26
|
+
4. **Chain Evolution**: Iteratively builds a Markov chain of symbolic
|
27
|
+
expressions that converges to the target distribution
|
28
|
+
|
29
|
+
The key adaptation for symbolic regression is handling discrete, structured
|
30
|
+
parameter spaces (symbolic expressions) rather than continuous parameters.
|
31
|
+
|
32
|
+
Example Integration
|
33
|
+
-------------------
|
34
|
+
>>> from bingo.symbolic_regression import AGraph
|
35
|
+
>>>
|
36
|
+
>>> def likelihood_func(model):
|
37
|
+
... # Evaluate model on data and return log-likelihood
|
38
|
+
... return model.evaluate_fitness_vector(X, y)
|
39
|
+
>>>
|
40
|
+
>>> def proposal_func(model):
|
41
|
+
... # Generate new model via mutation
|
42
|
+
... return mutate(model)
|
43
|
+
>>>
|
44
|
+
>>> mcmc = Metropolis(
|
45
|
+
... likelihood=likelihood_func,
|
46
|
+
... proposal=proposal_func,
|
47
|
+
... prior=uniform_prior,
|
48
|
+
... multiprocess=True
|
49
|
+
... )
|
50
|
+
|
51
|
+
Implementation Notes
|
52
|
+
--------------------
|
53
|
+
- Uniform priors are assumed (evaluate_log_priors returns ones)
|
54
|
+
- Proposal updates are called after each sampling round to maintain
|
55
|
+
an adaptive gene pool for crossover operations
|
56
|
+
- Fitness values are cached on AGraph objects to avoid redundant computation
|
57
|
+
- The implementation handles vectorized operations for efficiency
|
58
|
+
"""
|
59
|
+
|
60
|
+
from multiprocessing import Pool
|
61
|
+
import numpy as np
|
62
|
+
from smcpy import VectorMCMC
|
63
|
+
|
64
|
+
|
65
|
+
class Metropolis(VectorMCMC):
|
66
|
+
"""Class for running basic MCMC w/ the Metropolis algorithm
|
67
|
+
|
68
|
+
Parameters
|
69
|
+
----------
|
70
|
+
likelihood : callable
|
71
|
+
Computes marginal log likelihood given a bingo AGraph
|
72
|
+
proposal : callable
|
73
|
+
Proposes a new AGraph conditioned on an existing AGraph; must be
|
74
|
+
symmetric.
|
75
|
+
"""
|
76
|
+
|
77
|
+
def __init__(self, likelihood, proposal, prior, multiprocess=False):
|
78
|
+
super().__init__(
|
79
|
+
model=None,
|
80
|
+
data=None,
|
81
|
+
priors=[prior],
|
82
|
+
log_like_func=lambda *x: likelihood,
|
83
|
+
log_like_args=None,
|
84
|
+
)
|
85
|
+
self._equ_proposal = proposal
|
86
|
+
self.proposal = lambda x, z: np.array(
|
87
|
+
[[self._equ_proposal(xi)] for xi in x.flatten()]
|
88
|
+
)
|
89
|
+
self._is_multiprocess = multiprocess
|
90
|
+
|
91
|
+
def smc_metropolis(self, inputs, num_samples, cov=None):
|
92
|
+
"""
|
93
|
+
Parameters
|
94
|
+
----------
|
95
|
+
model : AGraph
|
96
|
+
model at which Markov chain initiates
|
97
|
+
num_samples : int
|
98
|
+
number of samples in the chain; includes burnin
|
99
|
+
"""
|
100
|
+
log_priors, log_like = self._initialize_probabilities(inputs)
|
101
|
+
|
102
|
+
for _ in range(num_samples):
|
103
|
+
|
104
|
+
inputs, log_like, _, _ = self._perform_mcmc_step(
|
105
|
+
inputs, None, log_like, log_priors
|
106
|
+
)
|
107
|
+
|
108
|
+
self._equ_proposal.update(gene_pool=inputs.flatten())
|
109
|
+
|
110
|
+
return inputs, log_like
|
111
|
+
|
112
|
+
def evaluate_model(self, _=None):
|
113
|
+
return None
|
114
|
+
|
115
|
+
def evaluate_log_priors(self, inputs):
|
116
|
+
return np.ones((inputs.shape[0], 1))
|
117
|
+
|
118
|
+
def evaluate_log_likelihood(self, inputs):
|
119
|
+
if self._is_multiprocess:
|
120
|
+
with Pool() as p:
|
121
|
+
log_like = p.map(self._log_like_func, inputs.flatten())
|
122
|
+
for l, xi in zip(log_like, inputs.flatten()):
|
123
|
+
xi.fitness = l
|
124
|
+
else:
|
125
|
+
log_like = [self._log_like_func(xi) for xi in inputs.flatten()]
|
126
|
+
return np.c_[log_like]
|
@@ -0,0 +1,220 @@
|
|
1
|
+
"""
|
2
|
+
Mutation-Based Proposal Generator for Symbolic Regression Models.
|
3
|
+
|
4
|
+
This module provides a proposal mechanism for symbolic regression that uses
|
5
|
+
bingo's AGraph mutation operations to generate new candidate models from existing
|
6
|
+
ones. It is designed to work within Markov Chain Monte Carlo (MCMC) sampling
|
7
|
+
frameworks where new model proposals are needed at each step.
|
8
|
+
|
9
|
+
The module implements a configurable mutation strategy that can perform various
|
10
|
+
types of structural changes to symbolic mathematical expressions, including
|
11
|
+
adding/removing nodes, changing operations, modifying parameters, and pruning
|
12
|
+
or expanding expression trees.
|
13
|
+
|
14
|
+
Key Features
|
15
|
+
------------
|
16
|
+
- Multiple mutation types: command, node, parameter, prune, and fork mutations
|
17
|
+
- Configurable probabilities for each mutation type
|
18
|
+
- Repeat mutation capability for more dramatic changes
|
19
|
+
- Ensures non-identical proposals (prevents proposing the same model)
|
20
|
+
- Seeded random number generation for reproducible results
|
21
|
+
- Integration with bingo's ComponentGenerator for operator management
|
22
|
+
|
23
|
+
Mutation Types
|
24
|
+
--------------
|
25
|
+
Command Mutation
|
26
|
+
Changes the operation at a node (e.g., '+' to '*')
|
27
|
+
Node Mutation
|
28
|
+
Replaces a node with a new randomly generated subtree
|
29
|
+
Parameter Mutation
|
30
|
+
Modifies the numeric constants in the expression
|
31
|
+
Prune Mutation
|
32
|
+
Removes a portion of the expression tree
|
33
|
+
Fork Mutation
|
34
|
+
Adds a new branch to the expression tree
|
35
|
+
Repeat Mutation
|
36
|
+
Recursively applies additional mutations with specified probability
|
37
|
+
|
38
|
+
Usage Example
|
39
|
+
-------------
|
40
|
+
>>> # Create a mutation proposal generator
|
41
|
+
>>> proposal = MutationProposal(
|
42
|
+
... X_dim=3, # 3 input features
|
43
|
+
... operators=["+", "subtract", "multiply", "divide"],
|
44
|
+
... terminal_probability=0.2,
|
45
|
+
... command_probability=0.3,
|
46
|
+
... node_probability=0.2,
|
47
|
+
... seed=42
|
48
|
+
... )
|
49
|
+
>>>
|
50
|
+
>>> # Use in MCMC sampling (assuming you have a model)
|
51
|
+
>>> # new_model = proposal(current_model)
|
52
|
+
|
53
|
+
Notes
|
54
|
+
-----
|
55
|
+
The proposal generator ensures that new proposals are always different from
|
56
|
+
the input model by repeatedly applying mutations until a change occurs. This
|
57
|
+
prevents MCMC chains from getting stuck with identical consecutive states.
|
58
|
+
|
59
|
+
The update() method is provided for compatibility with adaptive MCMC frameworks
|
60
|
+
but currently performs no operations, as the mutation probabilities are fixed
|
61
|
+
at initialization.
|
62
|
+
"""
|
63
|
+
|
64
|
+
import numpy as np
|
65
|
+
from bingo.symbolic_regression import (
|
66
|
+
ComponentGenerator,
|
67
|
+
AGraphMutation,
|
68
|
+
)
|
69
|
+
|
70
|
+
|
71
|
+
class MutationProposal:
|
72
|
+
"""Proposal functor that performs bingo's Agraph mutation
|
73
|
+
|
74
|
+
Parameters
|
75
|
+
----------
|
76
|
+
x_dim : int
|
77
|
+
dimension of input data (number of features in dataset)
|
78
|
+
operators : list of str
|
79
|
+
list of equation primatives to allow, e.g. ["+", "subtraction", "pow"]
|
80
|
+
terminal_probability : float, optional
|
81
|
+
[0.0-1.0] probability that a new node will be a terminal, by default 0.1
|
82
|
+
constant_probability : float, optional
|
83
|
+
[0.0-1.0] probability that a new terminal will be a constant, by default
|
84
|
+
weighted the same as a single feature of the input data
|
85
|
+
command_probability : float, optional
|
86
|
+
probability of command mutation, by default 0.2
|
87
|
+
node_probability : float, optional
|
88
|
+
probability of node mutation, by default 0.2
|
89
|
+
parameter_probability : float, optional
|
90
|
+
probability of parameter mutation, by default 0.2
|
91
|
+
prune_probability : float, optional
|
92
|
+
probability of pruning (removing a portion of the equation), by default 0.2
|
93
|
+
fork_probability : float, optional
|
94
|
+
probability of forking (adding an additional branch to the equation),
|
95
|
+
by default 0.2
|
96
|
+
repeat_mutation_probability : float, optional
|
97
|
+
probability of a repeated mutation (applied recursively). default 0.0
|
98
|
+
seed : int, optional
|
99
|
+
random seed used to control repeatability
|
100
|
+
"""
|
101
|
+
|
102
|
+
# pylint: disable=R0913,R0917
|
103
|
+
def __init__(
|
104
|
+
self,
|
105
|
+
x_dim,
|
106
|
+
operators,
|
107
|
+
terminal_probability=0.1,
|
108
|
+
constant_probability=None,
|
109
|
+
command_probability=0.2,
|
110
|
+
node_probability=0.2,
|
111
|
+
parameter_probability=0.2,
|
112
|
+
prune_probability=0.2,
|
113
|
+
fork_probability=0.2,
|
114
|
+
repeat_mutation_probability=0.0,
|
115
|
+
seed=None,
|
116
|
+
):
|
117
|
+
self._rng = np.random.default_rng(seed)
|
118
|
+
|
119
|
+
component_generator = ComponentGenerator(
|
120
|
+
input_x_dimension=x_dim,
|
121
|
+
terminal_probability=terminal_probability,
|
122
|
+
constant_probability=constant_probability,
|
123
|
+
)
|
124
|
+
for comp in operators:
|
125
|
+
component_generator.add_operator(comp)
|
126
|
+
|
127
|
+
self._mutation = AGraphMutation(
|
128
|
+
component_generator,
|
129
|
+
command_probability,
|
130
|
+
node_probability,
|
131
|
+
parameter_probability,
|
132
|
+
prune_probability,
|
133
|
+
fork_probability,
|
134
|
+
)
|
135
|
+
self._repeat_mutation_prob = repeat_mutation_probability
|
136
|
+
|
137
|
+
def _do_mutation(self, model):
|
138
|
+
new_model = self._mutation(model)
|
139
|
+
while self._rng.random() < self._repeat_mutation_prob:
|
140
|
+
new_model = self._mutation(new_model)
|
141
|
+
return new_model
|
142
|
+
|
143
|
+
def __call__(self, model):
|
144
|
+
"""
|
145
|
+
Apply mutation to generate a new symbolic expression model.
|
146
|
+
|
147
|
+
This method takes a symbolic regression model (AGraph) as input and returns
|
148
|
+
a new model created by applying one or more mutation operations. The method
|
149
|
+
guarantees that the returned model is different from the input model by
|
150
|
+
repeating mutations if necessary.
|
151
|
+
|
152
|
+
Parameters
|
153
|
+
----------
|
154
|
+
model : AGraph
|
155
|
+
The input symbolic regression model to be mutated. This should be a
|
156
|
+
bingo AGraph instance representing a mathematical expression.
|
157
|
+
|
158
|
+
Returns
|
159
|
+
-------
|
160
|
+
AGraph
|
161
|
+
A new symbolic regression model created by applying mutation(s) to
|
162
|
+
the input model. Guaranteed to be different from the input model.
|
163
|
+
|
164
|
+
Mutation Process
|
165
|
+
---------------
|
166
|
+
1. **Initial Mutation**: Applies the configured mutation operation to the model
|
167
|
+
2. **Repeat Mutations**: May apply additional mutations based on repeat_mutation_probability
|
168
|
+
3. **Difference Check**: Ensures the new model differs from the original one
|
169
|
+
4. **Repeated Attempts**: If the mutation produces an identical model, tries again
|
170
|
+
|
171
|
+
Notes
|
172
|
+
-----
|
173
|
+
- The mutation type applied is selected probabilistically based on the
|
174
|
+
probabilities specified during initialization (command_probability,
|
175
|
+
node_probability, etc.)
|
176
|
+
- The repeat mutation feature allows for more dramatic changes by applying
|
177
|
+
multiple mutations in sequence with probability repeat_mutation_probability
|
178
|
+
- This method will always return a different model, never the same as the input
|
179
|
+
|
180
|
+
See Also
|
181
|
+
--------
|
182
|
+
AGraphMutation : Bingo's mutation implementation used internally
|
183
|
+
"""
|
184
|
+
new_model = self._do_mutation(model)
|
185
|
+
while new_model == model:
|
186
|
+
new_model = self._do_mutation(model)
|
187
|
+
return new_model
|
188
|
+
|
189
|
+
def update(self, *args, **kwargs):
|
190
|
+
"""
|
191
|
+
Update method for compatibility with adaptive MCMC frameworks.
|
192
|
+
|
193
|
+
This method is provided to maintain API compatibility with other proposal
|
194
|
+
mechanisms that support adaptive behavior. In the current implementation,
|
195
|
+
the method is a no-op as the mutation proposal does not adapt its behavior
|
196
|
+
based on sampling history.
|
197
|
+
|
198
|
+
Parameters
|
199
|
+
----------
|
200
|
+
*args : tuple
|
201
|
+
Positional arguments (not used in the current implementation).
|
202
|
+
**kwargs : dict
|
203
|
+
Keyword arguments (not used in the current implementation).
|
204
|
+
|
205
|
+
Returns
|
206
|
+
-------
|
207
|
+
None
|
208
|
+
This method does not return any value.
|
209
|
+
|
210
|
+
Notes
|
211
|
+
-----
|
212
|
+
Future versions might implement adaptive behavior such as:
|
213
|
+
- Adjusting mutation probabilities based on acceptance rates
|
214
|
+
- Learning which mutation types are more effective for a given problem
|
215
|
+
|
216
|
+
In composite proposal mechanisms that combine multiple proposal types
|
217
|
+
(such as RandomChoiceProposal), this method will be called as part
|
218
|
+
of the update process, but currently has no effect on this proposal.
|
219
|
+
"""
|
220
|
+
# pass
|