pysips 0.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pysips/__init__.py +53 -0
- pysips/crossover_proposal.py +138 -0
- pysips/laplace_nmll.py +104 -0
- pysips/metropolis.py +126 -0
- pysips/mutation_proposal.py +220 -0
- pysips/prior.py +106 -0
- pysips/random_choice_proposal.py +177 -0
- pysips/regressor.py +451 -0
- pysips/sampler.py +159 -0
- pysips-0.0.0.dist-info/METADATA +156 -0
- pysips-0.0.0.dist-info/RECORD +26 -0
- pysips-0.0.0.dist-info/WHEEL +5 -0
- pysips-0.0.0.dist-info/licenses/LICENSE +94 -0
- pysips-0.0.0.dist-info/top_level.txt +2 -0
- tests/integration/test_log_likelihood.py +18 -0
- tests/integration/test_prior_with_bingo.py +45 -0
- tests/regression/test_basic_end_to_end.py +131 -0
- tests/regression/test_regressor_end_to_end.py +95 -0
- tests/unit/test_crossover_proposal.py +156 -0
- tests/unit/test_laplace_nmll.py +111 -0
- tests/unit/test_metropolis.py +111 -0
- tests/unit/test_mutation_proposal.py +196 -0
- tests/unit/test_prior.py +135 -0
- tests/unit/test_random_choice_proposal.py +136 -0
- tests/unit/test_regressor.py +227 -0
- tests/unit/test_sampler.py +133 -0
@@ -0,0 +1,156 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: pysips
|
3
|
+
Version: 0.0.0
|
4
|
+
Summary: A python package for symbolic inference via posterior sampling.
|
5
|
+
Author-email: Geoffrey Bomarito <geoffrey.f.bomarito@nasa.gov>, Patrick Leser <patrick.e.leser@nasa.gov>
|
6
|
+
License-Expression: NASA-1.3
|
7
|
+
Project-URL: Documentation, https://nasa.github.io/pysips/
|
8
|
+
Project-URL: Repository, https://github.com/nasa/pysips
|
9
|
+
Keywords: symbolic regression
|
10
|
+
Classifier: Development Status :: 3 - Alpha
|
11
|
+
Classifier: Programming Language :: Python :: 3.12
|
12
|
+
Classifier: Programming Language :: Python :: 3.13
|
13
|
+
Requires-Python: >=3.12
|
14
|
+
Description-Content-Type: text/markdown
|
15
|
+
License-File: LICENSE
|
16
|
+
Requires-Dist: numpy
|
17
|
+
Requires-Dist: bingo-nasa
|
18
|
+
Requires-Dist: smcpy>=0.1.6
|
19
|
+
Requires-Dist: scikit-learn
|
20
|
+
Dynamic: license-file
|
21
|
+
|
22
|
+
# PySIPS: Python package for Symbolic Inference via Posterior Sampling
|
23
|
+
|
24
|
+
PySIPS is an open-source implementation of Bayesian symbolic regression via posterior sampling as described in the paper "Bayesian Symbolic Regression via Posterior Sampling" by G. F. Bomarito and P. E. Leser from NASA Langley Research Center.
|
25
|
+
|
26
|
+
## Purpose
|
27
|
+
|
28
|
+
PySIPS provides a robust framework for discovering interpretable symbolic expressions from data, with a particular focus on handling noisy datasets. Unlike traditional symbolic regression approaches, PySIPS uses a Bayesian framework with Sequential Monte Carlo (SMC) sampling to:
|
29
|
+
|
30
|
+
1. Enhance robustness to noise
|
31
|
+
2. Provide built-in uncertainty quantification
|
32
|
+
3. Discover parsimonious expressions with improved generalization
|
33
|
+
4. Reduce overfitting in symbolic regression tasks
|
34
|
+
|
35
|
+
## Algorithm Overview
|
36
|
+
|
37
|
+
PySIPS implements a Sequential Monte Carlo (SMC) framework for Bayesian symbolic regression that:
|
38
|
+
|
39
|
+
- Approximates the posterior distribution over symbolic expressions
|
40
|
+
- Uses probabilistic selection and adaptive annealing to explore the search space efficiently
|
41
|
+
- Employs normalized marginal likelihood for model evaluation
|
42
|
+
- Combines mutation and crossover operations as proposal mechanisms
|
43
|
+
- Provides model selection criteria based on maximum normalized marginal likelihood or posterior mode
|
44
|
+
|
45
|
+
## Installation
|
46
|
+
|
47
|
+
(Coming Soon!)
|
48
|
+
|
49
|
+
```bash
|
50
|
+
pip install pysips
|
51
|
+
```
|
52
|
+
|
53
|
+
## Example Usage
|
54
|
+
|
55
|
+
```python
|
56
|
+
import numpy as np
|
57
|
+
from pysips import PysipsRegressor
|
58
|
+
from sklearn.model_selection import train_test_split
|
59
|
+
from sklearn.metrics import r2_score
|
60
|
+
|
61
|
+
# Generate synthetic data (y = x^2 + noise)
|
62
|
+
np.random.seed(42)
|
63
|
+
X = np.linspace(-3, 3, 100).reshape(-1, 1)
|
64
|
+
y = X[:, 0]**2 + np.random.normal(0, 0.1, size=X.shape[0])
|
65
|
+
|
66
|
+
# Split data
|
67
|
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
68
|
+
|
69
|
+
# Initialize and fit the regressor
|
70
|
+
regressor = PysipsRegressor(
|
71
|
+
operators=['+', '-', '*', '^2'],
|
72
|
+
max_complexity=12,
|
73
|
+
num_particles=100,
|
74
|
+
num_mcmc_samples=10,
|
75
|
+
random_state=42
|
76
|
+
)
|
77
|
+
|
78
|
+
regressor.fit(X_train, y_train)
|
79
|
+
|
80
|
+
# Make predictions
|
81
|
+
y_pred = regressor.predict(X_test)
|
82
|
+
|
83
|
+
# Get the discovered expression
|
84
|
+
expression = regressor.get_expression()
|
85
|
+
print(f"Discovered expression: {expression}")
|
86
|
+
print(f"R² score: {r2_score(y_test, y_pred):.4f}")
|
87
|
+
|
88
|
+
# Get model posterior and their likelihoods
|
89
|
+
models, likelihoods = regressor.get_models()
|
90
|
+
```
|
91
|
+
|
92
|
+
### Example Output
|
93
|
+
|
94
|
+
```
|
95
|
+
Discovered expression: x_0^2
|
96
|
+
R² score: 0.9987
|
97
|
+
Number of unique models sampled: 32
|
98
|
+
```
|
99
|
+
|
100
|
+
## Advanced Features
|
101
|
+
|
102
|
+
- Control over operators and expression complexity
|
103
|
+
- Multiple model selection strategies
|
104
|
+
- Access to the full posterior distribution over expressions
|
105
|
+
- Compatible with scikit-learn's API for easy integration into ML pipelines
|
106
|
+
- Uncertainty quantification for symbolic regression results
|
107
|
+
|
108
|
+
## Citation
|
109
|
+
|
110
|
+
If you use PySIPS, please cite the following paper:
|
111
|
+
|
112
|
+
```bibtex
|
113
|
+
@article{bomarito2024bayesian,
|
114
|
+
title={Bayesian Symbolic Regression via Posterior Sampling},
|
115
|
+
author={Bomarito, Geoffrey F. and Leser, Patrick E.},
|
116
|
+
journal={Philosophical Transactions of the Royal Society A},
|
117
|
+
year={2025},
|
118
|
+
publisher={Royal Society}
|
119
|
+
}
|
120
|
+
```
|
121
|
+
|
122
|
+
## License
|
123
|
+
|
124
|
+
Notices:
|
125
|
+
Copyright 2025 United States Government as represented by the Administrator of the National Aeronautics and Space Administration. No copyright is claimed in the United States under Title 17, U.S. Code. All Other Rights Reserved.
|
126
|
+
|
127
|
+
The NASA Software “PySIPS” (LAR-20644-1) calls the following third-party software, which is subject to the terms and conditions of its licensor, as applicable at the time of licensing. The third-party software is not bundled or included with this software but may be available from the licensor. License hyperlinks are provided here for information purposes only.
|
128
|
+
|
129
|
+
NumPy
|
130
|
+
https://numpy.org/devdocs/license.html
|
131
|
+
Copyright (c) 2005-2025, NumPy Developers.
|
132
|
+
All rights reserved.
|
133
|
+
|
134
|
+
h5py
|
135
|
+
https://github.com/h5py/h5py/blob/master/LICENSE
|
136
|
+
Copyright (c) 2008 Andrew Collette and contributors
|
137
|
+
All rights reserved.
|
138
|
+
|
139
|
+
tqdm
|
140
|
+
https://github.com/tqdm/tqdm/blob/master/LICENCE
|
141
|
+
Copyright (c) 2013 noamraph
|
142
|
+
|
143
|
+
SciPy
|
144
|
+
https://github.com/scipy/scipy/blob/main/LICENSE.txt
|
145
|
+
Copyright (c) 2001-2002 Enthought, Inc. 2003, SciPy Developers.
|
146
|
+
All rights reserved.
|
147
|
+
|
148
|
+
Disclaimers
|
149
|
+
No Warranty: THE SUBJECT SOFTWARE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY OF ANY KIND, EITHER EXPRESSED, IMPLIED, OR STATUTORY, INCLUDING, BUT NOT LIMITED TO, ANY WARRANTY THAT THE SUBJECT SOFTWARE WILL CONFORM TO SPECIFICATIONS, ANY IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR FREEDOM FROM INFRINGEMENT, ANY WARRANTY THAT THE SUBJECT SOFTWARE WILL BE ERROR FREE, OR ANY WARRANTY THAT DOCUMENTATION, IF PROVIDED, WILL CONFORM TO THE SUBJECT SOFTWARE. THIS AGREEMENT DOES NOT, IN ANY MANNER, CONSTITUTE AN ENDORSEMENT BY GOVERNMENT AGENCY OR ANY PRIOR RECIPIENT OF ANY RESULTS, RESULTING DESIGNS, HARDWARE, SOFTWARE PRODUCTS OR ANY OTHER APPLICATIONS RESULTING FROM USE OF THE SUBJECT SOFTWARE. FURTHER, GOVERNMENT AGENCY DISCLAIMS ALL WARRANTIES AND LIABILITIES REGARDING THIRD-PARTY SOFTWARE, IF PRESENT IN THE ORIGINAL SOFTWARE, AND DISTRIBUTES IT "AS IS."
|
150
|
+
|
151
|
+
Waiver and Indemnity: RECIPIENT AGREES TO WAIVE ANY AND ALL CLAIMS AGAINST THE UNITED STATES GOVERNMENT, ITS CONTRACTORS AND SUBCONTRACTORS, AS WELL AS ANY PRIOR RECIPIENT. IF RECIPIENT'S USE OF THE SUBJECT SOFTWARE RESULTS IN ANY LIABILITIES, DEMANDS, DAMAGES, EXPENSES OR LOSSES ARISING FROM SUCH USE, INCLUDING ANY DAMAGES FROM PRODUCTS BASED ON, OR RESULTING FROM, RECIPIENT'S USE OF THE SUBJECT SOFTWARE, RECIPIENT SHALL INDEMNIFY AND HOLD HARMLESS THE UNITED STATES GOVERNMENT, ITS CONTRACTORS, AND SUBCONTRACTORS, AS WELL AS ANY PRIOR RECIPIENT, TO THE EXTENT PERMITTED BY LAW. RECIPIENT'S SOLE REMEDY FOR ANY SUCH MATTER SHALL BE THE IMMEDIATE, UNILATERAL TERMINATION OF THIS AGREEMENT.
|
152
|
+
|
153
|
+
|
154
|
+
## Acknowledgements
|
155
|
+
|
156
|
+
This work was developed at NASA Langley Research Center.
|
@@ -0,0 +1,26 @@
|
|
1
|
+
pysips/__init__.py,sha256=0a2_4KO9Xfhj19oOHwbTdwVwbcL6ii04QGGRaURHRs8,2729
|
2
|
+
pysips/crossover_proposal.py,sha256=E9bORMwqrTzqII3awevu07LTqvfsG2Rj8hSI58e6oew,5318
|
3
|
+
pysips/laplace_nmll.py,sha256=s20RqhG6f85a1mR3VeyeYwspsitGcUvS5yzhBXk16Hc,3789
|
4
|
+
pysips/metropolis.py,sha256=_yHGzSobmdle1clKBKItSS2hP66bRGUZMcr4SwfwFLQ,4281
|
5
|
+
pysips/mutation_proposal.py,sha256=Mdnxdm-PzNg_PymO-YYlfv7yuP4Fvdvj6FUMEd4tQcI,8314
|
6
|
+
pysips/prior.py,sha256=S2fyfB2noTTMlZb_L985TYsM87DLQOh3B_DRXOulwdQ,3076
|
7
|
+
pysips/random_choice_proposal.py,sha256=to_2Ij_hJ_r-KgpYmpfS6oQQ8I7z8UcGJzvyw3hJjP8,6672
|
8
|
+
pysips/regressor.py,sha256=0i0UicaY389JcaPtG4uttgdmYL399fyrN561jmSF_Ac,15263
|
9
|
+
pysips/sampler.py,sha256=yQJzsC6b0GDHiXgDfo1LYg9Xc31y3EE86-qNtSgFptE,5801
|
10
|
+
pysips-0.0.0.dist-info/licenses/LICENSE,sha256=J1HrtwG3szOqedwTaNPiLrFsg6y0l5elQClIbaRyVXI,14176
|
11
|
+
tests/integration/test_log_likelihood.py,sha256=bILhs6A1473emzMTNZRvugT-CJxBC0ASep0q01-eqCA,634
|
12
|
+
tests/integration/test_prior_with_bingo.py,sha256=xy0LDlJbSFuDbCJ9R9Qc5z-qVQ5UNGK_819velQkfyU,1139
|
13
|
+
tests/regression/test_basic_end_to_end.py,sha256=8bpxqo9cAk1iAggv8zwTXYzm8Ul-RbJhOsJaX5do3r4,3658
|
14
|
+
tests/regression/test_regressor_end_to_end.py,sha256=ul_LBgzLK92dUBBw9s41qUmwziYIlrETPgVX3NRPhz4,2871
|
15
|
+
tests/unit/test_crossover_proposal.py,sha256=fElKgiSO-rp9KUEhFYD06cYx6RLGYy1-ZOG-6t2Cq-M,5742
|
16
|
+
tests/unit/test_laplace_nmll.py,sha256=8-BjnP0Mpr61XYey_Kvlk00ZvXCHL4t3jMoLKzTWsTM,4005
|
17
|
+
tests/unit/test_metropolis.py,sha256=rtvhHBw249WmAXwcvsqPItyyPBEfnIUoK5eE76ZVu8A,3367
|
18
|
+
tests/unit/test_mutation_proposal.py,sha256=H1amdVdeV45J3Ww72clSxzo6rFkZDw1qnsoGMTf9ORY,7188
|
19
|
+
tests/unit/test_prior.py,sha256=n0jiUTy80tW7exYJ8fbttZLwdRRHxIOzWMi1PVDhRp8,4580
|
20
|
+
tests/unit/test_random_choice_proposal.py,sha256=0NekKWkLFN2BNfMXvVxU8trEBktbWsipKLwy14ImpF4,5211
|
21
|
+
tests/unit/test_regressor.py,sha256=omr2Z1nPT_OkLqh_v3YMn0Zw6H0Cebj4yIHcnYEuULA,7630
|
22
|
+
tests/unit/test_sampler.py,sha256=5QV_PIALIZsLVzfNkRBQ9md4ypBA-uhBS0iSqZL8qGs,4318
|
23
|
+
pysips-0.0.0.dist-info/METADATA,sha256=DW85gQs8e6OzPGshLceCn-ZCFB5RzoUbIs1jpdLtrAg,6701
|
24
|
+
pysips-0.0.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
25
|
+
pysips-0.0.0.dist-info/top_level.txt,sha256=-JZT9y5QJV6SZnWdfFctDC_Qc_ulByTB0-qRuXZyGYY,13
|
26
|
+
pysips-0.0.0.dist-info/RECORD,,
|
@@ -0,0 +1,94 @@
|
|
1
|
+
NASA OPEN SOURCE AGREEMENT VERSION 1.3
|
2
|
+
|
3
|
+
THIS OPEN SOURCE AGREEMENT (“AGREEMENT”) DEFINES THE RIGHTS OF USE, REPRODUCTION, DISTRIBUTION, MODIFICATION AND REDISTRIBUTION OF CERTAIN COMPUTER SOFTWARE ORIGINALLY RELEASED BY THE UNITED STATES GOVERNMENT AS REPRESENTED BY THE GOVERNMENT AGENCY LISTED BELOW ("GOVERNMENT AGENCY"). THE UNITED STATES GOVERNMENT, AS REPRESENTED BY GOVERNMENT AGENCY, IS AN INTENDED THIRD-PARTY BENEFICIARY OF ALL SUBSEQUENT DISTRIBUTIONS OR REDISTRIBUTIONS OF THE SUBJECT SOFTWARE. ANYONE WHO USES, REPRODUCES, DISTRIBUTES, MODIFIES OR REDISTRIBUTES THE SUBJECT SOFTWARE, AS DEFINED HEREIN, OR ANY PART THEREOF, IS, BY THAT ACTION, ACCEPTING IN FULL THE RESPONSIBILITIES AND OBLIGATIONS CONTAINED IN THIS AGREEMENT.
|
4
|
+
|
5
|
+
Government Agency: National Aeronautics and Space Administration
|
6
|
+
Government Agency Original Software Designation: LAR-20644-1
|
7
|
+
Government Agency Original Software Title: Sequential Monte Carlo-based Symbolic Regression (SMC-SR)
|
8
|
+
Government Agency Point of Contact for Original Software: patrick.e.leser@nasa.gov
|
9
|
+
|
10
|
+
1. DEFINITIONS
|
11
|
+
A. “Contributor” means Government Agency, as the developer of the Original Software, and any entity that makes a Modification.
|
12
|
+
B. “Covered Patents” mean patent claims licensable by a Contributor that are necessarily infringed by the use or sale of its Modification alone or when combined with the Subject Software.
|
13
|
+
C. “Display” means the showing of a copy of the Subject Software, either directly or by means of an image, or any other device.
|
14
|
+
D. “Distribution” means conveyance or transfer of the Subject Software, regardless of means, to another.
|
15
|
+
E. “Larger Work” means computer software that combines Subject Software, or portions thereof, with software separate from the Subject Software that is not governed by the terms of this Agreement.
|
16
|
+
F. “Modification” means any alteration of, including addition to or deletion from, the substance or structure of either the Original Software or Subject Software, and includes derivative works, as that term is defined in the Copyright Statute, 17 USC 101. However, the act of including Subject Software as part of a Larger Work does not in and of itself constitute a Modification.
|
17
|
+
G. “Original Software” means the computer software first released under this Agreement by Government Agency with Government Agency designation LAR-20644-1 and entitled Sequential Monte Carlo-based Symbolic Regression (SMC-SR), including source code, object code and accompanying documentation, if any.
|
18
|
+
H. “Recipient” means anyone who acquires the Subject Software under this Agreement, including all Contributors.
|
19
|
+
I. “Redistribution” means Distribution of the Subject Software after a Modification has been made.
|
20
|
+
J. “Reproduction” means the making of a counterpart, image or copy of the Subject Software.
|
21
|
+
K. “Sale” means the exchange of the Subject Software for money or equivalent value.
|
22
|
+
L. “Subject Software” means the Original Software, Modifications, or any respective parts thereof.
|
23
|
+
M. “Use” means the application or employment of the Subject Software for any purpose.
|
24
|
+
|
25
|
+
2. GRANT OF RIGHTS
|
26
|
+
A. Under Non-Patent Rights: Subject to the terms and conditions of this Agreement, each Contributor, with respect to its own contribution to the Subject Software, hereby grants to each Recipient a non-exclusive, world-wide, royalty-free license to engage in the following activities pertaining to the Subject Software:
|
27
|
+
1. Use
|
28
|
+
2. Distribution
|
29
|
+
3. Reproduction
|
30
|
+
4. Modification
|
31
|
+
5. Redistribution
|
32
|
+
6. Display
|
33
|
+
B. Under Patent Rights: Subject to the terms and conditions of this Agreement, each Contributor, with respect to its own contribution to the Subject Software, hereby grants to each Recipient under Covered Patents a non-exclusive, world-wide, royalty-free license to engage in the following activities pertaining to the Subject Software:
|
34
|
+
1. Use
|
35
|
+
2. Distribution
|
36
|
+
3. Reproduction
|
37
|
+
4. Sale
|
38
|
+
5. Offer for Sale
|
39
|
+
C. The rights granted under Paragraph B. also apply to the combination of a Contributor’s Modification and the Subject Software if, at the time the Modification is added by the Contributor, the addition of such Modification causes the combination to be covered by the Covered Patents. It does not apply to any other combinations that include a Modification.
|
40
|
+
D. The rights granted in Paragraphs A. and B. allow the Recipient to sublicense those same rights. Such sublicense must be under the same terms and conditions of this Agreement.
|
41
|
+
|
42
|
+
3. OBLIGATIONS OF RECIPIENT
|
43
|
+
A. Distribution or Redistribution of the Subject Software must be made under this Agreement except for additions covered under paragraph 3H.
|
44
|
+
1. Whenever a Recipient distributes or redistributes the Subject Software, a copy of this Agreement must be included with each copy of the Subject Software; and
|
45
|
+
2. If Recipient distributes or redistributes the Subject Software in any form other than source code, Recipient must also make the source code freely available, and must provide with each copy of the Subject Software information on how to obtain the source code in a reasonable manner on or through a medium customarily used for software exchange.
|
46
|
+
B. Each Recipient must ensure that the following copyright notice appears prominently in the Subject Software:
|
47
|
+
This software may be used, reproduced, and provided to others only as permitted under the terms of the agreement under which it was acquired from the U.S. Government. Neither title to, nor ownership of, the software is hereby transferred. This notice shall remain on all copies of the software.
|
48
|
+
Copyright 2025 United States Government as represented by the Administrator of the National Aeronautics and Space Administration. No copyright is claimed in the United States under Title 17, U.S. Code. All Other Rights Reserved.
|
49
|
+
The NASA Software “Sequential Monte Carlo-based Symbolic Regression (SMC-SR)” (LAR-20644-1) calls the following third-party software, which is subject to the terms and conditions of its licensor, as applicable at the time of licensing. The third-party software is not bundled or included with this software but may be available from the licensor. License hyperlinks are provided here for information purposes only.
|
50
|
+
|
51
|
+
NumPy
|
52
|
+
https://numpy.org/devdocs/license.html
|
53
|
+
Copyright (c) 2005-2025, NumPy Developers.
|
54
|
+
All rights reserved.
|
55
|
+
h5py
|
56
|
+
https://github.com/h5py/h5py/blob/master/LICENSE
|
57
|
+
Copyright (c) 2008 Andrew Collette and contributors
|
58
|
+
All rights reserved.
|
59
|
+
|
60
|
+
tqdm
|
61
|
+
https://github.com/tqdm/tqdm/blob/master/LICENCE
|
62
|
+
Copyright (c) 2013 noamraph
|
63
|
+
|
64
|
+
SciPy
|
65
|
+
https://github.com/scipy/scipy/blob/main/LICENSE.txt
|
66
|
+
Copyright (c) 2001-2002 Enthought, Inc. 2003, SciPy Developers.
|
67
|
+
All rights reserved.
|
68
|
+
|
69
|
+
C. Each Contributor must characterize its alteration of the Subject Software as a Modification and must identify itself as the originator of its Modification in a manner that reasonably allows subsequent Recipients to identify the originator of the Modification. In fulfillment of these requirements, Contributor must include a file (e.g., a change log file) that describes the alterations made and the date of the alterations, identifies Contributor as originator of the alterations, and consents to characterization of the alterations as a Modification, for example, by including a statement that the Modification is derived, directly or indirectly, from Original Software provided by Government Agency. Once consent is granted, it may not thereafter be revoked.
|
70
|
+
D. A Contributor may add its own copyright notice to the Subject Software. Once a copyright notice has been added to the Subject Software, a Recipient may not remove it without the express permission of the Contributor who added the notice.
|
71
|
+
E. A Recipient may not make any representation in the Subject Software or in any promotional, advertising or other material that may be construed as an endorsement by Government Agency or by any prior Recipient of any product or service provided by Recipient, or that may seek to obtain commercial advantage by the fact of Government Agency's or a prior Recipient’s participation in this Agreement.
|
72
|
+
F. In an effort to track usage and maintain accurate records of the Subject Software, each Recipient, upon receipt of the Subject Software, is requested to provide Government Agency, by e-mail to the Government Agency Point of Contact listed in clause 5.F., the following information: First and Last Name; Email Address; and Affiliation. Recipient’s name and personal information shall be used for statistical purposes only. Once a Recipient makes a Modification available, it is requested that the Recipient inform Government Agency, by e-mail to the Government Agency Point of Contact listed in clause 5.F., how to access the Modification.
|
73
|
+
G. Each Contributor represents that that its Modification is believed to be Contributor’s original creation and does not violate any existing agreements, regulations, statutes or rules, and further that Contributor has sufficient rights to grant the rights conveyed by this Agreement.
|
74
|
+
H. A Recipient may choose to offer, and to charge a fee for, warranty, support, indemnity and/or liability obligations to one or more other Recipients of the Subject Software. A Recipient may do so, however, only on its own behalf and not on behalf of Government Agency or any other Recipient. Such a Recipient must make it absolutely clear that any such warranty, support, indemnity and/or liability obligation is offered by that Recipient alone. Further, such Recipient agrees to indemnify Government Agency and every other Recipient for any liability incurred by them as a result of warranty, support, indemnity and/or liability offered by such Recipient.
|
75
|
+
I. A Recipient may create a Larger Work by combining Subject Software with separate software not governed by the terms of this agreement and distribute the Larger Work as a single product. In such case, the Recipient must make sure Subject Software, or portions thereof, included in the Larger Work is subject to this Agreement.
|
76
|
+
J. Notwithstanding any provisions contained herein, Recipient is hereby put on notice that export of any goods or technical data from the United States may require some form of export license from the U.S. Government. Failure to obtain necessary export licenses may result in criminal liability under U.S. laws. Government Agency neither represents that a license shall not be required nor that, if required, it shall be issued. Nothing granted herein provides any such export license.
|
77
|
+
|
78
|
+
4. DISCLAIMER OF WARRANTIES AND LIABILITIES; WAIVER AND INDEMNIFICATION
|
79
|
+
A. No Warranty: THE SUBJECT SOFTWARE IS PROVIDED “AS IS” WITHOUT ANY WARRANTY OF ANY KIND, EITHER EXPRESSED, IMPLIED, OR STATUTORY, INCLUDING, BUT NOT LIMITED TO, ANY WARRANTY THAT THE SUBJECT SOFTWARE WILL CONFORM TO SPECIFICATIONS, ANY IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR FREEDOM FROM INFRINGEMENT, ANY WARRANTY THAT THE SUBJECT SOFTWARE WILL BE ERROR FREE, OR ANY WARRANTY THAT DOCUMENTATION, IF PROVIDED, WILL CONFORM TO THE SUBJECT SOFTWARE. THIS AGREEMENT DOES NOT, IN ANY MANNER, CONSTITUTE AN ENDORSEMENT BY GOVERNMENT AGENCY OR ANY PRIOR RECIPIENT OF ANY RESULTS, RESULTING DESIGNS, HARDWARE, SOFTWARE PRODUCTS OR ANY OTHER APPLICATIONS RESULTING FROM USE OF THE SUBJECT SOFTWARE. FURTHER, GOVERNMENT AGENCY DISCLAIMS ALL WARRANTIES AND LIABILITIES REGARDING THIRD-PARTY SOFTWARE, IF PRESENT IN THE ORIGINAL SOFTWARE, AND DISTRIBUTES IT “AS IS.”
|
80
|
+
B. Waiver and Indemnity: RECIPIENT AGREES TO WAIVE ANY AND ALL CLAIMS AGAINST THE UNITED STATES GOVERNMENT, ITS CONTRACTORS AND SUBCONTRACTORS, AS WELL AS ANY PRIOR RECIPIENT. IF RECIPIENT'S USE OF THE SUBJECT SOFTWARE RESULTS IN ANY LIABILITIES, DEMANDS, DAMAGES, EXPENSES OR LOSSES ARISING FROM SUCH USE, INCLUDING ANY DAMAGES FROM PRODUCTS BASED ON, OR RESULTING FROM, RECIPIENT'S USE OF THE SUBJECT SOFTWARE, RECIPIENT SHALL INDEMNIFY AND HOLD HARMLESS THE UNITED STATES GOVERNMENT, ITS CONTRACTORS AND SUBCONTRACTORS, AS WELL AS ANY PRIOR RECIPIENT, TO THE EXTENT PERMITTED BY LAW. RECIPIENT'S SOLE REMEDY FOR ANY SUCH MATTER SHALL BE THE IMMEDIATE, UNILATERAL TERMINATION OF THIS AGREEMENT.
|
81
|
+
|
82
|
+
5. GENERAL TERMS
|
83
|
+
A. Termination: This Agreement and the rights granted hereunder will terminate automatically if a Recipient fails to comply with these terms and conditions, and fails to cure such noncompliance within thirty (30) days of becoming aware of such noncompliance. Upon termination, a Recipient agrees to immediately cease use and distribution of the Subject Software. All sublicenses to the Subject Software properly granted by the breaching Recipient shall survive any such termination of this Agreement.
|
84
|
+
B. Severability: If any provision of this Agreement is invalid or unenforceable under applicable law, it shall not affect the validity or enforceability of the remainder of the terms of this Agreement.
|
85
|
+
C. Applicable Law: This Agreement shall be subject to United States federal law only for all purposes, including, but not limited to, determining the validity of this Agreement, the meaning of its provisions and the rights, obligations and remedies of the parties.
|
86
|
+
D. Entire Understanding: This Agreement constitutes the entire understanding and agreement of the parties relating to release of the Subject Software and may not be superseded, modified or amended except by further written agreement duly executed by the parties.
|
87
|
+
E. Binding Authority: By accepting and using the Subject Software under this Agreement, a Recipient affirms its authority to bind the Recipient to all terms and conditions of this Agreement and that that Recipient hereby agrees to all terms and conditions herein.
|
88
|
+
F. Point of Contact: Any Recipient contact with Government Agency is to be directed to the designated representative as follows:
|
89
|
+
Maxine Saunders
|
90
|
+
Software Release Authority
|
91
|
+
MS 151, NASA Langley Research Center
|
92
|
+
Hampton, VA 23681
|
93
|
+
Phone: 757-864-2025
|
94
|
+
Email: larc-sra@mail.nasa.gov
|
@@ -0,0 +1,18 @@
|
|
1
|
+
import numpy as np
|
2
|
+
|
3
|
+
from pysips.laplace_nmll import LaplaceNmll
|
4
|
+
from pysips.metropolis import Metropolis
|
5
|
+
from bingo.symbolic_regression.agraph.agraph import AGraph
|
6
|
+
|
7
|
+
|
8
|
+
def test_log_likelihood_relative():
|
9
|
+
x = np.arange(0, 100)
|
10
|
+
model = lambda a, b: a * x + b
|
11
|
+
data = model(5, 5) + np.random.default_rng(34).normal(0, 0.1, 100)
|
12
|
+
|
13
|
+
models = np.c_[[AGraph(equation="1.0"), AGraph(equation="1.0 + 2.0*X_0")]]
|
14
|
+
likelihood = LaplaceNmll(np.c_[x], data)
|
15
|
+
mcmc = Metropolis(likelihood=likelihood, proposal=None, prior=None)
|
16
|
+
|
17
|
+
log_likes = mcmc.evaluate_log_likelihood(models).flatten()
|
18
|
+
assert log_likes[0] < log_likes[1]
|
@@ -0,0 +1,45 @@
|
|
1
|
+
import pytest
|
2
|
+
from bingo.symbolic_regression import ComponentGenerator, AGraphGenerator
|
3
|
+
|
4
|
+
|
5
|
+
from pysips.prior import Prior
|
6
|
+
|
7
|
+
|
8
|
+
def get_generator(
|
9
|
+
X_dim,
|
10
|
+
operators,
|
11
|
+
terminal_probability=0.1,
|
12
|
+
constant_probability=0.5,
|
13
|
+
max_complexity=48,
|
14
|
+
**kwargs,
|
15
|
+
):
|
16
|
+
USE_PYTHON = True
|
17
|
+
USE_SIMPLIFICATION = True
|
18
|
+
component_generator = ComponentGenerator(
|
19
|
+
input_x_dimension=X_dim,
|
20
|
+
terminal_probability=terminal_probability,
|
21
|
+
constant_probability=constant_probability,
|
22
|
+
)
|
23
|
+
for comp in operators:
|
24
|
+
component_generator.add_operator(comp)
|
25
|
+
generator = AGraphGenerator(
|
26
|
+
max_complexity,
|
27
|
+
component_generator,
|
28
|
+
use_python=USE_PYTHON,
|
29
|
+
use_simplification=USE_SIMPLIFICATION,
|
30
|
+
)
|
31
|
+
|
32
|
+
return generator
|
33
|
+
|
34
|
+
|
35
|
+
@pytest.mark.parametrize("N", [10, 500])
|
36
|
+
def test_prior_makes_initial_set_of_unique_expressions(N):
|
37
|
+
"""Test rvs with different values of N"""
|
38
|
+
generator = get_generator(1, ["+", "-", "*"])
|
39
|
+
prior = Prior(generator)
|
40
|
+
|
41
|
+
result = prior.rvs(N)
|
42
|
+
|
43
|
+
assert result.shape == (N, 1)
|
44
|
+
unique_models = set(result.flatten())
|
45
|
+
assert len(unique_models) == N
|
@@ -0,0 +1,131 @@
|
|
1
|
+
import argparse
|
2
|
+
from pathlib import Path
|
3
|
+
import numpy as np
|
4
|
+
import h5py
|
5
|
+
|
6
|
+
from pysips.laplace_nmll import LaplaceNmll
|
7
|
+
from pysips.mutation_proposal import MutationProposal
|
8
|
+
from pysips.crossover_proposal import CrossoverProposal
|
9
|
+
from pysips.random_choice_proposal import RandomChoiceProposal
|
10
|
+
from pysips.sampler import sample
|
11
|
+
|
12
|
+
from bingo.symbolic_regression import ComponentGenerator, AGraphGenerator
|
13
|
+
|
14
|
+
|
15
|
+
def get_proposal(
|
16
|
+
X_dim,
|
17
|
+
operators,
|
18
|
+
terminal_probability=0.1,
|
19
|
+
constant_probability=None,
|
20
|
+
command_probability=0.2,
|
21
|
+
node_probability=0.2,
|
22
|
+
parameter_probability=0.2,
|
23
|
+
prune_probability=0.2,
|
24
|
+
fork_probability=0.2,
|
25
|
+
repeat_mutation_probability=0.0,
|
26
|
+
crossover_pool_size=500,
|
27
|
+
mutation_prob=0.5,
|
28
|
+
crossover_prob=0.5,
|
29
|
+
exclusuive=True,
|
30
|
+
max_complexity=48,
|
31
|
+
**kwargs,
|
32
|
+
):
|
33
|
+
generator = get_generator(
|
34
|
+
X_dim, operators, terminal_probability, constant_probability, max_complexity
|
35
|
+
)
|
36
|
+
|
37
|
+
mutation = MutationProposal(
|
38
|
+
X_dim,
|
39
|
+
operators=operators,
|
40
|
+
terminal_probability=terminal_probability,
|
41
|
+
constant_probability=constant_probability,
|
42
|
+
command_probability=command_probability,
|
43
|
+
node_probability=node_probability,
|
44
|
+
parameter_probability=parameter_probability,
|
45
|
+
prune_probability=prune_probability,
|
46
|
+
fork_probability=fork_probability,
|
47
|
+
repeat_mutation_probability=repeat_mutation_probability,
|
48
|
+
)
|
49
|
+
|
50
|
+
pool = set()
|
51
|
+
while len(pool) < crossover_pool_size:
|
52
|
+
pool.add(generator())
|
53
|
+
crossover = CrossoverProposal(list(pool))
|
54
|
+
|
55
|
+
proposal = RandomChoiceProposal(
|
56
|
+
[mutation, crossover], [mutation_prob, crossover_prob], exclusuive
|
57
|
+
)
|
58
|
+
|
59
|
+
return proposal
|
60
|
+
|
61
|
+
|
62
|
+
def get_generator(
|
63
|
+
X_dim,
|
64
|
+
operators,
|
65
|
+
terminal_probability=0.1,
|
66
|
+
constant_probability=None,
|
67
|
+
max_complexity=48,
|
68
|
+
**kwargs,
|
69
|
+
):
|
70
|
+
USE_PYTHON = True
|
71
|
+
USE_SIMPLIFICATION = True
|
72
|
+
component_generator = ComponentGenerator(
|
73
|
+
input_x_dimension=X_dim,
|
74
|
+
terminal_probability=terminal_probability,
|
75
|
+
constant_probability=constant_probability,
|
76
|
+
)
|
77
|
+
for comp in operators:
|
78
|
+
component_generator.add_operator(comp)
|
79
|
+
generator = AGraphGenerator(
|
80
|
+
max_complexity,
|
81
|
+
component_generator,
|
82
|
+
use_python=USE_PYTHON,
|
83
|
+
use_simplification=USE_SIMPLIFICATION,
|
84
|
+
)
|
85
|
+
|
86
|
+
return generator
|
87
|
+
|
88
|
+
|
89
|
+
def test_basic_end_to_end():
|
90
|
+
|
91
|
+
n_pts = 21
|
92
|
+
X = np.c_[np.linspace(0, 2 * np.pi, n_pts)]
|
93
|
+
y = (np.sin(X) * 2 + 4).flatten() + np.random.default_rng(34).normal(0, 0.5, n_pts)
|
94
|
+
|
95
|
+
config = {
|
96
|
+
"X_dim": X.shape[1],
|
97
|
+
"constant_probability": 1 / (X.shape[1] + 1),
|
98
|
+
"operators": ["+", "*"],
|
99
|
+
"param_init_bounds": [-5, 5],
|
100
|
+
"opt_restarts": 1,
|
101
|
+
"terminal_probability": 0.1,
|
102
|
+
"command_probability": 0.2,
|
103
|
+
"node_probability": 0.2,
|
104
|
+
"parameter_probability": 0.2,
|
105
|
+
"prune_probability": 0.2,
|
106
|
+
"fork_probability": 0.2,
|
107
|
+
"repeat_mutation_probability": 0.05,
|
108
|
+
"crossover_pool_size": 50,
|
109
|
+
"mutation_prob": 0.75,
|
110
|
+
"crossover_prob": 0.25,
|
111
|
+
"exclusuive": True,
|
112
|
+
"max_complexity": 24,
|
113
|
+
"num_particles": 50,
|
114
|
+
"num_mcmc_samples": 5,
|
115
|
+
"target_ess": 0.8,
|
116
|
+
}
|
117
|
+
|
118
|
+
likelihood = LaplaceNmll(X, y)
|
119
|
+
generator = get_generator(**config)
|
120
|
+
proposal = get_proposal(**config)
|
121
|
+
models, likelihoods = sample(
|
122
|
+
likelihood,
|
123
|
+
proposal,
|
124
|
+
generator,
|
125
|
+
seed=34,
|
126
|
+
kwargs={
|
127
|
+
"num_particles": config["num_particles"],
|
128
|
+
"num_mcmc_samples": config["num_mcmc_samples"],
|
129
|
+
"target_ess": config["target_ess"],
|
130
|
+
},
|
131
|
+
)
|
@@ -0,0 +1,95 @@
|
|
1
|
+
import numpy as np
|
2
|
+
import pytest
|
3
|
+
from sklearn.model_selection import train_test_split, GridSearchCV
|
4
|
+
from sklearn.metrics import mean_squared_error
|
5
|
+
from pysips.regressor import PysipsRegressor
|
6
|
+
|
7
|
+
|
8
|
+
@pytest.fixture
|
9
|
+
def synthetic_data():
|
10
|
+
"""Fixture to create synthetic sin wave data."""
|
11
|
+
n_pts = 21
|
12
|
+
X = np.c_[np.linspace(0, 2 * np.pi, n_pts)]
|
13
|
+
# Use seed 34 for consistent test data
|
14
|
+
y = (np.sin(X) * 2 + 4).flatten() + np.random.default_rng(34).normal(0, 0.5, n_pts)
|
15
|
+
return X, y
|
16
|
+
|
17
|
+
|
18
|
+
@pytest.fixture
|
19
|
+
def train_test_data(synthetic_data):
|
20
|
+
"""Fixture to split data into train and test sets."""
|
21
|
+
X, y = synthetic_data
|
22
|
+
return train_test_split(X, y, test_size=0.2, random_state=42)
|
23
|
+
|
24
|
+
|
25
|
+
@pytest.fixture
|
26
|
+
def base_regressor():
|
27
|
+
"""Fixture for basic regressor with default settings."""
|
28
|
+
return PysipsRegressor(
|
29
|
+
operators=["+", "*", "sin"],
|
30
|
+
max_complexity=24,
|
31
|
+
num_particles=20,
|
32
|
+
random_state=42,
|
33
|
+
)
|
34
|
+
|
35
|
+
|
36
|
+
def test_basic_end_to_end(train_test_data, base_regressor):
|
37
|
+
"""Test basic end-to-end workflow for PysipsRegressor."""
|
38
|
+
X_train, X_test, y_train, y_test = train_test_data
|
39
|
+
|
40
|
+
model = base_regressor
|
41
|
+
model.fit(X_train, y_train)
|
42
|
+
y_pred = model.predict(X_test)
|
43
|
+
mse = mean_squared_error(y_test, y_pred)
|
44
|
+
|
45
|
+
# print(f"Best expression: {model.get_expression()}")
|
46
|
+
# print(f"Test MSE: {mse:.4f}")
|
47
|
+
# print(f"R² score: {model.score(X_test, y_test):.4f}")
|
48
|
+
|
49
|
+
|
50
|
+
def test_hyperparameter_optimization(synthetic_data):
|
51
|
+
"""Test compatibility with scikit-learn's hyperparameter optimization."""
|
52
|
+
X, y = synthetic_data
|
53
|
+
|
54
|
+
# Create the regressor with reduced computation for faster testing
|
55
|
+
base_model = PysipsRegressor(
|
56
|
+
operators=["+", "*", "sin"],
|
57
|
+
max_complexity=20,
|
58
|
+
num_particles=10,
|
59
|
+
num_mcmc_samples=50,
|
60
|
+
random_state=42,
|
61
|
+
)
|
62
|
+
|
63
|
+
# Define hyperparameter grid
|
64
|
+
param_grid = {
|
65
|
+
"max_complexity": [15, 20],
|
66
|
+
"terminal_probability": [0.1, 0.2],
|
67
|
+
"mutation_prob": [0.6, 0.7],
|
68
|
+
}
|
69
|
+
|
70
|
+
# Set up GridSearchCV
|
71
|
+
grid_search = GridSearchCV(
|
72
|
+
estimator=base_model,
|
73
|
+
param_grid=param_grid,
|
74
|
+
cv=3,
|
75
|
+
scoring="neg_mean_squared_error",
|
76
|
+
verbose=1,
|
77
|
+
n_jobs=1, # Use single job for testing
|
78
|
+
)
|
79
|
+
|
80
|
+
# Perform grid search
|
81
|
+
grid_search.fit(X, y)
|
82
|
+
|
83
|
+
# Check that grid search completed successfully
|
84
|
+
assert hasattr(grid_search, "best_params_")
|
85
|
+
|
86
|
+
# Check that the best estimator is fitted
|
87
|
+
assert hasattr(grid_search.best_estimator_, "best_model_")
|
88
|
+
|
89
|
+
# Verify it can make predictions
|
90
|
+
y_pred = grid_search.predict(X)
|
91
|
+
assert y_pred.shape == y.shape
|
92
|
+
|
93
|
+
# print(f"Best parameters: {grid_search.best_params_}")
|
94
|
+
# print(f"Best CV score: {-grid_search.best_score_:.4f}") # Convert back to MSE
|
95
|
+
# print(f"Best model expression: {grid_search.best_estimator_.get_expression()}")
|