pysips 0.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. pysips-0.0.0/LICENSE +94 -0
  2. pysips-0.0.0/PKG-INFO +156 -0
  3. pysips-0.0.0/README.md +135 -0
  4. pysips-0.0.0/pyproject.toml +36 -0
  5. pysips-0.0.0/pysips/__init__.py +53 -0
  6. pysips-0.0.0/pysips/crossover_proposal.py +138 -0
  7. pysips-0.0.0/pysips/laplace_nmll.py +104 -0
  8. pysips-0.0.0/pysips/metropolis.py +126 -0
  9. pysips-0.0.0/pysips/mutation_proposal.py +220 -0
  10. pysips-0.0.0/pysips/prior.py +106 -0
  11. pysips-0.0.0/pysips/random_choice_proposal.py +177 -0
  12. pysips-0.0.0/pysips/regressor.py +451 -0
  13. pysips-0.0.0/pysips/sampler.py +159 -0
  14. pysips-0.0.0/pysips.egg-info/PKG-INFO +156 -0
  15. pysips-0.0.0/pysips.egg-info/SOURCES.txt +29 -0
  16. pysips-0.0.0/pysips.egg-info/dependency_links.txt +1 -0
  17. pysips-0.0.0/pysips.egg-info/requires.txt +4 -0
  18. pysips-0.0.0/pysips.egg-info/top_level.txt +3 -0
  19. pysips-0.0.0/setup.cfg +4 -0
  20. pysips-0.0.0/tests/integration/test_log_likelihood.py +18 -0
  21. pysips-0.0.0/tests/integration/test_prior_with_bingo.py +45 -0
  22. pysips-0.0.0/tests/regression/test_basic_end_to_end.py +131 -0
  23. pysips-0.0.0/tests/regression/test_regressor_end_to_end.py +95 -0
  24. pysips-0.0.0/tests/unit/test_crossover_proposal.py +156 -0
  25. pysips-0.0.0/tests/unit/test_laplace_nmll.py +111 -0
  26. pysips-0.0.0/tests/unit/test_metropolis.py +111 -0
  27. pysips-0.0.0/tests/unit/test_mutation_proposal.py +196 -0
  28. pysips-0.0.0/tests/unit/test_prior.py +135 -0
  29. pysips-0.0.0/tests/unit/test_random_choice_proposal.py +136 -0
  30. pysips-0.0.0/tests/unit/test_regressor.py +227 -0
  31. pysips-0.0.0/tests/unit/test_sampler.py +133 -0
pysips-0.0.0/LICENSE ADDED
@@ -0,0 +1,94 @@
1
+ NASA OPEN SOURCE AGREEMENT VERSION 1.3
2
+
3
+ THIS OPEN SOURCE AGREEMENT (“AGREEMENT”) DEFINES THE RIGHTS OF USE, REPRODUCTION, DISTRIBUTION, MODIFICATION AND REDISTRIBUTION OF CERTAIN COMPUTER SOFTWARE ORIGINALLY RELEASED BY THE UNITED STATES GOVERNMENT AS REPRESENTED BY THE GOVERNMENT AGENCY LISTED BELOW ("GOVERNMENT AGENCY"). THE UNITED STATES GOVERNMENT, AS REPRESENTED BY GOVERNMENT AGENCY, IS AN INTENDED THIRD-PARTY BENEFICIARY OF ALL SUBSEQUENT DISTRIBUTIONS OR REDISTRIBUTIONS OF THE SUBJECT SOFTWARE. ANYONE WHO USES, REPRODUCES, DISTRIBUTES, MODIFIES OR REDISTRIBUTES THE SUBJECT SOFTWARE, AS DEFINED HEREIN, OR ANY PART THEREOF, IS, BY THAT ACTION, ACCEPTING IN FULL THE RESPONSIBILITIES AND OBLIGATIONS CONTAINED IN THIS AGREEMENT.
4
+
5
+ Government Agency: National Aeronautics and Space Administration
6
+ Government Agency Original Software Designation: LAR-20644-1
7
+ Government Agency Original Software Title: Sequential Monte Carlo-based Symbolic Regression (SMC-SR)
8
+ Government Agency Point of Contact for Original Software: patrick.e.leser@nasa.gov
9
+
10
+ 1. DEFINITIONS
11
+ A. “Contributor” means Government Agency, as the developer of the Original Software, and any entity that makes a Modification.
12
+ B. “Covered Patents” mean patent claims licensable by a Contributor that are necessarily infringed by the use or sale of its Modification alone or when combined with the Subject Software.
13
+ C. “Display” means the showing of a copy of the Subject Software, either directly or by means of an image, or any other device.
14
+ D. “Distribution” means conveyance or transfer of the Subject Software, regardless of means, to another.
15
+ E. “Larger Work” means computer software that combines Subject Software, or portions thereof, with software separate from the Subject Software that is not governed by the terms of this Agreement.
16
+ F. “Modification” means any alteration of, including addition to or deletion from, the substance or structure of either the Original Software or Subject Software, and includes derivative works, as that term is defined in the Copyright Statute, 17 USC 101. However, the act of including Subject Software as part of a Larger Work does not in and of itself constitute a Modification.
17
+ G. “Original Software” means the computer software first released under this Agreement by Government Agency with Government Agency designation LAR-20644-1 and entitled Sequential Monte Carlo-based Symbolic Regression (SMC-SR), including source code, object code and accompanying documentation, if any.
18
+ H. “Recipient” means anyone who acquires the Subject Software under this Agreement, including all Contributors.
19
+ I. “Redistribution” means Distribution of the Subject Software after a Modification has been made.
20
+ J. “Reproduction” means the making of a counterpart, image or copy of the Subject Software.
21
+ K. “Sale” means the exchange of the Subject Software for money or equivalent value.
22
+ L. “Subject Software” means the Original Software, Modifications, or any respective parts thereof.
23
+ M. “Use” means the application or employment of the Subject Software for any purpose.
24
+
25
+ 2. GRANT OF RIGHTS
26
+ A. Under Non-Patent Rights: Subject to the terms and conditions of this Agreement, each Contributor, with respect to its own contribution to the Subject Software, hereby grants to each Recipient a non-exclusive, world-wide, royalty-free license to engage in the following activities pertaining to the Subject Software:
27
+ 1. Use
28
+ 2. Distribution
29
+ 3. Reproduction
30
+ 4. Modification
31
+ 5. Redistribution
32
+ 6. Display
33
+ B. Under Patent Rights: Subject to the terms and conditions of this Agreement, each Contributor, with respect to its own contribution to the Subject Software, hereby grants to each Recipient under Covered Patents a non-exclusive, world-wide, royalty-free license to engage in the following activities pertaining to the Subject Software:
34
+ 1. Use
35
+ 2. Distribution
36
+ 3. Reproduction
37
+ 4. Sale
38
+ 5. Offer for Sale
39
+ C. The rights granted under Paragraph B. also apply to the combination of a Contributor’s Modification and the Subject Software if, at the time the Modification is added by the Contributor, the addition of such Modification causes the combination to be covered by the Covered Patents. It does not apply to any other combinations that include a Modification.
40
+ D. The rights granted in Paragraphs A. and B. allow the Recipient to sublicense those same rights. Such sublicense must be under the same terms and conditions of this Agreement.
41
+
42
+ 3. OBLIGATIONS OF RECIPIENT
43
+ A. Distribution or Redistribution of the Subject Software must be made under this Agreement except for additions covered under paragraph 3H.
44
+ 1. Whenever a Recipient distributes or redistributes the Subject Software, a copy of this Agreement must be included with each copy of the Subject Software; and
45
+ 2. If Recipient distributes or redistributes the Subject Software in any form other than source code, Recipient must also make the source code freely available, and must provide with each copy of the Subject Software information on how to obtain the source code in a reasonable manner on or through a medium customarily used for software exchange.
46
+ B. Each Recipient must ensure that the following copyright notice appears prominently in the Subject Software:
47
+ This software may be used, reproduced, and provided to others only as permitted under the terms of the agreement under which it was acquired from the U.S. Government. Neither title to, nor ownership of, the software is hereby transferred. This notice shall remain on all copies of the software.
48
+ Copyright 2025 United States Government as represented by the Administrator of the National Aeronautics and Space Administration. No copyright is claimed in the United States under Title 17, U.S. Code. All Other Rights Reserved.
49
+ The NASA Software “Sequential Monte Carlo-based Symbolic Regression (SMC-SR)” (LAR-20644-1) calls the following third-party software, which is subject to the terms and conditions of its licensor, as applicable at the time of licensing. The third-party software is not bundled or included with this software but may be available from the licensor. License hyperlinks are provided here for information purposes only.
50
+
51
+ NumPy
52
+ https://numpy.org/devdocs/license.html
53
+ Copyright (c) 2005-2025, NumPy Developers.
54
+ All rights reserved.
55
+ h5py
56
+ https://github.com/h5py/h5py/blob/master/LICENSE
57
+ Copyright (c) 2008 Andrew Collette and contributors
58
+ All rights reserved.
59
+
60
+ tqdm
61
+ https://github.com/tqdm/tqdm/blob/master/LICENCE
62
+ Copyright (c) 2013 noamraph
63
+
64
+ SciPy
65
+ https://github.com/scipy/scipy/blob/main/LICENSE.txt
66
+ Copyright (c) 2001-2002 Enthought, Inc. 2003, SciPy Developers.
67
+ All rights reserved.
68
+
69
+ C. Each Contributor must characterize its alteration of the Subject Software as a Modification and must identify itself as the originator of its Modification in a manner that reasonably allows subsequent Recipients to identify the originator of the Modification. In fulfillment of these requirements, Contributor must include a file (e.g., a change log file) that describes the alterations made and the date of the alterations, identifies Contributor as originator of the alterations, and consents to characterization of the alterations as a Modification, for example, by including a statement that the Modification is derived, directly or indirectly, from Original Software provided by Government Agency. Once consent is granted, it may not thereafter be revoked.
70
+ D. A Contributor may add its own copyright notice to the Subject Software. Once a copyright notice has been added to the Subject Software, a Recipient may not remove it without the express permission of the Contributor who added the notice.
71
+ E. A Recipient may not make any representation in the Subject Software or in any promotional, advertising or other material that may be construed as an endorsement by Government Agency or by any prior Recipient of any product or service provided by Recipient, or that may seek to obtain commercial advantage by the fact of Government Agency's or a prior Recipient’s participation in this Agreement.
72
+ F. In an effort to track usage and maintain accurate records of the Subject Software, each Recipient, upon receipt of the Subject Software, is requested to provide Government Agency, by e-mail to the Government Agency Point of Contact listed in clause 5.F., the following information: First and Last Name; Email Address; and Affiliation. Recipient’s name and personal information shall be used for statistical purposes only. Once a Recipient makes a Modification available, it is requested that the Recipient inform Government Agency, by e-mail to the Government Agency Point of Contact listed in clause 5.F., how to access the Modification.
73
+ G. Each Contributor represents that that its Modification is believed to be Contributor’s original creation and does not violate any existing agreements, regulations, statutes or rules, and further that Contributor has sufficient rights to grant the rights conveyed by this Agreement.
74
+ H. A Recipient may choose to offer, and to charge a fee for, warranty, support, indemnity and/or liability obligations to one or more other Recipients of the Subject Software. A Recipient may do so, however, only on its own behalf and not on behalf of Government Agency or any other Recipient. Such a Recipient must make it absolutely clear that any such warranty, support, indemnity and/or liability obligation is offered by that Recipient alone. Further, such Recipient agrees to indemnify Government Agency and every other Recipient for any liability incurred by them as a result of warranty, support, indemnity and/or liability offered by such Recipient.
75
+ I. A Recipient may create a Larger Work by combining Subject Software with separate software not governed by the terms of this agreement and distribute the Larger Work as a single product. In such case, the Recipient must make sure Subject Software, or portions thereof, included in the Larger Work is subject to this Agreement.
76
+ J. Notwithstanding any provisions contained herein, Recipient is hereby put on notice that export of any goods or technical data from the United States may require some form of export license from the U.S. Government. Failure to obtain necessary export licenses may result in criminal liability under U.S. laws. Government Agency neither represents that a license shall not be required nor that, if required, it shall be issued. Nothing granted herein provides any such export license.
77
+
78
+ 4. DISCLAIMER OF WARRANTIES AND LIABILITIES; WAIVER AND INDEMNIFICATION
79
+ A. No Warranty: THE SUBJECT SOFTWARE IS PROVIDED “AS IS” WITHOUT ANY WARRANTY OF ANY KIND, EITHER EXPRESSED, IMPLIED, OR STATUTORY, INCLUDING, BUT NOT LIMITED TO, ANY WARRANTY THAT THE SUBJECT SOFTWARE WILL CONFORM TO SPECIFICATIONS, ANY IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR FREEDOM FROM INFRINGEMENT, ANY WARRANTY THAT THE SUBJECT SOFTWARE WILL BE ERROR FREE, OR ANY WARRANTY THAT DOCUMENTATION, IF PROVIDED, WILL CONFORM TO THE SUBJECT SOFTWARE. THIS AGREEMENT DOES NOT, IN ANY MANNER, CONSTITUTE AN ENDORSEMENT BY GOVERNMENT AGENCY OR ANY PRIOR RECIPIENT OF ANY RESULTS, RESULTING DESIGNS, HARDWARE, SOFTWARE PRODUCTS OR ANY OTHER APPLICATIONS RESULTING FROM USE OF THE SUBJECT SOFTWARE. FURTHER, GOVERNMENT AGENCY DISCLAIMS ALL WARRANTIES AND LIABILITIES REGARDING THIRD-PARTY SOFTWARE, IF PRESENT IN THE ORIGINAL SOFTWARE, AND DISTRIBUTES IT “AS IS.”
80
+ B. Waiver and Indemnity: RECIPIENT AGREES TO WAIVE ANY AND ALL CLAIMS AGAINST THE UNITED STATES GOVERNMENT, ITS CONTRACTORS AND SUBCONTRACTORS, AS WELL AS ANY PRIOR RECIPIENT. IF RECIPIENT'S USE OF THE SUBJECT SOFTWARE RESULTS IN ANY LIABILITIES, DEMANDS, DAMAGES, EXPENSES OR LOSSES ARISING FROM SUCH USE, INCLUDING ANY DAMAGES FROM PRODUCTS BASED ON, OR RESULTING FROM, RECIPIENT'S USE OF THE SUBJECT SOFTWARE, RECIPIENT SHALL INDEMNIFY AND HOLD HARMLESS THE UNITED STATES GOVERNMENT, ITS CONTRACTORS AND SUBCONTRACTORS, AS WELL AS ANY PRIOR RECIPIENT, TO THE EXTENT PERMITTED BY LAW. RECIPIENT'S SOLE REMEDY FOR ANY SUCH MATTER SHALL BE THE IMMEDIATE, UNILATERAL TERMINATION OF THIS AGREEMENT.
81
+
82
+ 5. GENERAL TERMS
83
+ A. Termination: This Agreement and the rights granted hereunder will terminate automatically if a Recipient fails to comply with these terms and conditions, and fails to cure such noncompliance within thirty (30) days of becoming aware of such noncompliance. Upon termination, a Recipient agrees to immediately cease use and distribution of the Subject Software. All sublicenses to the Subject Software properly granted by the breaching Recipient shall survive any such termination of this Agreement.
84
+ B. Severability: If any provision of this Agreement is invalid or unenforceable under applicable law, it shall not affect the validity or enforceability of the remainder of the terms of this Agreement.
85
+ C. Applicable Law: This Agreement shall be subject to United States federal law only for all purposes, including, but not limited to, determining the validity of this Agreement, the meaning of its provisions and the rights, obligations and remedies of the parties.
86
+ D. Entire Understanding: This Agreement constitutes the entire understanding and agreement of the parties relating to release of the Subject Software and may not be superseded, modified or amended except by further written agreement duly executed by the parties.
87
+ E. Binding Authority: By accepting and using the Subject Software under this Agreement, a Recipient affirms its authority to bind the Recipient to all terms and conditions of this Agreement and that that Recipient hereby agrees to all terms and conditions herein.
88
+ F. Point of Contact: Any Recipient contact with Government Agency is to be directed to the designated representative as follows:
89
+ Maxine Saunders
90
+ Software Release Authority
91
+ MS 151, NASA Langley Research Center
92
+ Hampton, VA 23681
93
+ Phone: 757-864-2025
94
+ Email: larc-sra@mail.nasa.gov
pysips-0.0.0/PKG-INFO ADDED
@@ -0,0 +1,156 @@
1
+ Metadata-Version: 2.4
2
+ Name: pysips
3
+ Version: 0.0.0
4
+ Summary: A python package for symbolic inference via posterior sampling.
5
+ Author-email: Geoffrey Bomarito <geoffrey.f.bomarito@nasa.gov>, Patrick Leser <patrick.e.leser@nasa.gov>
6
+ License-Expression: NASA-1.3
7
+ Project-URL: Documentation, https://nasa.github.io/pysips/
8
+ Project-URL: Repository, https://github.com/nasa/pysips
9
+ Keywords: symbolic regression
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Programming Language :: Python :: 3.12
12
+ Classifier: Programming Language :: Python :: 3.13
13
+ Requires-Python: >=3.12
14
+ Description-Content-Type: text/markdown
15
+ License-File: LICENSE
16
+ Requires-Dist: numpy
17
+ Requires-Dist: bingo-nasa
18
+ Requires-Dist: smcpy>=0.1.6
19
+ Requires-Dist: scikit-learn
20
+ Dynamic: license-file
21
+
22
+ # PySIPS: Python package for Symbolic Inference via Posterior Sampling
23
+
24
+ PySIPS is an open-source implementation of Bayesian symbolic regression via posterior sampling as described in the paper "Bayesian Symbolic Regression via Posterior Sampling" by G. F. Bomarito and P. E. Leser from NASA Langley Research Center.
25
+
26
+ ## Purpose
27
+
28
+ PySIPS provides a robust framework for discovering interpretable symbolic expressions from data, with a particular focus on handling noisy datasets. Unlike traditional symbolic regression approaches, PySIPS uses a Bayesian framework with Sequential Monte Carlo (SMC) sampling to:
29
+
30
+ 1. Enhance robustness to noise
31
+ 2. Provide built-in uncertainty quantification
32
+ 3. Discover parsimonious expressions with improved generalization
33
+ 4. Reduce overfitting in symbolic regression tasks
34
+
35
+ ## Algorithm Overview
36
+
37
+ PySIPS implements a Sequential Monte Carlo (SMC) framework for Bayesian symbolic regression that:
38
+
39
+ - Approximates the posterior distribution over symbolic expressions
40
+ - Uses probabilistic selection and adaptive annealing to explore the search space efficiently
41
+ - Employs normalized marginal likelihood for model evaluation
42
+ - Combines mutation and crossover operations as proposal mechanisms
43
+ - Provides model selection criteria based on maximum normalized marginal likelihood or posterior mode
44
+
45
+ ## Installation
46
+
47
+ (Coming Soon!)
48
+
49
+ ```bash
50
+ pip install pysips
51
+ ```
52
+
53
+ ## Example Usage
54
+
55
+ ```python
56
+ import numpy as np
57
+ from pysips import PysipsRegressor
58
+ from sklearn.model_selection import train_test_split
59
+ from sklearn.metrics import r2_score
60
+
61
+ # Generate synthetic data (y = x^2 + noise)
62
+ np.random.seed(42)
63
+ X = np.linspace(-3, 3, 100).reshape(-1, 1)
64
+ y = X[:, 0]**2 + np.random.normal(0, 0.1, size=X.shape[0])
65
+
66
+ # Split data
67
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
68
+
69
+ # Initialize and fit the regressor
70
+ regressor = PysipsRegressor(
71
+ operators=['+', '-', '*', '^2'],
72
+ max_complexity=12,
73
+ num_particles=100,
74
+ num_mcmc_samples=10,
75
+ random_state=42
76
+ )
77
+
78
+ regressor.fit(X_train, y_train)
79
+
80
+ # Make predictions
81
+ y_pred = regressor.predict(X_test)
82
+
83
+ # Get the discovered expression
84
+ expression = regressor.get_expression()
85
+ print(f"Discovered expression: {expression}")
86
+ print(f"R² score: {r2_score(y_test, y_pred):.4f}")
87
+
88
+ # Get model posterior and their likelihoods
89
+ models, likelihoods = regressor.get_models()
90
+ ```
91
+
92
+ ### Example Output
93
+
94
+ ```
95
+ Discovered expression: x_0^2
96
+ R² score: 0.9987
97
+ Number of unique models sampled: 32
98
+ ```
99
+
100
+ ## Advanced Features
101
+
102
+ - Control over operators and expression complexity
103
+ - Multiple model selection strategies
104
+ - Access to the full posterior distribution over expressions
105
+ - Compatible with scikit-learn's API for easy integration into ML pipelines
106
+ - Uncertainty quantification for symbolic regression results
107
+
108
+ ## Citation
109
+
110
+ If you use PySIPS, please cite the following paper:
111
+
112
+ ```bibtex
113
+ @article{bomarito2024bayesian,
114
+ title={Bayesian Symbolic Regression via Posterior Sampling},
115
+ author={Bomarito, Geoffrey F. and Leser, Patrick E.},
116
+ journal={Philosophical Transactions of the Royal Society A},
117
+ year={2025},
118
+ publisher={Royal Society}
119
+ }
120
+ ```
121
+
122
+ ## License
123
+
124
+ Notices:
125
+ Copyright 2025 United States Government as represented by the Administrator of the National Aeronautics and Space Administration. No copyright is claimed in the United States under Title 17, U.S. Code. All Other Rights Reserved.
126
+
127
+ The NASA Software “PySIPS” (LAR-20644-1) calls the following third-party software, which is subject to the terms and conditions of its licensor, as applicable at the time of licensing. The third-party software is not bundled or included with this software but may be available from the licensor. License hyperlinks are provided here for information purposes only.
128
+
129
+ NumPy
130
+ https://numpy.org/devdocs/license.html
131
+ Copyright (c) 2005-2025, NumPy Developers.
132
+ All rights reserved.
133
+
134
+ h5py
135
+ https://github.com/h5py/h5py/blob/master/LICENSE
136
+ Copyright (c) 2008 Andrew Collette and contributors
137
+ All rights reserved.
138
+
139
+ tqdm
140
+ https://github.com/tqdm/tqdm/blob/master/LICENCE
141
+ Copyright (c) 2013 noamraph
142
+
143
+ SciPy
144
+ https://github.com/scipy/scipy/blob/main/LICENSE.txt
145
+ Copyright (c) 2001-2002 Enthought, Inc. 2003, SciPy Developers.
146
+ All rights reserved.
147
+
148
+ Disclaimers
149
+ No Warranty: THE SUBJECT SOFTWARE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY OF ANY KIND, EITHER EXPRESSED, IMPLIED, OR STATUTORY, INCLUDING, BUT NOT LIMITED TO, ANY WARRANTY THAT THE SUBJECT SOFTWARE WILL CONFORM TO SPECIFICATIONS, ANY IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR FREEDOM FROM INFRINGEMENT, ANY WARRANTY THAT THE SUBJECT SOFTWARE WILL BE ERROR FREE, OR ANY WARRANTY THAT DOCUMENTATION, IF PROVIDED, WILL CONFORM TO THE SUBJECT SOFTWARE. THIS AGREEMENT DOES NOT, IN ANY MANNER, CONSTITUTE AN ENDORSEMENT BY GOVERNMENT AGENCY OR ANY PRIOR RECIPIENT OF ANY RESULTS, RESULTING DESIGNS, HARDWARE, SOFTWARE PRODUCTS OR ANY OTHER APPLICATIONS RESULTING FROM USE OF THE SUBJECT SOFTWARE. FURTHER, GOVERNMENT AGENCY DISCLAIMS ALL WARRANTIES AND LIABILITIES REGARDING THIRD-PARTY SOFTWARE, IF PRESENT IN THE ORIGINAL SOFTWARE, AND DISTRIBUTES IT "AS IS."
150
+
151
+ Waiver and Indemnity: RECIPIENT AGREES TO WAIVE ANY AND ALL CLAIMS AGAINST THE UNITED STATES GOVERNMENT, ITS CONTRACTORS AND SUBCONTRACTORS, AS WELL AS ANY PRIOR RECIPIENT. IF RECIPIENT'S USE OF THE SUBJECT SOFTWARE RESULTS IN ANY LIABILITIES, DEMANDS, DAMAGES, EXPENSES OR LOSSES ARISING FROM SUCH USE, INCLUDING ANY DAMAGES FROM PRODUCTS BASED ON, OR RESULTING FROM, RECIPIENT'S USE OF THE SUBJECT SOFTWARE, RECIPIENT SHALL INDEMNIFY AND HOLD HARMLESS THE UNITED STATES GOVERNMENT, ITS CONTRACTORS, AND SUBCONTRACTORS, AS WELL AS ANY PRIOR RECIPIENT, TO THE EXTENT PERMITTED BY LAW. RECIPIENT'S SOLE REMEDY FOR ANY SUCH MATTER SHALL BE THE IMMEDIATE, UNILATERAL TERMINATION OF THIS AGREEMENT.
152
+
153
+
154
+ ## Acknowledgements
155
+
156
+ This work was developed at NASA Langley Research Center.
pysips-0.0.0/README.md ADDED
@@ -0,0 +1,135 @@
1
+ # PySIPS: Python package for Symbolic Inference via Posterior Sampling
2
+
3
+ PySIPS is an open-source implementation of Bayesian symbolic regression via posterior sampling as described in the paper "Bayesian Symbolic Regression via Posterior Sampling" by G. F. Bomarito and P. E. Leser from NASA Langley Research Center.
4
+
5
+ ## Purpose
6
+
7
+ PySIPS provides a robust framework for discovering interpretable symbolic expressions from data, with a particular focus on handling noisy datasets. Unlike traditional symbolic regression approaches, PySIPS uses a Bayesian framework with Sequential Monte Carlo (SMC) sampling to:
8
+
9
+ 1. Enhance robustness to noise
10
+ 2. Provide built-in uncertainty quantification
11
+ 3. Discover parsimonious expressions with improved generalization
12
+ 4. Reduce overfitting in symbolic regression tasks
13
+
14
+ ## Algorithm Overview
15
+
16
+ PySIPS implements a Sequential Monte Carlo (SMC) framework for Bayesian symbolic regression that:
17
+
18
+ - Approximates the posterior distribution over symbolic expressions
19
+ - Uses probabilistic selection and adaptive annealing to explore the search space efficiently
20
+ - Employs normalized marginal likelihood for model evaluation
21
+ - Combines mutation and crossover operations as proposal mechanisms
22
+ - Provides model selection criteria based on maximum normalized marginal likelihood or posterior mode
23
+
24
+ ## Installation
25
+
26
+ (Coming Soon!)
27
+
28
+ ```bash
29
+ pip install pysips
30
+ ```
31
+
32
+ ## Example Usage
33
+
34
+ ```python
35
+ import numpy as np
36
+ from pysips import PysipsRegressor
37
+ from sklearn.model_selection import train_test_split
38
+ from sklearn.metrics import r2_score
39
+
40
+ # Generate synthetic data (y = x^2 + noise)
41
+ np.random.seed(42)
42
+ X = np.linspace(-3, 3, 100).reshape(-1, 1)
43
+ y = X[:, 0]**2 + np.random.normal(0, 0.1, size=X.shape[0])
44
+
45
+ # Split data
46
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
47
+
48
+ # Initialize and fit the regressor
49
+ regressor = PysipsRegressor(
50
+ operators=['+', '-', '*', '^2'],
51
+ max_complexity=12,
52
+ num_particles=100,
53
+ num_mcmc_samples=10,
54
+ random_state=42
55
+ )
56
+
57
+ regressor.fit(X_train, y_train)
58
+
59
+ # Make predictions
60
+ y_pred = regressor.predict(X_test)
61
+
62
+ # Get the discovered expression
63
+ expression = regressor.get_expression()
64
+ print(f"Discovered expression: {expression}")
65
+ print(f"R² score: {r2_score(y_test, y_pred):.4f}")
66
+
67
+ # Get model posterior and their likelihoods
68
+ models, likelihoods = regressor.get_models()
69
+ ```
70
+
71
+ ### Example Output
72
+
73
+ ```
74
+ Discovered expression: x_0^2
75
+ R² score: 0.9987
76
+ Number of unique models sampled: 32
77
+ ```
78
+
79
+ ## Advanced Features
80
+
81
+ - Control over operators and expression complexity
82
+ - Multiple model selection strategies
83
+ - Access to the full posterior distribution over expressions
84
+ - Compatible with scikit-learn's API for easy integration into ML pipelines
85
+ - Uncertainty quantification for symbolic regression results
86
+
87
+ ## Citation
88
+
89
+ If you use PySIPS, please cite the following paper:
90
+
91
+ ```bibtex
92
+ @article{bomarito2024bayesian,
93
+ title={Bayesian Symbolic Regression via Posterior Sampling},
94
+ author={Bomarito, Geoffrey F. and Leser, Patrick E.},
95
+ journal={Philosophical Transactions of the Royal Society A},
96
+ year={2025},
97
+ publisher={Royal Society}
98
+ }
99
+ ```
100
+
101
+ ## License
102
+
103
+ Notices:
104
+ Copyright 2025 United States Government as represented by the Administrator of the National Aeronautics and Space Administration. No copyright is claimed in the United States under Title 17, U.S. Code. All Other Rights Reserved.
105
+
106
+ The NASA Software “PySIPS” (LAR-20644-1) calls the following third-party software, which is subject to the terms and conditions of its licensor, as applicable at the time of licensing. The third-party software is not bundled or included with this software but may be available from the licensor. License hyperlinks are provided here for information purposes only.
107
+
108
+ NumPy
109
+ https://numpy.org/devdocs/license.html
110
+ Copyright (c) 2005-2025, NumPy Developers.
111
+ All rights reserved.
112
+
113
+ h5py
114
+ https://github.com/h5py/h5py/blob/master/LICENSE
115
+ Copyright (c) 2008 Andrew Collette and contributors
116
+ All rights reserved.
117
+
118
+ tqdm
119
+ https://github.com/tqdm/tqdm/blob/master/LICENCE
120
+ Copyright (c) 2013 noamraph
121
+
122
+ SciPy
123
+ https://github.com/scipy/scipy/blob/main/LICENSE.txt
124
+ Copyright (c) 2001-2002 Enthought, Inc. 2003, SciPy Developers.
125
+ All rights reserved.
126
+
127
+ Disclaimers
128
+ No Warranty: THE SUBJECT SOFTWARE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY OF ANY KIND, EITHER EXPRESSED, IMPLIED, OR STATUTORY, INCLUDING, BUT NOT LIMITED TO, ANY WARRANTY THAT THE SUBJECT SOFTWARE WILL CONFORM TO SPECIFICATIONS, ANY IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR FREEDOM FROM INFRINGEMENT, ANY WARRANTY THAT THE SUBJECT SOFTWARE WILL BE ERROR FREE, OR ANY WARRANTY THAT DOCUMENTATION, IF PROVIDED, WILL CONFORM TO THE SUBJECT SOFTWARE. THIS AGREEMENT DOES NOT, IN ANY MANNER, CONSTITUTE AN ENDORSEMENT BY GOVERNMENT AGENCY OR ANY PRIOR RECIPIENT OF ANY RESULTS, RESULTING DESIGNS, HARDWARE, SOFTWARE PRODUCTS OR ANY OTHER APPLICATIONS RESULTING FROM USE OF THE SUBJECT SOFTWARE. FURTHER, GOVERNMENT AGENCY DISCLAIMS ALL WARRANTIES AND LIABILITIES REGARDING THIRD-PARTY SOFTWARE, IF PRESENT IN THE ORIGINAL SOFTWARE, AND DISTRIBUTES IT "AS IS."
129
+
130
+ Waiver and Indemnity: RECIPIENT AGREES TO WAIVE ANY AND ALL CLAIMS AGAINST THE UNITED STATES GOVERNMENT, ITS CONTRACTORS AND SUBCONTRACTORS, AS WELL AS ANY PRIOR RECIPIENT. IF RECIPIENT'S USE OF THE SUBJECT SOFTWARE RESULTS IN ANY LIABILITIES, DEMANDS, DAMAGES, EXPENSES OR LOSSES ARISING FROM SUCH USE, INCLUDING ANY DAMAGES FROM PRODUCTS BASED ON, OR RESULTING FROM, RECIPIENT'S USE OF THE SUBJECT SOFTWARE, RECIPIENT SHALL INDEMNIFY AND HOLD HARMLESS THE UNITED STATES GOVERNMENT, ITS CONTRACTORS, AND SUBCONTRACTORS, AS WELL AS ANY PRIOR RECIPIENT, TO THE EXTENT PERMITTED BY LAW. RECIPIENT'S SOLE REMEDY FOR ANY SUCH MATTER SHALL BE THE IMMEDIATE, UNILATERAL TERMINATION OF THIS AGREEMENT.
131
+
132
+
133
+ ## Acknowledgements
134
+
135
+ This work was developed at NASA Langley Research Center.
@@ -0,0 +1,36 @@
1
+ [project]
2
+ name = "pysips"
3
+ keywords = ["symbolic regression"]
4
+ description = "A python package for symbolic inference via posterior sampling."
5
+ authors = [
6
+ {name = "Geoffrey Bomarito", email = "geoffrey.f.bomarito@nasa.gov" },
7
+ {name = "Patrick Leser", email = "patrick.e.leser@nasa.gov" },
8
+ ]
9
+ requires-python = ">=3.12"
10
+ readme = "README.md"
11
+ license = "NASA-1.3"
12
+ dynamic = ["version"]
13
+ dependencies = [
14
+ "numpy",
15
+ "bingo-nasa",
16
+ "smcpy>=0.1.6",
17
+ "scikit-learn"
18
+ ]
19
+ classifiers = [
20
+ "Development Status :: 3 - Alpha",
21
+ "Programming Language :: Python :: 3.12",
22
+ "Programming Language :: Python :: 3.13",
23
+ ]
24
+
25
+ [project.urls]
26
+ Documentation = "https://nasa.github.io/pysips/"
27
+ Repository = "https://github.com/nasa/pysips"
28
+
29
+ [tool.cibuildwheel]
30
+ skip = "pp*"
31
+
32
+ [tool.setuptools_scm]
33
+ local_scheme = "no-local-version"
34
+
35
+ [tool.setuptools.packages.find]
36
+ exclude = ['demos']
@@ -0,0 +1,53 @@
1
+ """
2
+ Notices:
3
+ Copyright 2025 United States Government as represented by the Administrator of the National
4
+ Aeronautics and Space Administration. No copyright is claimed in the United States under Title 17,
5
+ U.S. Code. All Other Rights Reserved.
6
+
7
+ The NASA Software “PySIPS” (LAR-20644-1) calls the following third-party software, which is
8
+ subject to the terms and conditions of its licensor, as applicable at the time of licensing. The
9
+ third-party software is not bundled or included with this software but may be available from the
10
+ licensor. License hyperlinks are provided here for information purposes only.
11
+
12
+ NumPy
13
+ https://numpy.org/devdocs/license.html
14
+ Copyright (c) 2005-2025, NumPy Developers.
15
+ All rights reserved.
16
+
17
+ h5py
18
+ https://github.com/h5py/h5py/blob/master/LICENSE
19
+ Copyright (c) 2008 Andrew Collette and contributors
20
+ All rights reserved.
21
+
22
+ tqdm
23
+ https://github.com/tqdm/tqdm/blob/master/LICENCE
24
+ Copyright (c) 2013 noamraph
25
+
26
+ SciPy
27
+ https://github.com/scipy/scipy/blob/main/LICENSE.txt
28
+ Copyright (c) 2001-2002 Enthought, Inc. 2003, SciPy Developers.
29
+ All rights reserved.
30
+
31
+ Disclaimers
32
+ No Warranty: THE SUBJECT SOFTWARE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY OF ANY KIND, EITHER
33
+ EXPRESSED, IMPLIED, OR STATUTORY, INCLUDING, BUT NOT LIMITED TO, ANY WARRANTY THAT THE SUBJECT
34
+ SOFTWARE WILL CONFORM TO SPECIFICATIONS, ANY IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
35
+ PARTICULAR PURPOSE, OR FREEDOM FROM INFRINGEMENT, ANY WARRANTY THAT THE SUBJECT SOFTWARE WILL BE
36
+ ERROR FREE, OR ANY WARRANTY THAT DOCUMENTATION, IF PROVIDED, WILL CONFORM TO THE SUBJECT SOFTWARE.
37
+ THIS AGREEMENT DOES NOT, IN ANY MANNER, CONSTITUTE AN ENDORSEMENT BY GOVERNMENT AGENCY OR ANY
38
+ PRIOR RECIPIENT OF ANY RESULTS, RESULTING DESIGNS, HARDWARE, SOFTWARE PRODUCTS OR ANY OTHER
39
+ APPLICATIONS RESULTING FROM USE OF THE SUBJECT SOFTWARE. FURTHER, GOVERNMENT AGENCY DISCLAIMS
40
+ ALL WARRANTIES AND LIABILITIES REGARDING THIRD-PARTY SOFTWARE, IF PRESENT IN THE ORIGINAL
41
+ SOFTWARE, AND DISTRIBUTES IT "AS IS."
42
+
43
+ Waiver and Indemnity: RECIPIENT AGREES TO WAIVE ANY AND ALL CLAIMS AGAINST THE UNITED STATES
44
+ GOVERNMENT, ITS CONTRACTORS AND SUBCONTRACTORS, AS WELL AS ANY PRIOR RECIPIENT. IF RECIPIENT'S
45
+ USE OF THE SUBJECT SOFTWARE RESULTS IN ANY LIABILITIES, DEMANDS, DAMAGES, EXPENSES OR LOSSES
46
+ ARISING FROM SUCH USE, INCLUDING ANY DAMAGES FROM PRODUCTS BASED ON, OR RESULTING FROM,
47
+ RECIPIENT'S USE OF THE SUBJECT SOFTWARE, RECIPIENT SHALL INDEMNIFY AND HOLD HARMLESS THE UNITED
48
+ STATES GOVERNMENT, ITS CONTRACTORS, AND SUBCONTRACTORS, AS WELL AS ANY PRIOR RECIPIENT, TO THE
49
+ EXTENT PERMITTED BY LAW. RECIPIENT'S SOLE REMEDY FOR ANY SUCH MATTER SHALL BE THE IMMEDIATE,
50
+ UNILATERAL TERMINATION OF THIS AGREEMENT.
51
+ """
52
+
53
+ from .regressor import PysipsRegressor
@@ -0,0 +1,138 @@
1
+ """
2
+ Crossover-Based Proposal Generator for Symbolic Regression Models.
3
+
4
+ This module provides a crossover-based proposal mechanism for symbolic regression
5
+ that creates new candidate models by combining genetic material from existing
6
+ models. It implements genetic programming crossover operations using bingo's
7
+ AGraphCrossover functionality within an MCMC or evolutionary algorithm framework.
8
+
9
+ The crossover operation mimics biological reproduction by exchanging subtrees
10
+ between two parent expressions to create offspring that inherit characteristics
11
+ from both parents. This approach can effectively explore the space of symbolic
12
+ expressions by combining successful components from different models.
13
+
14
+ Key Features
15
+ ------------
16
+ - Random partner selection from a configurable gene pool
17
+ - Stochastic child selection (50/50 probability between two crossover offspring)
18
+ - Avoids self-crossover by ensuring different parent selection
19
+ - Updateable gene pool for adaptive sampling strategies
20
+ - Seeded random number generation for reproducible results
21
+
22
+ Crossover Mechanism
23
+ -------------------
24
+ The crossover operation works by:
25
+ 1. Selecting a random crossover point in each parent expression tree
26
+ 2. Swapping the subtrees at those points between the two parents
27
+ 3. Producing two offspring that combine features from both parents
28
+ 4. Randomly selecting one of the two offspring as the proposal
29
+
30
+ This process allows successful expression fragments to be preserved and
31
+ recombined in novel ways, potentially discovering better solutions through
32
+ the exploration of hybrid models.
33
+
34
+ Usage Example
35
+ -------------
36
+ >>> # Assume you have a collection of symbolic models
37
+ >>> gene_pool = [model1, model2, model3, model4] # List of AGraph models
38
+ >>>
39
+ >>> # Create crossover proposal generator
40
+ >>> crossover = CrossoverProposal(gene_pool, seed=42)
41
+ >>>
42
+ >>> # Use in MCMC or evolutionary sampling
43
+ >>> current_model = model1
44
+ >>> new_proposal = crossover(current_model)
45
+ >>>
46
+ >>> # Update gene pool as better models are discovered
47
+ >>> updated_pool = [best_model1, best_model2, new_good_model]
48
+ >>> crossover.update(updated_pool)
49
+
50
+ Integration Notes
51
+ -----------------
52
+ The update() method allows for dynamic gene pool management, enabling
53
+ adaptive strategies where successful models from the sampling process
54
+ can be added to influence future proposals.
55
+ """
56
+
57
+ import numpy as np
58
+ from bingo.symbolic_regression import (
59
+ AGraphCrossover,
60
+ )
61
+
62
+
63
+ class CrossoverProposal:
64
+ """A proposal operator that performs crossover between AGraph models.
65
+
66
+ This class implements a callable object that creates new models by performing
67
+ crossover operations between an input model and randomly selected partners
68
+ from a gene pool. It utilizes bingo's AGraphCrossover mechanism and randomly
69
+ selects one of the two children produced by each crossover operation.
70
+
71
+ Parameters
72
+ ----------
73
+ gene_pool : list of AGraph
74
+ A collection of AGraph models that will be used as potential partners
75
+ during crossover operations
76
+ seed : int, optional
77
+ Random seed for the internal random number generator, used to control
78
+ repeatability of operations
79
+ """
80
+
81
+ def __init__(self, gene_pool, seed=None):
82
+ self._crossover = AGraphCrossover()
83
+ self._gene_pool = gene_pool
84
+ self._rng = np.random.default_rng(seed)
85
+
86
+ def _select_other_parent(self, model):
87
+ ind = self._rng.integers(0, len(self._gene_pool))
88
+ while self._gene_pool[ind] == model:
89
+ ind = self._rng.integers(0, len(self._gene_pool))
90
+ return self._gene_pool[ind]
91
+
92
+ def _do_crossover(self, model, other_parent):
93
+ child_1, child_2 = self._crossover(model, other_parent)
94
+ if self._rng.random() < 0.5:
95
+ return child_1
96
+ return child_2
97
+
98
+ def __call__(self, model):
99
+ """Perform crossover between the input model and a randomly selected one from the gene pool.
100
+
101
+ This method randomly selects a parent from the gene pool, performs crossover between
102
+ the input model and the selected parent, and returns one of the two resulting children
103
+ with equal probability.
104
+
105
+ Parameters
106
+ ----------
107
+ model : AGraph
108
+ The model to be used as the first parent in the crossover operation
109
+
110
+ Returns
111
+ -------
112
+ AGraph
113
+ A new model resulting from crossover between the input model and a
114
+ randomly selected model from the gene pool
115
+ """
116
+ other_parent = self._select_other_parent(model)
117
+ new_model = self._do_crossover(model, other_parent)
118
+ return new_model
119
+
120
+ def update(self, gene_pool, *_, **__):
121
+ """Update the gene pool used for selecting crossover partners.
122
+
123
+ Parameters
124
+ ----------
125
+ gene_pool : iterable of AGraph
126
+ The new collection of AGraph models to use as the gene pool
127
+ *_ : tuple
128
+ Additional positional arguments (ignored)
129
+ **__ : dict
130
+ Additional keyword arguments (ignored)
131
+
132
+ Notes
133
+ -----
134
+ This method allows for updating the gene pool while maintaining the same
135
+ crossover behavior. The additional parameters are included for compatibility
136
+ with other proposal update interfaces but are not used.
137
+ """
138
+ self._gene_pool = list(gene_pool)