jaxsr 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. jaxsr-0.1.0/.claude/skills/jaxsr/SKILL.md +311 -0
  2. jaxsr-0.1.0/.claude/skills/jaxsr/guides/active-learning.md +267 -0
  3. jaxsr-0.1.0/.claude/skills/jaxsr/guides/basis-library.md +289 -0
  4. jaxsr-0.1.0/.claude/skills/jaxsr/guides/cli.md +305 -0
  5. jaxsr-0.1.0/.claude/skills/jaxsr/guides/constraints.md +206 -0
  6. jaxsr-0.1.0/.claude/skills/jaxsr/guides/doe-workflow.md +329 -0
  7. jaxsr-0.1.0/.claude/skills/jaxsr/guides/known-model-fitting.md +417 -0
  8. jaxsr-0.1.0/.claude/skills/jaxsr/guides/model-fitting.md +268 -0
  9. jaxsr-0.1.0/.claude/skills/jaxsr/guides/rsm.md +226 -0
  10. jaxsr-0.1.0/.claude/skills/jaxsr/guides/uncertainty.md +257 -0
  11. jaxsr-0.1.0/.claude/skills/jaxsr/templates/active-learning-loop.py +112 -0
  12. jaxsr-0.1.0/.claude/skills/jaxsr/templates/basic-regression.py +92 -0
  13. jaxsr-0.1.0/.claude/skills/jaxsr/templates/constrained-model.py +94 -0
  14. jaxsr-0.1.0/.claude/skills/jaxsr/templates/doe-study.py +102 -0
  15. jaxsr-0.1.0/.claude/skills/jaxsr/templates/langmuir-isotherm.py +158 -0
  16. jaxsr-0.1.0/.claude/skills/jaxsr/templates/notebook-starter.py +184 -0
  17. jaxsr-0.1.0/.claude/skills/jaxsr/templates/uncertainty-analysis.py +158 -0
  18. jaxsr-0.1.0/.github/workflows/docs.yml +51 -0
  19. jaxsr-0.1.0/.github/workflows/tests.yml +61 -0
  20. jaxsr-0.1.0/.gitignore +16 -0
  21. jaxsr-0.1.0/CLAUDE.md +297 -0
  22. jaxsr-0.1.0/LICENSE +21 -0
  23. jaxsr-0.1.0/Makefile +15 -0
  24. jaxsr-0.1.0/PKG-INFO +424 -0
  25. jaxsr-0.1.0/README.md +358 -0
  26. jaxsr-0.1.0/docs/_config.yml +21 -0
  27. jaxsr-0.1.0/docs/_toc.yml +35 -0
  28. jaxsr-0.1.0/docs/api.md +282 -0
  29. jaxsr-0.1.0/docs/background/comparison-alamo-sisso-jaxsr.md +103 -0
  30. jaxsr-0.1.0/docs/background/literature_review.md +282 -0
  31. jaxsr-0.1.0/docs/examples/active_learning.ipynb +774 -0
  32. jaxsr-0.1.0/docs/examples/basic_usage.ipynb +1047 -0
  33. jaxsr-0.1.0/docs/examples/categorical_variables.ipynb +520 -0
  34. jaxsr-0.1.0/docs/examples/chemical_kinetics.ipynb +963 -0
  35. jaxsr-0.1.0/docs/examples/comprehensive_tutorial.ipynb +2555 -0
  36. jaxsr-0.1.0/docs/examples/doe_study_workflow.ipynb +574 -0
  37. jaxsr-0.1.0/docs/examples/heat_transfer.ipynb +947 -0
  38. jaxsr-0.1.0/docs/examples/ode_discovery.ipynb +829 -0
  39. jaxsr-0.1.0/docs/examples/symbolic_classification.ipynb +1397 -0
  40. jaxsr-0.1.0/docs/examples/uncertainty_quantification.ipynb +873 -0
  41. jaxsr-0.1.0/docs/examples/uq_bma_weights.png +0 -0
  42. jaxsr-0.1.0/docs/examples/uq_coefficient_intervals.png +0 -0
  43. jaxsr-0.1.0/docs/examples/uq_prediction_intervals.png +0 -0
  44. jaxsr-0.1.0/docs/guides/acquisition.md +338 -0
  45. jaxsr-0.1.0/docs/guides/claude_code_skills.md +154 -0
  46. jaxsr-0.1.0/docs/guides/cli_guide.md +598 -0
  47. jaxsr-0.1.0/docs/guides/doe_guide.md +447 -0
  48. jaxsr-0.1.0/docs/intro.md +104 -0
  49. jaxsr-0.1.0/docs/jaxsr.png +0 -0
  50. jaxsr-0.1.0/docs/quickstart.md +241 -0
  51. jaxsr-0.1.0/examples/active_learning.py +520 -0
  52. jaxsr-0.1.0/examples/basic_usage.py +376 -0
  53. jaxsr-0.1.0/examples/categorical_variables.py +186 -0
  54. jaxsr-0.1.0/examples/chemical_kinetics.py +348 -0
  55. jaxsr-0.1.0/examples/cli_workflow.md +330 -0
  56. jaxsr-0.1.0/examples/comprehensive_tutorial.ipynb +2555 -0
  57. jaxsr-0.1.0/examples/doe_study_workflow.py +180 -0
  58. jaxsr-0.1.0/examples/heat_transfer.py +331 -0
  59. jaxsr-0.1.0/examples/langmuir_doe_active_learning.ipynb +1345 -0
  60. jaxsr-0.1.0/examples/model_comparison_isotherms.ipynb +984 -0
  61. jaxsr-0.1.0/examples/rsm_formulation.ipynb +1093 -0
  62. jaxsr-0.1.0/examples/serialization_and_sharing.ipynb +1035 -0
  63. jaxsr-0.1.0/examples/uncertainty_quantification.py +464 -0
  64. jaxsr-0.1.0/jaxsr-image.png +0 -0
  65. jaxsr-0.1.0/pyproject.toml +119 -0
  66. jaxsr-0.1.0/src/jaxsr/__init__.py +260 -0
  67. jaxsr-0.1.0/src/jaxsr/acquisition.py +1257 -0
  68. jaxsr-0.1.0/src/jaxsr/app/__init__.py +57 -0
  69. jaxsr-0.1.0/src/jaxsr/app/components.py +166 -0
  70. jaxsr-0.1.0/src/jaxsr/app/main.py +64 -0
  71. jaxsr-0.1.0/src/jaxsr/app/pages/1_Setup.py +123 -0
  72. jaxsr-0.1.0/src/jaxsr/app/pages/2_Design.py +131 -0
  73. jaxsr-0.1.0/src/jaxsr/app/pages/3_Data.py +154 -0
  74. jaxsr-0.1.0/src/jaxsr/app/pages/4_Fit.py +188 -0
  75. jaxsr-0.1.0/src/jaxsr/app/pages/5_Diagnostics.py +150 -0
  76. jaxsr-0.1.0/src/jaxsr/app/pages/6_Surface.py +194 -0
  77. jaxsr-0.1.0/src/jaxsr/app/pages/7_Optimize.py +143 -0
  78. jaxsr-0.1.0/src/jaxsr/app/pages/8_Export.py +182 -0
  79. jaxsr-0.1.0/src/jaxsr/app/state.py +121 -0
  80. jaxsr-0.1.0/src/jaxsr/basis.py +1596 -0
  81. jaxsr-0.1.0/src/jaxsr/classifier.py +905 -0
  82. jaxsr-0.1.0/src/jaxsr/cli.py +563 -0
  83. jaxsr-0.1.0/src/jaxsr/constraints.py +1442 -0
  84. jaxsr-0.1.0/src/jaxsr/dynamics.py +290 -0
  85. jaxsr-0.1.0/src/jaxsr/excel.py +780 -0
  86. jaxsr-0.1.0/src/jaxsr/metrics.py +1256 -0
  87. jaxsr-0.1.0/src/jaxsr/plotting.py +919 -0
  88. jaxsr-0.1.0/src/jaxsr/regressor.py +1220 -0
  89. jaxsr-0.1.0/src/jaxsr/reporting.py +341 -0
  90. jaxsr-0.1.0/src/jaxsr/rsm.py +955 -0
  91. jaxsr-0.1.0/src/jaxsr/sampling.py +610 -0
  92. jaxsr-0.1.0/src/jaxsr/selection.py +2251 -0
  93. jaxsr-0.1.0/src/jaxsr/simplify.py +584 -0
  94. jaxsr-0.1.0/src/jaxsr/skill/SKILL.md +311 -0
  95. jaxsr-0.1.0/src/jaxsr/skill/guides/active-learning.md +267 -0
  96. jaxsr-0.1.0/src/jaxsr/skill/guides/basis-library.md +289 -0
  97. jaxsr-0.1.0/src/jaxsr/skill/guides/cli.md +305 -0
  98. jaxsr-0.1.0/src/jaxsr/skill/guides/constraints.md +206 -0
  99. jaxsr-0.1.0/src/jaxsr/skill/guides/doe-workflow.md +329 -0
  100. jaxsr-0.1.0/src/jaxsr/skill/guides/known-model-fitting.md +417 -0
  101. jaxsr-0.1.0/src/jaxsr/skill/guides/model-fitting.md +268 -0
  102. jaxsr-0.1.0/src/jaxsr/skill/guides/rsm.md +226 -0
  103. jaxsr-0.1.0/src/jaxsr/skill/guides/uncertainty.md +257 -0
  104. jaxsr-0.1.0/src/jaxsr/skill/templates/active-learning-loop.py +112 -0
  105. jaxsr-0.1.0/src/jaxsr/skill/templates/basic-regression.py +92 -0
  106. jaxsr-0.1.0/src/jaxsr/skill/templates/constrained-model.py +94 -0
  107. jaxsr-0.1.0/src/jaxsr/skill/templates/doe-study.py +102 -0
  108. jaxsr-0.1.0/src/jaxsr/skill/templates/langmuir-isotherm.py +158 -0
  109. jaxsr-0.1.0/src/jaxsr/skill/templates/notebook-starter.py +184 -0
  110. jaxsr-0.1.0/src/jaxsr/skill/templates/uncertainty-analysis.py +158 -0
  111. jaxsr-0.1.0/src/jaxsr/study.py +934 -0
  112. jaxsr-0.1.0/src/jaxsr/uncertainty.py +1405 -0
  113. jaxsr-0.1.0/src/jaxsr/utils.py +569 -0
  114. jaxsr-0.1.0/tasks.org +2 -0
  115. jaxsr-0.1.0/tests/__init__.py +1 -0
  116. jaxsr-0.1.0/tests/test_acquisition.py +799 -0
  117. jaxsr-0.1.0/tests/test_basis.py +250 -0
  118. jaxsr-0.1.0/tests/test_categorical.py +366 -0
  119. jaxsr-0.1.0/tests/test_classifier.py +855 -0
  120. jaxsr-0.1.0/tests/test_cli.py +333 -0
  121. jaxsr-0.1.0/tests/test_constraints.py +484 -0
  122. jaxsr-0.1.0/tests/test_dynamics.py +240 -0
  123. jaxsr-0.1.0/tests/test_excel.py +419 -0
  124. jaxsr-0.1.0/tests/test_integration.py +325 -0
  125. jaxsr-0.1.0/tests/test_parametric.py +236 -0
  126. jaxsr-0.1.0/tests/test_regressor.py +417 -0
  127. jaxsr-0.1.0/tests/test_reporting.py +158 -0
  128. jaxsr-0.1.0/tests/test_rsm.py +438 -0
  129. jaxsr-0.1.0/tests/test_selection.py +336 -0
  130. jaxsr-0.1.0/tests/test_study.py +508 -0
  131. jaxsr-0.1.0/tests/test_uncertainty.py +672 -0
@@ -0,0 +1,311 @@
1
+ # JAXSR Skill — Symbolic Regression Assistant
2
+
3
+ JAXSR is a JAX-based symbolic regression library that discovers interpretable algebraic
4
+ expressions from data using sparse optimization. It follows ALAMO-style methodology:
5
+ build a rich candidate basis, then select the simplest model that explains the data.
6
+
7
+ ## Skill Activation
8
+
9
+ Activate this skill when the user wants to:
10
+ - Discover algebraic expressions or equations from data
11
+ - Set up a Design of Experiments (DOE) study
12
+ - Fit, interpret, or export symbolic regression models
13
+ - Choose between basis functions, strategies, UQ methods, or design methods
14
+ - Generate reports from experimental data
15
+ - Use the `jaxsr` CLI tool
16
+ - Build notebooks or scripts for symbolic regression workflows
17
+
18
+ ## Assistant Mode
19
+
20
+ When the user asks for help deciding how to set up, analyze, or report on their problem,
21
+ enter **assistant mode**. In this mode, ask diagnostic questions to guide them to the right
22
+ configuration. Do not dump all options at once — walk through decisions sequentially.
23
+
24
+ ### Step 1: Characterize the Problem
25
+
26
+ Ask the user:
27
+ 1. **What are you modeling?** (physical system, chemical process, ML feature engineering, etc.)
28
+ 2. **How many input features?** (1-3 is small, 4-8 is medium, 9+ is large)
29
+ 3. **How many data points?** (< 20 is very small, 20-100 is typical, 100+ is large)
30
+ 4. **Do you have domain knowledge?** (known physics, monotonicity, bounds, symmetry)
31
+ 5. **What is the goal?** (interpretable equation, prediction, optimization, screening)
32
+
33
+ ### Step 2: Recommend Basis Library
34
+
35
+ Based on the answers, recommend a basis library configuration:
36
+
37
+ | Scenario | Recommended Basis |
38
+ |----------|-------------------|
39
+ | Unknown relationship, few features | `add_constant + add_linear + add_polynomials(3) + add_interactions(2) + add_transcendental()` |
40
+ | Known polynomial behavior | `add_constant + add_linear + add_polynomials(max_degree)` |
41
+ | Engineering correlation (Nusselt, friction) | `add_constant + add_linear + add_polynomials(2) + add_transcendental(funcs=["log","exp","sqrt","inv"])` |
42
+ | Chemical kinetics (rate laws) | `add_constant + add_linear + add_transcendental(funcs=["exp","inv","log"]) + add_ratios() + add_parametric(Arrhenius)` |
43
+ | Large feature space (screening) | `add_constant + add_linear + add_interactions(2)` then use `lasso_path` strategy |
44
+ | Response surface (DOE) | `add_constant + add_linear + add_polynomials(2) + add_interactions(2)` — or use `ResponseSurface` directly |
45
+ | Categorical factors present | Add `add_categorical_indicators() + add_categorical_interactions()` to any of the above |
46
+
47
+ **Key guidance:**
48
+ - Start simple. You can always add complexity.
49
+ - `add_transcendental(safe=True)` guards against log(0), 1/0, sqrt(<0). Always use `safe=True`.
50
+ - `add_ratios(safe=True)` adds x_i/x_j terms. Doubles the library size — only use when ratios are physically meaningful.
51
+ - `add_parametric()` enables nonlinear parameters (e.g., `exp(-a*x)`). Powerful but slower to fit.
52
+ - If n_features > 5, avoid `add_polynomials(degree>2)` — the library becomes enormous.
53
+
54
+ ### Step 3: Recommend Selection Strategy
55
+
56
+ | Data Size | Library Size | Recommended Strategy |
57
+ |-----------|-------------|---------------------|
58
+ | Any | < 20 basis functions | `exhaustive` (exact optimal) |
59
+ | Any | 20-200 basis functions | `greedy_forward` (default, fast, reliable) |
60
+ | Small n | Large library | `lasso_path` (regularized screening) |
61
+ | Many terms expected | Any | `greedy_backward` (start full, prune) |
62
+
63
+ **When to change from defaults:**
64
+ - `greedy_forward` is the right choice 80% of the time. It's the default.
65
+ - Use `exhaustive` only when the basis library is small enough (< 20 terms). It guarantees the global optimum but scales as O(2^n).
66
+ - Use `lasso_path` when you have a very large library and want fast screening. It may miss interaction effects.
67
+ - Use `greedy_backward` when you suspect many terms matter and want to start from the full model.
68
+
69
+ ### Step 4: Recommend Information Criterion
70
+
71
+ | Scenario | Recommended Criterion |
72
+ |----------|----------------------|
73
+ | Small sample (n < 40) | `aicc` (corrected AIC, penalizes overfitting more) |
74
+ | Medium sample (40 < n < 200) | `bic` (stronger complexity penalty, sparser models) |
75
+ | Large sample (n > 200) | `aic` or `bic` (both work well) |
76
+ | Want simplest model | `bic` (always penalizes complexity more) |
77
+ | Want best prediction | `aicc` (balances fit and complexity) |
78
+
79
+ **Default recommendation:** Use `"bic"` for interpretable models, `"aicc"` for predictive models.
80
+ Only `"aic"`, `"aicc"`, and `"bic"` are supported — not `"cv"`.
81
+
82
+ ### Step 5: Recommend Constraints (if applicable)
83
+
84
+ Ask: "Do you have any physical knowledge about the system?"
85
+
86
+ | Physical Knowledge | Constraint to Add |
87
+ |-------------------|-------------------|
88
+ | Output must be positive | `.add_bounds("y", lower=0)` |
89
+ | Output in known range | `.add_bounds("y", lower=lo, upper=hi)` |
90
+ | Increasing in temperature | `.add_monotonic("T", direction="increasing")` |
91
+ | Diminishing returns | `.add_concave(feature)` |
92
+ | Accelerating growth | `.add_convex(feature)` |
93
+ | Coefficient must be positive | `.add_sign_constraint(basis_name, sign="positive")` |
94
+ | Known intercept or slope | `.add_known_coefficient(name, value)` |
95
+
96
+ Use `hard=True` for strict enforcement; `hard=False` (default) for soft penalty.
97
+
98
+ ### Step 6: Recommend Uncertainty Quantification
99
+
100
+ | Need | Method | When to Use |
101
+ |------|--------|-------------|
102
+ | Quick confidence intervals | `model.predict_interval()` | Default. OLS-based. Assumes normality. |
103
+ | Coefficient significance | `model.coefficient_intervals()` | Check which terms are statistically significant |
104
+ | Robust to model uncertainty | `model.predict_bma()` | Averages over Pareto-front models weighted by criterion |
105
+ | No distributional assumptions | `model.predict_conformal()` | Distribution-free. Needs enough data (n > 30). |
106
+ | Assess model stability | `bootstrap_predict()` | Resamples data. Shows sensitivity to individual points. |
107
+ | Compare model structures | `model.predict_ensemble()` | Returns predictions from all Pareto-front models |
108
+ | Variable importance | `anova()` | Decomposes variance by term. Shows which factors matter. |
109
+
110
+ **Default recommendation:** Start with `predict_interval()` (built-in, fast). Add `predict_bma()` if you have multiple competing models. Use `predict_conformal()` for publication-quality intervals.
111
+
112
+ ### Step 7: Recommend Reporting Format
113
+
114
+ | Goal | Action |
115
+ |------|--------|
116
+ | Quick look at results | `model.summary()` or `jaxsr status study.jaxsr` |
117
+ | Share with collaborators | `jaxsr report study.jaxsr -o report.xlsx` (Excel) |
118
+ | Formal report | `jaxsr report study.jaxsr -o report.docx` (Word) |
119
+ | Paper/presentation | `model.to_latex()` for equation, `plot_pareto_front()` for figures |
120
+ | Deploy model | `model.to_callable()` (pure NumPy, no JAX dependency) |
121
+ | Archive/reproduce | `model.save("model.json")` and `study.save("study.jaxsr")` |
122
+
123
+ ---
124
+
125
+ ## Quick Reference: Installation
126
+
127
+ ```bash
128
+ # Core library
129
+ pip install jaxsr
130
+
131
+ # With CLI support
132
+ pip install "jaxsr[cli]"
133
+
134
+ # With Excel reporting
135
+ pip install "jaxsr[excel]"
136
+
137
+ # With Word reports
138
+ pip install "jaxsr[reports]"
139
+
140
+ # Everything for development
141
+ pip install -e ".[dev,cli,excel,reports]"
142
+ ```
143
+
144
+ ## Quick Reference: Python API
145
+
146
+ ### Minimal Example (5 lines)
147
+
148
+ ```python
149
+ from jaxsr import fit_symbolic
150
+ import numpy as np
151
+
152
+ X = np.column_stack([x1, x2]) # shape (n_samples, n_features)
153
+ model = fit_symbolic(X, y, feature_names=["x1", "x2"], max_terms=5)
154
+ print(model.expression_)
155
+ ```
156
+
157
+ ### Full Control Example
158
+
159
+ ```python
160
+ from jaxsr import BasisLibrary, SymbolicRegressor, Constraints
161
+
162
+ # 1. Build basis library
163
+ library = (BasisLibrary(n_features=2, feature_names=["T", "P"])
164
+ .add_constant()
165
+ .add_linear()
166
+ .add_polynomials(max_degree=3)
167
+ .add_interactions(max_order=2)
168
+ .add_transcendental(funcs=["log", "exp", "sqrt"])
169
+ )
170
+
171
+ # 2. Define constraints (optional)
172
+ constraints = (Constraints()
173
+ .add_monotonic("T", direction="increasing")
174
+ .add_bounds("y", lower=0)
175
+ )
176
+
177
+ # 3. Fit model
178
+ model = SymbolicRegressor(
179
+ basis_library=library,
180
+ max_terms=5,
181
+ strategy="greedy_forward",
182
+ information_criterion="bic",
183
+ constraints=constraints,
184
+ )
185
+ model.fit(X_train, y_train)
186
+
187
+ # 4. Inspect results
188
+ print(model.expression_)
189
+ print(model.metrics_)
190
+ print(model.summary())
191
+
192
+ # 5. Predict with uncertainty
193
+ y_pred, lower, upper = model.predict_interval(X_test, alpha=0.05)
194
+
195
+ # 6. Export
196
+ model.save("model.json")
197
+ latex_eq = model.to_latex()
198
+ predict_fn = model.to_callable() # pure NumPy function
199
+ ```
200
+
201
+ ### DOE Workflow
202
+
203
+ ```python
204
+ from jaxsr import DOEStudy
205
+
206
+ # Create study
207
+ study = DOEStudy("catalyst", ["T", "P", "flow"],
208
+ bounds=[(300, 500), (1, 10), (0.1, 2.0)])
209
+ X_design = study.create_design(method="latin_hypercube", n_points=20)
210
+ study.save("catalyst.jaxsr")
211
+
212
+ # After collecting data
213
+ study = DOEStudy.load("catalyst.jaxsr")
214
+ study.add_observations(X_measured, y_measured)
215
+ model = study.fit(max_terms=5)
216
+
217
+ # Get next experiments
218
+ next_pts = study.suggest_next(n_points=5, strategy="uncertainty")
219
+ study.save("catalyst.jaxsr")
220
+ ```
221
+
222
+ ## Quick Reference: CLI
223
+
224
+ ```bash
225
+ # Create study with factors
226
+ jaxsr init my_study -f "temp:300:500" -f "pressure:1:10" -f "catalyst:A,B,C"
227
+
228
+ # Generate experimental design → Excel template
229
+ jaxsr design my_study.jaxsr -m latin_hypercube -n 20 --format xlsx -o template.xlsx
230
+
231
+ # Import completed experiments
232
+ jaxsr add my_study.jaxsr completed.xlsx
233
+
234
+ # Fit model
235
+ jaxsr fit my_study.jaxsr --max-terms 5 --strategy greedy_forward --criterion bic
236
+
237
+ # Suggest next experiments
238
+ jaxsr suggest my_study.jaxsr -n 5 --strategy uncertainty
239
+
240
+ # Generate reports
241
+ jaxsr report my_study.jaxsr -o report.xlsx
242
+ jaxsr report my_study.jaxsr -o report.docx
243
+
244
+ # Check study status
245
+ jaxsr status my_study.jaxsr
246
+ ```
247
+
248
+ ## Decision Trees
249
+
250
+ ### "Which basis functions should I use?"
251
+
252
+ See `guides/basis-library.md` for the complete decision guide.
253
+
254
+ ### "Which selection strategy should I use?"
255
+
256
+ See `guides/model-fitting.md` for strategy comparison and benchmarks.
257
+
258
+ ### "Which UQ method should I use?"
259
+
260
+ See `guides/uncertainty.md` for method comparison and selection flowchart.
261
+
262
+ ### "How do I set up a DOE study?"
263
+
264
+ See `guides/doe-workflow.md` for the complete lifecycle guide.
265
+
266
+ ### "How do I add physical constraints?"
267
+
268
+ See `guides/constraints.md` for constraint types and examples.
269
+
270
+ ### "How do I use the CLI?"
271
+
272
+ See `guides/cli.md` for full CLI reference with examples.
273
+
274
+ ### "I already know the model form. How do I estimate parameters?"
275
+
276
+ See `guides/known-model-fitting.md` for a worked example using the Langmuir isotherm,
277
+ including parametric basis functions, experiment design, ANOVA, and uncertainty analysis.
278
+ Generalizes to Arrhenius, Michaelis-Menten, power laws, and other known models.
279
+
280
+ ### "How do I use Response Surface Methodology?"
281
+
282
+ See `guides/rsm.md` for RSM designs, canonical analysis, and optimization.
283
+
284
+ ### "How do I set up active learning?"
285
+
286
+ See `guides/active-learning.md` for acquisition functions and adaptive sampling.
287
+
288
+ ## Templates
289
+
290
+ Ready-to-use scripts and notebook starters are in `templates/`:
291
+
292
+ | Template | Use Case |
293
+ |----------|----------|
294
+ | `basic-regression.py` | Discover an equation from X, y data |
295
+ | `constrained-model.py` | Add physical constraints to model |
296
+ | `doe-study.py` | Full DOE workflow from design to report |
297
+ | `uncertainty-analysis.py` | Compare all UQ methods |
298
+ | `active-learning-loop.py` | Iterative experiment-model loop |
299
+ | `langmuir-isotherm.py` | Known-model parameter estimation (Langmuir) |
300
+ | `notebook-starter.py` | Jupyter notebook cell structure |
301
+
302
+ ## Common Pitfalls
303
+
304
+ 1. **Library too large for exhaustive search.** If you have > 20 basis functions, use `greedy_forward` instead of `exhaustive`.
305
+ 2. **Using `information_criterion="cv"`.** Only `"aic"`, `"aicc"`, `"bic"` are supported.
306
+ 3. **Forgetting `safe=True` for transcendental.** Without it, `log(0)` and `1/0` produce NaN.
307
+ 4. **Over-specifying the basis.** A library with 500+ terms is slow and prone to overfitting. Start simple.
308
+ 5. **Not checking collinearity.** Use `from jaxsr.utils import check_collinearity` before fitting if terms are nearly redundant.
309
+ 6. **Stale metrics after refit.** After applying constraints, metrics are automatically recalculated. Do not copy metrics from a previous result.
310
+ 7. **Python control flow in JIT.** If writing custom basis functions with `@jit`, use `jnp.where` instead of `if/else`.
311
+ 8. **Calling `float()` on JAX arrays inside JIT.** Use `.item()` outside JIT or keep values as JAX arrays.
@@ -0,0 +1,267 @@
1
+ # Active Learning & Adaptive Sampling Guide
2
+
3
+ ## What is Active Learning?
4
+
5
+ Instead of running all experiments upfront, active learning iteratively:
6
+ 1. Fit a model to current data
7
+ 2. Use the model to suggest the most informative next experiments
8
+ 3. Run those experiments
9
+ 4. Repeat until the model is good enough
10
+
11
+ This can dramatically reduce the number of experiments needed.
12
+
13
+ ## AdaptiveSampler: Simple Interface
14
+
15
+ ```python
16
+ from jaxsr import AdaptiveSampler
17
+
18
+ sampler = AdaptiveSampler(
19
+ model=model, # a fitted SymbolicRegressor
20
+ bounds=[(300, 500), (1, 10)], # Feature bounds
21
+ strategy="uncertainty",
22
+ batch_size=5,
23
+ n_candidates=1000,
24
+ random_state=42
25
+ )
26
+
27
+ # Get suggestions
28
+ result = sampler.suggest(n_points=5)
29
+ X_next = result.points # shape (5, n_features)
30
+ scores = result.scores # acquisition function values
31
+ ```
32
+
33
+ ## Suggestion Strategies
34
+
35
+ ### `uncertainty`
36
+
37
+ Suggests points where the model is most uncertain (largest prediction intervals).
38
+
39
+ **Best for:** General exploration, reducing model error everywhere.
40
+
41
+ ### `error`
42
+
43
+ Suggests points where the model prediction error is expected to be highest.
44
+ Uses ensemble disagreement or residual extrapolation.
45
+
46
+ **Best for:** Improving accuracy in poorly-modeled regions.
47
+
48
+ ### `leverage`
49
+
50
+ Suggests points with high statistical leverage — positions that would have the
51
+ most influence on the fitted coefficients.
52
+
53
+ **Best for:** Stabilizing coefficient estimates, optimal experimental design.
54
+
55
+ ### `gradient`
56
+
57
+ Suggests points where the model gradient is steepest — regions of rapid change.
58
+
59
+ **Best for:** Resolving sharp transitions, nonlinear behavior.
60
+
61
+ ### `space_filling`
62
+
63
+ Suggests points that fill gaps in the existing design.
64
+ Uses maximin distance criterion.
65
+
66
+ **Best for:** Initial exploration, uniform coverage, no model required.
67
+
68
+ ### `random`
69
+
70
+ Uniform random sampling within bounds.
71
+
72
+ **Best for:** Baseline comparison, sanity check.
73
+
74
+ ## Strategy Selection Guide
75
+
76
+ ```
77
+ Do you have a fitted model?
78
+ ├── NO → Use "space_filling" or "random"
79
+ └── YES
80
+ ├── Want to reduce overall uncertainty?
81
+ │ └── Use "uncertainty"
82
+ ├── Want to improve accuracy in worst regions?
83
+ │ └── Use "error"
84
+ ├── Want to stabilize coefficients?
85
+ │ └── Use "leverage"
86
+ └── Want to resolve sharp features?
87
+ └── Use "gradient"
88
+ ```
89
+
90
+ ## Acquisition Functions: Advanced Interface
91
+
92
+ An **acquisition function** scores candidate points by how useful they would be as
93
+ the next experiment. Higher scores mean more informative. For fine-grained control,
94
+ use acquisition functions directly:
95
+
96
+ ```python
97
+ from jaxsr.acquisition import (
98
+ ActiveLearner,
99
+ PredictionVariance,
100
+ UCB, LCB,
101
+ ExpectedImprovement,
102
+ EnsembleDisagreement,
103
+ BMAUncertainty,
104
+ AOptimal, DOptimal,
105
+ Composite,
106
+ suggest_points
107
+ )
108
+ ```
109
+
110
+ ### Prediction Variance
111
+
112
+ ```python
113
+ acq = PredictionVariance()
114
+ learner = ActiveLearner(model, bounds=[(0, 1), (0, 1)], acquisition=acq)
115
+ result = learner.suggest(n_points=5)
116
+ ```
117
+
118
+ ### Upper/Lower Confidence Bound
119
+
120
+ ```python
121
+ # Explore high-response regions (maximize)
122
+ acq = UCB(kappa=2.0) # mean + kappa * std
123
+
124
+ # Explore low-response regions (minimize)
125
+ acq = LCB(kappa=2.0) # mean - kappa * std
126
+ ```
127
+
128
+ `kappa` controls exploration vs. exploitation:
129
+ - High kappa (> 2) → more exploration (wider search)
130
+ - Low kappa (< 1) → more exploitation (stay near known good areas)
131
+
132
+ ### Expected Improvement
133
+
134
+ ```python
135
+ acq = ExpectedImprovement() # For finding the maximum of the response
136
+ ```
137
+
138
+ Classic Bayesian optimization acquisition function. Balances exploitation
139
+ (predicted value) and exploration (uncertainty).
140
+
141
+ ### Ensemble Disagreement
142
+
143
+ ```python
144
+ acq = EnsembleDisagreement() # Where Pareto-front models disagree most
145
+ ```
146
+
147
+ ### Optimal Design Criteria
148
+
149
+ ```python
150
+ acq = AOptimal() # Minimize average variance of coefficients
151
+ acq = DOptimal() # Minimize volume of coefficient confidence ellipsoid
152
+ ```
153
+
154
+ ### Composite Acquisition Functions
155
+
156
+ Combine multiple acquisition functions with weights:
157
+
158
+ ```python
159
+ # 70% uncertainty + 30% space-filling
160
+ combined = 0.7 * PredictionVariance() + 0.3 * AOptimal()
161
+
162
+ learner = ActiveLearner(model, bounds=bounds, acquisition=combined)
163
+ result = learner.suggest(n_points=5)
164
+ ```
165
+
166
+ You can also use the `Composite` class:
167
+ ```python
168
+ combined = Composite(functions=[(0.7, PredictionVariance()), (0.3, AOptimal())])
169
+ ```
170
+
171
+ ## Complete Active Learning Loop
172
+
173
+ ```python
174
+ import numpy as np
175
+ from jaxsr import BasisLibrary, SymbolicRegressor, AdaptiveSampler
176
+
177
+ # Define your experiment (replace with real measurements)
178
+ def true_function(X):
179
+ """Placeholder — replace with your actual experiment or simulation."""
180
+ return 3.0 * X[:, 0] + 0.5 * X[:, 1] - 0.01 * X[:, 0]**2
181
+
182
+ # Initial data (small)
183
+ X_init = np.random.uniform([300, 1], [500, 10], size=(10, 2))
184
+ y_init = true_function(X_init)
185
+
186
+ # Build library
187
+ library = (BasisLibrary(n_features=2, feature_names=["T", "P"])
188
+ .add_constant().add_linear()
189
+ .add_polynomials(max_degree=2)
190
+ .add_interactions(max_order=2)
191
+ )
192
+
193
+ # Fit initial model
194
+ model = SymbolicRegressor(basis_library=library, max_terms=5)
195
+ model.fit(X_init, y_init)
196
+
197
+ X_all, y_all = X_init.copy(), y_init.copy()
198
+ bounds = [(300, 500), (1, 10)]
199
+
200
+ for iteration in range(5):
201
+ print(f"\n--- Iteration {iteration + 1} ---")
202
+ print(f"Model: {model.expression_}")
203
+ print(f"R²: {model.metrics_['r2']:.4f}")
204
+
205
+ # Suggest next experiments
206
+ sampler = AdaptiveSampler(model, bounds, strategy="uncertainty", batch_size=5)
207
+ result = sampler.suggest(n_points=5)
208
+ X_next = result.points
209
+
210
+ # Run experiments (replace with actual experiments)
211
+ y_next = true_function(X_next)
212
+
213
+ # Update model
214
+ X_all = np.vstack([X_all, X_next])
215
+ y_all = np.concatenate([y_all, y_next])
216
+ model.fit(X_all, y_all)
217
+
218
+ print(f"\nFinal model ({len(y_all)} points): {model.expression_}")
219
+ ```
220
+
221
+ ## DOE Integration
222
+
223
+ The `DOEStudy` class wraps active learning into the study workflow:
224
+
225
+ ```python
226
+ from jaxsr import DOEStudy
227
+
228
+ study = DOEStudy.load("my_study.jaxsr")
229
+
230
+ # Suggest next experiments using the fitted model
231
+ next_pts = study.suggest_next(n_points=5, strategy="uncertainty")
232
+
233
+ # Or via CLI
234
+ # jaxsr suggest my_study.jaxsr -n 5 --strategy uncertainty
235
+ ```
236
+
237
+ ## Discrete/Integer Variables
238
+
239
+ If some features take only discrete values:
240
+
241
+ ```python
242
+ sampler = AdaptiveSampler(
243
+ model, bounds,
244
+ strategy="uncertainty",
245
+ discrete_dims={1: [1, 2, 3, 4, 5]} # Feature index 1: valid discrete values
246
+ )
247
+ ```
248
+
249
+ ## Excluding Regions
250
+
251
+ Avoid suggesting points too close to existing data:
252
+
253
+ ```python
254
+ result = sampler.suggest(
255
+ n_points=5,
256
+ exclude_points=X_existing, # Don't suggest near these
257
+ min_distance=0.01 # Minimum distance threshold
258
+ )
259
+ ```
260
+
261
+ ## Batch vs. Sequential
262
+
263
+ - **Sequential** (n_points=1): Optimal but requires one experiment at a time.
264
+ - **Batch** (n_points=5+): Less optimal but allows parallel experiments.
265
+
266
+ For batch suggestions, points are selected greedily to avoid clustering:
267
+ each new point considers previously selected points in the batch.