jaxsr 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- jaxsr-0.1.0/.claude/skills/jaxsr/SKILL.md +311 -0
- jaxsr-0.1.0/.claude/skills/jaxsr/guides/active-learning.md +267 -0
- jaxsr-0.1.0/.claude/skills/jaxsr/guides/basis-library.md +289 -0
- jaxsr-0.1.0/.claude/skills/jaxsr/guides/cli.md +305 -0
- jaxsr-0.1.0/.claude/skills/jaxsr/guides/constraints.md +206 -0
- jaxsr-0.1.0/.claude/skills/jaxsr/guides/doe-workflow.md +329 -0
- jaxsr-0.1.0/.claude/skills/jaxsr/guides/known-model-fitting.md +417 -0
- jaxsr-0.1.0/.claude/skills/jaxsr/guides/model-fitting.md +268 -0
- jaxsr-0.1.0/.claude/skills/jaxsr/guides/rsm.md +226 -0
- jaxsr-0.1.0/.claude/skills/jaxsr/guides/uncertainty.md +257 -0
- jaxsr-0.1.0/.claude/skills/jaxsr/templates/active-learning-loop.py +112 -0
- jaxsr-0.1.0/.claude/skills/jaxsr/templates/basic-regression.py +92 -0
- jaxsr-0.1.0/.claude/skills/jaxsr/templates/constrained-model.py +94 -0
- jaxsr-0.1.0/.claude/skills/jaxsr/templates/doe-study.py +102 -0
- jaxsr-0.1.0/.claude/skills/jaxsr/templates/langmuir-isotherm.py +158 -0
- jaxsr-0.1.0/.claude/skills/jaxsr/templates/notebook-starter.py +184 -0
- jaxsr-0.1.0/.claude/skills/jaxsr/templates/uncertainty-analysis.py +158 -0
- jaxsr-0.1.0/.github/workflows/docs.yml +51 -0
- jaxsr-0.1.0/.github/workflows/tests.yml +61 -0
- jaxsr-0.1.0/.gitignore +16 -0
- jaxsr-0.1.0/CLAUDE.md +297 -0
- jaxsr-0.1.0/LICENSE +21 -0
- jaxsr-0.1.0/Makefile +15 -0
- jaxsr-0.1.0/PKG-INFO +424 -0
- jaxsr-0.1.0/README.md +358 -0
- jaxsr-0.1.0/docs/_config.yml +21 -0
- jaxsr-0.1.0/docs/_toc.yml +35 -0
- jaxsr-0.1.0/docs/api.md +282 -0
- jaxsr-0.1.0/docs/background/comparison-alamo-sisso-jaxsr.md +103 -0
- jaxsr-0.1.0/docs/background/literature_review.md +282 -0
- jaxsr-0.1.0/docs/examples/active_learning.ipynb +774 -0
- jaxsr-0.1.0/docs/examples/basic_usage.ipynb +1047 -0
- jaxsr-0.1.0/docs/examples/categorical_variables.ipynb +520 -0
- jaxsr-0.1.0/docs/examples/chemical_kinetics.ipynb +963 -0
- jaxsr-0.1.0/docs/examples/comprehensive_tutorial.ipynb +2555 -0
- jaxsr-0.1.0/docs/examples/doe_study_workflow.ipynb +574 -0
- jaxsr-0.1.0/docs/examples/heat_transfer.ipynb +947 -0
- jaxsr-0.1.0/docs/examples/ode_discovery.ipynb +829 -0
- jaxsr-0.1.0/docs/examples/symbolic_classification.ipynb +1397 -0
- jaxsr-0.1.0/docs/examples/uncertainty_quantification.ipynb +873 -0
- jaxsr-0.1.0/docs/examples/uq_bma_weights.png +0 -0
- jaxsr-0.1.0/docs/examples/uq_coefficient_intervals.png +0 -0
- jaxsr-0.1.0/docs/examples/uq_prediction_intervals.png +0 -0
- jaxsr-0.1.0/docs/guides/acquisition.md +338 -0
- jaxsr-0.1.0/docs/guides/claude_code_skills.md +154 -0
- jaxsr-0.1.0/docs/guides/cli_guide.md +598 -0
- jaxsr-0.1.0/docs/guides/doe_guide.md +447 -0
- jaxsr-0.1.0/docs/intro.md +104 -0
- jaxsr-0.1.0/docs/jaxsr.png +0 -0
- jaxsr-0.1.0/docs/quickstart.md +241 -0
- jaxsr-0.1.0/examples/active_learning.py +520 -0
- jaxsr-0.1.0/examples/basic_usage.py +376 -0
- jaxsr-0.1.0/examples/categorical_variables.py +186 -0
- jaxsr-0.1.0/examples/chemical_kinetics.py +348 -0
- jaxsr-0.1.0/examples/cli_workflow.md +330 -0
- jaxsr-0.1.0/examples/comprehensive_tutorial.ipynb +2555 -0
- jaxsr-0.1.0/examples/doe_study_workflow.py +180 -0
- jaxsr-0.1.0/examples/heat_transfer.py +331 -0
- jaxsr-0.1.0/examples/langmuir_doe_active_learning.ipynb +1345 -0
- jaxsr-0.1.0/examples/model_comparison_isotherms.ipynb +984 -0
- jaxsr-0.1.0/examples/rsm_formulation.ipynb +1093 -0
- jaxsr-0.1.0/examples/serialization_and_sharing.ipynb +1035 -0
- jaxsr-0.1.0/examples/uncertainty_quantification.py +464 -0
- jaxsr-0.1.0/jaxsr-image.png +0 -0
- jaxsr-0.1.0/pyproject.toml +119 -0
- jaxsr-0.1.0/src/jaxsr/__init__.py +260 -0
- jaxsr-0.1.0/src/jaxsr/acquisition.py +1257 -0
- jaxsr-0.1.0/src/jaxsr/app/__init__.py +57 -0
- jaxsr-0.1.0/src/jaxsr/app/components.py +166 -0
- jaxsr-0.1.0/src/jaxsr/app/main.py +64 -0
- jaxsr-0.1.0/src/jaxsr/app/pages/1_Setup.py +123 -0
- jaxsr-0.1.0/src/jaxsr/app/pages/2_Design.py +131 -0
- jaxsr-0.1.0/src/jaxsr/app/pages/3_Data.py +154 -0
- jaxsr-0.1.0/src/jaxsr/app/pages/4_Fit.py +188 -0
- jaxsr-0.1.0/src/jaxsr/app/pages/5_Diagnostics.py +150 -0
- jaxsr-0.1.0/src/jaxsr/app/pages/6_Surface.py +194 -0
- jaxsr-0.1.0/src/jaxsr/app/pages/7_Optimize.py +143 -0
- jaxsr-0.1.0/src/jaxsr/app/pages/8_Export.py +182 -0
- jaxsr-0.1.0/src/jaxsr/app/state.py +121 -0
- jaxsr-0.1.0/src/jaxsr/basis.py +1596 -0
- jaxsr-0.1.0/src/jaxsr/classifier.py +905 -0
- jaxsr-0.1.0/src/jaxsr/cli.py +563 -0
- jaxsr-0.1.0/src/jaxsr/constraints.py +1442 -0
- jaxsr-0.1.0/src/jaxsr/dynamics.py +290 -0
- jaxsr-0.1.0/src/jaxsr/excel.py +780 -0
- jaxsr-0.1.0/src/jaxsr/metrics.py +1256 -0
- jaxsr-0.1.0/src/jaxsr/plotting.py +919 -0
- jaxsr-0.1.0/src/jaxsr/regressor.py +1220 -0
- jaxsr-0.1.0/src/jaxsr/reporting.py +341 -0
- jaxsr-0.1.0/src/jaxsr/rsm.py +955 -0
- jaxsr-0.1.0/src/jaxsr/sampling.py +610 -0
- jaxsr-0.1.0/src/jaxsr/selection.py +2251 -0
- jaxsr-0.1.0/src/jaxsr/simplify.py +584 -0
- jaxsr-0.1.0/src/jaxsr/skill/SKILL.md +311 -0
- jaxsr-0.1.0/src/jaxsr/skill/guides/active-learning.md +267 -0
- jaxsr-0.1.0/src/jaxsr/skill/guides/basis-library.md +289 -0
- jaxsr-0.1.0/src/jaxsr/skill/guides/cli.md +305 -0
- jaxsr-0.1.0/src/jaxsr/skill/guides/constraints.md +206 -0
- jaxsr-0.1.0/src/jaxsr/skill/guides/doe-workflow.md +329 -0
- jaxsr-0.1.0/src/jaxsr/skill/guides/known-model-fitting.md +417 -0
- jaxsr-0.1.0/src/jaxsr/skill/guides/model-fitting.md +268 -0
- jaxsr-0.1.0/src/jaxsr/skill/guides/rsm.md +226 -0
- jaxsr-0.1.0/src/jaxsr/skill/guides/uncertainty.md +257 -0
- jaxsr-0.1.0/src/jaxsr/skill/templates/active-learning-loop.py +112 -0
- jaxsr-0.1.0/src/jaxsr/skill/templates/basic-regression.py +92 -0
- jaxsr-0.1.0/src/jaxsr/skill/templates/constrained-model.py +94 -0
- jaxsr-0.1.0/src/jaxsr/skill/templates/doe-study.py +102 -0
- jaxsr-0.1.0/src/jaxsr/skill/templates/langmuir-isotherm.py +158 -0
- jaxsr-0.1.0/src/jaxsr/skill/templates/notebook-starter.py +184 -0
- jaxsr-0.1.0/src/jaxsr/skill/templates/uncertainty-analysis.py +158 -0
- jaxsr-0.1.0/src/jaxsr/study.py +934 -0
- jaxsr-0.1.0/src/jaxsr/uncertainty.py +1405 -0
- jaxsr-0.1.0/src/jaxsr/utils.py +569 -0
- jaxsr-0.1.0/tasks.org +2 -0
- jaxsr-0.1.0/tests/__init__.py +1 -0
- jaxsr-0.1.0/tests/test_acquisition.py +799 -0
- jaxsr-0.1.0/tests/test_basis.py +250 -0
- jaxsr-0.1.0/tests/test_categorical.py +366 -0
- jaxsr-0.1.0/tests/test_classifier.py +855 -0
- jaxsr-0.1.0/tests/test_cli.py +333 -0
- jaxsr-0.1.0/tests/test_constraints.py +484 -0
- jaxsr-0.1.0/tests/test_dynamics.py +240 -0
- jaxsr-0.1.0/tests/test_excel.py +419 -0
- jaxsr-0.1.0/tests/test_integration.py +325 -0
- jaxsr-0.1.0/tests/test_parametric.py +236 -0
- jaxsr-0.1.0/tests/test_regressor.py +417 -0
- jaxsr-0.1.0/tests/test_reporting.py +158 -0
- jaxsr-0.1.0/tests/test_rsm.py +438 -0
- jaxsr-0.1.0/tests/test_selection.py +336 -0
- jaxsr-0.1.0/tests/test_study.py +508 -0
- jaxsr-0.1.0/tests/test_uncertainty.py +672 -0
|
@@ -0,0 +1,311 @@
|
|
|
1
|
+
# JAXSR Skill — Symbolic Regression Assistant
|
|
2
|
+
|
|
3
|
+
JAXSR is a JAX-based symbolic regression library that discovers interpretable algebraic
|
|
4
|
+
expressions from data using sparse optimization. It follows ALAMO-style methodology:
|
|
5
|
+
build a rich candidate basis, then select the simplest model that explains the data.
|
|
6
|
+
|
|
7
|
+
## Skill Activation
|
|
8
|
+
|
|
9
|
+
Activate this skill when the user wants to:
|
|
10
|
+
- Discover algebraic expressions or equations from data
|
|
11
|
+
- Set up a Design of Experiments (DOE) study
|
|
12
|
+
- Fit, interpret, or export symbolic regression models
|
|
13
|
+
- Choose between basis functions, strategies, UQ methods, or design methods
|
|
14
|
+
- Generate reports from experimental data
|
|
15
|
+
- Use the `jaxsr` CLI tool
|
|
16
|
+
- Build notebooks or scripts for symbolic regression workflows
|
|
17
|
+
|
|
18
|
+
## Assistant Mode
|
|
19
|
+
|
|
20
|
+
When the user asks for help deciding how to set up, analyze, or report on their problem,
|
|
21
|
+
enter **assistant mode**. In this mode, ask diagnostic questions to guide them to the right
|
|
22
|
+
configuration. Do not dump all options at once — walk through decisions sequentially.
|
|
23
|
+
|
|
24
|
+
### Step 1: Characterize the Problem
|
|
25
|
+
|
|
26
|
+
Ask the user:
|
|
27
|
+
1. **What are you modeling?** (physical system, chemical process, ML feature engineering, etc.)
|
|
28
|
+
2. **How many input features?** (1-3 is small, 4-8 is medium, 9+ is large)
|
|
29
|
+
3. **How many data points?** (< 20 is very small, 20-100 is typical, 100+ is large)
|
|
30
|
+
4. **Do you have domain knowledge?** (known physics, monotonicity, bounds, symmetry)
|
|
31
|
+
5. **What is the goal?** (interpretable equation, prediction, optimization, screening)
|
|
32
|
+
|
|
33
|
+
### Step 2: Recommend Basis Library
|
|
34
|
+
|
|
35
|
+
Based on the answers, recommend a basis library configuration:
|
|
36
|
+
|
|
37
|
+
| Scenario | Recommended Basis |
|
|
38
|
+
|----------|-------------------|
|
|
39
|
+
| Unknown relationship, few features | `add_constant + add_linear + add_polynomials(3) + add_interactions(2) + add_transcendental()` |
|
|
40
|
+
| Known polynomial behavior | `add_constant + add_linear + add_polynomials(max_degree)` |
|
|
41
|
+
| Engineering correlation (Nusselt, friction) | `add_constant + add_linear + add_polynomials(2) + add_transcendental(funcs=["log","exp","sqrt","inv"])` |
|
|
42
|
+
| Chemical kinetics (rate laws) | `add_constant + add_linear + add_transcendental(funcs=["exp","inv","log"]) + add_ratios() + add_parametric(Arrhenius)` |
|
|
43
|
+
| Large feature space (screening) | `add_constant + add_linear + add_interactions(2)` then use `lasso_path` strategy |
|
|
44
|
+
| Response surface (DOE) | `add_constant + add_linear + add_polynomials(2) + add_interactions(2)` — or use `ResponseSurface` directly |
|
|
45
|
+
| Categorical factors present | Add `add_categorical_indicators() + add_categorical_interactions()` to any of the above |
|
|
46
|
+
|
|
47
|
+
**Key guidance:**
|
|
48
|
+
- Start simple. You can always add complexity.
|
|
49
|
+
- `add_transcendental(safe=True)` guards against log(0), 1/0, sqrt(<0). Always use `safe=True`.
|
|
50
|
+
- `add_ratios(safe=True)` adds x_i/x_j terms. Doubles the library size — only use when ratios are physically meaningful.
|
|
51
|
+
- `add_parametric()` enables nonlinear parameters (e.g., `exp(-a*x)`). Powerful but slower to fit.
|
|
52
|
+
- If n_features > 5, avoid `add_polynomials(degree>2)` — the library becomes enormous.
|
|
53
|
+
|
|
54
|
+
### Step 3: Recommend Selection Strategy
|
|
55
|
+
|
|
56
|
+
| Data Size | Library Size | Recommended Strategy |
|
|
57
|
+
|-----------|-------------|---------------------|
|
|
58
|
+
| Any | < 20 basis functions | `exhaustive` (exact optimal) |
|
|
59
|
+
| Any | 20-200 basis functions | `greedy_forward` (default, fast, reliable) |
|
|
60
|
+
| Small n | Large library | `lasso_path` (regularized screening) |
|
|
61
|
+
| Many terms expected | Any | `greedy_backward` (start full, prune) |
|
|
62
|
+
|
|
63
|
+
**When to change from defaults:**
|
|
64
|
+
- `greedy_forward` is the right choice 80% of the time. It's the default.
|
|
65
|
+
- Use `exhaustive` only when the basis library is small enough (< 20 terms). It guarantees the global optimum but scales as O(2^n).
|
|
66
|
+
- Use `lasso_path` when you have a very large library and want fast screening. It may miss interaction effects.
|
|
67
|
+
- Use `greedy_backward` when you suspect many terms matter and want to start from the full model.
|
|
68
|
+
|
|
69
|
+
### Step 4: Recommend Information Criterion
|
|
70
|
+
|
|
71
|
+
| Scenario | Recommended Criterion |
|
|
72
|
+
|----------|----------------------|
|
|
73
|
+
| Small sample (n < 40) | `aicc` (corrected AIC, penalizes overfitting more) |
|
|
74
|
+
| Medium sample (40 < n < 200) | `bic` (stronger complexity penalty, sparser models) |
|
|
75
|
+
| Large sample (n > 200) | `aic` or `bic` (both work well) |
|
|
76
|
+
| Want simplest model | `bic` (always penalizes complexity more) |
|
|
77
|
+
| Want best prediction | `aicc` (balances fit and complexity) |
|
|
78
|
+
|
|
79
|
+
**Default recommendation:** Use `"bic"` for interpretable models, `"aicc"` for predictive models.
|
|
80
|
+
Only `"aic"`, `"aicc"`, and `"bic"` are supported — not `"cv"`.
|
|
81
|
+
|
|
82
|
+
### Step 5: Recommend Constraints (if applicable)
|
|
83
|
+
|
|
84
|
+
Ask: "Do you have any physical knowledge about the system?"
|
|
85
|
+
|
|
86
|
+
| Physical Knowledge | Constraint to Add |
|
|
87
|
+
|-------------------|-------------------|
|
|
88
|
+
| Output must be positive | `.add_bounds("y", lower=0)` |
|
|
89
|
+
| Output in known range | `.add_bounds("y", lower=lo, upper=hi)` |
|
|
90
|
+
| Increasing in temperature | `.add_monotonic("T", direction="increasing")` |
|
|
91
|
+
| Diminishing returns | `.add_concave(feature)` |
|
|
92
|
+
| Accelerating growth | `.add_convex(feature)` |
|
|
93
|
+
| Coefficient must be positive | `.add_sign_constraint(basis_name, sign="positive")` |
|
|
94
|
+
| Known intercept or slope | `.add_known_coefficient(name, value)` |
|
|
95
|
+
|
|
96
|
+
Use `hard=True` for strict enforcement; `hard=False` (default) for soft penalty.
|
|
97
|
+
|
|
98
|
+
### Step 6: Recommend Uncertainty Quantification
|
|
99
|
+
|
|
100
|
+
| Need | Method | When to Use |
|
|
101
|
+
|------|--------|-------------|
|
|
102
|
+
| Quick confidence intervals | `model.predict_interval()` | Default. OLS-based. Assumes normality. |
|
|
103
|
+
| Coefficient significance | `model.coefficient_intervals()` | Check which terms are statistically significant |
|
|
104
|
+
| Robust to model uncertainty | `model.predict_bma()` | Averages over Pareto-front models weighted by criterion |
|
|
105
|
+
| No distributional assumptions | `model.predict_conformal()` | Distribution-free. Needs enough data (n > 30). |
|
|
106
|
+
| Assess model stability | `bootstrap_predict()` | Resamples data. Shows sensitivity to individual points. |
|
|
107
|
+
| Compare model structures | `model.predict_ensemble()` | Returns predictions from all Pareto-front models |
|
|
108
|
+
| Variable importance | `anova()` | Decomposes variance by term. Shows which factors matter. |
|
|
109
|
+
|
|
110
|
+
**Default recommendation:** Start with `predict_interval()` (built-in, fast). Add `predict_bma()` if you have multiple competing models. Use `predict_conformal()` for publication-quality intervals.
|
|
111
|
+
|
|
112
|
+
### Step 7: Recommend Reporting Format
|
|
113
|
+
|
|
114
|
+
| Goal | Action |
|
|
115
|
+
|------|--------|
|
|
116
|
+
| Quick look at results | `model.summary()` or `jaxsr status study.jaxsr` |
|
|
117
|
+
| Share with collaborators | `jaxsr report study.jaxsr -o report.xlsx` (Excel) |
|
|
118
|
+
| Formal report | `jaxsr report study.jaxsr -o report.docx` (Word) |
|
|
119
|
+
| Paper/presentation | `model.to_latex()` for equation, `plot_pareto_front()` for figures |
|
|
120
|
+
| Deploy model | `model.to_callable()` (pure NumPy, no JAX dependency) |
|
|
121
|
+
| Archive/reproduce | `model.save("model.json")` and `study.save("study.jaxsr")` |
|
|
122
|
+
|
|
123
|
+
---
|
|
124
|
+
|
|
125
|
+
## Quick Reference: Installation
|
|
126
|
+
|
|
127
|
+
```bash
|
|
128
|
+
# Core library
|
|
129
|
+
pip install jaxsr
|
|
130
|
+
|
|
131
|
+
# With CLI support
|
|
132
|
+
pip install "jaxsr[cli]"
|
|
133
|
+
|
|
134
|
+
# With Excel reporting
|
|
135
|
+
pip install "jaxsr[excel]"
|
|
136
|
+
|
|
137
|
+
# With Word reports
|
|
138
|
+
pip install "jaxsr[reports]"
|
|
139
|
+
|
|
140
|
+
# Everything for development
|
|
141
|
+
pip install -e ".[dev,cli,excel,reports]"
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
## Quick Reference: Python API
|
|
145
|
+
|
|
146
|
+
### Minimal Example (5 lines)
|
|
147
|
+
|
|
148
|
+
```python
|
|
149
|
+
from jaxsr import fit_symbolic
|
|
150
|
+
import numpy as np
|
|
151
|
+
|
|
152
|
+
X = np.column_stack([x1, x2]) # shape (n_samples, n_features)
|
|
153
|
+
model = fit_symbolic(X, y, feature_names=["x1", "x2"], max_terms=5)
|
|
154
|
+
print(model.expression_)
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
### Full Control Example
|
|
158
|
+
|
|
159
|
+
```python
|
|
160
|
+
from jaxsr import BasisLibrary, SymbolicRegressor, Constraints
|
|
161
|
+
|
|
162
|
+
# 1. Build basis library
|
|
163
|
+
library = (BasisLibrary(n_features=2, feature_names=["T", "P"])
|
|
164
|
+
.add_constant()
|
|
165
|
+
.add_linear()
|
|
166
|
+
.add_polynomials(max_degree=3)
|
|
167
|
+
.add_interactions(max_order=2)
|
|
168
|
+
.add_transcendental(funcs=["log", "exp", "sqrt"])
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
# 2. Define constraints (optional)
|
|
172
|
+
constraints = (Constraints()
|
|
173
|
+
.add_monotonic("T", direction="increasing")
|
|
174
|
+
.add_bounds("y", lower=0)
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
# 3. Fit model
|
|
178
|
+
model = SymbolicRegressor(
|
|
179
|
+
basis_library=library,
|
|
180
|
+
max_terms=5,
|
|
181
|
+
strategy="greedy_forward",
|
|
182
|
+
information_criterion="bic",
|
|
183
|
+
constraints=constraints,
|
|
184
|
+
)
|
|
185
|
+
model.fit(X_train, y_train)
|
|
186
|
+
|
|
187
|
+
# 4. Inspect results
|
|
188
|
+
print(model.expression_)
|
|
189
|
+
print(model.metrics_)
|
|
190
|
+
print(model.summary())
|
|
191
|
+
|
|
192
|
+
# 5. Predict with uncertainty
|
|
193
|
+
y_pred, lower, upper = model.predict_interval(X_test, alpha=0.05)
|
|
194
|
+
|
|
195
|
+
# 6. Export
|
|
196
|
+
model.save("model.json")
|
|
197
|
+
latex_eq = model.to_latex()
|
|
198
|
+
predict_fn = model.to_callable() # pure NumPy function
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
### DOE Workflow
|
|
202
|
+
|
|
203
|
+
```python
|
|
204
|
+
from jaxsr import DOEStudy
|
|
205
|
+
|
|
206
|
+
# Create study
|
|
207
|
+
study = DOEStudy("catalyst", ["T", "P", "flow"],
|
|
208
|
+
bounds=[(300, 500), (1, 10), (0.1, 2.0)])
|
|
209
|
+
X_design = study.create_design(method="latin_hypercube", n_points=20)
|
|
210
|
+
study.save("catalyst.jaxsr")
|
|
211
|
+
|
|
212
|
+
# After collecting data
|
|
213
|
+
study = DOEStudy.load("catalyst.jaxsr")
|
|
214
|
+
study.add_observations(X_measured, y_measured)
|
|
215
|
+
model = study.fit(max_terms=5)
|
|
216
|
+
|
|
217
|
+
# Get next experiments
|
|
218
|
+
next_pts = study.suggest_next(n_points=5, strategy="uncertainty")
|
|
219
|
+
study.save("catalyst.jaxsr")
|
|
220
|
+
```
|
|
221
|
+
|
|
222
|
+
## Quick Reference: CLI
|
|
223
|
+
|
|
224
|
+
```bash
|
|
225
|
+
# Create study with factors
|
|
226
|
+
jaxsr init my_study -f "temp:300:500" -f "pressure:1:10" -f "catalyst:A,B,C"
|
|
227
|
+
|
|
228
|
+
# Generate experimental design → Excel template
|
|
229
|
+
jaxsr design my_study.jaxsr -m latin_hypercube -n 20 --format xlsx -o template.xlsx
|
|
230
|
+
|
|
231
|
+
# Import completed experiments
|
|
232
|
+
jaxsr add my_study.jaxsr completed.xlsx
|
|
233
|
+
|
|
234
|
+
# Fit model
|
|
235
|
+
jaxsr fit my_study.jaxsr --max-terms 5 --strategy greedy_forward --criterion bic
|
|
236
|
+
|
|
237
|
+
# Suggest next experiments
|
|
238
|
+
jaxsr suggest my_study.jaxsr -n 5 --strategy uncertainty
|
|
239
|
+
|
|
240
|
+
# Generate reports
|
|
241
|
+
jaxsr report my_study.jaxsr -o report.xlsx
|
|
242
|
+
jaxsr report my_study.jaxsr -o report.docx
|
|
243
|
+
|
|
244
|
+
# Check study status
|
|
245
|
+
jaxsr status my_study.jaxsr
|
|
246
|
+
```
|
|
247
|
+
|
|
248
|
+
## Decision Trees
|
|
249
|
+
|
|
250
|
+
### "Which basis functions should I use?"
|
|
251
|
+
|
|
252
|
+
See `guides/basis-library.md` for the complete decision guide.
|
|
253
|
+
|
|
254
|
+
### "Which selection strategy should I use?"
|
|
255
|
+
|
|
256
|
+
See `guides/model-fitting.md` for strategy comparison and benchmarks.
|
|
257
|
+
|
|
258
|
+
### "Which UQ method should I use?"
|
|
259
|
+
|
|
260
|
+
See `guides/uncertainty.md` for method comparison and selection flowchart.
|
|
261
|
+
|
|
262
|
+
### "How do I set up a DOE study?"
|
|
263
|
+
|
|
264
|
+
See `guides/doe-workflow.md` for the complete lifecycle guide.
|
|
265
|
+
|
|
266
|
+
### "How do I add physical constraints?"
|
|
267
|
+
|
|
268
|
+
See `guides/constraints.md` for constraint types and examples.
|
|
269
|
+
|
|
270
|
+
### "How do I use the CLI?"
|
|
271
|
+
|
|
272
|
+
See `guides/cli.md` for full CLI reference with examples.
|
|
273
|
+
|
|
274
|
+
### "I already know the model form. How do I estimate parameters?"
|
|
275
|
+
|
|
276
|
+
See `guides/known-model-fitting.md` for a worked example using the Langmuir isotherm,
|
|
277
|
+
including parametric basis functions, experiment design, ANOVA, and uncertainty analysis.
|
|
278
|
+
Generalizes to Arrhenius, Michaelis-Menten, power laws, and other known models.
|
|
279
|
+
|
|
280
|
+
### "How do I use Response Surface Methodology?"
|
|
281
|
+
|
|
282
|
+
See `guides/rsm.md` for RSM designs, canonical analysis, and optimization.
|
|
283
|
+
|
|
284
|
+
### "How do I set up active learning?"
|
|
285
|
+
|
|
286
|
+
See `guides/active-learning.md` for acquisition functions and adaptive sampling.
|
|
287
|
+
|
|
288
|
+
## Templates
|
|
289
|
+
|
|
290
|
+
Ready-to-use scripts and notebook starters are in `templates/`:
|
|
291
|
+
|
|
292
|
+
| Template | Use Case |
|
|
293
|
+
|----------|----------|
|
|
294
|
+
| `basic-regression.py` | Discover an equation from X, y data |
|
|
295
|
+
| `constrained-model.py` | Add physical constraints to model |
|
|
296
|
+
| `doe-study.py` | Full DOE workflow from design to report |
|
|
297
|
+
| `uncertainty-analysis.py` | Compare all UQ methods |
|
|
298
|
+
| `active-learning-loop.py` | Iterative experiment-model loop |
|
|
299
|
+
| `langmuir-isotherm.py` | Known-model parameter estimation (Langmuir) |
|
|
300
|
+
| `notebook-starter.py` | Jupyter notebook cell structure |
|
|
301
|
+
|
|
302
|
+
## Common Pitfalls
|
|
303
|
+
|
|
304
|
+
1. **Library too large for exhaustive search.** If you have > 20 basis functions, use `greedy_forward` instead of `exhaustive`.
|
|
305
|
+
2. **Using `information_criterion="cv"`.** Only `"aic"`, `"aicc"`, `"bic"` are supported.
|
|
306
|
+
3. **Forgetting `safe=True` for transcendental.** Without it, `log(0)` and `1/0` produce NaN.
|
|
307
|
+
4. **Over-specifying the basis.** A library with 500+ terms is slow and prone to overfitting. Start simple.
|
|
308
|
+
5. **Not checking collinearity.** Use `from jaxsr.utils import check_collinearity` before fitting if terms are nearly redundant.
|
|
309
|
+
6. **Stale metrics after refit.** After applying constraints, metrics are automatically recalculated. Do not copy metrics from a previous result.
|
|
310
|
+
7. **Python control flow in JIT.** If writing custom basis functions with `@jit`, use `jnp.where` instead of `if/else`.
|
|
311
|
+
8. **Calling `float()` on JAX arrays inside JIT.** Use `.item()` outside JIT or keep values as JAX arrays.
|
|
@@ -0,0 +1,267 @@
|
|
|
1
|
+
# Active Learning & Adaptive Sampling Guide
|
|
2
|
+
|
|
3
|
+
## What is Active Learning?
|
|
4
|
+
|
|
5
|
+
Instead of running all experiments upfront, active learning iteratively:
|
|
6
|
+
1. Fit a model to current data
|
|
7
|
+
2. Use the model to suggest the most informative next experiments
|
|
8
|
+
3. Run those experiments
|
|
9
|
+
4. Repeat until the model is good enough
|
|
10
|
+
|
|
11
|
+
This can dramatically reduce the number of experiments needed.
|
|
12
|
+
|
|
13
|
+
## AdaptiveSampler: Simple Interface
|
|
14
|
+
|
|
15
|
+
```python
|
|
16
|
+
from jaxsr import AdaptiveSampler
|
|
17
|
+
|
|
18
|
+
sampler = AdaptiveSampler(
|
|
19
|
+
model=model, # a fitted SymbolicRegressor
|
|
20
|
+
bounds=[(300, 500), (1, 10)], # Feature bounds
|
|
21
|
+
strategy="uncertainty",
|
|
22
|
+
batch_size=5,
|
|
23
|
+
n_candidates=1000,
|
|
24
|
+
random_state=42
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
# Get suggestions
|
|
28
|
+
result = sampler.suggest(n_points=5)
|
|
29
|
+
X_next = result.points # shape (5, n_features)
|
|
30
|
+
scores = result.scores # acquisition function values
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
## Suggestion Strategies
|
|
34
|
+
|
|
35
|
+
### `uncertainty`
|
|
36
|
+
|
|
37
|
+
Suggests points where the model is most uncertain (largest prediction intervals).
|
|
38
|
+
|
|
39
|
+
**Best for:** General exploration, reducing model error everywhere.
|
|
40
|
+
|
|
41
|
+
### `error`
|
|
42
|
+
|
|
43
|
+
Suggests points where the model prediction error is expected to be highest.
|
|
44
|
+
Uses ensemble disagreement or residual extrapolation.
|
|
45
|
+
|
|
46
|
+
**Best for:** Improving accuracy in poorly-modeled regions.
|
|
47
|
+
|
|
48
|
+
### `leverage`
|
|
49
|
+
|
|
50
|
+
Suggests points with high statistical leverage — positions that would have the
|
|
51
|
+
most influence on the fitted coefficients.
|
|
52
|
+
|
|
53
|
+
**Best for:** Stabilizing coefficient estimates, optimal experimental design.
|
|
54
|
+
|
|
55
|
+
### `gradient`
|
|
56
|
+
|
|
57
|
+
Suggests points where the model gradient is steepest — regions of rapid change.
|
|
58
|
+
|
|
59
|
+
**Best for:** Resolving sharp transitions, nonlinear behavior.
|
|
60
|
+
|
|
61
|
+
### `space_filling`
|
|
62
|
+
|
|
63
|
+
Suggests points that fill gaps in the existing design.
|
|
64
|
+
Uses maximin distance criterion.
|
|
65
|
+
|
|
66
|
+
**Best for:** Initial exploration, uniform coverage, no model required.
|
|
67
|
+
|
|
68
|
+
### `random`
|
|
69
|
+
|
|
70
|
+
Uniform random sampling within bounds.
|
|
71
|
+
|
|
72
|
+
**Best for:** Baseline comparison, sanity check.
|
|
73
|
+
|
|
74
|
+
## Strategy Selection Guide
|
|
75
|
+
|
|
76
|
+
```
|
|
77
|
+
Do you have a fitted model?
|
|
78
|
+
├── NO → Use "space_filling" or "random"
|
|
79
|
+
└── YES
|
|
80
|
+
├── Want to reduce overall uncertainty?
|
|
81
|
+
│ └── Use "uncertainty"
|
|
82
|
+
├── Want to improve accuracy in worst regions?
|
|
83
|
+
│ └── Use "error"
|
|
84
|
+
├── Want to stabilize coefficients?
|
|
85
|
+
│ └── Use "leverage"
|
|
86
|
+
└── Want to resolve sharp features?
|
|
87
|
+
└── Use "gradient"
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
## Acquisition Functions: Advanced Interface
|
|
91
|
+
|
|
92
|
+
An **acquisition function** scores candidate points by how useful they would be as
|
|
93
|
+
the next experiment. Higher scores mean more informative. For fine-grained control,
|
|
94
|
+
use acquisition functions directly:
|
|
95
|
+
|
|
96
|
+
```python
|
|
97
|
+
from jaxsr.acquisition import (
|
|
98
|
+
ActiveLearner,
|
|
99
|
+
PredictionVariance,
|
|
100
|
+
UCB, LCB,
|
|
101
|
+
ExpectedImprovement,
|
|
102
|
+
EnsembleDisagreement,
|
|
103
|
+
BMAUncertainty,
|
|
104
|
+
AOptimal, DOptimal,
|
|
105
|
+
Composite,
|
|
106
|
+
suggest_points
|
|
107
|
+
)
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
### Prediction Variance
|
|
111
|
+
|
|
112
|
+
```python
|
|
113
|
+
acq = PredictionVariance()
|
|
114
|
+
learner = ActiveLearner(model, bounds=[(0, 1), (0, 1)], acquisition=acq)
|
|
115
|
+
result = learner.suggest(n_points=5)
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
### Upper/Lower Confidence Bound
|
|
119
|
+
|
|
120
|
+
```python
|
|
121
|
+
# Explore high-response regions (maximize)
|
|
122
|
+
acq = UCB(kappa=2.0) # mean + kappa * std
|
|
123
|
+
|
|
124
|
+
# Explore low-response regions (minimize)
|
|
125
|
+
acq = LCB(kappa=2.0) # mean - kappa * std
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
`kappa` controls exploration vs. exploitation:
|
|
129
|
+
- High kappa (> 2) → more exploration (wider search)
|
|
130
|
+
- Low kappa (< 1) → more exploitation (stay near known good areas)
|
|
131
|
+
|
|
132
|
+
### Expected Improvement
|
|
133
|
+
|
|
134
|
+
```python
|
|
135
|
+
acq = ExpectedImprovement() # For finding the maximum of the response
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
Classic Bayesian optimization acquisition function. Balances exploitation
|
|
139
|
+
(predicted value) and exploration (uncertainty).
|
|
140
|
+
|
|
141
|
+
### Ensemble Disagreement
|
|
142
|
+
|
|
143
|
+
```python
|
|
144
|
+
acq = EnsembleDisagreement() # Where Pareto-front models disagree most
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
### Optimal Design Criteria
|
|
148
|
+
|
|
149
|
+
```python
|
|
150
|
+
acq = AOptimal() # Minimize average variance of coefficients
|
|
151
|
+
acq = DOptimal() # Minimize volume of coefficient confidence ellipsoid
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
### Composite Acquisition Functions
|
|
155
|
+
|
|
156
|
+
Combine multiple acquisition functions with weights:
|
|
157
|
+
|
|
158
|
+
```python
|
|
159
|
+
# 70% uncertainty + 30% space-filling
|
|
160
|
+
combined = 0.7 * PredictionVariance() + 0.3 * AOptimal()
|
|
161
|
+
|
|
162
|
+
learner = ActiveLearner(model, bounds=bounds, acquisition=combined)
|
|
163
|
+
result = learner.suggest(n_points=5)
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
You can also use the `Composite` class:
|
|
167
|
+
```python
|
|
168
|
+
combined = Composite(functions=[(0.7, PredictionVariance()), (0.3, AOptimal())])
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
## Complete Active Learning Loop
|
|
172
|
+
|
|
173
|
+
```python
|
|
174
|
+
import numpy as np
|
|
175
|
+
from jaxsr import BasisLibrary, SymbolicRegressor, AdaptiveSampler
|
|
176
|
+
|
|
177
|
+
# Define your experiment (replace with real measurements)
|
|
178
|
+
def true_function(X):
|
|
179
|
+
"""Placeholder — replace with your actual experiment or simulation."""
|
|
180
|
+
return 3.0 * X[:, 0] + 0.5 * X[:, 1] - 0.01 * X[:, 0]**2
|
|
181
|
+
|
|
182
|
+
# Initial data (small)
|
|
183
|
+
X_init = np.random.uniform([300, 1], [500, 10], size=(10, 2))
|
|
184
|
+
y_init = true_function(X_init)
|
|
185
|
+
|
|
186
|
+
# Build library
|
|
187
|
+
library = (BasisLibrary(n_features=2, feature_names=["T", "P"])
|
|
188
|
+
.add_constant().add_linear()
|
|
189
|
+
.add_polynomials(max_degree=2)
|
|
190
|
+
.add_interactions(max_order=2)
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
# Fit initial model
|
|
194
|
+
model = SymbolicRegressor(basis_library=library, max_terms=5)
|
|
195
|
+
model.fit(X_init, y_init)
|
|
196
|
+
|
|
197
|
+
X_all, y_all = X_init.copy(), y_init.copy()
|
|
198
|
+
bounds = [(300, 500), (1, 10)]
|
|
199
|
+
|
|
200
|
+
for iteration in range(5):
|
|
201
|
+
print(f"\n--- Iteration {iteration + 1} ---")
|
|
202
|
+
print(f"Model: {model.expression_}")
|
|
203
|
+
print(f"R²: {model.metrics_['r2']:.4f}")
|
|
204
|
+
|
|
205
|
+
# Suggest next experiments
|
|
206
|
+
sampler = AdaptiveSampler(model, bounds, strategy="uncertainty", batch_size=5)
|
|
207
|
+
result = sampler.suggest(n_points=5)
|
|
208
|
+
X_next = result.points
|
|
209
|
+
|
|
210
|
+
# Run experiments (replace with actual experiments)
|
|
211
|
+
y_next = true_function(X_next)
|
|
212
|
+
|
|
213
|
+
# Update model
|
|
214
|
+
X_all = np.vstack([X_all, X_next])
|
|
215
|
+
y_all = np.concatenate([y_all, y_next])
|
|
216
|
+
model.fit(X_all, y_all)
|
|
217
|
+
|
|
218
|
+
print(f"\nFinal model ({len(y_all)} points): {model.expression_}")
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
## DOE Integration
|
|
222
|
+
|
|
223
|
+
The `DOEStudy` class wraps active learning into the study workflow:
|
|
224
|
+
|
|
225
|
+
```python
|
|
226
|
+
from jaxsr import DOEStudy
|
|
227
|
+
|
|
228
|
+
study = DOEStudy.load("my_study.jaxsr")
|
|
229
|
+
|
|
230
|
+
# Suggest next experiments using the fitted model
|
|
231
|
+
next_pts = study.suggest_next(n_points=5, strategy="uncertainty")
|
|
232
|
+
|
|
233
|
+
# Or via CLI
|
|
234
|
+
# jaxsr suggest my_study.jaxsr -n 5 --strategy uncertainty
|
|
235
|
+
```
|
|
236
|
+
|
|
237
|
+
## Discrete/Integer Variables
|
|
238
|
+
|
|
239
|
+
If some features take only discrete values:
|
|
240
|
+
|
|
241
|
+
```python
|
|
242
|
+
sampler = AdaptiveSampler(
|
|
243
|
+
model, bounds,
|
|
244
|
+
strategy="uncertainty",
|
|
245
|
+
discrete_dims={1: [1, 2, 3, 4, 5]} # Feature index 1: valid discrete values
|
|
246
|
+
)
|
|
247
|
+
```
|
|
248
|
+
|
|
249
|
+
## Excluding Regions
|
|
250
|
+
|
|
251
|
+
Avoid suggesting points too close to existing data:
|
|
252
|
+
|
|
253
|
+
```python
|
|
254
|
+
result = sampler.suggest(
|
|
255
|
+
n_points=5,
|
|
256
|
+
exclude_points=X_existing, # Don't suggest near these
|
|
257
|
+
min_distance=0.01 # Minimum distance threshold
|
|
258
|
+
)
|
|
259
|
+
```
|
|
260
|
+
|
|
261
|
+
## Batch vs. Sequential
|
|
262
|
+
|
|
263
|
+
- **Sequential** (n_points=1): Optimal but requires one experiment at a time.
|
|
264
|
+
- **Batch** (n_points=5+): Less optimal but allows parallel experiments.
|
|
265
|
+
|
|
266
|
+
For batch suggestions, points are selected greedily to avoid clustering:
|
|
267
|
+
each new point considers previously selected points in the batch.
|