neuropt 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- neuropt-0.3.0/.claude/settings.local.json +13 -0
- neuropt-0.3.0/.github/workflows/ci.yml +48 -0
- neuropt-0.3.0/.gitignore +35 -0
- neuropt-0.3.0/.python-version +1 -0
- neuropt-0.3.0/CONTRIBUTING.md +37 -0
- neuropt-0.3.0/LICENSE +21 -0
- neuropt-0.3.0/PKG-INFO +145 -0
- neuropt-0.3.0/README.md +94 -0
- neuropt-0.3.0/assets/banner.png +0 -0
- neuropt-0.3.0/assets/benchmark.png +0 -0
- neuropt-0.3.0/assets/convergence.png +0 -0
- neuropt-0.3.0/assets/results.png +0 -0
- neuropt-0.3.0/docs/api.md +122 -0
- neuropt-0.3.0/docs/benchmarks.md +56 -0
- neuropt-0.3.0/docs/cli.md +61 -0
- neuropt-0.3.0/docs/examples.md +37 -0
- neuropt-0.3.0/docs/how-it-works.md +68 -0
- neuropt-0.3.0/docs/index.md +64 -0
- neuropt-0.3.0/examples/benchmark.py +359 -0
- neuropt-0.3.0/examples/train_fashion.py +163 -0
- neuropt-0.3.0/examples/train_resnet.py +77 -0
- neuropt-0.3.0/mkdocs.yml +32 -0
- neuropt-0.3.0/neuropt/__init__.py +7 -0
- neuropt-0.3.0/neuropt/arch_search.py +833 -0
- neuropt-0.3.0/neuropt/backends/__init__.py +59 -0
- neuropt-0.3.0/neuropt/backends/base.py +18 -0
- neuropt-0.3.0/neuropt/backends/claude_backend.py +29 -0
- neuropt-0.3.0/neuropt/backends/local_qwen.py +52 -0
- neuropt-0.3.0/neuropt/backends/openai_backend.py +29 -0
- neuropt-0.3.0/neuropt/cli.py +180 -0
- neuropt-0.3.0/neuropt/introspect.py +192 -0
- neuropt-0.3.0/neuropt/search_space.py +84 -0
- neuropt-0.3.0/pyproject.toml +65 -0
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
{
|
|
2
|
+
"permissions": {
|
|
3
|
+
"allow": [
|
|
4
|
+
"WebSearch",
|
|
5
|
+
"Bash(grep -E \"\\\\.\\(png|gif|ipynb|jsonl|lock\\)$\")",
|
|
6
|
+
"Bash(.venv/bin/python -c \"import torch; print\\('torch', torch.__version__\\); import torchvision; print\\('torchvision', torchvision.__version__\\)\")",
|
|
7
|
+
"Bash(.venv/bin/python -c \"import transformers; print\\('transformers', transformers.__version__\\)\" 2>&1)",
|
|
8
|
+
"Bash(.venv/bin/python -c \"import mlx; print\\(mlx.__version__\\)\")",
|
|
9
|
+
"Bash(.venv/bin/python -c \"import anthropic; print\\(anthropic.__version__\\)\")",
|
|
10
|
+
"Bash(git config:*)"
|
|
11
|
+
]
|
|
12
|
+
}
|
|
13
|
+
}
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
test:
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
strategy:
|
|
13
|
+
matrix:
|
|
14
|
+
python-version: ["3.10", "3.11", "3.12", "3.13"]
|
|
15
|
+
|
|
16
|
+
steps:
|
|
17
|
+
- uses: actions/checkout@v4
|
|
18
|
+
|
|
19
|
+
- name: Install uv
|
|
20
|
+
uses: astral-sh/setup-uv@v4
|
|
21
|
+
|
|
22
|
+
- name: Set up Python ${{ matrix.python-version }}
|
|
23
|
+
run: uv python install ${{ matrix.python-version }}
|
|
24
|
+
|
|
25
|
+
- name: Install dependencies
|
|
26
|
+
run: uv sync
|
|
27
|
+
|
|
28
|
+
- name: Import check
|
|
29
|
+
run: |
|
|
30
|
+
uv run python -c "from neuropt import ArchSearch, LogUniform, Uniform, IntUniform, Categorical"
|
|
31
|
+
uv run python -c "from neuropt.backends import get_default_backend, get_backend_by_name"
|
|
32
|
+
uv run python -c "from neuropt.introspect import introspect"
|
|
33
|
+
uv run python -c "from neuropt.cli import app"
|
|
34
|
+
|
|
35
|
+
- name: Smoke test (random search, no LLM)
|
|
36
|
+
run: |
|
|
37
|
+
uv run python -c "
|
|
38
|
+
from neuropt import ArchSearch
|
|
39
|
+
search = ArchSearch(
|
|
40
|
+
train_fn=lambda cfg: {'score': cfg['lr'] * 10},
|
|
41
|
+
search_space={'lr': (1e-4, 1e-1), 'x': [1, 2, 3]},
|
|
42
|
+
backend='none',
|
|
43
|
+
log_path='/tmp/ci_test.jsonl',
|
|
44
|
+
)
|
|
45
|
+
search.run(max_evals=3)
|
|
46
|
+
assert search.best_score < 1.0
|
|
47
|
+
print('OK')
|
|
48
|
+
"
|
neuropt-0.3.0/.gitignore
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[oc]
|
|
4
|
+
build/
|
|
5
|
+
dist/
|
|
6
|
+
wheels/
|
|
7
|
+
*.egg-info
|
|
8
|
+
|
|
9
|
+
# Virtual environments
|
|
10
|
+
.venv
|
|
11
|
+
|
|
12
|
+
# Data & results
|
|
13
|
+
data/
|
|
14
|
+
*.jsonl
|
|
15
|
+
benchmark_results.json
|
|
16
|
+
*.tsv
|
|
17
|
+
*.lock
|
|
18
|
+
results/
|
|
19
|
+
|
|
20
|
+
# IDE
|
|
21
|
+
.idea/
|
|
22
|
+
.vscode/
|
|
23
|
+
*.swp
|
|
24
|
+
|
|
25
|
+
# Build artifacts
|
|
26
|
+
site/
|
|
27
|
+
|
|
28
|
+
# OS
|
|
29
|
+
.DS_Store
|
|
30
|
+
|
|
31
|
+
# Root-level scratch files (old prototypes)
|
|
32
|
+
/log_vs_linear_sampling.png
|
|
33
|
+
/pso_results.png
|
|
34
|
+
/pso_lightning_chat.ipynb
|
|
35
|
+
/sa_lightning_chat.ipynb
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
3.13
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# Contributing to neuropt
|
|
2
|
+
|
|
3
|
+
Thanks for your interest! Here are some ways to help:
|
|
4
|
+
|
|
5
|
+
## Good first contributions
|
|
6
|
+
|
|
7
|
+
- **Try it on a new dataset/model** and share your results in an issue
|
|
8
|
+
- **Add a new LLM backend** — see `neuropt/backends/` for the pattern (just implement `generate` and `is_available`)
|
|
9
|
+
- **Improve the prompt** — the system prompt in `arch_search.py` can always be better. If you find phrasing that gets better results, open a PR
|
|
10
|
+
- **Add tests** — we need them, especially for config validation and dedup logic
|
|
11
|
+
|
|
12
|
+
## Bigger ideas
|
|
13
|
+
|
|
14
|
+
- **Multi-objective optimization** — optimize for accuracy AND inference speed simultaneously
|
|
15
|
+
- **Model introspection for more layer types** — currently we detect activations, dropout, and batch norm. Width multipliers, attention heads, and other structural changes would be great
|
|
16
|
+
- **Better local LLM support** — the Qwen backend works but has a high parse failure rate on complex search spaces. Prompt engineering or constrained decoding could help
|
|
17
|
+
|
|
18
|
+
## Setup
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
git clone https://github.com/loevlie/neuropt.git
|
|
22
|
+
cd neuropt
|
|
23
|
+
uv sync
|
|
24
|
+
uv run neuropt --help
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
## Running tests
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
uv run pytest
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
## PR guidelines
|
|
34
|
+
|
|
35
|
+
- Keep changes focused — one thing per PR
|
|
36
|
+
- Add a test if you're changing logic
|
|
37
|
+
- Run `uv run neuropt run examples/train_fashion.py --backend none -n 3` to verify nothing is broken
|
neuropt-0.3.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Dennis Loevlie
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
neuropt-0.3.0/PKG-INFO
ADDED
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: neuropt
|
|
3
|
+
Version: 0.3.0
|
|
4
|
+
Summary: LLM-guided ML optimization — point it at a training script, it reads the curves and designs better models
|
|
5
|
+
Project-URL: Homepage, https://github.com/loevlie/neuropt
|
|
6
|
+
Project-URL: Repository, https://github.com/loevlie/neuropt
|
|
7
|
+
Project-URL: Issues, https://github.com/loevlie/neuropt/issues
|
|
8
|
+
Author: Dennis Loevlie
|
|
9
|
+
License: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: architecture-search,automl,hyperparameter,llm,machine-learning,neural-architecture-search,optimization,pytorch,training-curves
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Intended Audience :: Science/Research
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
20
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
21
|
+
Requires-Python: >=3.10
|
|
22
|
+
Requires-Dist: cloudpickle>=3.0
|
|
23
|
+
Requires-Dist: numpy>=1.24
|
|
24
|
+
Requires-Dist: typer>=0.9
|
|
25
|
+
Provides-Extra: all
|
|
26
|
+
Requires-Dist: accelerate>=0.25; extra == 'all'
|
|
27
|
+
Requires-Dist: anthropic>=0.40; extra == 'all'
|
|
28
|
+
Requires-Dist: matplotlib>=3.7; extra == 'all'
|
|
29
|
+
Requires-Dist: openai>=1.0; extra == 'all'
|
|
30
|
+
Requires-Dist: pandas>=2.0; extra == 'all'
|
|
31
|
+
Requires-Dist: torch>=2.0; extra == 'all'
|
|
32
|
+
Requires-Dist: torchvision>=0.15; extra == 'all'
|
|
33
|
+
Requires-Dist: transformers>=4.37; extra == 'all'
|
|
34
|
+
Provides-Extra: dev
|
|
35
|
+
Requires-Dist: optuna; extra == 'dev'
|
|
36
|
+
Requires-Dist: pytest; extra == 'dev'
|
|
37
|
+
Provides-Extra: llm
|
|
38
|
+
Requires-Dist: anthropic>=0.40; extra == 'llm'
|
|
39
|
+
Provides-Extra: llm-local
|
|
40
|
+
Requires-Dist: accelerate>=0.25; extra == 'llm-local'
|
|
41
|
+
Requires-Dist: transformers>=4.37; extra == 'llm-local'
|
|
42
|
+
Provides-Extra: llm-openai
|
|
43
|
+
Requires-Dist: openai>=1.0; extra == 'llm-openai'
|
|
44
|
+
Provides-Extra: torch
|
|
45
|
+
Requires-Dist: torch>=2.0; extra == 'torch'
|
|
46
|
+
Requires-Dist: torchvision>=0.15; extra == 'torch'
|
|
47
|
+
Provides-Extra: viz
|
|
48
|
+
Requires-Dist: matplotlib>=3.7; extra == 'viz'
|
|
49
|
+
Requires-Dist: pandas>=2.0; extra == 'viz'
|
|
50
|
+
Description-Content-Type: text/markdown
|
|
51
|
+
|
|
52
|
+
# neuropt
|
|
53
|
+
|
|
54
|
+
<p align="center">
|
|
55
|
+
<img src="assets/banner.png" alt="Three robot researchers designing neural network architectures" width="700">
|
|
56
|
+
</p>
|
|
57
|
+
|
|
58
|
+
<p align="center">
|
|
59
|
+
<em>An LLM reads your training curves and designs your next experiment.</em>
|
|
60
|
+
</p>
|
|
61
|
+
|
|
62
|
+
---
|
|
63
|
+
|
|
64
|
+
Point it at a training script, let it run overnight. The LLM sees full per-epoch train/val curves, spots overfitting, and proposes what to try next — like a research assistant who never sleeps and actually reads the loss plots.
|
|
65
|
+
|
|
66
|
+
### vs Optuna and random search
|
|
67
|
+
|
|
68
|
+
<p align="center">
|
|
69
|
+
<img src="assets/benchmark.png" alt="Benchmark: neuropt vs Optuna vs Random" width="700">
|
|
70
|
+
</p>
|
|
71
|
+
|
|
72
|
+
Same 15-eval budget, 14-parameter CNN search space. These results use **Claude Haiku 4.5** (the smallest and cheapest of their 4.5 models). We expect even stronger results with Sonnet or Opus. Optuna's TPE was configured with `n_startup_trials=3` for a fair comparison (default is 10, which would make it purely random for most of the budget).
|
|
73
|
+
|
|
74
|
+
## Quick start
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
pip install neuropt[llm]
|
|
78
|
+
export ANTHROPIC_API_KEY="sk-ant-..."
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
**Option 1** — define what to search over:
|
|
82
|
+
|
|
83
|
+
```python
|
|
84
|
+
# train.py
|
|
85
|
+
search_space = {
|
|
86
|
+
"lr": (1e-4, 1e-1), # auto-detects log-scale
|
|
87
|
+
"hidden_dim": (32, 512), # auto-detects integer
|
|
88
|
+
"activation": ["relu", "gelu", "silu"], # categorical
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
def train_fn(config):
|
|
92
|
+
model = build_my_model(config["hidden_dim"], config["activation"])
|
|
93
|
+
# ... train, return per-epoch losses for smarter LLM decisions ...
|
|
94
|
+
return {"score": val_loss, "train_losses": [...], "val_losses": [...]}
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
**Option 2** — just give it a model, we figure out the rest:
|
|
98
|
+
|
|
99
|
+
```python
|
|
100
|
+
# train.py
|
|
101
|
+
model = torchvision.models.resnet18(num_classes=10) # neuropt introspects this
|
|
102
|
+
|
|
103
|
+
def train_fn(config):
|
|
104
|
+
m = config["model"].to("cuda") # deep copy with modifications applied
|
|
105
|
+
# ... train ...
|
|
106
|
+
return {"score": val_loss, "train_losses": [...], "val_losses": [...]}
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
Then run:
|
|
110
|
+
|
|
111
|
+
```bash
|
|
112
|
+
neuropt run train.py
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
Runs until Ctrl+C. Crash-safe, resumable. Works in notebooks too:
|
|
116
|
+
|
|
117
|
+
```python
|
|
118
|
+
from neuropt import ArchSearch
|
|
119
|
+
|
|
120
|
+
search = ArchSearch(train_fn=train_fn, search_space=search_space, backend="claude")
|
|
121
|
+
search.run(max_evals=50)
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
## Documentation
|
|
125
|
+
|
|
126
|
+
See the [full documentation](https://loevlie.github.io/neuropt/) for:
|
|
127
|
+
|
|
128
|
+
- [How it works](https://loevlie.github.io/neuropt/how-it-works/) — what the LLM sees, training curve analysis
|
|
129
|
+
- [CLI reference](https://loevlie.github.io/neuropt/cli/) — `neuropt run`, `inspect`, `results`
|
|
130
|
+
- [Python API](https://loevlie.github.io/neuropt/api/) — `ArchSearch`, `from_model`, search space types
|
|
131
|
+
- [Examples](https://loevlie.github.io/neuropt/examples/) — CNN search, ResNet tuning
|
|
132
|
+
- [Benchmarks](https://loevlie.github.io/neuropt/benchmarks/) — vs Optuna, random search
|
|
133
|
+
|
|
134
|
+
## Installation
|
|
135
|
+
|
|
136
|
+
```bash
|
|
137
|
+
pip install neuropt # core
|
|
138
|
+
pip install neuropt[llm] # + Claude API (recommended)
|
|
139
|
+
pip install neuropt[llm-openai] # + OpenAI API
|
|
140
|
+
pip install neuropt[all] # everything
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
## License
|
|
144
|
+
|
|
145
|
+
MIT
|
neuropt-0.3.0/README.md
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
# neuropt
|
|
2
|
+
|
|
3
|
+
<p align="center">
|
|
4
|
+
<img src="assets/banner.png" alt="Three robot researchers designing neural network architectures" width="700">
|
|
5
|
+
</p>
|
|
6
|
+
|
|
7
|
+
<p align="center">
|
|
8
|
+
<em>An LLM reads your training curves and designs your next experiment.</em>
|
|
9
|
+
</p>
|
|
10
|
+
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
Point it at a training script, let it run overnight. The LLM sees full per-epoch train/val curves, spots overfitting, and proposes what to try next — like a research assistant who never sleeps and actually reads the loss plots.
|
|
14
|
+
|
|
15
|
+
### vs Optuna and random search
|
|
16
|
+
|
|
17
|
+
<p align="center">
|
|
18
|
+
<img src="assets/benchmark.png" alt="Benchmark: neuropt vs Optuna vs Random" width="700">
|
|
19
|
+
</p>
|
|
20
|
+
|
|
21
|
+
Same 15-eval budget, 14-parameter CNN search space. These results use **Claude Haiku 4.5** (the smallest and cheapest of their 4.5 models). We expect even stronger results with Sonnet or Opus. Optuna's TPE was configured with `n_startup_trials=3` for a fair comparison (default is 10, which would make it purely random for most of the budget).
|
|
22
|
+
|
|
23
|
+
## Quick start
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
pip install neuropt[llm]
|
|
27
|
+
export ANTHROPIC_API_KEY="sk-ant-..."
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
**Option 1** — define what to search over:
|
|
31
|
+
|
|
32
|
+
```python
|
|
33
|
+
# train.py
|
|
34
|
+
search_space = {
|
|
35
|
+
"lr": (1e-4, 1e-1), # auto-detects log-scale
|
|
36
|
+
"hidden_dim": (32, 512), # auto-detects integer
|
|
37
|
+
"activation": ["relu", "gelu", "silu"], # categorical
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
def train_fn(config):
|
|
41
|
+
model = build_my_model(config["hidden_dim"], config["activation"])
|
|
42
|
+
# ... train, return per-epoch losses for smarter LLM decisions ...
|
|
43
|
+
return {"score": val_loss, "train_losses": [...], "val_losses": [...]}
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
**Option 2** — just give it a model, we figure out the rest:
|
|
47
|
+
|
|
48
|
+
```python
|
|
49
|
+
# train.py
|
|
50
|
+
model = torchvision.models.resnet18(num_classes=10) # neuropt introspects this
|
|
51
|
+
|
|
52
|
+
def train_fn(config):
|
|
53
|
+
m = config["model"].to("cuda") # deep copy with modifications applied
|
|
54
|
+
# ... train ...
|
|
55
|
+
return {"score": val_loss, "train_losses": [...], "val_losses": [...]}
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
Then run:
|
|
59
|
+
|
|
60
|
+
```bash
|
|
61
|
+
neuropt run train.py
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
Runs until Ctrl+C. Crash-safe, resumable. Works in notebooks too:
|
|
65
|
+
|
|
66
|
+
```python
|
|
67
|
+
from neuropt import ArchSearch
|
|
68
|
+
|
|
69
|
+
search = ArchSearch(train_fn=train_fn, search_space=search_space, backend="claude")
|
|
70
|
+
search.run(max_evals=50)
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
## Documentation
|
|
74
|
+
|
|
75
|
+
See the [full documentation](https://loevlie.github.io/neuropt/) for:
|
|
76
|
+
|
|
77
|
+
- [How it works](https://loevlie.github.io/neuropt/how-it-works/) — what the LLM sees, training curve analysis
|
|
78
|
+
- [CLI reference](https://loevlie.github.io/neuropt/cli/) — `neuropt run`, `inspect`, `results`
|
|
79
|
+
- [Python API](https://loevlie.github.io/neuropt/api/) — `ArchSearch`, `from_model`, search space types
|
|
80
|
+
- [Examples](https://loevlie.github.io/neuropt/examples/) — CNN search, ResNet tuning
|
|
81
|
+
- [Benchmarks](https://loevlie.github.io/neuropt/benchmarks/) — vs Optuna, random search
|
|
82
|
+
|
|
83
|
+
## Installation
|
|
84
|
+
|
|
85
|
+
```bash
|
|
86
|
+
pip install neuropt # core
|
|
87
|
+
pip install neuropt[llm] # + Claude API (recommended)
|
|
88
|
+
pip install neuropt[llm-openai] # + OpenAI API
|
|
89
|
+
pip install neuropt[all] # everything
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
## License
|
|
93
|
+
|
|
94
|
+
MIT
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
# Python API
|
|
2
|
+
|
|
3
|
+
## ArchSearch
|
|
4
|
+
|
|
5
|
+
The main class. Use it directly in notebooks or scripts.
|
|
6
|
+
|
|
7
|
+
### From a search space
|
|
8
|
+
|
|
9
|
+
```python
|
|
10
|
+
from neuropt import ArchSearch
|
|
11
|
+
|
|
12
|
+
search = ArchSearch(
|
|
13
|
+
train_fn=train_fn,
|
|
14
|
+
search_space={
|
|
15
|
+
"lr": (1e-4, 1e-1),
|
|
16
|
+
"n_layers": (2, 8),
|
|
17
|
+
"activation": ["relu", "gelu", "silu"],
|
|
18
|
+
"use_bn": [True, False],
|
|
19
|
+
},
|
|
20
|
+
backend="claude",
|
|
21
|
+
)
|
|
22
|
+
search.run(max_evals=50)
|
|
23
|
+
|
|
24
|
+
print(search.best_config)
|
|
25
|
+
print(search.best_score)
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
### From a model
|
|
29
|
+
|
|
30
|
+
```python
|
|
31
|
+
from neuropt import ArchSearch
|
|
32
|
+
|
|
33
|
+
search = ArchSearch.from_model(
|
|
34
|
+
model=my_model,
|
|
35
|
+
train_fn=train_fn,
|
|
36
|
+
backend="claude",
|
|
37
|
+
)
|
|
38
|
+
search.run(max_evals=50)
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
`from_model` introspects the module tree, finds activations/dropout/batch norm, generates a search space, and wraps your `train_fn` so `config["model"]` contains the modified deep copy.
|
|
42
|
+
|
|
43
|
+
### Parameters
|
|
44
|
+
|
|
45
|
+
| Parameter | Default | Description |
|
|
46
|
+
|-----------|---------|-------------|
|
|
47
|
+
| `train_fn` | required | `config dict → result dict` |
|
|
48
|
+
| `search_space` | required | Dict of param names to ranges/choices |
|
|
49
|
+
| `backend` | `"auto"` | `"auto"`, `"claude"`, `"openai"`, `"qwen"`, `"none"` |
|
|
50
|
+
| `log_path` | `"search.jsonl"` | JSONL log file |
|
|
51
|
+
| `batch_size` | `3` | Configs per LLM call |
|
|
52
|
+
| `device` | `None` | Injected as `config["device"]` |
|
|
53
|
+
| `timeout` | `600` | Max seconds per experiment |
|
|
54
|
+
| `ml_context` | generic | Domain knowledge for the LLM |
|
|
55
|
+
|
|
56
|
+
### `run(max_evals=None)`
|
|
57
|
+
|
|
58
|
+
Runs the search loop. If `max_evals` is set, stops after that many experiments. Otherwise runs until Ctrl+C.
|
|
59
|
+
|
|
60
|
+
### Result attributes
|
|
61
|
+
|
|
62
|
+
After `run()` completes:
|
|
63
|
+
|
|
64
|
+
- `search.best_score` — lowest score seen
|
|
65
|
+
- `search.best_config` — config dict that produced it
|
|
66
|
+
- `search.best_accuracy` — accuracy of best config (if returned)
|
|
67
|
+
- `search.total_experiments` — total experiments run
|
|
68
|
+
- `search.llm_success` — LLM calls that produced valid configs
|
|
69
|
+
- `search.llm_fallback` — LLM calls that fell back to random
|
|
70
|
+
|
|
71
|
+
## train_fn contract
|
|
72
|
+
|
|
73
|
+
Your function receives a config dict and returns a result dict.
|
|
74
|
+
|
|
75
|
+
**Required return key:**
|
|
76
|
+
|
|
77
|
+
- `"score"` — float, lower is better
|
|
78
|
+
|
|
79
|
+
**Optional return keys (recommended):**
|
|
80
|
+
|
|
81
|
+
- `"train_losses"` — list of per-epoch training losses
|
|
82
|
+
- `"val_losses"` — list of per-epoch validation losses
|
|
83
|
+
- `"val_accuracies"` — list of per-epoch validation accuracies
|
|
84
|
+
- `"accuracy"` — final accuracy
|
|
85
|
+
- `"n_params"` — model parameter count
|
|
86
|
+
|
|
87
|
+
The per-epoch lists are what give the LLM its advantage — it can spot overfitting, underfitting, and learning rate issues from the curve shapes.
|
|
88
|
+
|
|
89
|
+
## Search space types
|
|
90
|
+
|
|
91
|
+
You can use plain Python types (auto-inferred) or explicit dimension objects.
|
|
92
|
+
|
|
93
|
+
### Auto-inference from tuples and lists
|
|
94
|
+
|
|
95
|
+
```python
|
|
96
|
+
search_space = {
|
|
97
|
+
"lr": (1e-4, 1e-1), # → LogUniform (name-based)
|
|
98
|
+
"wd": (1e-6, 1e-2), # → LogUniform (name-based)
|
|
99
|
+
"dropout": (0.0, 0.5), # → Uniform
|
|
100
|
+
"n_layers": (2, 8), # → IntUniform (name + int values)
|
|
101
|
+
"hidden_dim": (32, 512), # → IntUniform (name + int values)
|
|
102
|
+
"activation": ["relu", "gelu"], # → Categorical
|
|
103
|
+
"use_bn": [True, False], # → Categorical
|
|
104
|
+
}
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
Names like `lr`, `learning_rate`, `wd`, `weight_decay` automatically get log-scale sampling. Names like `n_layers`, `hidden_dim`, `num_heads` get integer sampling. Integer tuple values also trigger IntUniform.
|
|
108
|
+
|
|
109
|
+
### Explicit dimension objects
|
|
110
|
+
|
|
111
|
+
For full control over ranges and sampling:
|
|
112
|
+
|
|
113
|
+
```python
|
|
114
|
+
from neuropt import LogUniform, Uniform, IntUniform, Categorical
|
|
115
|
+
|
|
116
|
+
search_space = {
|
|
117
|
+
"lr": LogUniform(1e-4, 1e-1),
|
|
118
|
+
"momentum": Uniform(0.8, 0.99),
|
|
119
|
+
"depth": IntUniform(2, 8),
|
|
120
|
+
"optimizer": Categorical(["sgd", "adam", "adamw"]),
|
|
121
|
+
}
|
|
122
|
+
```
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
# Benchmarks
|
|
2
|
+
|
|
3
|
+
## 15-eval benchmark (14-parameter CNN search)
|
|
4
|
+
|
|
5
|
+
All methods get exactly 15 evaluations on the same search space. FashionMNIST, 5 epochs per eval, M1 MacBook.
|
|
6
|
+
|
|
7
|
+

|
|
8
|
+
|
|
9
|
+
| Method | Best Loss | Best Acc | LLM Fallbacks |
|
|
10
|
+
|--------|-----------|----------|---------------|
|
|
11
|
+
| **LLM (Claude)** | **0.385** | **85.4%** | 0/5 |
|
|
12
|
+
| Optuna TPE | 0.454 | 82.7% | — |
|
|
13
|
+
| Random Search | 0.610 | 76.7% | — |
|
|
14
|
+
| LLM (Qwen local) | 0.637 | 75.0% | 2/5 |
|
|
15
|
+
|
|
16
|
+
### Convergence
|
|
17
|
+
|
|
18
|
+
| Eval | LLM (Claude) | Optuna TPE | Random | LLM (Qwen) |
|
|
19
|
+
|------|-------------|------------|--------|-------------|
|
|
20
|
+
| 5 | **0.401** | 0.612 | 0.655 | 0.748 |
|
|
21
|
+
| 10 | **0.399** | 0.454 | 0.655 | 0.637 |
|
|
22
|
+
| 15 | **0.385** | 0.454 | 0.610 | 0.637 |
|
|
23
|
+
|
|
24
|
+
Claude was ahead of Optuna from eval 5 onward — it started with good architectural priors (residual connections, AdamW, reasonable LR) instead of discovering them through trial and error.
|
|
25
|
+
|
|
26
|
+
All benchmark results used **Claude Haiku 4.5** (the smallest, cheapest Claude model for pennies per run). We expect stronger results with Sonnet or Opus, which have better reasoning capabilities for complex search spaces.
|
|
27
|
+
|
|
28
|
+
### What went wrong with the other methods
|
|
29
|
+
|
|
30
|
+
**Optuna TPE** — 7 out of 15 evals scored above 1.0 (worse than random chance). With 14 parameters, TPE's surrogate model needs many more samples before it becomes useful. It found one good config at eval 8 but couldn't build on it.
|
|
31
|
+
|
|
32
|
+
**Random search** — Actually outperformed Qwen, which says more about Qwen's parse failures than random's quality. Random got lucky with a few configs but has no mechanism to improve.
|
|
33
|
+
|
|
34
|
+
**LLM (Qwen local)** — Failed to produce valid JSON on 2 of 5 iterations (40% fallback rate). When it did generate configs, they were reasonable but not as focused as Claude's. The local Qwen backend is experimental — it works for simpler search spaces but struggles with 14-key JSON output.
|
|
35
|
+
|
|
36
|
+
### Why Claude wins on this search space
|
|
37
|
+
|
|
38
|
+
With 14 parameters including categorical choices (activation, optimizer, pool type, residual on/off), the search space has complex interactions:
|
|
39
|
+
|
|
40
|
+
- High LR + no batch norm = training instability
|
|
41
|
+
- Deep networks + no residual = vanishing gradients
|
|
42
|
+
- Dropout + small dataset subset = unnecessary regularization
|
|
43
|
+
|
|
44
|
+
Claude starts with knowledge of these interactions. Optuna has to discover each one empirically, burning evals on configurations that any ML practitioner would avoid.
|
|
45
|
+
|
|
46
|
+
### Run it yourself
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
pip install neuropt[llm]
|
|
50
|
+
export ANTHROPIC_API_KEY="sk-ant-..."
|
|
51
|
+
|
|
52
|
+
python examples/benchmark.py
|
|
53
|
+
python examples/benchmark.py --n-evals 30 # longer run
|
|
54
|
+
python examples/benchmark.py --skip-qwen # skip local model
|
|
55
|
+
python examples/benchmark.py --skip-qwen --n-evals 50 # thorough comparison
|
|
56
|
+
```
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# CLI Reference
|
|
2
|
+
|
|
3
|
+
## `neuropt run`
|
|
4
|
+
|
|
5
|
+
Run LLM-guided search on a training script.
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
neuropt run train.py
|
|
9
|
+
neuropt run train.py --backend claude
|
|
10
|
+
neuropt run train.py --backend none -n 50 --log results.jsonl
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
Your script must define:
|
|
14
|
+
|
|
15
|
+
- `train_fn(config)` — returns dict with at least `{"score": float}`
|
|
16
|
+
- `search_space` dict **or** `model` (nn.Module) — one of the two
|
|
17
|
+
|
|
18
|
+
Optional: `ml_context` string with domain knowledge for the LLM.
|
|
19
|
+
|
|
20
|
+
| Option | Default | Description |
|
|
21
|
+
|--------|---------|-------------|
|
|
22
|
+
| `--backend` | auto | `auto`, `claude`, `openai`, `qwen`, `none` |
|
|
23
|
+
| `--log` | search.jsonl | Log file path (supports resume) |
|
|
24
|
+
| `-b` / `--batch-size` | 3 | Configs proposed per LLM call |
|
|
25
|
+
| `-n` / `--max-evals` | unlimited | Stop after N experiments |
|
|
26
|
+
| `--device` | auto | `cuda`, `mps`, `cpu` |
|
|
27
|
+
| `--timeout` | 600 | Max seconds per experiment |
|
|
28
|
+
|
|
29
|
+
## `neuropt inspect`
|
|
30
|
+
|
|
31
|
+
Show what neuropt would search over for a given model.
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
neuropt inspect train.py
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
```
|
|
38
|
+
Model: 11,689,512 parameters
|
|
39
|
+
Activations: ReLU (9 layers)
|
|
40
|
+
BatchNorm: 20 layers
|
|
41
|
+
|
|
42
|
+
Search space (5 params):
|
|
43
|
+
activation: Categorical(['relu', 'gelu', 'silu', 'leaky_relu'])
|
|
44
|
+
use_batchnorm: Categorical([True, False])
|
|
45
|
+
lr: LogUniform(0.0001, 0.1)
|
|
46
|
+
wd: LogUniform(1e-06, 0.01)
|
|
47
|
+
optimizer: Categorical(['sgd', 'adam', 'adamw'])
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
Only works with scripts that define a `model` variable.
|
|
51
|
+
|
|
52
|
+
## `neuropt results`
|
|
53
|
+
|
|
54
|
+
Analyze a search log.
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
neuropt results search.jsonl
|
|
58
|
+
neuropt results search.jsonl --top 20
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
Shows total experiments, top N results with configs, and convergence over time.
|