claude-turing 3.2.0 → 3.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +2 -2
- package/README.md +8 -2
- package/commands/curriculum.md +43 -0
- package/commands/feature.md +42 -0
- package/commands/merge.md +24 -0
- package/commands/prune.md +26 -0
- package/commands/quantize.md +24 -0
- package/commands/surgery.md +27 -0
- package/commands/turing.md +12 -0
- package/package.json +1 -1
- package/src/install.js +2 -0
- package/src/verify.js +6 -0
- package/templates/scripts/__pycache__/architecture_surgery.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/curriculum_optimizer.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/feature_intelligence.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/model_merger.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/model_pruning.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/model_quantization.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/scaffold.cpython-314.pyc +0 -0
- package/templates/scripts/architecture_surgery.py +238 -0
- package/templates/scripts/curriculum_optimizer.py +337 -0
- package/templates/scripts/feature_intelligence.py +369 -0
- package/templates/scripts/model_merger.py +277 -0
- package/templates/scripts/model_pruning.py +182 -0
- package/templates/scripts/model_quantization.py +177 -0
- package/templates/scripts/scaffold.py +12 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "turing",
|
|
3
|
-
"version": "3.
|
|
4
|
-
"description": "Autonomous ML research harness — the autoresearch loop as a formal protocol.
|
|
3
|
+
"version": "3.4.0",
|
|
4
|
+
"description": "Autonomous ML research harness — the autoresearch loop as a formal protocol. 53 commands, 2 specialized agents, model surgery (pruning + quantization + merging + architecture modification), feature & training intelligence (feature selection + curriculum optimization), model debugging (xray + sensitivity + calibration), pre-training intelligence (sanity checks + baseline generation + leakage detection), meta-intelligence (cross-project knowledge transfer + methodology audit), scaling & efficiency (scaling laws + compute budget + model distillation), model composition (ensemble + pipeline stitch + warm-start), deep analysis (experiment diff + live training monitor + regression gate), experiment orchestration (batch queue + smart retry + branching), literature integration + paper drafting, production model export, performance profiling, smart checkpoints, experiment intelligence, statistical rigor, tree-search hypothesis exploration, cost-performance frontier, model cards, model registry, hypothesis database with novelty guard, anti-cheating guardrails, and the taste-leverage loop. Inspired by Karpathy's autoresearch and the scientific method itself.",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "pragnition"
|
|
7
7
|
},
|
package/README.md
CHANGED
|
@@ -358,6 +358,12 @@ The index (`hypotheses.yaml`) is the lightweight queue. The detail files (`hypot
|
|
|
358
358
|
| `/turing:xray [exp-id]` | Internal model diagnostics — gradient flow, dead neurons, weight distributions, tree analysis |
|
|
359
359
|
| `/turing:sensitivity [exp-id]` | Hyperparameter sensitivity — rank parameters by impact, detect non-monotonic responses |
|
|
360
360
|
| `/turing:calibrate [exp-id]` | Probability calibration — ECE/MCE, reliability diagrams, Platt/isotonic/temperature scaling |
|
|
361
|
+
| `/turing:feature [--method]` | Automated feature selection — multi-method consensus ranking, redundancy, interactions |
|
|
362
|
+
| `/turing:curriculum [exp-id]` | Training curriculum optimization — difficulty scoring, strategy comparison, mislabeled sample detection |
|
|
363
|
+
| `/turing:prune <exp-id>` | Weight pruning — magnitude/structured/lottery, sparsity sweep, knee point detection |
|
|
364
|
+
| `/turing:quantize <exp-id>` | Post-training quantization — FP16/INT8, accuracy-latency comparison |
|
|
365
|
+
| `/turing:merge <exp-ids...>` | Model merging — uniform/greedy soup, TIES, DARE, zero latency cost |
|
|
366
|
+
| `/turing:surgery <exp-id>` | Architecture modification — add/remove layer, widen/narrow, swap activation |
|
|
361
367
|
|
|
362
368
|
And for fully hands-off operation:
|
|
363
369
|
|
|
@@ -542,11 +548,11 @@ Each project gets independent config, data, experiments, models, and agent memor
|
|
|
542
548
|
|
|
543
549
|
## Architecture of Turing Itself
|
|
544
550
|
|
|
545
|
-
|
|
551
|
+
53 commands, 2 agents, 10 config files, 72 template scripts, model registry, artifact contract, cost-performance frontier, model cards, tree-search exploration, statistical rigor, experiment intelligence, performance profiling, smart checkpoints, production model export, literature integration, paper section drafting, experiment orchestration (queue + retry + fork), deep analysis (diff + watch + regress), model composition (ensemble + stitch + warm), scaling & efficiency (scale + budget + distill), meta-intelligence (transfer + audit), pre-training intelligence (sanity + baseline + leak), model debugging (xray + sensitivity + calibrate), feature & training intelligence (feature + curriculum), model surgery (prune + quantize + merge + surgery), 16 ADRs. See [docs/ARCHITECTURE.md](docs/ARCHITECTURE.md) for the full codemap.
|
|
546
552
|
|
|
547
553
|
```
|
|
548
554
|
turing/
|
|
549
|
-
├── commands/
|
|
555
|
+
├── commands/ 52 skill files (core + taste-leverage + reporting + exploration + statistical rigor + experiment intelligence + performance + deployment + research workflow + orchestration + deep analysis + model composition + scaling & efficiency + meta-intelligence + pre-training intelligence + model debugging + feature & training intelligence + model surgery)
|
|
550
556
|
├── agents/ 2 agents (researcher: read/write, evaluator: read-only)
|
|
551
557
|
├── config/ 8 files (lifecycle, taxonomy, archetypes, novelty aliases)
|
|
552
558
|
├── templates/ Scaffolded into user projects by /turing:init
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: curriculum
|
|
3
|
+
description: Training curriculum optimization — order data by difficulty, compare easy-to-hard vs hard-to-easy vs self-paced strategies.
|
|
4
|
+
disable-model-invocation: true
|
|
5
|
+
argument-hint: "[exp-id] [--strategies easy-to-hard,random]"
|
|
6
|
+
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
Does the order your model sees data matter? Find out systematically.
|
|
10
|
+
|
|
11
|
+
## Steps
|
|
12
|
+
|
|
13
|
+
1. **Activate environment:**
|
|
14
|
+
```bash
|
|
15
|
+
source .venv/bin/activate
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
2. **Parse arguments from `$ARGUMENTS`:**
|
|
19
|
+
- Optional experiment ID
|
|
20
|
+
- `--strategies "easy_to_hard,hard_to_easy,self_paced,random"` — strategies to test
|
|
21
|
+
- `--json` — raw JSON output
|
|
22
|
+
|
|
23
|
+
3. **Run curriculum analysis:**
|
|
24
|
+
```bash
|
|
25
|
+
python scripts/curriculum_optimizer.py $ARGUMENTS
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
4. **Strategies tested:**
|
|
29
|
+
- **Random:** standard shuffling (control)
|
|
30
|
+
- **Easy-to-hard:** classic curriculum learning
|
|
31
|
+
- **Hard-to-easy:** anti-curriculum
|
|
32
|
+
- **Self-paced:** start easy, gradually include harder samples
|
|
33
|
+
|
|
34
|
+
5. **Report includes:** strategy comparison table with metric, convergence epoch, and speedup vs random; impossible sample detection (likely mislabeled)
|
|
35
|
+
|
|
36
|
+
6. **Saved output:** report in `experiments/curriculum/<exp-id>-curriculum.yaml`
|
|
37
|
+
|
|
38
|
+
## Examples
|
|
39
|
+
|
|
40
|
+
```
|
|
41
|
+
/turing:curriculum exp-042 # All strategies
|
|
42
|
+
/turing:curriculum --strategies easy_to_hard,random # Specific strategies
|
|
43
|
+
```
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: feature
|
|
3
|
+
description: Automated feature selection — multi-method importance consensus, redundancy detection, and interaction feature generation.
|
|
4
|
+
disable-model-invocation: true
|
|
5
|
+
argument-hint: "[--method all|importance] [--top-k 20]"
|
|
6
|
+
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
Systematically evaluate which features matter and which are noise.
|
|
10
|
+
|
|
11
|
+
## Steps
|
|
12
|
+
|
|
13
|
+
1. **Activate environment:**
|
|
14
|
+
```bash
|
|
15
|
+
source .venv/bin/activate
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
2. **Parse arguments from `$ARGUMENTS`:**
|
|
19
|
+
- `--method all|importance|selection|generation` — analysis type (default: all)
|
|
20
|
+
- `--top-k 20` — number of top features to consider
|
|
21
|
+
- `--json` — raw JSON output
|
|
22
|
+
|
|
23
|
+
3. **Run feature analysis:**
|
|
24
|
+
```bash
|
|
25
|
+
python scripts/feature_intelligence.py $ARGUMENTS
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
4. **Report includes:**
|
|
29
|
+
- Consensus ranking: features ranked by number of methods placing them in top-K
|
|
30
|
+
- Per-method ranks: mutual information, L1, tree-based
|
|
31
|
+
- Redundant pairs: features with |r| > 0.95
|
|
32
|
+
- Candidate interaction features from top consensus set
|
|
33
|
+
- Drop recommendation for zero-consensus features
|
|
34
|
+
|
|
35
|
+
5. **Saved output:** report in `experiments/features/features-*.yaml`
|
|
36
|
+
|
|
37
|
+
## Examples
|
|
38
|
+
|
|
39
|
+
```
|
|
40
|
+
/turing:feature # Full analysis
|
|
41
|
+
/turing:feature --top-k 10 # Top-10 consensus
|
|
42
|
+
```
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: merge
|
|
3
|
+
description: Model merging — average weights from multiple checkpoints into a single model (soups, TIES, DARE). Free accuracy, zero latency cost.
|
|
4
|
+
disable-model-invocation: true
|
|
5
|
+
argument-hint: "<exp-ids...> [--method uniform|greedy|ties|dare]"
|
|
6
|
+
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
Combine model weights (not predictions) into a single, better model with no latency overhead.
|
|
10
|
+
|
|
11
|
+
## Steps
|
|
12
|
+
|
|
13
|
+
1. **Activate environment:** `source .venv/bin/activate`
|
|
14
|
+
2. **Run:** `python scripts/model_merger.py $ARGUMENTS`
|
|
15
|
+
3. **Methods:** uniform soup (simple average), greedy soup (include only if improves), TIES (trim+elect+merge), DARE (drop+rescale)
|
|
16
|
+
4. **Report:** compatibility check, per-model metrics, method comparison, improvement delta
|
|
17
|
+
5. **Saved output:** `experiments/merges/merge-*.yaml`
|
|
18
|
+
|
|
19
|
+
## Examples
|
|
20
|
+
|
|
21
|
+
```
|
|
22
|
+
/turing:merge exp-042 exp-053 exp-067 # All methods
|
|
23
|
+
/turing:merge exp-042 exp-053 --method greedy # Greedy soup only
|
|
24
|
+
```
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: prune
|
|
3
|
+
description: Weight pruning — measure accuracy at different sparsity levels, find the knee point, produce a smaller/faster model.
|
|
4
|
+
disable-model-invocation: true
|
|
5
|
+
argument-hint: "<exp-id> [--sparsity 0.5,0.75,0.9] [--method magnitude|structured|lottery]"
|
|
6
|
+
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
Remove redundant weights for faster inference and smaller models.
|
|
10
|
+
|
|
11
|
+
## Steps
|
|
12
|
+
|
|
13
|
+
1. **Activate environment:** `source .venv/bin/activate`
|
|
14
|
+
2. **Run:** `python scripts/model_pruning.py $ARGUMENTS`
|
|
15
|
+
3. **Methods:** magnitude (zero small weights), structured (remove neurons), lottery (iterative with rewind)
|
|
16
|
+
4. **For tree models:** progressively reduces n_estimators
|
|
17
|
+
5. **Report:** sparsity sweep table, knee point, recommended sparsity
|
|
18
|
+
6. **Saved output:** `experiments/pruning/<exp-id>-pruning.yaml`
|
|
19
|
+
|
|
20
|
+
## Examples
|
|
21
|
+
|
|
22
|
+
```
|
|
23
|
+
/turing:prune exp-042 # Default: magnitude, 5 levels
|
|
24
|
+
/turing:prune exp-042 --method structured # Remove entire neurons
|
|
25
|
+
/turing:prune exp-042 --sparsity 0.5,0.75,0.9 # Custom levels
|
|
26
|
+
```
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: quantize
|
|
3
|
+
description: Post-training quantization — FP32→INT8/FP16, measure accuracy loss, 2-4x speedup with <0.5% accuracy loss.
|
|
4
|
+
disable-model-invocation: true
|
|
5
|
+
argument-hint: "<exp-id> [--precision int8|fp16|dynamic]"
|
|
6
|
+
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
Quantize for production. Lowest-effort optimization: 2-4x speedup, 2-4x memory reduction.
|
|
10
|
+
|
|
11
|
+
## Steps
|
|
12
|
+
|
|
13
|
+
1. **Activate environment:** `source .venv/bin/activate`
|
|
14
|
+
2. **Run:** `python scripts/model_quantization.py $ARGUMENTS`
|
|
15
|
+
3. **Precision levels:** FP32 (baseline), FP16 (GPU), INT8 dynamic (simplest), INT8 static (best accuracy)
|
|
16
|
+
4. **Report:** precision comparison table, recommended level, QAT suggestion if needed
|
|
17
|
+
5. **Saved output:** `experiments/quantization/<exp-id>-quantization.yaml`
|
|
18
|
+
|
|
19
|
+
## Examples
|
|
20
|
+
|
|
21
|
+
```
|
|
22
|
+
/turing:quantize exp-042 # Compare all precision levels
|
|
23
|
+
/turing:quantize exp-042 --precision int8 # INT8 specifically
|
|
24
|
+
```
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: surgery
|
|
3
|
+
description: Architecture modification — add/remove layers, widen/narrow, swap activations, inject skip connections. Specify what to change, system handles how.
|
|
4
|
+
disable-model-invocation: true
|
|
5
|
+
argument-hint: "<exp-id> --op <operation> [args...]"
|
|
6
|
+
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
Programmatic architecture changes with auto warm-start from existing weights.
|
|
10
|
+
|
|
11
|
+
## Steps
|
|
12
|
+
|
|
13
|
+
1. **Activate environment:** `source .venv/bin/activate`
|
|
14
|
+
2. **Run:** `python scripts/architecture_surgery.py $ARGUMENTS`
|
|
15
|
+
3. **Operations:** add-layer, remove-layer, widen, narrow, swap-activation, add-skip, add-norm, deepen, swap-objective
|
|
16
|
+
4. **For tree models:** deepen (increase max_depth), widen (more estimators), swap-objective
|
|
17
|
+
5. **Report:** operation details, config changes, parameter count delta, warm-start source
|
|
18
|
+
6. **Saved output:** `experiments/surgery/<exp-id>-<op>.yaml`
|
|
19
|
+
|
|
20
|
+
## Examples
|
|
21
|
+
|
|
22
|
+
```
|
|
23
|
+
/turing:surgery exp-042 --op widen 2 # 2x wider hidden layers
|
|
24
|
+
/turing:surgery exp-042 --op add-layer # Insert a layer
|
|
25
|
+
/turing:surgery exp-042 --op swap-activation relu gelu # ReLU → GELU
|
|
26
|
+
/turing:surgery exp-042 --op deepen # Deeper trees
|
|
27
|
+
```
|
package/commands/turing.md
CHANGED
|
@@ -56,6 +56,12 @@ You are the Turing ML research router. Detect the user's intent and route to the
|
|
|
56
56
|
| "xray", "model internals", "dead neurons", "gradient flow", "weight distribution", "inside the model" | `/turing:xray` | Analyze |
|
|
57
57
|
| "sensitivity", "which params matter", "hyperparameter importance", "parameter ranking" | `/turing:sensitivity` | Analyze |
|
|
58
58
|
| "calibrate", "calibration", "ECE", "reliability diagram", "overconfident", "probability calibration" | `/turing:calibrate` | Analyze |
|
|
59
|
+
| "feature", "features", "feature selection", "feature importance", "which features matter", "redundant features" | `/turing:feature` | Analyze |
|
|
60
|
+
| "curriculum", "training order", "easy to hard", "data ordering", "curriculum learning" | `/turing:curriculum` | Optimize |
|
|
61
|
+
| "prune", "pruning", "sparsity", "remove weights", "smaller model", "weight pruning" | `/turing:prune` | Optimize |
|
|
62
|
+
| "quantize", "quantization", "int8", "fp16", "reduce precision", "faster inference" | `/turing:quantize` | Optimize |
|
|
63
|
+
| "merge", "model soup", "merge weights", "average models", "TIES", "DARE" | `/turing:merge` | Compose |
|
|
64
|
+
| "surgery", "architecture", "add layer", "widen", "modify model", "swap activation" | `/turing:surgery` | Modify |
|
|
59
65
|
|
|
60
66
|
## Sub-commands
|
|
61
67
|
|
|
@@ -108,6 +114,12 @@ You are the Turing ML research router. Detect the user's intent and route to the
|
|
|
108
114
|
| `/turing:xray [exp-id]` | Internal model diagnostics: gradient flow, dead neurons, weight distributions, tree analysis | (inline) |
|
|
109
115
|
| `/turing:sensitivity [exp-id]` | Hyperparameter sensitivity analysis: rank parameters by impact, detect non-monotonic responses | (inline) |
|
|
110
116
|
| `/turing:calibrate [exp-id]` | Probability calibration: ECE/MCE, reliability diagrams, Platt/isotonic/temperature scaling | (inline) |
|
|
117
|
+
| `/turing:feature [--method]` | Automated feature selection: multi-method consensus ranking, redundancy, interaction generation | (inline) |
|
|
118
|
+
| `/turing:curriculum [exp-id]` | Training curriculum optimization: difficulty scoring, strategy comparison, impossible sample detection | (inline) |
|
|
119
|
+
| `/turing:prune <exp-id>` | Weight pruning: magnitude/structured/lottery, sparsity sweep, knee point detection | (inline) |
|
|
120
|
+
| `/turing:quantize <exp-id>` | Post-training quantization: FP16/INT8, accuracy-latency comparison, QAT suggestion | (inline) |
|
|
121
|
+
| `/turing:merge <exp-ids...>` | Model merging: uniform/greedy soup, TIES, DARE — free accuracy, zero latency cost | (inline) |
|
|
122
|
+
| `/turing:surgery <exp-id>` | Architecture modification: add/remove layer, widen/narrow, swap activation, skip connections | (inline) |
|
|
111
123
|
|
|
112
124
|
## Proactive Detection
|
|
113
125
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "claude-turing",
|
|
3
|
-
"version": "3.
|
|
3
|
+
"version": "3.4.0",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "Autonomous ML research harness for Claude Code. The autoresearch loop as a formal protocol — iteratively trains, evaluates, and improves ML models with structured experiment tracking, convergence detection, immutable evaluation infrastructure, and safety guardrails.",
|
|
6
6
|
"bin": {
|
package/src/install.js
CHANGED
package/src/verify.js
CHANGED
|
@@ -61,6 +61,12 @@ const EXPECTED_COMMANDS = [
|
|
|
61
61
|
"xray/SKILL.md",
|
|
62
62
|
"sensitivity/SKILL.md",
|
|
63
63
|
"calibrate/SKILL.md",
|
|
64
|
+
"feature/SKILL.md",
|
|
65
|
+
"curriculum/SKILL.md",
|
|
66
|
+
"prune/SKILL.md",
|
|
67
|
+
"quantize/SKILL.md",
|
|
68
|
+
"merge/SKILL.md",
|
|
69
|
+
"surgery/SKILL.md",
|
|
64
70
|
];
|
|
65
71
|
|
|
66
72
|
const EXPECTED_AGENTS = ["ml-researcher.md", "ml-evaluator.md"];
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Architecture modification for the autoresearch pipeline.
|
|
3
|
+
|
|
4
|
+
Programmatic architecture changes: add/remove layers, widen/narrow,
|
|
5
|
+
swap activation functions, inject skip connections, change normalization.
|
|
6
|
+
Produces a modified config and instructions for the modified experiment.
|
|
7
|
+
|
|
8
|
+
Usage:
|
|
9
|
+
python scripts/architecture_surgery.py exp-042 --op widen 2
|
|
10
|
+
python scripts/architecture_surgery.py exp-042 --op add-layer
|
|
11
|
+
python scripts/architecture_surgery.py exp-042 --op swap-activation relu gelu
|
|
12
|
+
python scripts/architecture_surgery.py --json
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import argparse
|
|
18
|
+
import json
|
|
19
|
+
import math
|
|
20
|
+
import sys
|
|
21
|
+
from datetime import datetime, timezone
|
|
22
|
+
from pathlib import Path
|
|
23
|
+
|
|
24
|
+
import yaml
|
|
25
|
+
|
|
26
|
+
from scripts.turing_io import load_config, load_experiments
|
|
27
|
+
|
|
28
|
+
DEFAULT_LOG_PATH = "experiments/log.jsonl"
|
|
29
|
+
OPERATIONS = ["add-layer", "remove-layer", "widen", "narrow", "swap-activation",
|
|
30
|
+
"add-skip", "add-norm", "deepen", "swap-objective"]
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def plan_operation(
|
|
34
|
+
operation: str,
|
|
35
|
+
config: dict,
|
|
36
|
+
hyperparams: dict,
|
|
37
|
+
model_type: str,
|
|
38
|
+
args: list[str] | None = None,
|
|
39
|
+
) -> dict:
|
|
40
|
+
"""Plan an architecture modification.
|
|
41
|
+
|
|
42
|
+
Returns a plan dict with new config, parameter count change, and instructions.
|
|
43
|
+
"""
|
|
44
|
+
args = args or []
|
|
45
|
+
plan = {
|
|
46
|
+
"operation": operation,
|
|
47
|
+
"model_type": model_type,
|
|
48
|
+
"original_config": hyperparams.copy(),
|
|
49
|
+
"new_config": hyperparams.copy(),
|
|
50
|
+
"instructions": [],
|
|
51
|
+
"param_change": None,
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
is_tree = any(t in model_type.lower() for t in ("xgboost", "lightgbm", "forest", "gbm", "catboost"))
|
|
55
|
+
is_neural = any(t in model_type.lower() for t in ("mlp", "nn", "pytorch", "tensorflow", "transformer"))
|
|
56
|
+
|
|
57
|
+
if operation == "widen":
|
|
58
|
+
factor = float(args[0]) if args else 2.0
|
|
59
|
+
if is_neural:
|
|
60
|
+
hs = hyperparams.get("hidden_size", 256)
|
|
61
|
+
new_hs = int(hs * factor)
|
|
62
|
+
plan["new_config"]["hidden_size"] = new_hs
|
|
63
|
+
plan["instructions"].append(f"Multiply hidden dimensions: {hs} → {new_hs} ({factor}x)")
|
|
64
|
+
plan["param_change"] = f"+{(factor**2 - 1)*100:.0f}% parameters (quadratic in width)"
|
|
65
|
+
elif is_tree:
|
|
66
|
+
n = hyperparams.get("n_estimators", 100)
|
|
67
|
+
new_n = int(n * factor)
|
|
68
|
+
plan["new_config"]["n_estimators"] = new_n
|
|
69
|
+
plan["instructions"].append(f"Increase estimators: {n} → {new_n}")
|
|
70
|
+
plan["param_change"] = f"+{(factor - 1)*100:.0f}% trees"
|
|
71
|
+
else:
|
|
72
|
+
plan["instructions"].append(f"Widen by {factor}x — adjust model-specific width parameters")
|
|
73
|
+
|
|
74
|
+
elif operation == "narrow":
|
|
75
|
+
factor = float(args[0]) if args else 0.5
|
|
76
|
+
if is_neural:
|
|
77
|
+
hs = hyperparams.get("hidden_size", 256)
|
|
78
|
+
new_hs = max(8, int(hs * factor))
|
|
79
|
+
plan["new_config"]["hidden_size"] = new_hs
|
|
80
|
+
plan["instructions"].append(f"Reduce hidden dimensions: {hs} → {new_hs} ({factor}x)")
|
|
81
|
+
elif is_tree:
|
|
82
|
+
n = hyperparams.get("n_estimators", 100)
|
|
83
|
+
new_n = max(1, int(n * factor))
|
|
84
|
+
plan["new_config"]["n_estimators"] = new_n
|
|
85
|
+
plan["instructions"].append(f"Reduce estimators: {n} → {new_n}")
|
|
86
|
+
|
|
87
|
+
elif operation == "add-layer":
|
|
88
|
+
if is_neural:
|
|
89
|
+
n_layers = hyperparams.get("n_layers", hyperparams.get("layers", 3))
|
|
90
|
+
plan["new_config"]["n_layers"] = n_layers + 1
|
|
91
|
+
plan["instructions"].extend([
|
|
92
|
+
f"Add layer: {n_layers} → {n_layers + 1}",
|
|
93
|
+
"New layer initialized with default weights",
|
|
94
|
+
"Auto warm-start: existing layers loaded from source",
|
|
95
|
+
])
|
|
96
|
+
plan["param_change"] = f"+1 layer ({n_layers} → {n_layers + 1})"
|
|
97
|
+
else:
|
|
98
|
+
plan["instructions"].append("add-layer not applicable for non-neural models")
|
|
99
|
+
|
|
100
|
+
elif operation == "remove-layer":
|
|
101
|
+
if is_neural:
|
|
102
|
+
n_layers = hyperparams.get("n_layers", hyperparams.get("layers", 3))
|
|
103
|
+
if n_layers > 1:
|
|
104
|
+
plan["new_config"]["n_layers"] = n_layers - 1
|
|
105
|
+
plan["instructions"].append(f"Remove layer: {n_layers} → {n_layers - 1}")
|
|
106
|
+
else:
|
|
107
|
+
plan["instructions"].append("Cannot remove — only 1 layer remaining")
|
|
108
|
+
else:
|
|
109
|
+
plan["instructions"].append("remove-layer not applicable for non-neural models")
|
|
110
|
+
|
|
111
|
+
elif operation == "deepen":
|
|
112
|
+
if is_tree:
|
|
113
|
+
depth = hyperparams.get("max_depth", 6)
|
|
114
|
+
new_depth = depth + 2
|
|
115
|
+
plan["new_config"]["max_depth"] = new_depth
|
|
116
|
+
plan["instructions"].append(f"Increase max depth: {depth} → {new_depth}")
|
|
117
|
+
elif is_neural:
|
|
118
|
+
n_layers = hyperparams.get("n_layers", 3)
|
|
119
|
+
plan["new_config"]["n_layers"] = n_layers + 2
|
|
120
|
+
plan["instructions"].append(f"Add 2 layers: {n_layers} → {n_layers + 2}")
|
|
121
|
+
|
|
122
|
+
elif operation == "swap-activation":
|
|
123
|
+
if len(args) >= 2:
|
|
124
|
+
from_act, to_act = args[0], args[1]
|
|
125
|
+
else:
|
|
126
|
+
from_act, to_act = "relu", "gelu"
|
|
127
|
+
plan["new_config"]["activation"] = to_act
|
|
128
|
+
plan["instructions"].append(f"Swap activation: {from_act} → {to_act}")
|
|
129
|
+
|
|
130
|
+
elif operation == "add-skip":
|
|
131
|
+
plan["new_config"]["skip_connections"] = True
|
|
132
|
+
plan["instructions"].append("Inject residual/skip connections between layers")
|
|
133
|
+
|
|
134
|
+
elif operation == "add-norm":
|
|
135
|
+
norm_type = args[0] if args else "batch_norm"
|
|
136
|
+
plan["new_config"]["normalization"] = norm_type
|
|
137
|
+
plan["instructions"].append(f"Add {norm_type} after each layer")
|
|
138
|
+
|
|
139
|
+
elif operation == "swap-objective":
|
|
140
|
+
if len(args) >= 2:
|
|
141
|
+
from_obj, to_obj = args[0], args[1]
|
|
142
|
+
else:
|
|
143
|
+
from_obj, to_obj = hyperparams.get("objective", "logloss"), "focal"
|
|
144
|
+
plan["new_config"]["objective"] = to_obj
|
|
145
|
+
plan["instructions"].append(f"Swap objective: {from_obj} → {to_obj}")
|
|
146
|
+
|
|
147
|
+
else:
|
|
148
|
+
plan["instructions"].append(f"Unknown operation: {operation}")
|
|
149
|
+
|
|
150
|
+
return plan
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def surgery_report(
|
|
154
|
+
exp_id: str,
|
|
155
|
+
operation: str,
|
|
156
|
+
op_args: list[str] | None = None,
|
|
157
|
+
config_path: str = "config.yaml",
|
|
158
|
+
log_path: str = DEFAULT_LOG_PATH,
|
|
159
|
+
) -> dict:
|
|
160
|
+
"""Generate a surgery report."""
|
|
161
|
+
experiments = load_experiments(log_path)
|
|
162
|
+
exp = next((e for e in experiments if e.get("experiment_id") == exp_id), None)
|
|
163
|
+
|
|
164
|
+
if not exp:
|
|
165
|
+
return {"error": f"Experiment {exp_id} not found"}
|
|
166
|
+
|
|
167
|
+
config = exp.get("config", {})
|
|
168
|
+
model_type = config.get("model_type", "unknown")
|
|
169
|
+
hyperparams = config.get("hyperparams", {})
|
|
170
|
+
|
|
171
|
+
plan = plan_operation(operation, config, hyperparams, model_type, op_args)
|
|
172
|
+
|
|
173
|
+
return {
|
|
174
|
+
"generated_at": datetime.now(timezone.utc).isoformat(),
|
|
175
|
+
"experiment_id": exp_id,
|
|
176
|
+
"plan": plan,
|
|
177
|
+
"warm_start_from": exp_id,
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def save_surgery_report(report: dict, output_dir: str = "experiments/surgery") -> Path:
|
|
182
|
+
out = Path(output_dir); out.mkdir(parents=True, exist_ok=True)
|
|
183
|
+
exp_id = report.get("experiment_id", "unknown")
|
|
184
|
+
op = report.get("plan", {}).get("operation", "unknown")
|
|
185
|
+
fp = out / f"{exp_id}-{op}.yaml"
|
|
186
|
+
with open(fp, "w") as f: yaml.dump(report, f, default_flow_style=False, sort_keys=False)
|
|
187
|
+
return fp
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def format_surgery_report(report: dict) -> str:
|
|
191
|
+
if "error" in report: return f"ERROR: {report['error']}"
|
|
192
|
+
|
|
193
|
+
plan = report.get("plan", {})
|
|
194
|
+
exp_id = report.get("experiment_id", "?")
|
|
195
|
+
op = plan.get("operation", "?")
|
|
196
|
+
|
|
197
|
+
lines = [f"# Surgery: {op} ({exp_id})", "",
|
|
198
|
+
f"*Generated {report.get('generated_at', 'N/A')[:19]}*",
|
|
199
|
+
f"**Model:** {plan.get('model_type', '?')}", ""]
|
|
200
|
+
|
|
201
|
+
lines.extend(["## Instructions", ""])
|
|
202
|
+
for i, inst in enumerate(plan.get("instructions", []), 1):
|
|
203
|
+
lines.append(f"{i}. {inst}")
|
|
204
|
+
lines.append("")
|
|
205
|
+
|
|
206
|
+
if plan.get("param_change"):
|
|
207
|
+
lines.append(f"**Parameter change:** {plan['param_change']}")
|
|
208
|
+
lines.append("")
|
|
209
|
+
|
|
210
|
+
orig = plan.get("original_config", {})
|
|
211
|
+
new = plan.get("new_config", {})
|
|
212
|
+
changed = {k: (orig.get(k), new[k]) for k in new if orig.get(k) != new.get(k)}
|
|
213
|
+
if changed:
|
|
214
|
+
lines.extend(["## Config Changes", ""])
|
|
215
|
+
for k, (old, new_v) in changed.items():
|
|
216
|
+
lines.append(f"- `{k}`: {old} → {new_v}")
|
|
217
|
+
lines.append("")
|
|
218
|
+
|
|
219
|
+
lines.append(f"**Warm-start from:** {report.get('warm_start_from', '?')}")
|
|
220
|
+
|
|
221
|
+
return "\n".join(lines)
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
def main() -> None:
|
|
225
|
+
parser = argparse.ArgumentParser(description="Architecture modification")
|
|
226
|
+
parser.add_argument("exp_id")
|
|
227
|
+
parser.add_argument("--op", required=True, help="Operation name")
|
|
228
|
+
parser.add_argument("op_args", nargs="*", help="Operation arguments")
|
|
229
|
+
parser.add_argument("--config", default="config.yaml")
|
|
230
|
+
parser.add_argument("--log", default=DEFAULT_LOG_PATH)
|
|
231
|
+
parser.add_argument("--json", action="store_true")
|
|
232
|
+
args = parser.parse_args()
|
|
233
|
+
report = surgery_report(args.exp_id, args.op, args.op_args, args.config, args.log)
|
|
234
|
+
if "error" not in report:
|
|
235
|
+
fp = save_surgery_report(report); print(f"Saved to {fp}", file=sys.stderr)
|
|
236
|
+
print(json.dumps(report, indent=2, default=str) if args.json else format_surgery_report(report))
|
|
237
|
+
|
|
238
|
+
if __name__ == "__main__": main()
|