holoscript-trait-inference 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- holoscript_trait_inference-0.1.0/PKG-INFO +193 -0
- holoscript_trait_inference-0.1.0/README.md +168 -0
- holoscript_trait_inference-0.1.0/holoscript_trait_inference.egg-info/PKG-INFO +193 -0
- holoscript_trait_inference-0.1.0/holoscript_trait_inference.egg-info/SOURCES.txt +19 -0
- holoscript_trait_inference-0.1.0/holoscript_trait_inference.egg-info/dependency_links.txt +1 -0
- holoscript_trait_inference-0.1.0/holoscript_trait_inference.egg-info/entry_points.txt +2 -0
- holoscript_trait_inference-0.1.0/holoscript_trait_inference.egg-info/requires.txt +17 -0
- holoscript_trait_inference-0.1.0/holoscript_trait_inference.egg-info/top_level.txt +1 -0
- holoscript_trait_inference-0.1.0/pyproject.toml +59 -0
- holoscript_trait_inference-0.1.0/setup.cfg +4 -0
- holoscript_trait_inference-0.1.0/trait_inference/__init__.py +27 -0
- holoscript_trait_inference-0.1.0/trait_inference/baselines.py +238 -0
- holoscript_trait_inference-0.1.0/trait_inference/cli.py +498 -0
- holoscript_trait_inference-0.1.0/trait_inference/dataset.py +348 -0
- holoscript_trait_inference-0.1.0/trait_inference/eval/__init__.py +21 -0
- holoscript_trait_inference-0.1.0/trait_inference/eval/ablations.py +336 -0
- holoscript_trait_inference-0.1.0/trait_inference/metrics.py +291 -0
- holoscript_trait_inference-0.1.0/trait_inference/model/__init__.py +34 -0
- holoscript_trait_inference-0.1.0/trait_inference/model/decoder.py +192 -0
- holoscript_trait_inference-0.1.0/trait_inference/model/sweep.py +242 -0
- holoscript_trait_inference-0.1.0/trait_inference/model/trainer.py +237 -0
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: holoscript-trait-inference
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Paper 19 (ATI) — Automated Trait Inference for HoloScript .hsplus. Phase 3 training pipeline + baselines + eval harness.
|
|
5
|
+
Author: HoloScript Core
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Keywords: holoscript,trait-inference,paper-19,ml
|
|
8
|
+
Requires-Python: >=3.10
|
|
9
|
+
Description-Content-Type: text/markdown
|
|
10
|
+
Requires-Dist: numpy>=1.24
|
|
11
|
+
Requires-Dist: scikit-learn>=1.3
|
|
12
|
+
Requires-Dist: scipy>=1.11
|
|
13
|
+
Requires-Dist: pandas>=2.0
|
|
14
|
+
Provides-Extra: model
|
|
15
|
+
Requires-Dist: torch>=2.1; extra == "model"
|
|
16
|
+
Requires-Dist: transformers>=4.40; extra == "model"
|
|
17
|
+
Requires-Dist: sentence-transformers>=2.6; extra == "model"
|
|
18
|
+
Requires-Dist: outlines>=0.0.40; extra == "model"
|
|
19
|
+
Requires-Dist: accelerate>=0.30; extra == "model"
|
|
20
|
+
Provides-Extra: dev
|
|
21
|
+
Requires-Dist: pytest>=7.4; extra == "dev"
|
|
22
|
+
Requires-Dist: pytest-cov>=4.1; extra == "dev"
|
|
23
|
+
Requires-Dist: ruff>=0.1.0; extra == "dev"
|
|
24
|
+
Requires-Dist: mypy>=1.7; extra == "dev"
|
|
25
|
+
|
|
26
|
+
# trait-inference — Paper 19 (ATI) Phase 3 Pipeline
|
|
27
|
+
|
|
28
|
+
Python package implementing the **frozen** Paper 19 (Automated Trait
|
|
29
|
+
Inference) Phase 3 training pipeline + baselines + eval harness, per:
|
|
30
|
+
|
|
31
|
+
- Spec: `ai-ecosystem/research/paper-19-trait-inference/phase-1-spec.md`
|
|
32
|
+
- Pre-registration: `ai-ecosystem/research/paper-19-trait-inference/preregistration.md`
|
|
33
|
+
- Brain: `ai-ecosystem/compositions/trait-inference-brain.hsplus`
|
|
34
|
+
- GPU-claim ticket: `task_1777072040695_mrr3`
|
|
35
|
+
|
|
36
|
+
**Status (2026-04-24)**: Phase 1 (CPU pipeline) shipped — dataset
|
|
37
|
+
loader/audit/splits + 3 baselines (keyword + TF-IDF + Brittney-stub) +
|
|
38
|
+
eval metrics with bootstrap CI + CLI runner + Vast.ai launcher.
|
|
39
|
+
**Phase 2 (model module)** — sentence-transformer encoder + constrained-
|
|
40
|
+
decoder LLM, requires `[model]` extra — pending follow-up commit.
|
|
41
|
+
|
|
42
|
+
---
|
|
43
|
+
|
|
44
|
+
## Quick start
|
|
45
|
+
|
|
46
|
+
### 1. Install (CPU baselines + eval only)
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
cd packages/trait-inference
|
|
50
|
+
pip install -e .
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
### 2. Smoke test (synthetic data, end-to-end)
|
|
54
|
+
|
|
55
|
+
Validates the pipeline runs without needing real data or GPU. ~2 min.
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
trait-inference smoke --n 200 --bootstrap-b 200
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
Should emit a JSON measurement bundle to stdout with `"smoke_test": true,
|
|
62
|
+
"passed": true`. Use this to validate a fresh install before committing
|
|
63
|
+
to a Vast.ai run.
|
|
64
|
+
|
|
65
|
+
### 3. Extract trait label space from HoloScript core
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
trait-inference extract-traits \
|
|
69
|
+
--constants-dir ../core/src/traits/constants/ \
|
|
70
|
+
--output trait_inference/data/trait_label_space.json \
|
|
71
|
+
--verbose
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
Reads the 113 TS constant files, extracts string-array exports, writes
|
|
75
|
+
a single JSON consumed by the dataset + model modules.
|
|
76
|
+
|
|
77
|
+
### 4. Audit a real dataset
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
trait-inference dataset audit data/atimark.jsonl --output measurements/audit.json
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
Returns exit 0 if the dataset passes spec §1.4 acceptance (≥2k pairs,
|
|
84
|
+
≥300 novel combinations, ≥500 each major source, ≥200 negatives, no
|
|
85
|
+
novelty leak); exit 1 with `issues` list otherwise.
|
|
86
|
+
|
|
87
|
+
### 5. Run baselines
|
|
88
|
+
|
|
89
|
+
```bash
|
|
90
|
+
trait-inference dataset split data/atimark.jsonl --output-dir splits/ --seed 42
|
|
91
|
+
trait-inference baseline run keyword --train splits/train.jsonl --eval splits/held_out_novel.jsonl --output measurements/keyword.json
|
|
92
|
+
trait-inference baseline run tfidf --train splits/train.jsonl --eval splits/held_out_novel.jsonl --val splits/val.jsonl --tune-threshold --output measurements/tfidf.json
|
|
93
|
+
trait-inference baseline run brittney --train splits/train.jsonl --eval splits/held_out_novel.jsonl --output measurements/brittney.json
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
Each emits `f1_macro`, `exact_match`, `bootstrap_ci`, sample predictions.
|
|
97
|
+
|
|
98
|
+
---
|
|
99
|
+
|
|
100
|
+
## Vast.ai GPU launch
|
|
101
|
+
|
|
102
|
+
Orchestration script: `scripts/vast-launch-paper-19.ps1` (PowerShell;
|
|
103
|
+
mirrors the existing `ai-ecosystem/scripts/vast-bench-runner.ps1`
|
|
104
|
+
pattern).
|
|
105
|
+
|
|
106
|
+
```powershell
|
|
107
|
+
# Cheapest end-to-end pipeline validation (~$0.30, ~5 min)
|
|
108
|
+
.\scripts\vast-launch-paper-19.ps1 -Phase smoke -Label paper19-smoke
|
|
109
|
+
|
|
110
|
+
# Run all 3 baselines on the real dataset (~$0.30, ~10 min)
|
|
111
|
+
.\scripts\vast-launch-paper-19.ps1 -Phase baseline `
|
|
112
|
+
-DatasetPath data/atimark.jsonl -Label paper19-baselines
|
|
113
|
+
|
|
114
|
+
# Full training run (REQUIRES preregistration.md frozen + Phase 2 model module shipped)
|
|
115
|
+
.\scripts\vast-launch-paper-19.ps1 -Phase train -GpuName RTX_4090 `
|
|
116
|
+
-DatasetPath data/atimark.jsonl -Label paper19-headline-cell-1
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
Pre-flight: requires `vastai set api-key` configured (see
|
|
120
|
+
`ai-ecosystem/.env` `VAST_API_KEY`); requires `~/.ssh/id_rsa` with the
|
|
121
|
+
matching public key registered on the Vast.ai account; requires
|
|
122
|
+
≥$0.50 credit for `train`.
|
|
123
|
+
|
|
124
|
+
---
|
|
125
|
+
|
|
126
|
+
## Cost estimate (per
|
|
127
|
+
|
|
128
|
+
`ai-ecosystem/research/paper-19-trait-inference/README.md` Phase 2-4 task table + GPU-claim ticket `_mrr3`)
|
|
129
|
+
|
|
130
|
+
| Job | GPU | Hours | Cost |
|
|
131
|
+
| --------------------------------------------- | -------- | ------------------------------: | -----: |
|
|
132
|
+
| Smoke test | RTX 4090 | 0.1 | $0.03 |
|
|
133
|
+
| Baselines (CPU-bound) | RTX 4090 | 0.2 | $0.06 |
|
|
134
|
+
| Single training cell | RTX 4090 | ~6 | ~$1.80 |
|
|
135
|
+
| Full sweep (30 cells × N=5 reseed = 150 runs) | RTX 4090 | ~900 (parallel: 30 GPUs × 30hr) | ~$240 |
|
|
136
|
+
|
|
137
|
+
(A100 estimates are roughly 4-8× higher; A100 supply is also tighter.
|
|
138
|
+
4090 is sufficient for ≤1B-param decoder per spec §3.1.)
|
|
139
|
+
|
|
140
|
+
---
|
|
141
|
+
|
|
142
|
+
## Per-spec deliverable map
|
|
143
|
+
|
|
144
|
+
| Spec section | Module | Status |
|
|
145
|
+
| ------------------------------------ | ------------------------------------------------------------------- | ------------------------------------------------ |
|
|
146
|
+
| §1.1 Sourcing 3-source mix | `dataset.py` Pair + Source | done (loader; data construction is Phase 2 task) |
|
|
147
|
+
| §1.2 Schema | `dataset.py` Pair dataclass | done |
|
|
148
|
+
| §1.3 Splits (train/val/indist/novel) | `dataset.py` make_splits | done |
|
|
149
|
+
| §1.4 Audit protocol | `dataset.py` audit + AuditReport | done |
|
|
150
|
+
| §2.1 Keyword baseline | `baselines.py` KeywordBaseline | done |
|
|
151
|
+
| §2.2 TF-IDF + LogReg baseline | `baselines.py` TfidfLogregBaseline | done |
|
|
152
|
+
| §2.3 Brittney few-shot baseline | `baselines.py` BrittneyFewShotBaseline | stub (real impl needs Brittney API integration) |
|
|
153
|
+
| §3.1 Constrained-decoder model | `model/` (Phase 2 commit) | pending |
|
|
154
|
+
| §3.2 Conditioning fields | `model/` (Phase 2 commit) | pending |
|
|
155
|
+
| §3.3 Hyperparameter sweep | `model/sweep.py` (Phase 2 commit) | pending |
|
|
156
|
+
| §4.1 Metric definitions | `metrics.py` f1_macro, f1_micro, exact_match_rate, bootstrap_ci | done |
|
|
157
|
+
| §4.2 Statistical protocol | `metrics.py` bootstrap_ci, evaluate_headline | done |
|
|
158
|
+
| §4.3 Ablation matrix | `eval/ablations.py` (Phase 2 commit) | pending |
|
|
159
|
+
| §4.4 Required user study | (separate UX-research task) | pending |
|
|
160
|
+
| §4.5 Pre-registration freeze | `ai-ecosystem/research/paper-19-trait-inference/preregistration.md` | FROZEN (do not edit) |
|
|
161
|
+
|
|
162
|
+
---
|
|
163
|
+
|
|
164
|
+
## Anti-pattern guards (binding — inherited from
|
|
165
|
+
|
|
166
|
+
`compositions/trait-inference-brain.hsplus`)
|
|
167
|
+
|
|
168
|
+
- **No train-set evaluation.** Headline metric on novel-combination split only.
|
|
169
|
+
- **No easy-split-only F1.** Reports include both indist (sanity) and novel (headline).
|
|
170
|
+
- **No single-source dataset.** Audit rejects datasets <500 from any of {existing, brittney, community}.
|
|
171
|
+
- **No optional user study.** §4.4 is required not optional (per F.031).
|
|
172
|
+
- **No after-the-fact threshold-shopping.** preregistration.md is frozen before any Phase 3 board task is filed.
|
|
173
|
+
- **No qualitative-only claims.** ML venue requires numbers; pipeline emits structured measurements.
|
|
174
|
+
- **No validity gap as "scoped contribution"** — constrained-decoding architecture (Phase 2 module) bakes ≥90% validity into the decoder, not into a post-filter.
|
|
175
|
+
|
|
176
|
+
---
|
|
177
|
+
|
|
178
|
+
## Known limitations / future work
|
|
179
|
+
|
|
180
|
+
- Brittney few-shot baseline is a stub returning empty predictions; real impl needs HoloScript MCP integration (separate task).
|
|
181
|
+
- Constrained-decoder model module (`model/`) is the Phase 2 deliverable — not in this commit.
|
|
182
|
+
- Training loop + ablation matrix runner pending Phase 2.
|
|
183
|
+
- User study (Phase 4 §4.4) is a separate UX-research deliverable.
|
|
184
|
+
- The PowerShell Vast.ai launcher targets Windows; a bash equivalent for macOS/Linux is a follow-up.
|
|
185
|
+
|
|
186
|
+
---
|
|
187
|
+
|
|
188
|
+
## Provenance
|
|
189
|
+
|
|
190
|
+
- Authored by `trait-inference-brain` (`compositions/trait-inference-brain.hsplus`).
|
|
191
|
+
- GPU-claim ticket: `task_1777072040695_mrr3` (live on team_1775935947314_f0noxi board).
|
|
192
|
+
- Capability-build provenance commit: `fc294af` (lean-theorist-brain — sibling).
|
|
193
|
+
- F.031 pre-emptions baked into spec; constrained decoding ships in Phase 2 model module.
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
# trait-inference — Paper 19 (ATI) Phase 3 Pipeline
|
|
2
|
+
|
|
3
|
+
Python package implementing the **frozen** Paper 19 (Automated Trait
|
|
4
|
+
Inference) Phase 3 training pipeline + baselines + eval harness, per:
|
|
5
|
+
|
|
6
|
+
- Spec: `ai-ecosystem/research/paper-19-trait-inference/phase-1-spec.md`
|
|
7
|
+
- Pre-registration: `ai-ecosystem/research/paper-19-trait-inference/preregistration.md`
|
|
8
|
+
- Brain: `ai-ecosystem/compositions/trait-inference-brain.hsplus`
|
|
9
|
+
- GPU-claim ticket: `task_1777072040695_mrr3`
|
|
10
|
+
|
|
11
|
+
**Status (2026-04-24)**: Phase 1 (CPU pipeline) shipped — dataset
|
|
12
|
+
loader/audit/splits + 3 baselines (keyword + TF-IDF + Brittney-stub) +
|
|
13
|
+
eval metrics with bootstrap CI + CLI runner + Vast.ai launcher.
|
|
14
|
+
**Phase 2 (model module)** — sentence-transformer encoder + constrained-
|
|
15
|
+
decoder LLM, requires `[model]` extra — pending follow-up commit.
|
|
16
|
+
|
|
17
|
+
---
|
|
18
|
+
|
|
19
|
+
## Quick start
|
|
20
|
+
|
|
21
|
+
### 1. Install (CPU baselines + eval only)
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
cd packages/trait-inference
|
|
25
|
+
pip install -e .
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
### 2. Smoke test (synthetic data, end-to-end)
|
|
29
|
+
|
|
30
|
+
Validates the pipeline runs without needing real data or GPU. ~2 min.
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
trait-inference smoke --n 200 --bootstrap-b 200
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
Should emit a JSON measurement bundle to stdout with `"smoke_test": true,
|
|
37
|
+
"passed": true`. Use this to validate a fresh install before committing
|
|
38
|
+
to a Vast.ai run.
|
|
39
|
+
|
|
40
|
+
### 3. Extract trait label space from HoloScript core
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
trait-inference extract-traits \
|
|
44
|
+
--constants-dir ../core/src/traits/constants/ \
|
|
45
|
+
--output trait_inference/data/trait_label_space.json \
|
|
46
|
+
--verbose
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
Reads the 113 TS constant files, extracts string-array exports, writes
|
|
50
|
+
a single JSON consumed by the dataset + model modules.
|
|
51
|
+
|
|
52
|
+
### 4. Audit a real dataset
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
trait-inference dataset audit data/atimark.jsonl --output measurements/audit.json
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
Returns exit 0 if the dataset passes spec §1.4 acceptance (≥2k pairs,
|
|
59
|
+
≥300 novel combinations, ≥500 each major source, ≥200 negatives, no
|
|
60
|
+
novelty leak); exit 1 with `issues` list otherwise.
|
|
61
|
+
|
|
62
|
+
### 5. Run baselines
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
trait-inference dataset split data/atimark.jsonl --output-dir splits/ --seed 42
|
|
66
|
+
trait-inference baseline run keyword --train splits/train.jsonl --eval splits/held_out_novel.jsonl --output measurements/keyword.json
|
|
67
|
+
trait-inference baseline run tfidf --train splits/train.jsonl --eval splits/held_out_novel.jsonl --val splits/val.jsonl --tune-threshold --output measurements/tfidf.json
|
|
68
|
+
trait-inference baseline run brittney --train splits/train.jsonl --eval splits/held_out_novel.jsonl --output measurements/brittney.json
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
Each emits `f1_macro`, `exact_match`, `bootstrap_ci`, sample predictions.
|
|
72
|
+
|
|
73
|
+
---
|
|
74
|
+
|
|
75
|
+
## Vast.ai GPU launch
|
|
76
|
+
|
|
77
|
+
Orchestration script: `scripts/vast-launch-paper-19.ps1` (PowerShell;
|
|
78
|
+
mirrors the existing `ai-ecosystem/scripts/vast-bench-runner.ps1`
|
|
79
|
+
pattern).
|
|
80
|
+
|
|
81
|
+
```powershell
|
|
82
|
+
# Cheapest end-to-end pipeline validation (~$0.30, ~5 min)
|
|
83
|
+
.\scripts\vast-launch-paper-19.ps1 -Phase smoke -Label paper19-smoke
|
|
84
|
+
|
|
85
|
+
# Run all 3 baselines on the real dataset (~$0.30, ~10 min)
|
|
86
|
+
.\scripts\vast-launch-paper-19.ps1 -Phase baseline `
|
|
87
|
+
-DatasetPath data/atimark.jsonl -Label paper19-baselines
|
|
88
|
+
|
|
89
|
+
# Full training run (REQUIRES preregistration.md frozen + Phase 2 model module shipped)
|
|
90
|
+
.\scripts\vast-launch-paper-19.ps1 -Phase train -GpuName RTX_4090 `
|
|
91
|
+
-DatasetPath data/atimark.jsonl -Label paper19-headline-cell-1
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
Pre-flight: requires `vastai set api-key` configured (see
|
|
95
|
+
`ai-ecosystem/.env` `VAST_API_KEY`); requires `~/.ssh/id_rsa` with the
|
|
96
|
+
matching public key registered on the Vast.ai account; requires
|
|
97
|
+
≥$0.50 credit for `train`.
|
|
98
|
+
|
|
99
|
+
---
|
|
100
|
+
|
|
101
|
+
## Cost estimate (per
|
|
102
|
+
|
|
103
|
+
`ai-ecosystem/research/paper-19-trait-inference/README.md` Phase 2-4 task table + GPU-claim ticket `_mrr3`)
|
|
104
|
+
|
|
105
|
+
| Job | GPU | Hours | Cost |
|
|
106
|
+
| --------------------------------------------- | -------- | ------------------------------: | -----: |
|
|
107
|
+
| Smoke test | RTX 4090 | 0.1 | $0.03 |
|
|
108
|
+
| Baselines (CPU-bound) | RTX 4090 | 0.2 | $0.06 |
|
|
109
|
+
| Single training cell | RTX 4090 | ~6 | ~$1.80 |
|
|
110
|
+
| Full sweep (30 cells × N=5 reseed = 150 runs) | RTX 4090 | ~900 (parallel: 30 GPUs × 30hr) | ~$240 |
|
|
111
|
+
|
|
112
|
+
(A100 estimates are roughly 4-8× higher; A100 supply is also tighter.
|
|
113
|
+
4090 is sufficient for ≤1B-param decoder per spec §3.1.)
|
|
114
|
+
|
|
115
|
+
---
|
|
116
|
+
|
|
117
|
+
## Per-spec deliverable map
|
|
118
|
+
|
|
119
|
+
| Spec section | Module | Status |
|
|
120
|
+
| ------------------------------------ | ------------------------------------------------------------------- | ------------------------------------------------ |
|
|
121
|
+
| §1.1 Sourcing 3-source mix | `dataset.py` Pair + Source | done (loader; data construction is Phase 2 task) |
|
|
122
|
+
| §1.2 Schema | `dataset.py` Pair dataclass | done |
|
|
123
|
+
| §1.3 Splits (train/val/indist/novel) | `dataset.py` make_splits | done |
|
|
124
|
+
| §1.4 Audit protocol | `dataset.py` audit + AuditReport | done |
|
|
125
|
+
| §2.1 Keyword baseline | `baselines.py` KeywordBaseline | done |
|
|
126
|
+
| §2.2 TF-IDF + LogReg baseline | `baselines.py` TfidfLogregBaseline | done |
|
|
127
|
+
| §2.3 Brittney few-shot baseline | `baselines.py` BrittneyFewShotBaseline | stub (real impl needs Brittney API integration) |
|
|
128
|
+
| §3.1 Constrained-decoder model | `model/` (Phase 2 commit) | pending |
|
|
129
|
+
| §3.2 Conditioning fields | `model/` (Phase 2 commit) | pending |
|
|
130
|
+
| §3.3 Hyperparameter sweep | `model/sweep.py` (Phase 2 commit) | pending |
|
|
131
|
+
| §4.1 Metric definitions | `metrics.py` f1_macro, f1_micro, exact_match_rate, bootstrap_ci | done |
|
|
132
|
+
| §4.2 Statistical protocol | `metrics.py` bootstrap_ci, evaluate_headline | done |
|
|
133
|
+
| §4.3 Ablation matrix | `eval/ablations.py` (Phase 2 commit) | pending |
|
|
134
|
+
| §4.4 Required user study | (separate UX-research task) | pending |
|
|
135
|
+
| §4.5 Pre-registration freeze | `ai-ecosystem/research/paper-19-trait-inference/preregistration.md` | FROZEN (do not edit) |
|
|
136
|
+
|
|
137
|
+
---
|
|
138
|
+
|
|
139
|
+
## Anti-pattern guards (binding — inherited from
|
|
140
|
+
|
|
141
|
+
`compositions/trait-inference-brain.hsplus`)
|
|
142
|
+
|
|
143
|
+
- **No train-set evaluation.** Headline metric on novel-combination split only.
|
|
144
|
+
- **No easy-split-only F1.** Reports include both indist (sanity) and novel (headline).
|
|
145
|
+
- **No single-source dataset.** Audit rejects datasets <500 from any of {existing, brittney, community}.
|
|
146
|
+
- **No optional user study.** §4.4 is required not optional (per F.031).
|
|
147
|
+
- **No after-the-fact threshold-shopping.** preregistration.md is frozen before any Phase 3 board task is filed.
|
|
148
|
+
- **No qualitative-only claims.** ML venue requires numbers; pipeline emits structured measurements.
|
|
149
|
+
- **No validity gap as "scoped contribution"** — constrained-decoding architecture (Phase 2 module) bakes ≥90% validity into the decoder, not into a post-filter.
|
|
150
|
+
|
|
151
|
+
---
|
|
152
|
+
|
|
153
|
+
## Known limitations / future work
|
|
154
|
+
|
|
155
|
+
- Brittney few-shot baseline is a stub returning empty predictions; real impl needs HoloScript MCP integration (separate task).
|
|
156
|
+
- Constrained-decoder model module (`model/`) is the Phase 2 deliverable — not in this commit.
|
|
157
|
+
- Training loop + ablation matrix runner pending Phase 2.
|
|
158
|
+
- User study (Phase 4 §4.4) is a separate UX-research deliverable.
|
|
159
|
+
- The PowerShell Vast.ai launcher targets Windows; a bash equivalent for macOS/Linux is a follow-up.
|
|
160
|
+
|
|
161
|
+
---
|
|
162
|
+
|
|
163
|
+
## Provenance
|
|
164
|
+
|
|
165
|
+
- Authored by `trait-inference-brain` (`compositions/trait-inference-brain.hsplus`).
|
|
166
|
+
- GPU-claim ticket: `task_1777072040695_mrr3` (live on team_1775935947314_f0noxi board).
|
|
167
|
+
- Capability-build provenance commit: `fc294af` (lean-theorist-brain — sibling).
|
|
168
|
+
- F.031 pre-emptions baked into spec; constrained decoding ships in Phase 2 model module.
|
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: holoscript-trait-inference
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Paper 19 (ATI) — Automated Trait Inference for HoloScript .hsplus. Phase 3 training pipeline + baselines + eval harness.
|
|
5
|
+
Author: HoloScript Core
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Keywords: holoscript,trait-inference,paper-19,ml
|
|
8
|
+
Requires-Python: >=3.10
|
|
9
|
+
Description-Content-Type: text/markdown
|
|
10
|
+
Requires-Dist: numpy>=1.24
|
|
11
|
+
Requires-Dist: scikit-learn>=1.3
|
|
12
|
+
Requires-Dist: scipy>=1.11
|
|
13
|
+
Requires-Dist: pandas>=2.0
|
|
14
|
+
Provides-Extra: model
|
|
15
|
+
Requires-Dist: torch>=2.1; extra == "model"
|
|
16
|
+
Requires-Dist: transformers>=4.40; extra == "model"
|
|
17
|
+
Requires-Dist: sentence-transformers>=2.6; extra == "model"
|
|
18
|
+
Requires-Dist: outlines>=0.0.40; extra == "model"
|
|
19
|
+
Requires-Dist: accelerate>=0.30; extra == "model"
|
|
20
|
+
Provides-Extra: dev
|
|
21
|
+
Requires-Dist: pytest>=7.4; extra == "dev"
|
|
22
|
+
Requires-Dist: pytest-cov>=4.1; extra == "dev"
|
|
23
|
+
Requires-Dist: ruff>=0.1.0; extra == "dev"
|
|
24
|
+
Requires-Dist: mypy>=1.7; extra == "dev"
|
|
25
|
+
|
|
26
|
+
# trait-inference — Paper 19 (ATI) Phase 3 Pipeline
|
|
27
|
+
|
|
28
|
+
Python package implementing the **frozen** Paper 19 (Automated Trait
|
|
29
|
+
Inference) Phase 3 training pipeline + baselines + eval harness, per:
|
|
30
|
+
|
|
31
|
+
- Spec: `ai-ecosystem/research/paper-19-trait-inference/phase-1-spec.md`
|
|
32
|
+
- Pre-registration: `ai-ecosystem/research/paper-19-trait-inference/preregistration.md`
|
|
33
|
+
- Brain: `ai-ecosystem/compositions/trait-inference-brain.hsplus`
|
|
34
|
+
- GPU-claim ticket: `task_1777072040695_mrr3`
|
|
35
|
+
|
|
36
|
+
**Status (2026-04-24)**: Phase 1 (CPU pipeline) shipped — dataset
|
|
37
|
+
loader/audit/splits + 3 baselines (keyword + TF-IDF + Brittney-stub) +
|
|
38
|
+
eval metrics with bootstrap CI + CLI runner + Vast.ai launcher.
|
|
39
|
+
**Phase 2 (model module)** — sentence-transformer encoder + constrained-
|
|
40
|
+
decoder LLM, requires `[model]` extra — pending follow-up commit.
|
|
41
|
+
|
|
42
|
+
---
|
|
43
|
+
|
|
44
|
+
## Quick start
|
|
45
|
+
|
|
46
|
+
### 1. Install (CPU baselines + eval only)
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
cd packages/trait-inference
|
|
50
|
+
pip install -e .
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
### 2. Smoke test (synthetic data, end-to-end)
|
|
54
|
+
|
|
55
|
+
Validates the pipeline runs without needing real data or GPU. ~2 min.
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
trait-inference smoke --n 200 --bootstrap-b 200
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
Should emit a JSON measurement bundle to stdout with `"smoke_test": true,
|
|
62
|
+
"passed": true`. Use this to validate a fresh install before committing
|
|
63
|
+
to a Vast.ai run.
|
|
64
|
+
|
|
65
|
+
### 3. Extract trait label space from HoloScript core
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
trait-inference extract-traits \
|
|
69
|
+
--constants-dir ../core/src/traits/constants/ \
|
|
70
|
+
--output trait_inference/data/trait_label_space.json \
|
|
71
|
+
--verbose
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
Reads the 113 TS constant files, extracts string-array exports, writes
|
|
75
|
+
a single JSON consumed by the dataset + model modules.
|
|
76
|
+
|
|
77
|
+
### 4. Audit a real dataset
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
trait-inference dataset audit data/atimark.jsonl --output measurements/audit.json
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
Returns exit 0 if the dataset passes spec §1.4 acceptance (≥2k pairs,
|
|
84
|
+
≥300 novel combinations, ≥500 each major source, ≥200 negatives, no
|
|
85
|
+
novelty leak); exit 1 with `issues` list otherwise.
|
|
86
|
+
|
|
87
|
+
### 5. Run baselines
|
|
88
|
+
|
|
89
|
+
```bash
|
|
90
|
+
trait-inference dataset split data/atimark.jsonl --output-dir splits/ --seed 42
|
|
91
|
+
trait-inference baseline run keyword --train splits/train.jsonl --eval splits/held_out_novel.jsonl --output measurements/keyword.json
|
|
92
|
+
trait-inference baseline run tfidf --train splits/train.jsonl --eval splits/held_out_novel.jsonl --val splits/val.jsonl --tune-threshold --output measurements/tfidf.json
|
|
93
|
+
trait-inference baseline run brittney --train splits/train.jsonl --eval splits/held_out_novel.jsonl --output measurements/brittney.json
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
Each emits `f1_macro`, `exact_match`, `bootstrap_ci`, sample predictions.
|
|
97
|
+
|
|
98
|
+
---
|
|
99
|
+
|
|
100
|
+
## Vast.ai GPU launch
|
|
101
|
+
|
|
102
|
+
Orchestration script: `scripts/vast-launch-paper-19.ps1` (PowerShell;
|
|
103
|
+
mirrors the existing `ai-ecosystem/scripts/vast-bench-runner.ps1`
|
|
104
|
+
pattern).
|
|
105
|
+
|
|
106
|
+
```powershell
|
|
107
|
+
# Cheapest end-to-end pipeline validation (~$0.30, ~5 min)
|
|
108
|
+
.\scripts\vast-launch-paper-19.ps1 -Phase smoke -Label paper19-smoke
|
|
109
|
+
|
|
110
|
+
# Run all 3 baselines on the real dataset (~$0.30, ~10 min)
|
|
111
|
+
.\scripts\vast-launch-paper-19.ps1 -Phase baseline `
|
|
112
|
+
-DatasetPath data/atimark.jsonl -Label paper19-baselines
|
|
113
|
+
|
|
114
|
+
# Full training run (REQUIRES preregistration.md frozen + Phase 2 model module shipped)
|
|
115
|
+
.\scripts\vast-launch-paper-19.ps1 -Phase train -GpuName RTX_4090 `
|
|
116
|
+
-DatasetPath data/atimark.jsonl -Label paper19-headline-cell-1
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
Pre-flight: requires `vastai set api-key` configured (see
|
|
120
|
+
`ai-ecosystem/.env` `VAST_API_KEY`); requires `~/.ssh/id_rsa` with the
|
|
121
|
+
matching public key registered on the Vast.ai account; requires
|
|
122
|
+
≥$0.50 credit for `train`.
|
|
123
|
+
|
|
124
|
+
---
|
|
125
|
+
|
|
126
|
+
## Cost estimate (per
|
|
127
|
+
|
|
128
|
+
`ai-ecosystem/research/paper-19-trait-inference/README.md` Phase 2-4 task table + GPU-claim ticket `_mrr3`)
|
|
129
|
+
|
|
130
|
+
| Job | GPU | Hours | Cost |
|
|
131
|
+
| --------------------------------------------- | -------- | ------------------------------: | -----: |
|
|
132
|
+
| Smoke test | RTX 4090 | 0.1 | $0.03 |
|
|
133
|
+
| Baselines (CPU-bound) | RTX 4090 | 0.2 | $0.06 |
|
|
134
|
+
| Single training cell | RTX 4090 | ~6 | ~$1.80 |
|
|
135
|
+
| Full sweep (30 cells × N=5 reseed = 150 runs) | RTX 4090 | ~900 (parallel: 30 GPUs × 30hr) | ~$240 |
|
|
136
|
+
|
|
137
|
+
(A100 estimates are roughly 4-8× higher; A100 supply is also tighter.
|
|
138
|
+
4090 is sufficient for ≤1B-param decoder per spec §3.1.)
|
|
139
|
+
|
|
140
|
+
---
|
|
141
|
+
|
|
142
|
+
## Per-spec deliverable map
|
|
143
|
+
|
|
144
|
+
| Spec section | Module | Status |
|
|
145
|
+
| ------------------------------------ | ------------------------------------------------------------------- | ------------------------------------------------ |
|
|
146
|
+
| §1.1 Sourcing 3-source mix | `dataset.py` Pair + Source | done (loader; data construction is Phase 2 task) |
|
|
147
|
+
| §1.2 Schema | `dataset.py` Pair dataclass | done |
|
|
148
|
+
| §1.3 Splits (train/val/indist/novel) | `dataset.py` make_splits | done |
|
|
149
|
+
| §1.4 Audit protocol | `dataset.py` audit + AuditReport | done |
|
|
150
|
+
| §2.1 Keyword baseline | `baselines.py` KeywordBaseline | done |
|
|
151
|
+
| §2.2 TF-IDF + LogReg baseline | `baselines.py` TfidfLogregBaseline | done |
|
|
152
|
+
| §2.3 Brittney few-shot baseline | `baselines.py` BrittneyFewShotBaseline | stub (real impl needs Brittney API integration) |
|
|
153
|
+
| §3.1 Constrained-decoder model | `model/` (Phase 2 commit) | pending |
|
|
154
|
+
| §3.2 Conditioning fields | `model/` (Phase 2 commit) | pending |
|
|
155
|
+
| §3.3 Hyperparameter sweep | `model/sweep.py` (Phase 2 commit) | pending |
|
|
156
|
+
| §4.1 Metric definitions | `metrics.py` f1_macro, f1_micro, exact_match_rate, bootstrap_ci | done |
|
|
157
|
+
| §4.2 Statistical protocol | `metrics.py` bootstrap_ci, evaluate_headline | done |
|
|
158
|
+
| §4.3 Ablation matrix | `eval/ablations.py` (Phase 2 commit) | pending |
|
|
159
|
+
| §4.4 Required user study | (separate UX-research task) | pending |
|
|
160
|
+
| §4.5 Pre-registration freeze | `ai-ecosystem/research/paper-19-trait-inference/preregistration.md` | FROZEN (do not edit) |
|
|
161
|
+
|
|
162
|
+
---
|
|
163
|
+
|
|
164
|
+
## Anti-pattern guards (binding — inherited from
|
|
165
|
+
|
|
166
|
+
`compositions/trait-inference-brain.hsplus`)
|
|
167
|
+
|
|
168
|
+
- **No train-set evaluation.** Headline metric on novel-combination split only.
|
|
169
|
+
- **No easy-split-only F1.** Reports include both indist (sanity) and novel (headline).
|
|
170
|
+
- **No single-source dataset.** Audit rejects datasets <500 from any of {existing, brittney, community}.
|
|
171
|
+
- **No optional user study.** §4.4 is required not optional (per F.031).
|
|
172
|
+
- **No after-the-fact threshold-shopping.** preregistration.md is frozen before any Phase 3 board task is filed.
|
|
173
|
+
- **No qualitative-only claims.** ML venue requires numbers; pipeline emits structured measurements.
|
|
174
|
+
- **No validity gap as "scoped contribution"** — constrained-decoding architecture (Phase 2 module) bakes ≥90% validity into the decoder, not into a post-filter.
|
|
175
|
+
|
|
176
|
+
---
|
|
177
|
+
|
|
178
|
+
## Known limitations / future work
|
|
179
|
+
|
|
180
|
+
- Brittney few-shot baseline is a stub returning empty predictions; real impl needs HoloScript MCP integration (separate task).
|
|
181
|
+
- Constrained-decoder model module (`model/`) is the Phase 2 deliverable — not in this commit.
|
|
182
|
+
- Training loop + ablation matrix runner pending Phase 2.
|
|
183
|
+
- User study (Phase 4 §4.4) is a separate UX-research deliverable.
|
|
184
|
+
- The PowerShell Vast.ai launcher targets Windows; a bash equivalent for macOS/Linux is a follow-up.
|
|
185
|
+
|
|
186
|
+
---
|
|
187
|
+
|
|
188
|
+
## Provenance
|
|
189
|
+
|
|
190
|
+
- Authored by `trait-inference-brain` (`compositions/trait-inference-brain.hsplus`).
|
|
191
|
+
- GPU-claim ticket: `task_1777072040695_mrr3` (live on team_1775935947314_f0noxi board).
|
|
192
|
+
- Capability-build provenance commit: `fc294af` (lean-theorist-brain — sibling).
|
|
193
|
+
- F.031 pre-emptions baked into spec; constrained decoding ships in Phase 2 model module.
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
holoscript_trait_inference.egg-info/PKG-INFO
|
|
4
|
+
holoscript_trait_inference.egg-info/SOURCES.txt
|
|
5
|
+
holoscript_trait_inference.egg-info/dependency_links.txt
|
|
6
|
+
holoscript_trait_inference.egg-info/entry_points.txt
|
|
7
|
+
holoscript_trait_inference.egg-info/requires.txt
|
|
8
|
+
holoscript_trait_inference.egg-info/top_level.txt
|
|
9
|
+
trait_inference/__init__.py
|
|
10
|
+
trait_inference/baselines.py
|
|
11
|
+
trait_inference/cli.py
|
|
12
|
+
trait_inference/dataset.py
|
|
13
|
+
trait_inference/metrics.py
|
|
14
|
+
trait_inference/eval/__init__.py
|
|
15
|
+
trait_inference/eval/ablations.py
|
|
16
|
+
trait_inference/model/__init__.py
|
|
17
|
+
trait_inference/model/decoder.py
|
|
18
|
+
trait_inference/model/sweep.py
|
|
19
|
+
trait_inference/model/trainer.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
numpy>=1.24
|
|
2
|
+
scikit-learn>=1.3
|
|
3
|
+
scipy>=1.11
|
|
4
|
+
pandas>=2.0
|
|
5
|
+
|
|
6
|
+
[dev]
|
|
7
|
+
pytest>=7.4
|
|
8
|
+
pytest-cov>=4.1
|
|
9
|
+
ruff>=0.1.0
|
|
10
|
+
mypy>=1.7
|
|
11
|
+
|
|
12
|
+
[model]
|
|
13
|
+
torch>=2.1
|
|
14
|
+
transformers>=4.40
|
|
15
|
+
sentence-transformers>=2.6
|
|
16
|
+
outlines>=0.0.40
|
|
17
|
+
accelerate>=0.30
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
trait_inference
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "holoscript-trait-inference"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Paper 19 (ATI) — Automated Trait Inference for HoloScript .hsplus. Phase 3 training pipeline + baselines + eval harness."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.10"
|
|
11
|
+
license = "MIT"
|
|
12
|
+
authors = [{ name = "HoloScript Core" }]
|
|
13
|
+
keywords = ["holoscript", "trait-inference", "paper-19", "ml"]
|
|
14
|
+
|
|
15
|
+
dependencies = [
|
|
16
|
+
# Core ML
|
|
17
|
+
"numpy>=1.24",
|
|
18
|
+
"scikit-learn>=1.3", # TF-IDF + LogReg baseline
|
|
19
|
+
"scipy>=1.11", # bootstrap CI, statistical tests
|
|
20
|
+
|
|
21
|
+
# Optional GPU/contribution model — install via [model] extra to keep CPU-only
|
|
22
|
+
# baselines + eval lightweight.
|
|
23
|
+
|
|
24
|
+
# Eval + diagnostics
|
|
25
|
+
"pandas>=2.0", # measurement table aggregation
|
|
26
|
+
]
|
|
27
|
+
|
|
28
|
+
[project.optional-dependencies]
|
|
29
|
+
model = [
|
|
30
|
+
# Heavy deps — required only for the contribution model (Phase 3).
|
|
31
|
+
# Baselines (keyword, tfidf) and eval (metrics, bootstrap) work without these.
|
|
32
|
+
"torch>=2.1",
|
|
33
|
+
"transformers>=4.40",
|
|
34
|
+
"sentence-transformers>=2.6",
|
|
35
|
+
"outlines>=0.0.40", # constrained decoding over .holo grammar
|
|
36
|
+
"accelerate>=0.30", # multi-GPU training
|
|
37
|
+
]
|
|
38
|
+
|
|
39
|
+
dev = [
|
|
40
|
+
"pytest>=7.4",
|
|
41
|
+
"pytest-cov>=4.1",
|
|
42
|
+
"ruff>=0.1.0",
|
|
43
|
+
"mypy>=1.7",
|
|
44
|
+
]
|
|
45
|
+
|
|
46
|
+
[project.scripts]
|
|
47
|
+
trait-inference = "trait_inference.cli:main"
|
|
48
|
+
|
|
49
|
+
[tool.setuptools.packages.find]
|
|
50
|
+
where = ["."]
|
|
51
|
+
include = ["trait_inference*"]
|
|
52
|
+
|
|
53
|
+
[tool.ruff]
|
|
54
|
+
line-length = 100
|
|
55
|
+
target-version = "py310"
|
|
56
|
+
|
|
57
|
+
[tool.pytest.ini_options]
|
|
58
|
+
testpaths = ["tests"]
|
|
59
|
+
addopts = "-v --tb=short"
|