holoscript-trait-inference 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (21) hide show
  1. holoscript_trait_inference-0.1.0/PKG-INFO +193 -0
  2. holoscript_trait_inference-0.1.0/README.md +168 -0
  3. holoscript_trait_inference-0.1.0/holoscript_trait_inference.egg-info/PKG-INFO +193 -0
  4. holoscript_trait_inference-0.1.0/holoscript_trait_inference.egg-info/SOURCES.txt +19 -0
  5. holoscript_trait_inference-0.1.0/holoscript_trait_inference.egg-info/dependency_links.txt +1 -0
  6. holoscript_trait_inference-0.1.0/holoscript_trait_inference.egg-info/entry_points.txt +2 -0
  7. holoscript_trait_inference-0.1.0/holoscript_trait_inference.egg-info/requires.txt +17 -0
  8. holoscript_trait_inference-0.1.0/holoscript_trait_inference.egg-info/top_level.txt +1 -0
  9. holoscript_trait_inference-0.1.0/pyproject.toml +59 -0
  10. holoscript_trait_inference-0.1.0/setup.cfg +4 -0
  11. holoscript_trait_inference-0.1.0/trait_inference/__init__.py +27 -0
  12. holoscript_trait_inference-0.1.0/trait_inference/baselines.py +238 -0
  13. holoscript_trait_inference-0.1.0/trait_inference/cli.py +498 -0
  14. holoscript_trait_inference-0.1.0/trait_inference/dataset.py +348 -0
  15. holoscript_trait_inference-0.1.0/trait_inference/eval/__init__.py +21 -0
  16. holoscript_trait_inference-0.1.0/trait_inference/eval/ablations.py +336 -0
  17. holoscript_trait_inference-0.1.0/trait_inference/metrics.py +291 -0
  18. holoscript_trait_inference-0.1.0/trait_inference/model/__init__.py +34 -0
  19. holoscript_trait_inference-0.1.0/trait_inference/model/decoder.py +192 -0
  20. holoscript_trait_inference-0.1.0/trait_inference/model/sweep.py +242 -0
  21. holoscript_trait_inference-0.1.0/trait_inference/model/trainer.py +237 -0
@@ -0,0 +1,193 @@
1
+ Metadata-Version: 2.4
2
+ Name: holoscript-trait-inference
3
+ Version: 0.1.0
4
+ Summary: Paper 19 (ATI) — Automated Trait Inference for HoloScript .hsplus. Phase 3 training pipeline + baselines + eval harness.
5
+ Author: HoloScript Core
6
+ License-Expression: MIT
7
+ Keywords: holoscript,trait-inference,paper-19,ml
8
+ Requires-Python: >=3.10
9
+ Description-Content-Type: text/markdown
10
+ Requires-Dist: numpy>=1.24
11
+ Requires-Dist: scikit-learn>=1.3
12
+ Requires-Dist: scipy>=1.11
13
+ Requires-Dist: pandas>=2.0
14
+ Provides-Extra: model
15
+ Requires-Dist: torch>=2.1; extra == "model"
16
+ Requires-Dist: transformers>=4.40; extra == "model"
17
+ Requires-Dist: sentence-transformers>=2.6; extra == "model"
18
+ Requires-Dist: outlines>=0.0.40; extra == "model"
19
+ Requires-Dist: accelerate>=0.30; extra == "model"
20
+ Provides-Extra: dev
21
+ Requires-Dist: pytest>=7.4; extra == "dev"
22
+ Requires-Dist: pytest-cov>=4.1; extra == "dev"
23
+ Requires-Dist: ruff>=0.1.0; extra == "dev"
24
+ Requires-Dist: mypy>=1.7; extra == "dev"
25
+
26
+ # trait-inference — Paper 19 (ATI) Phase 3 Pipeline
27
+
28
+ Python package implementing the **frozen** Paper 19 (Automated Trait
29
+ Inference) Phase 3 training pipeline + baselines + eval harness, per:
30
+
31
+ - Spec: `ai-ecosystem/research/paper-19-trait-inference/phase-1-spec.md`
32
+ - Pre-registration: `ai-ecosystem/research/paper-19-trait-inference/preregistration.md`
33
+ - Brain: `ai-ecosystem/compositions/trait-inference-brain.hsplus`
34
+ - GPU-claim ticket: `task_1777072040695_mrr3`
35
+
36
+ **Status (2026-04-24)**: Phase 1 (CPU pipeline) shipped — dataset
37
+ loader/audit/splits + 3 baselines (keyword + TF-IDF + Brittney-stub) +
38
+ eval metrics with bootstrap CI + CLI runner + Vast.ai launcher.
39
+ **Phase 2 (model module)** — sentence-transformer encoder + constrained-
40
+ decoder LLM, requires `[model]` extra — pending follow-up commit.
41
+
42
+ ---
43
+
44
+ ## Quick start
45
+
46
+ ### 1. Install (CPU baselines + eval only)
47
+
48
+ ```bash
49
+ cd packages/trait-inference
50
+ pip install -e .
51
+ ```
52
+
53
+ ### 2. Smoke test (synthetic data, end-to-end)
54
+
55
+ Validates the pipeline runs without needing real data or GPU. ~2 min.
56
+
57
+ ```bash
58
+ trait-inference smoke --n 200 --bootstrap-b 200
59
+ ```
60
+
61
+ Should emit a JSON measurement bundle to stdout with `"smoke_test": true,
62
+ "passed": true`. Use this to validate a fresh install before committing
63
+ to a Vast.ai run.
64
+
65
+ ### 3. Extract trait label space from HoloScript core
66
+
67
+ ```bash
68
+ trait-inference extract-traits \
69
+ --constants-dir ../core/src/traits/constants/ \
70
+ --output trait_inference/data/trait_label_space.json \
71
+ --verbose
72
+ ```
73
+
74
+ Reads the 113 TS constant files, extracts string-array exports, writes
75
+ a single JSON consumed by the dataset + model modules.
76
+
77
+ ### 4. Audit a real dataset
78
+
79
+ ```bash
80
+ trait-inference dataset audit data/atimark.jsonl --output measurements/audit.json
81
+ ```
82
+
83
+ Returns exit 0 if the dataset passes spec §1.4 acceptance (≥2k pairs,
84
+ ≥300 novel combinations, ≥500 each major source, ≥200 negatives, no
85
+ novelty leak); exit 1 with `issues` list otherwise.
86
+
87
+ ### 5. Run baselines
88
+
89
+ ```bash
90
+ trait-inference dataset split data/atimark.jsonl --output-dir splits/ --seed 42
91
+ trait-inference baseline run keyword --train splits/train.jsonl --eval splits/held_out_novel.jsonl --output measurements/keyword.json
92
+ trait-inference baseline run tfidf --train splits/train.jsonl --eval splits/held_out_novel.jsonl --val splits/val.jsonl --tune-threshold --output measurements/tfidf.json
93
+ trait-inference baseline run brittney --train splits/train.jsonl --eval splits/held_out_novel.jsonl --output measurements/brittney.json
94
+ ```
95
+
96
+ Each emits `f1_macro`, `exact_match`, `bootstrap_ci`, sample predictions.
97
+
98
+ ---
99
+
100
+ ## Vast.ai GPU launch
101
+
102
+ Orchestration script: `scripts/vast-launch-paper-19.ps1` (PowerShell;
103
+ mirrors the existing `ai-ecosystem/scripts/vast-bench-runner.ps1`
104
+ pattern).
105
+
106
+ ```powershell
107
+ # Cheapest end-to-end pipeline validation (~$0.30, ~5 min)
108
+ .\scripts\vast-launch-paper-19.ps1 -Phase smoke -Label paper19-smoke
109
+
110
+ # Run all 3 baselines on the real dataset (~$0.30, ~10 min)
111
+ .\scripts\vast-launch-paper-19.ps1 -Phase baseline `
112
+ -DatasetPath data/atimark.jsonl -Label paper19-baselines
113
+
114
+ # Full training run (REQUIRES preregistration.md frozen + Phase 2 model module shipped)
115
+ .\scripts\vast-launch-paper-19.ps1 -Phase train -GpuName RTX_4090 `
116
+ -DatasetPath data/atimark.jsonl -Label paper19-headline-cell-1
117
+ ```
118
+
119
+ Pre-flight: requires `vastai set api-key` configured (see
120
+ `ai-ecosystem/.env` `VAST_API_KEY`); requires `~/.ssh/id_rsa` with the
121
+ matching public key registered on the Vast.ai account; requires
122
+ ≥$0.50 credit for `train`.
123
+
124
+ ---
125
+
126
+ ## Cost estimate (per
127
+
128
+ `ai-ecosystem/research/paper-19-trait-inference/README.md` Phase 2-4 task table + GPU-claim ticket `_mrr3`)
129
+
130
+ | Job | GPU | Hours | Cost |
131
+ | --------------------------------------------- | -------- | ------------------------------: | -----: |
132
+ | Smoke test | RTX 4090 | 0.1 | $0.03 |
133
+ | Baselines (CPU-bound) | RTX 4090 | 0.2 | $0.06 |
134
+ | Single training cell | RTX 4090 | ~6 | ~$1.80 |
135
+ | Full sweep (30 cells × N=5 reseed = 150 runs) | RTX 4090 | ~900 (parallel: 30 GPUs × 30hr) | ~$240 |
136
+
137
+ (A100 estimates are roughly 4-8× higher; A100 supply is also tighter.
138
+ 4090 is sufficient for ≤1B-param decoder per spec §3.1.)
139
+
140
+ ---
141
+
142
+ ## Per-spec deliverable map
143
+
144
+ | Spec section | Module | Status |
145
+ | ------------------------------------ | ------------------------------------------------------------------- | ------------------------------------------------ |
146
+ | §1.1 Sourcing 3-source mix | `dataset.py` Pair + Source | done (loader; data construction is Phase 2 task) |
147
+ | §1.2 Schema | `dataset.py` Pair dataclass | done |
148
+ | §1.3 Splits (train/val/indist/novel) | `dataset.py` make_splits | done |
149
+ | §1.4 Audit protocol | `dataset.py` audit + AuditReport | done |
150
+ | §2.1 Keyword baseline | `baselines.py` KeywordBaseline | done |
151
+ | §2.2 TF-IDF + LogReg baseline | `baselines.py` TfidfLogregBaseline | done |
152
+ | §2.3 Brittney few-shot baseline | `baselines.py` BrittneyFewShotBaseline | stub (real impl needs Brittney API integration) |
153
+ | §3.1 Constrained-decoder model | `model/` (Phase 2 commit) | pending |
154
+ | §3.2 Conditioning fields | `model/` (Phase 2 commit) | pending |
155
+ | §3.3 Hyperparameter sweep | `model/sweep.py` (Phase 2 commit) | pending |
156
+ | §4.1 Metric definitions | `metrics.py` f1_macro, f1_micro, exact_match_rate, bootstrap_ci | done |
157
+ | §4.2 Statistical protocol | `metrics.py` bootstrap_ci, evaluate_headline | done |
158
+ | §4.3 Ablation matrix | `eval/ablations.py` (Phase 2 commit) | pending |
159
+ | §4.4 Required user study | (separate UX-research task) | pending |
160
+ | §4.5 Pre-registration freeze | `ai-ecosystem/research/paper-19-trait-inference/preregistration.md` | FROZEN (do not edit) |
161
+
162
+ ---
163
+
164
+ ## Anti-pattern guards (binding — inherited from
165
+
166
+ `compositions/trait-inference-brain.hsplus`)
167
+
168
+ - **No train-set evaluation.** Headline metric on novel-combination split only.
169
+ - **No easy-split-only F1.** Reports include both indist (sanity) and novel (headline).
170
+ - **No single-source dataset.** Audit rejects datasets <500 from any of {existing, brittney, community}.
171
+ - **No optional user study.** §4.4 is required not optional (per F.031).
172
+ - **No after-the-fact threshold-shopping.** preregistration.md is frozen before any Phase 3 board task is filed.
173
+ - **No qualitative-only claims.** ML venue requires numbers; pipeline emits structured measurements.
174
+ - **No validity gap as "scoped contribution"** — constrained-decoding architecture (Phase 2 module) bakes ≥90% validity into the decoder, not into a post-filter.
175
+
176
+ ---
177
+
178
+ ## Known limitations / future work
179
+
180
+ - Brittney few-shot baseline is a stub returning empty predictions; real impl needs HoloScript MCP integration (separate task).
181
+ - Constrained-decoder model module (`model/`) is the Phase 2 deliverable — not in this commit.
182
+ - Training loop + ablation matrix runner pending Phase 2.
183
+ - User study (Phase 4 §4.4) is a separate UX-research deliverable.
184
+ - The PowerShell Vast.ai launcher targets Windows; a bash equivalent for macOS/Linux is a follow-up.
185
+
186
+ ---
187
+
188
+ ## Provenance
189
+
190
+ - Authored by `trait-inference-brain` (`compositions/trait-inference-brain.hsplus`).
191
+ - GPU-claim ticket: `task_1777072040695_mrr3` (live on team_1775935947314_f0noxi board).
192
+ - Capability-build provenance commit: `fc294af` (lean-theorist-brain — sibling).
193
+ - F.031 pre-emptions baked into spec; constrained decoding ships in Phase 2 model module.
@@ -0,0 +1,168 @@
1
+ # trait-inference — Paper 19 (ATI) Phase 3 Pipeline
2
+
3
+ Python package implementing the **frozen** Paper 19 (Automated Trait
4
+ Inference) Phase 3 training pipeline + baselines + eval harness, per:
5
+
6
+ - Spec: `ai-ecosystem/research/paper-19-trait-inference/phase-1-spec.md`
7
+ - Pre-registration: `ai-ecosystem/research/paper-19-trait-inference/preregistration.md`
8
+ - Brain: `ai-ecosystem/compositions/trait-inference-brain.hsplus`
9
+ - GPU-claim ticket: `task_1777072040695_mrr3`
10
+
11
+ **Status (2026-04-24)**: Phase 1 (CPU pipeline) shipped — dataset
12
+ loader/audit/splits + 3 baselines (keyword + TF-IDF + Brittney-stub) +
13
+ eval metrics with bootstrap CI + CLI runner + Vast.ai launcher.
14
+ **Phase 2 (model module)** — sentence-transformer encoder + constrained-
15
+ decoder LLM, requires `[model]` extra — pending follow-up commit.
16
+
17
+ ---
18
+
19
+ ## Quick start
20
+
21
+ ### 1. Install (CPU baselines + eval only)
22
+
23
+ ```bash
24
+ cd packages/trait-inference
25
+ pip install -e .
26
+ ```
27
+
28
+ ### 2. Smoke test (synthetic data, end-to-end)
29
+
30
+ Validates the pipeline runs without needing real data or GPU. ~2 min.
31
+
32
+ ```bash
33
+ trait-inference smoke --n 200 --bootstrap-b 200
34
+ ```
35
+
36
+ Should emit a JSON measurement bundle to stdout with `"smoke_test": true,
37
+ "passed": true`. Use this to validate a fresh install before committing
38
+ to a Vast.ai run.
39
+
40
+ ### 3. Extract trait label space from HoloScript core
41
+
42
+ ```bash
43
+ trait-inference extract-traits \
44
+ --constants-dir ../core/src/traits/constants/ \
45
+ --output trait_inference/data/trait_label_space.json \
46
+ --verbose
47
+ ```
48
+
49
+ Reads the 113 TS constant files, extracts string-array exports, writes
50
+ a single JSON consumed by the dataset + model modules.
51
+
52
+ ### 4. Audit a real dataset
53
+
54
+ ```bash
55
+ trait-inference dataset audit data/atimark.jsonl --output measurements/audit.json
56
+ ```
57
+
58
+ Returns exit 0 if the dataset passes spec §1.4 acceptance (≥2k pairs,
59
+ ≥300 novel combinations, ≥500 each major source, ≥200 negatives, no
60
+ novelty leak); exit 1 with `issues` list otherwise.
61
+
62
+ ### 5. Run baselines
63
+
64
+ ```bash
65
+ trait-inference dataset split data/atimark.jsonl --output-dir splits/ --seed 42
66
+ trait-inference baseline run keyword --train splits/train.jsonl --eval splits/held_out_novel.jsonl --output measurements/keyword.json
67
+ trait-inference baseline run tfidf --train splits/train.jsonl --eval splits/held_out_novel.jsonl --val splits/val.jsonl --tune-threshold --output measurements/tfidf.json
68
+ trait-inference baseline run brittney --train splits/train.jsonl --eval splits/held_out_novel.jsonl --output measurements/brittney.json
69
+ ```
70
+
71
+ Each emits `f1_macro`, `exact_match`, `bootstrap_ci`, sample predictions.
72
+
73
+ ---
74
+
75
+ ## Vast.ai GPU launch
76
+
77
+ Orchestration script: `scripts/vast-launch-paper-19.ps1` (PowerShell;
78
+ mirrors the existing `ai-ecosystem/scripts/vast-bench-runner.ps1`
79
+ pattern).
80
+
81
+ ```powershell
82
+ # Cheapest end-to-end pipeline validation (~$0.30, ~5 min)
83
+ .\scripts\vast-launch-paper-19.ps1 -Phase smoke -Label paper19-smoke
84
+
85
+ # Run all 3 baselines on the real dataset (~$0.30, ~10 min)
86
+ .\scripts\vast-launch-paper-19.ps1 -Phase baseline `
87
+ -DatasetPath data/atimark.jsonl -Label paper19-baselines
88
+
89
+ # Full training run (REQUIRES preregistration.md frozen + Phase 2 model module shipped)
90
+ .\scripts\vast-launch-paper-19.ps1 -Phase train -GpuName RTX_4090 `
91
+ -DatasetPath data/atimark.jsonl -Label paper19-headline-cell-1
92
+ ```
93
+
94
+ Pre-flight: requires `vastai set api-key` configured (see
95
+ `ai-ecosystem/.env` `VAST_API_KEY`); requires `~/.ssh/id_rsa` with the
96
+ matching public key registered on the Vast.ai account; requires
97
+ ≥$0.50 credit for `train`.
98
+
99
+ ---
100
+
101
+ ## Cost estimate (per
102
+
103
+ `ai-ecosystem/research/paper-19-trait-inference/README.md` Phase 2-4 task table + GPU-claim ticket `_mrr3`)
104
+
105
+ | Job | GPU | Hours | Cost |
106
+ | --------------------------------------------- | -------- | ------------------------------: | -----: |
107
+ | Smoke test | RTX 4090 | 0.1 | $0.03 |
108
+ | Baselines (CPU-bound) | RTX 4090 | 0.2 | $0.06 |
109
+ | Single training cell | RTX 4090 | ~6 | ~$1.80 |
110
+ | Full sweep (30 cells × N=5 reseed = 150 runs) | RTX 4090 | ~900 (parallel: 30 GPUs × 30hr) | ~$240 |
111
+
112
+ (A100 estimates are roughly 4-8× higher; A100 supply is also tighter.
113
+ 4090 is sufficient for ≤1B-param decoder per spec §3.1.)
114
+
115
+ ---
116
+
117
+ ## Per-spec deliverable map
118
+
119
+ | Spec section | Module | Status |
120
+ | ------------------------------------ | ------------------------------------------------------------------- | ------------------------------------------------ |
121
+ | §1.1 Sourcing 3-source mix | `dataset.py` Pair + Source | done (loader; data construction is Phase 2 task) |
122
+ | §1.2 Schema | `dataset.py` Pair dataclass | done |
123
+ | §1.3 Splits (train/val/indist/novel) | `dataset.py` make_splits | done |
124
+ | §1.4 Audit protocol | `dataset.py` audit + AuditReport | done |
125
+ | §2.1 Keyword baseline | `baselines.py` KeywordBaseline | done |
126
+ | §2.2 TF-IDF + LogReg baseline | `baselines.py` TfidfLogregBaseline | done |
127
+ | §2.3 Brittney few-shot baseline | `baselines.py` BrittneyFewShotBaseline | stub (real impl needs Brittney API integration) |
128
+ | §3.1 Constrained-decoder model | `model/` (Phase 2 commit) | pending |
129
+ | §3.2 Conditioning fields | `model/` (Phase 2 commit) | pending |
130
+ | §3.3 Hyperparameter sweep | `model/sweep.py` (Phase 2 commit) | pending |
131
+ | §4.1 Metric definitions | `metrics.py` f1_macro, f1_micro, exact_match_rate, bootstrap_ci | done |
132
+ | §4.2 Statistical protocol | `metrics.py` bootstrap_ci, evaluate_headline | done |
133
+ | §4.3 Ablation matrix | `eval/ablations.py` (Phase 2 commit) | pending |
134
+ | §4.4 Required user study | (separate UX-research task) | pending |
135
+ | §4.5 Pre-registration freeze | `ai-ecosystem/research/paper-19-trait-inference/preregistration.md` | FROZEN (do not edit) |
136
+
137
+ ---
138
+
139
+ ## Anti-pattern guards (binding — inherited from
140
+
141
+ `compositions/trait-inference-brain.hsplus`)
142
+
143
+ - **No train-set evaluation.** Headline metric on novel-combination split only.
144
+ - **No easy-split-only F1.** Reports include both indist (sanity) and novel (headline).
145
+ - **No single-source dataset.** Audit rejects datasets <500 from any of {existing, brittney, community}.
146
+ - **No optional user study.** §4.4 is required not optional (per F.031).
147
+ - **No after-the-fact threshold-shopping.** preregistration.md is frozen before any Phase 3 board task is filed.
148
+ - **No qualitative-only claims.** ML venue requires numbers; pipeline emits structured measurements.
149
+ - **No validity gap as "scoped contribution"** — constrained-decoding architecture (Phase 2 module) bakes ≥90% validity into the decoder, not into a post-filter.
150
+
151
+ ---
152
+
153
+ ## Known limitations / future work
154
+
155
+ - Brittney few-shot baseline is a stub returning empty predictions; real impl needs HoloScript MCP integration (separate task).
156
+ - Constrained-decoder model module (`model/`) is the Phase 2 deliverable — not in this commit.
157
+ - Training loop + ablation matrix runner pending Phase 2.
158
+ - User study (Phase 4 §4.4) is a separate UX-research deliverable.
159
+ - The PowerShell Vast.ai launcher targets Windows; a bash equivalent for macOS/Linux is a follow-up.
160
+
161
+ ---
162
+
163
+ ## Provenance
164
+
165
+ - Authored by `trait-inference-brain` (`compositions/trait-inference-brain.hsplus`).
166
+ - GPU-claim ticket: `task_1777072040695_mrr3` (live on team_1775935947314_f0noxi board).
167
+ - Capability-build provenance commit: `fc294af` (lean-theorist-brain — sibling).
168
+ - F.031 pre-emptions baked into spec; constrained decoding ships in Phase 2 model module.
@@ -0,0 +1,193 @@
1
+ Metadata-Version: 2.4
2
+ Name: holoscript-trait-inference
3
+ Version: 0.1.0
4
+ Summary: Paper 19 (ATI) — Automated Trait Inference for HoloScript .hsplus. Phase 3 training pipeline + baselines + eval harness.
5
+ Author: HoloScript Core
6
+ License-Expression: MIT
7
+ Keywords: holoscript,trait-inference,paper-19,ml
8
+ Requires-Python: >=3.10
9
+ Description-Content-Type: text/markdown
10
+ Requires-Dist: numpy>=1.24
11
+ Requires-Dist: scikit-learn>=1.3
12
+ Requires-Dist: scipy>=1.11
13
+ Requires-Dist: pandas>=2.0
14
+ Provides-Extra: model
15
+ Requires-Dist: torch>=2.1; extra == "model"
16
+ Requires-Dist: transformers>=4.40; extra == "model"
17
+ Requires-Dist: sentence-transformers>=2.6; extra == "model"
18
+ Requires-Dist: outlines>=0.0.40; extra == "model"
19
+ Requires-Dist: accelerate>=0.30; extra == "model"
20
+ Provides-Extra: dev
21
+ Requires-Dist: pytest>=7.4; extra == "dev"
22
+ Requires-Dist: pytest-cov>=4.1; extra == "dev"
23
+ Requires-Dist: ruff>=0.1.0; extra == "dev"
24
+ Requires-Dist: mypy>=1.7; extra == "dev"
25
+
26
+ # trait-inference — Paper 19 (ATI) Phase 3 Pipeline
27
+
28
+ Python package implementing the **frozen** Paper 19 (Automated Trait
29
+ Inference) Phase 3 training pipeline + baselines + eval harness, per:
30
+
31
+ - Spec: `ai-ecosystem/research/paper-19-trait-inference/phase-1-spec.md`
32
+ - Pre-registration: `ai-ecosystem/research/paper-19-trait-inference/preregistration.md`
33
+ - Brain: `ai-ecosystem/compositions/trait-inference-brain.hsplus`
34
+ - GPU-claim ticket: `task_1777072040695_mrr3`
35
+
36
+ **Status (2026-04-24)**: Phase 1 (CPU pipeline) shipped — dataset
37
+ loader/audit/splits + 3 baselines (keyword + TF-IDF + Brittney-stub) +
38
+ eval metrics with bootstrap CI + CLI runner + Vast.ai launcher.
39
+ **Phase 2 (model module)** — sentence-transformer encoder + constrained-
40
+ decoder LLM, requires `[model]` extra — pending follow-up commit.
41
+
42
+ ---
43
+
44
+ ## Quick start
45
+
46
+ ### 1. Install (CPU baselines + eval only)
47
+
48
+ ```bash
49
+ cd packages/trait-inference
50
+ pip install -e .
51
+ ```
52
+
53
+ ### 2. Smoke test (synthetic data, end-to-end)
54
+
55
+ Validates the pipeline runs without needing real data or GPU. ~2 min.
56
+
57
+ ```bash
58
+ trait-inference smoke --n 200 --bootstrap-b 200
59
+ ```
60
+
61
+ Should emit a JSON measurement bundle to stdout with `"smoke_test": true,
62
+ "passed": true`. Use this to validate a fresh install before committing
63
+ to a Vast.ai run.
64
+
65
+ ### 3. Extract trait label space from HoloScript core
66
+
67
+ ```bash
68
+ trait-inference extract-traits \
69
+ --constants-dir ../core/src/traits/constants/ \
70
+ --output trait_inference/data/trait_label_space.json \
71
+ --verbose
72
+ ```
73
+
74
+ Reads the 113 TS constant files, extracts string-array exports, writes
75
+ a single JSON consumed by the dataset + model modules.
76
+
77
+ ### 4. Audit a real dataset
78
+
79
+ ```bash
80
+ trait-inference dataset audit data/atimark.jsonl --output measurements/audit.json
81
+ ```
82
+
83
+ Returns exit 0 if the dataset passes spec §1.4 acceptance (≥2k pairs,
84
+ ≥300 novel combinations, ≥500 each major source, ≥200 negatives, no
85
+ novelty leak); exit 1 with `issues` list otherwise.
86
+
87
+ ### 5. Run baselines
88
+
89
+ ```bash
90
+ trait-inference dataset split data/atimark.jsonl --output-dir splits/ --seed 42
91
+ trait-inference baseline run keyword --train splits/train.jsonl --eval splits/held_out_novel.jsonl --output measurements/keyword.json
92
+ trait-inference baseline run tfidf --train splits/train.jsonl --eval splits/held_out_novel.jsonl --val splits/val.jsonl --tune-threshold --output measurements/tfidf.json
93
+ trait-inference baseline run brittney --train splits/train.jsonl --eval splits/held_out_novel.jsonl --output measurements/brittney.json
94
+ ```
95
+
96
+ Each emits `f1_macro`, `exact_match`, `bootstrap_ci`, sample predictions.
97
+
98
+ ---
99
+
100
+ ## Vast.ai GPU launch
101
+
102
+ Orchestration script: `scripts/vast-launch-paper-19.ps1` (PowerShell;
103
+ mirrors the existing `ai-ecosystem/scripts/vast-bench-runner.ps1`
104
+ pattern).
105
+
106
+ ```powershell
107
+ # Cheapest end-to-end pipeline validation (~$0.30, ~5 min)
108
+ .\scripts\vast-launch-paper-19.ps1 -Phase smoke -Label paper19-smoke
109
+
110
+ # Run all 3 baselines on the real dataset (~$0.30, ~10 min)
111
+ .\scripts\vast-launch-paper-19.ps1 -Phase baseline `
112
+ -DatasetPath data/atimark.jsonl -Label paper19-baselines
113
+
114
+ # Full training run (REQUIRES preregistration.md frozen + Phase 2 model module shipped)
115
+ .\scripts\vast-launch-paper-19.ps1 -Phase train -GpuName RTX_4090 `
116
+ -DatasetPath data/atimark.jsonl -Label paper19-headline-cell-1
117
+ ```
118
+
119
+ Pre-flight: requires `vastai set api-key` configured (see
120
+ `ai-ecosystem/.env` `VAST_API_KEY`); requires `~/.ssh/id_rsa` with the
121
+ matching public key registered on the Vast.ai account; requires
122
+ ≥$0.50 credit for `train`.
123
+
124
+ ---
125
+
126
+ ## Cost estimate (per
127
+
128
+ `ai-ecosystem/research/paper-19-trait-inference/README.md` Phase 2-4 task table + GPU-claim ticket `_mrr3`)
129
+
130
+ | Job | GPU | Hours | Cost |
131
+ | --------------------------------------------- | -------- | ------------------------------: | -----: |
132
+ | Smoke test | RTX 4090 | 0.1 | $0.03 |
133
+ | Baselines (CPU-bound) | RTX 4090 | 0.2 | $0.06 |
134
+ | Single training cell | RTX 4090 | ~6 | ~$1.80 |
135
+ | Full sweep (30 cells × N=5 reseed = 150 runs) | RTX 4090 | ~900 (parallel: 30 GPUs × 30hr) | ~$240 |
136
+
137
+ (A100 estimates are roughly 4-8× higher; A100 supply is also tighter.
138
+ 4090 is sufficient for ≤1B-param decoder per spec §3.1.)
139
+
140
+ ---
141
+
142
+ ## Per-spec deliverable map
143
+
144
+ | Spec section | Module | Status |
145
+ | ------------------------------------ | ------------------------------------------------------------------- | ------------------------------------------------ |
146
+ | §1.1 Sourcing 3-source mix | `dataset.py` Pair + Source | done (loader; data construction is Phase 2 task) |
147
+ | §1.2 Schema | `dataset.py` Pair dataclass | done |
148
+ | §1.3 Splits (train/val/indist/novel) | `dataset.py` make_splits | done |
149
+ | §1.4 Audit protocol | `dataset.py` audit + AuditReport | done |
150
+ | §2.1 Keyword baseline | `baselines.py` KeywordBaseline | done |
151
+ | §2.2 TF-IDF + LogReg baseline | `baselines.py` TfidfLogregBaseline | done |
152
+ | §2.3 Brittney few-shot baseline | `baselines.py` BrittneyFewShotBaseline | stub (real impl needs Brittney API integration) |
153
+ | §3.1 Constrained-decoder model | `model/` (Phase 2 commit) | pending |
154
+ | §3.2 Conditioning fields | `model/` (Phase 2 commit) | pending |
155
+ | §3.3 Hyperparameter sweep | `model/sweep.py` (Phase 2 commit) | pending |
156
+ | §4.1 Metric definitions | `metrics.py` f1_macro, f1_micro, exact_match_rate, bootstrap_ci | done |
157
+ | §4.2 Statistical protocol | `metrics.py` bootstrap_ci, evaluate_headline | done |
158
+ | §4.3 Ablation matrix | `eval/ablations.py` (Phase 2 commit) | pending |
159
+ | §4.4 Required user study | (separate UX-research task) | pending |
160
+ | §4.5 Pre-registration freeze | `ai-ecosystem/research/paper-19-trait-inference/preregistration.md` | FROZEN (do not edit) |
161
+
162
+ ---
163
+
164
+ ## Anti-pattern guards (binding — inherited from
165
+
166
+ `compositions/trait-inference-brain.hsplus`)
167
+
168
+ - **No train-set evaluation.** Headline metric on novel-combination split only.
169
+ - **No easy-split-only F1.** Reports include both indist (sanity) and novel (headline).
170
+ - **No single-source dataset.** Audit rejects datasets <500 from any of {existing, brittney, community}.
171
+ - **No optional user study.** §4.4 is required not optional (per F.031).
172
+ - **No after-the-fact threshold-shopping.** preregistration.md is frozen before any Phase 3 board task is filed.
173
+ - **No qualitative-only claims.** ML venue requires numbers; pipeline emits structured measurements.
174
+ - **No validity gap as "scoped contribution"** — constrained-decoding architecture (Phase 2 module) bakes ≥90% validity into the decoder, not into a post-filter.
175
+
176
+ ---
177
+
178
+ ## Known limitations / future work
179
+
180
+ - Brittney few-shot baseline is a stub returning empty predictions; real impl needs HoloScript MCP integration (separate task).
181
+ - Constrained-decoder model module (`model/`) is the Phase 2 deliverable — not in this commit.
182
+ - Training loop + ablation matrix runner pending Phase 2.
183
+ - User study (Phase 4 §4.4) is a separate UX-research deliverable.
184
+ - The PowerShell Vast.ai launcher targets Windows; a bash equivalent for macOS/Linux is a follow-up.
185
+
186
+ ---
187
+
188
+ ## Provenance
189
+
190
+ - Authored by `trait-inference-brain` (`compositions/trait-inference-brain.hsplus`).
191
+ - GPU-claim ticket: `task_1777072040695_mrr3` (live on team_1775935947314_f0noxi board).
192
+ - Capability-build provenance commit: `fc294af` (lean-theorist-brain — sibling).
193
+ - F.031 pre-emptions baked into spec; constrained decoding ships in Phase 2 model module.
@@ -0,0 +1,19 @@
1
+ README.md
2
+ pyproject.toml
3
+ holoscript_trait_inference.egg-info/PKG-INFO
4
+ holoscript_trait_inference.egg-info/SOURCES.txt
5
+ holoscript_trait_inference.egg-info/dependency_links.txt
6
+ holoscript_trait_inference.egg-info/entry_points.txt
7
+ holoscript_trait_inference.egg-info/requires.txt
8
+ holoscript_trait_inference.egg-info/top_level.txt
9
+ trait_inference/__init__.py
10
+ trait_inference/baselines.py
11
+ trait_inference/cli.py
12
+ trait_inference/dataset.py
13
+ trait_inference/metrics.py
14
+ trait_inference/eval/__init__.py
15
+ trait_inference/eval/ablations.py
16
+ trait_inference/model/__init__.py
17
+ trait_inference/model/decoder.py
18
+ trait_inference/model/sweep.py
19
+ trait_inference/model/trainer.py
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ trait-inference = trait_inference.cli:main
@@ -0,0 +1,17 @@
1
+ numpy>=1.24
2
+ scikit-learn>=1.3
3
+ scipy>=1.11
4
+ pandas>=2.0
5
+
6
+ [dev]
7
+ pytest>=7.4
8
+ pytest-cov>=4.1
9
+ ruff>=0.1.0
10
+ mypy>=1.7
11
+
12
+ [model]
13
+ torch>=2.1
14
+ transformers>=4.40
15
+ sentence-transformers>=2.6
16
+ outlines>=0.0.40
17
+ accelerate>=0.30
@@ -0,0 +1,59 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "holoscript-trait-inference"
7
+ version = "0.1.0"
8
+ description = "Paper 19 (ATI) — Automated Trait Inference for HoloScript .hsplus. Phase 3 training pipeline + baselines + eval harness."
9
+ readme = "README.md"
10
+ requires-python = ">=3.10"
11
+ license = "MIT"
12
+ authors = [{ name = "HoloScript Core" }]
13
+ keywords = ["holoscript", "trait-inference", "paper-19", "ml"]
14
+
15
+ dependencies = [
16
+ # Core ML
17
+ "numpy>=1.24",
18
+ "scikit-learn>=1.3", # TF-IDF + LogReg baseline
19
+ "scipy>=1.11", # bootstrap CI, statistical tests
20
+
21
+ # Optional GPU/contribution model — install via [model] extra to keep CPU-only
22
+ # baselines + eval lightweight.
23
+
24
+ # Eval + diagnostics
25
+ "pandas>=2.0", # measurement table aggregation
26
+ ]
27
+
28
+ [project.optional-dependencies]
29
+ model = [
30
+ # Heavy deps — required only for the contribution model (Phase 3).
31
+ # Baselines (keyword, tfidf) and eval (metrics, bootstrap) work without these.
32
+ "torch>=2.1",
33
+ "transformers>=4.40",
34
+ "sentence-transformers>=2.6",
35
+ "outlines>=0.0.40", # constrained decoding over .holo grammar
36
+ "accelerate>=0.30", # multi-GPU training
37
+ ]
38
+
39
+ dev = [
40
+ "pytest>=7.4",
41
+ "pytest-cov>=4.1",
42
+ "ruff>=0.1.0",
43
+ "mypy>=1.7",
44
+ ]
45
+
46
+ [project.scripts]
47
+ trait-inference = "trait_inference.cli:main"
48
+
49
+ [tool.setuptools.packages.find]
50
+ where = ["."]
51
+ include = ["trait_inference*"]
52
+
53
+ [tool.ruff]
54
+ line-length = 100
55
+ target-version = "py310"
56
+
57
+ [tool.pytest.ini_options]
58
+ testpaths = ["tests"]
59
+ addopts = "-v --tb=short"
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+