@zigrivers/scaffold 3.13.0 → 3.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (180) hide show
  1. package/README.md +32 -10
  2. package/content/knowledge/research/research-architecture.md +385 -0
  3. package/content/knowledge/research/research-conventions.md +248 -0
  4. package/content/knowledge/research/research-dev-environment.md +303 -0
  5. package/content/knowledge/research/research-experiment-loop.md +429 -0
  6. package/content/knowledge/research/research-experiment-tracking.md +336 -0
  7. package/content/knowledge/research/research-ml-architecture-search.md +383 -0
  8. package/content/knowledge/research/research-ml-evaluation.md +407 -0
  9. package/content/knowledge/research/research-ml-experiment-tracking.md +466 -0
  10. package/content/knowledge/research/research-ml-training-patterns.md +413 -0
  11. package/content/knowledge/research/research-observability.md +395 -0
  12. package/content/knowledge/research/research-overfitting-prevention.md +306 -0
  13. package/content/knowledge/research/research-project-structure.md +264 -0
  14. package/content/knowledge/research/research-quant-backtesting.md +326 -0
  15. package/content/knowledge/research/research-quant-market-data.md +366 -0
  16. package/content/knowledge/research/research-quant-metrics.md +335 -0
  17. package/content/knowledge/research/research-quant-requirements.md +223 -0
  18. package/content/knowledge/research/research-quant-risk.md +469 -0
  19. package/content/knowledge/research/research-quant-strategy-patterns.md +412 -0
  20. package/content/knowledge/research/research-requirements.md +201 -0
  21. package/content/knowledge/research/research-security.md +374 -0
  22. package/content/knowledge/research/research-sim-compute-management.md +538 -0
  23. package/content/knowledge/research/research-sim-engine-patterns.md +448 -0
  24. package/content/knowledge/research/research-sim-parameter-spaces.md +425 -0
  25. package/content/knowledge/research/research-sim-validation.md +456 -0
  26. package/content/knowledge/research/research-testing.md +334 -0
  27. package/content/methodology/research-ml-research.yml +23 -0
  28. package/content/methodology/research-overlay.yml +65 -0
  29. package/content/methodology/research-quant-finance.yml +29 -0
  30. package/content/methodology/research-simulation.yml +23 -0
  31. package/dist/cli/commands/adopt.d.ts.map +1 -1
  32. package/dist/cli/commands/adopt.js +30 -8
  33. package/dist/cli/commands/adopt.js.map +1 -1
  34. package/dist/cli/commands/adopt.serialization.test.js +49 -0
  35. package/dist/cli/commands/adopt.serialization.test.js.map +1 -1
  36. package/dist/cli/commands/adopt.test.js +8 -0
  37. package/dist/cli/commands/adopt.test.js.map +1 -1
  38. package/dist/cli/commands/build.d.ts.map +1 -1
  39. package/dist/cli/commands/build.js +191 -180
  40. package/dist/cli/commands/build.js.map +1 -1
  41. package/dist/cli/commands/complete.d.ts.map +1 -1
  42. package/dist/cli/commands/complete.js +16 -12
  43. package/dist/cli/commands/complete.js.map +1 -1
  44. package/dist/cli/commands/complete.test.js +14 -5
  45. package/dist/cli/commands/complete.test.js.map +1 -1
  46. package/dist/cli/commands/init.d.ts +4 -0
  47. package/dist/cli/commands/init.d.ts.map +1 -1
  48. package/dist/cli/commands/init.js +75 -51
  49. package/dist/cli/commands/init.js.map +1 -1
  50. package/dist/cli/commands/init.test.js +33 -27
  51. package/dist/cli/commands/init.test.js.map +1 -1
  52. package/dist/cli/commands/reset.d.ts.map +1 -1
  53. package/dist/cli/commands/reset.js +44 -40
  54. package/dist/cli/commands/reset.js.map +1 -1
  55. package/dist/cli/commands/reset.test.js +42 -20
  56. package/dist/cli/commands/reset.test.js.map +1 -1
  57. package/dist/cli/commands/rework.d.ts.map +1 -1
  58. package/dist/cli/commands/rework.js +16 -12
  59. package/dist/cli/commands/rework.js.map +1 -1
  60. package/dist/cli/commands/rework.test.js +12 -3
  61. package/dist/cli/commands/rework.test.js.map +1 -1
  62. package/dist/cli/commands/run.d.ts.map +1 -1
  63. package/dist/cli/commands/run.js +318 -298
  64. package/dist/cli/commands/run.js.map +1 -1
  65. package/dist/cli/commands/run.test.js +92 -120
  66. package/dist/cli/commands/run.test.js.map +1 -1
  67. package/dist/cli/commands/skip.d.ts.map +1 -1
  68. package/dist/cli/commands/skip.js +19 -15
  69. package/dist/cli/commands/skip.js.map +1 -1
  70. package/dist/cli/commands/skip.test.js +22 -11
  71. package/dist/cli/commands/skip.test.js.map +1 -1
  72. package/dist/cli/commands/update.d.ts.map +1 -1
  73. package/dist/cli/commands/update.js +3 -1
  74. package/dist/cli/commands/update.js.map +1 -1
  75. package/dist/cli/commands/update.test.js +8 -4
  76. package/dist/cli/commands/update.test.js.map +1 -1
  77. package/dist/cli/commands/version.d.ts.map +1 -1
  78. package/dist/cli/commands/version.js +3 -1
  79. package/dist/cli/commands/version.js.map +1 -1
  80. package/dist/cli/commands/version.test.js +9 -5
  81. package/dist/cli/commands/version.test.js.map +1 -1
  82. package/dist/cli/index.d.ts.map +1 -1
  83. package/dist/cli/index.js +2 -0
  84. package/dist/cli/index.js.map +1 -1
  85. package/dist/cli/init-flag-families.d.ts +6 -1
  86. package/dist/cli/init-flag-families.d.ts.map +1 -1
  87. package/dist/cli/init-flag-families.js +32 -1
  88. package/dist/cli/init-flag-families.js.map +1 -1
  89. package/dist/cli/init-flag-families.test.js +47 -0
  90. package/dist/cli/init-flag-families.test.js.map +1 -1
  91. package/dist/cli/output/interactive.d.ts +1 -0
  92. package/dist/cli/output/interactive.d.ts.map +1 -1
  93. package/dist/cli/output/interactive.js +5 -0
  94. package/dist/cli/output/interactive.js.map +1 -1
  95. package/dist/cli/shutdown.d.ts +51 -0
  96. package/dist/cli/shutdown.d.ts.map +1 -0
  97. package/dist/cli/shutdown.js +199 -0
  98. package/dist/cli/shutdown.js.map +1 -0
  99. package/dist/cli/shutdown.test.d.ts +2 -0
  100. package/dist/cli/shutdown.test.d.ts.map +1 -0
  101. package/dist/cli/shutdown.test.js +316 -0
  102. package/dist/cli/shutdown.test.js.map +1 -0
  103. package/dist/config/schema.d.ts +272 -16
  104. package/dist/config/schema.d.ts.map +1 -1
  105. package/dist/config/schema.js +25 -1
  106. package/dist/config/schema.js.map +1 -1
  107. package/dist/config/schema.test.js +103 -3
  108. package/dist/config/schema.test.js.map +1 -1
  109. package/dist/core/assembly/overlay-loader.d.ts +12 -0
  110. package/dist/core/assembly/overlay-loader.d.ts.map +1 -1
  111. package/dist/core/assembly/overlay-loader.js +30 -0
  112. package/dist/core/assembly/overlay-loader.js.map +1 -1
  113. package/dist/core/assembly/overlay-loader.test.js +66 -1
  114. package/dist/core/assembly/overlay-loader.test.js.map +1 -1
  115. package/dist/core/assembly/overlay-state-resolver.d.ts.map +1 -1
  116. package/dist/core/assembly/overlay-state-resolver.js +48 -19
  117. package/dist/core/assembly/overlay-state-resolver.js.map +1 -1
  118. package/dist/core/assembly/overlay-state-resolver.test.js +80 -0
  119. package/dist/core/assembly/overlay-state-resolver.test.js.map +1 -1
  120. package/dist/e2e/init.test.js +5 -4
  121. package/dist/e2e/init.test.js.map +1 -1
  122. package/dist/e2e/project-type-overlays.test.js +119 -0
  123. package/dist/e2e/project-type-overlays.test.js.map +1 -1
  124. package/dist/project/adopt.d.ts.map +1 -1
  125. package/dist/project/adopt.js +3 -1
  126. package/dist/project/adopt.js.map +1 -1
  127. package/dist/project/detectors/disambiguate.js +1 -1
  128. package/dist/project/detectors/disambiguate.js.map +1 -1
  129. package/dist/project/detectors/index.d.ts.map +1 -1
  130. package/dist/project/detectors/index.js +2 -1
  131. package/dist/project/detectors/index.js.map +1 -1
  132. package/dist/project/detectors/ml.d.ts.map +1 -1
  133. package/dist/project/detectors/ml.js +2 -6
  134. package/dist/project/detectors/ml.js.map +1 -1
  135. package/dist/project/detectors/research.d.ts +4 -0
  136. package/dist/project/detectors/research.d.ts.map +1 -0
  137. package/dist/project/detectors/research.js +141 -0
  138. package/dist/project/detectors/research.js.map +1 -0
  139. package/dist/project/detectors/research.test.d.ts +2 -0
  140. package/dist/project/detectors/research.test.d.ts.map +1 -0
  141. package/dist/project/detectors/research.test.js +235 -0
  142. package/dist/project/detectors/research.test.js.map +1 -0
  143. package/dist/project/detectors/shared-signals.d.ts +3 -0
  144. package/dist/project/detectors/shared-signals.d.ts.map +1 -0
  145. package/dist/project/detectors/shared-signals.js +9 -0
  146. package/dist/project/detectors/shared-signals.js.map +1 -0
  147. package/dist/project/detectors/types.d.ts +6 -2
  148. package/dist/project/detectors/types.d.ts.map +1 -1
  149. package/dist/project/detectors/types.js.map +1 -1
  150. package/dist/state/lock-manager.d.ts +1 -0
  151. package/dist/state/lock-manager.d.ts.map +1 -1
  152. package/dist/state/lock-manager.js +1 -1
  153. package/dist/state/lock-manager.js.map +1 -1
  154. package/dist/types/config.d.ts +7 -1
  155. package/dist/types/config.d.ts.map +1 -1
  156. package/dist/wizard/copy/core.d.ts.map +1 -1
  157. package/dist/wizard/copy/core.js +4 -0
  158. package/dist/wizard/copy/core.js.map +1 -1
  159. package/dist/wizard/copy/index.d.ts.map +1 -1
  160. package/dist/wizard/copy/index.js +2 -0
  161. package/dist/wizard/copy/index.js.map +1 -1
  162. package/dist/wizard/copy/research.d.ts +3 -0
  163. package/dist/wizard/copy/research.d.ts.map +1 -0
  164. package/dist/wizard/copy/research.js +27 -0
  165. package/dist/wizard/copy/research.js.map +1 -0
  166. package/dist/wizard/copy/types.d.ts +5 -1
  167. package/dist/wizard/copy/types.d.ts.map +1 -1
  168. package/dist/wizard/flags.d.ts +7 -1
  169. package/dist/wizard/flags.d.ts.map +1 -1
  170. package/dist/wizard/questions.d.ts +4 -2
  171. package/dist/wizard/questions.d.ts.map +1 -1
  172. package/dist/wizard/questions.js +27 -1
  173. package/dist/wizard/questions.js.map +1 -1
  174. package/dist/wizard/questions.test.js +51 -0
  175. package/dist/wizard/questions.test.js.map +1 -1
  176. package/dist/wizard/wizard.d.ts +3 -2
  177. package/dist/wizard/wizard.d.ts.map +1 -1
  178. package/dist/wizard/wizard.js +3 -1
  179. package/dist/wizard/wizard.js.map +1 -1
  180. package/package.json +1 -1
package/README.md CHANGED
@@ -29,7 +29,7 @@ Either way, Scaffold constructs the prompt and the target AI tool does the work.
29
29
 
30
30
  **Assembly engine** — At execution time, Scaffold builds a 7-section prompt from: system metadata, the meta-prompt, knowledge base entries, project context (artifacts from prior steps), methodology settings, layered instructions, and depth-specific execution guidance.
31
31
 
32
- **Knowledge base** — 194 domain expertise entries in `content/knowledge/` organized in sixteen categories (core, product, review, validation, finalization, execution, tools, game, web-app, backend, cli, library, mobile-app, data-pipeline, ml, browser-extension) covering testing strategy, domain modeling, API design, security best practices, eval craft, TDD execution, task claiming, worktree management, release management, rendering strategies, data stores, CLI patterns, game engines, library bundling, mobile deployment, batch and streaming pipelines, model training and serving, browser extension manifests and service workers, and more. These get injected into prompts based on each step's `knowledge-base` frontmatter field. Knowledge files with a `## Deep Guidance` section are optimized for CLI assembly — only the deep guidance content is loaded, avoiding redundancy with the prompt text. Teams can add project-local overrides in `.scaffold/knowledge/` that layer on top of the global entries.
32
+ **Knowledge base** — 222 domain expertise entries in `content/knowledge/` organized in seventeen categories (core, product, review, validation, finalization, execution, tools, game, web-app, backend, cli, library, mobile-app, data-pipeline, ml, browser-extension, research) covering testing strategy, domain modeling, API design, security best practices, eval craft, TDD execution, task claiming, worktree management, release management, rendering strategies, data stores, CLI patterns, game engines, library bundling, mobile deployment, batch and streaming pipelines, model training and serving, browser extension manifests and service workers, and more. These get injected into prompts based on each step's `knowledge-base` frontmatter field. Knowledge files with a `## Deep Guidance` section are optimized for CLI assembly — only the deep guidance content is loaded, avoiding redundancy with the prompt text. Teams can add project-local overrides in `.scaffold/knowledge/` that layer on top of the global entries.
33
33
 
34
34
  **Methodology presets** — Three built-in presets control which steps run and how deep the analysis goes:
35
35
  - **deep** (depth 5) — all steps enabled, exhaustive analysis
@@ -40,7 +40,7 @@ Either way, Scaffold constructs the prompt and the target AI tool does the work.
40
40
 
41
41
  **Multi-model validation** — At depth 4-5, all 19 review and validation steps can dispatch independent reviews to Codex and/or Gemini CLIs. Two independent models catch more blind spots than one. When both CLIs are available, findings are reconciled by confidence level (both agree = high confidence, single model P0 = still actionable). When a channel is unavailable, a compensating Claude self-review pass runs in its place (labeled `[compensating: Codex-equivalent]` or `[compensating: Gemini-equivalent]`, single-source confidence). CLI commands must always run in the foreground — background execution produces empty output. See the [Multi-Model Review](#multi-model-review) section.
42
42
 
43
- **State management** — Pipeline progress is tracked in `.scaffold/state.json` with atomic file writes and crash recovery. An advisory lock prevents concurrent runs. Decisions are logged to an append-only `decisions.jsonl`.
43
+ **State management** — Pipeline progress is tracked in `.scaffold/state.json` with atomic file writes and crash recovery. An advisory lock prevents concurrent runs. Decisions are logged to an append-only `decisions.jsonl`. Pressing Ctrl+C during any command exits cleanly with an informative message — no stack traces, no orphaned locks, no corrupted state.
44
44
 
45
45
  **Dependency graph** — Steps declare their prerequisites in frontmatter. Scaffold builds a DAG, runs topological sort (Kahn's algorithm), detects cycles, and computes which steps are eligible at any point.
46
46
 
@@ -368,7 +368,7 @@ Every `scaffold init` wizard question can be answered via CLI flags, making scaf
368
368
  | `--depth` | 1-5 | Custom methodology depth (requires `--methodology custom`) |
369
369
  | `--adapters` | comma-sep | AI adapters: claude-code, codex, gemini |
370
370
  | `--traits` | comma-sep | Project traits: web, mobile |
371
- | `--project-type` | string | web-app, mobile-app, backend, cli, library, game, data-pipeline, ml, browser-extension |
371
+ | `--project-type` | string | web-app, mobile-app, backend, cli, library, game, data-pipeline, ml, browser-extension, research |
372
372
  | `--auto` | boolean | Non-interactive mode (uses Zod defaults for unset flags) |
373
373
 
374
374
  #### Web-App Config Flags (require `--project-type web-app` or auto-set it)
@@ -445,6 +445,15 @@ Every `scaffold init` wizard question can be answered via CLI flags, making scaf
445
445
  | `--ext-content-script` | boolean | `--ext-content-script` / `--no-ext-content-script` |
446
446
  | `--ext-background-worker` | boolean | `--ext-background-worker` / `--no-ext-background-worker` |
447
447
 
448
+ #### Research Config Flags (require `--project-type research` or auto-set it)
449
+
450
+ | Flag | Type | Values |
451
+ |------|------|--------|
452
+ | `--research-driver` | string | code-driven, config-driven, api-driven, notebook-driven |
453
+ | `--research-interaction` | string | autonomous, checkpoint-gated, human-guided |
454
+ | `--research-domain` | string | none, quant-finance, ml-research, simulation |
455
+ | `--research-tracking` | boolean | `--research-tracking` / `--no-research-tracking` |
456
+
448
457
  #### Game Config Flags (require `--project-type game` or auto-set it)
449
458
 
450
459
  | Flag | Type | Values |
@@ -467,9 +476,9 @@ Every `scaffold init` wizard question can be answered via CLI flags, making scaf
467
476
 
468
477
  - **Flag > auto > interactive**: Flags always take highest precedence. `--auto --engine unreal` uses defaults for everything except engine.
469
478
  - **Partial flags + interactive**: Provide some flags and the wizard asks only the remaining questions. `scaffold init --project-type game --engine unreal` prompts interactively for multiplayer, platforms, etc.
470
- - **Type-specific flags auto-set project type**: `--engine unity` automatically sets `--project-type game`, `--web-rendering ssr` sets `--project-type web-app`, `--backend-api-style rest` sets `--project-type backend`, `--cli-interactivity hybrid` sets `--project-type cli`, `--lib-visibility public` sets `--project-type library`, `--mobile-platform ios` sets `--project-type mobile-app`, `--pipeline-processing batch` sets `--project-type data-pipeline`, `--ml-phase training` sets `--project-type ml`, `--ext-manifest 3` sets `--project-type browser-extension`. Error if conflicting type.
471
- - **Cannot mix flag families**: `--web-rendering ssr --backend-api-style rest` is an error. Each flag family (`--web-*`, `--backend-*`, `--cli-*`, `--lib-*`, `--mobile-*`, `--pipeline-*`, `--ml-*`, `--ext-*`, game) is exclusive.
472
- - **Validation**: `--depth` requires `--methodology custom`. `--online-services` requires `--multiplayer online` or `hybrid`. SSR/hybrid rendering is incompatible with static deploy target. Session auth requires server state (not static). ML inference projects must specify a serving pattern. Browser extensions must declare at least one capability (UI surface, content script, or background worker).
479
+ - **Type-specific flags auto-set project type**: `--engine unity` automatically sets `--project-type game`, `--web-rendering ssr` sets `--project-type web-app`, `--backend-api-style rest` sets `--project-type backend`, `--cli-interactivity hybrid` sets `--project-type cli`, `--lib-visibility public` sets `--project-type library`, `--mobile-platform ios` sets `--project-type mobile-app`, `--pipeline-processing batch` sets `--project-type data-pipeline`, `--ml-phase training` sets `--project-type ml`, `--ext-manifest 3` sets `--project-type browser-extension`, `--research-driver code-driven` sets `--project-type research`. Error if conflicting type.
480
+ - **Cannot mix flag families**: `--web-rendering ssr --backend-api-style rest` is an error. Each flag family (`--web-*`, `--backend-*`, `--cli-*`, `--lib-*`, `--mobile-*`, `--pipeline-*`, `--ml-*`, `--research-*`, `--ext-*`, game) is exclusive.
481
+ - **Validation**: `--depth` requires `--methodology custom`. `--online-services` requires `--multiplayer online` or `hybrid`. SSR/hybrid rendering is incompatible with static deploy target. Session auth requires server state (not static). ML inference projects must specify a serving pattern. Browser extensions must declare at least one capability (UI surface, content script, or background worker). Notebook-driven research cannot be fully autonomous.
473
482
 
474
483
  #### CI Examples
475
484
 
@@ -550,6 +559,16 @@ scaffold init --auto --methodology mvp --project-type browser-extension \
550
559
  --ext-manifest 3 --ext-ui-surfaces devtools \
551
560
  --no-ext-content-script
552
561
 
562
+ # Autonomous quant-finance research (trading strategy optimization)
563
+ scaffold init --auto --methodology deep --project-type research \
564
+ --research-driver code-driven --research-interaction autonomous \
565
+ --research-domain quant-finance
566
+
567
+ # Checkpoint-gated ML architecture search
568
+ scaffold init --auto --methodology deep --project-type research \
569
+ --research-driver config-driven --research-interaction checkpoint-gated \
570
+ --research-domain ml-research
571
+
553
572
  # Multiplayer mobile game with Unity
554
573
  scaffold init --project-type game --methodology deep --auto \
555
574
  --engine unity --multiplayer online --target-platforms ios,android \
@@ -576,7 +595,7 @@ Scaffold supports **project-type overlays** — domain-specific knowledge and pi
576
595
 
577
596
  - **Injects domain knowledge** into existing pipeline steps (e.g., SSR caching strategies into `tech-stack`, API pagination patterns into `coding-standards`)
578
597
 
579
- The game overlay additionally adjusts step enablement, remaps artifact references, and adds dependency overrides (because game development has fundamentally different artifacts). The web-app, backend, CLI, library, mobile-app, data-pipeline, ML, and browser-extension overlays are **knowledge-only** — they inject domain expertise into existing steps without changing which steps run or how they depend on each other.
598
+ The game overlay additionally adjusts step enablement, remaps artifact references, and adds dependency overrides (because game development has fundamentally different artifacts). The web-app, backend, CLI, library, mobile-app, data-pipeline, ML, browser-extension, and research overlays are **knowledge-only** — they inject domain expertise into existing steps without changing which steps run or how they depend on each other. The research type additionally supports **domain sub-overlays** (quant-finance, ml-research, simulation) that layer domain-specific knowledge on top of the core research overlay.
580
599
 
581
600
  Overlays are composable with methodology presets. An MVP web-app gets fewer steps at lower depth; a deep backend project gets exhaustive analysis of every architectural decision.
582
601
 
@@ -590,6 +609,7 @@ Overlays are composable with methodology presets. An MVP web-app gets fewer step
590
609
  | `data-pipeline` | `data-pipeline-overlay.yml` | 12 entries (architecture, batch and streaming patterns, orchestration, schema management, quality, testing, security) | Processing model, orchestration, data quality strategy, schema management, data catalog |
591
610
  | `ml` | `ml-overlay.yml` | 12 entries (architecture, training and serving patterns, experiment tracking, model evaluation, observability, testing, security) | Project phase, model type, serving pattern, experiment tracking |
592
611
  | `browser-extension` | `browser-extension-overlay.yml` | 12 entries (architecture, manifest configuration, service workers, content scripts, cross-browser, store submission, testing, security) | Manifest version, UI surfaces, content script, background worker |
612
+ | `research` | `research-overlay.yml` + domain sub-overlays | 25 entries (experiment loop, tracking, overfitting prevention, backtesting, risk metrics, architecture search, simulation) | Experiment driver, interaction mode, domain, experiment tracking |
593
613
  | `game` | `game-overlay.yml` | 24 entries (engines, networking, audio, VR/AR, economy, save systems, certification) | Engine, multiplayer, platforms, economy, narrative, and 6 more |
594
614
 
595
615
  ### Game Development
@@ -675,7 +695,7 @@ These answers control which conditional steps activate. A single-player puzzle g
675
695
 
676
696
  #### Multi-type Detection
677
697
 
678
- `scaffold adopt` detects 9 project types from manifest files and directory layouts:
698
+ `scaffold adopt` detects 10 project types from manifest files and directory layouts:
679
699
 
680
700
  | Type | Key Signals |
681
701
  |------|-------------|
@@ -688,6 +708,7 @@ These answers control which conditional steps activate. A single-player puzzle g
688
708
  | `data-pipeline` | `dags/` dir, Airflow/Prefect/Dagster deps, Spark configs |
689
709
  | `ml` | `training/`/`models/` dirs, PyTorch/TensorFlow deps, MLflow configs |
690
710
  | `browser-extension` | `manifest.json` with `manifest_version` field |
711
+ | `research` | `program.md` + `results.tsv`, backtest/strategy files with trading deps, optimization deps + experiment dirs, simulation framework deps |
691
712
 
692
713
  Each detector returns a confidence tier (high/medium/low) with evidence trails. Override detection with `--project-type <type>`.
693
714
 
@@ -1316,7 +1337,7 @@ scaffold dashboard
1316
1337
 
1317
1338
  ## Knowledge System
1318
1339
 
1319
- Scaffold ships with 194 domain expertise entries organized in sixteen categories:
1340
+ Scaffold ships with 222 domain expertise entries organized in sixteen categories:
1320
1341
 
1321
1342
  - **core/** (26 entries) — eval craft, testing strategy, domain modeling, API design, database design, system architecture, ADR craft, security best practices, operations, task decomposition, user stories, UX specification, design system tokens, user story innovation, AI memory management, coding conventions, tech stack selection, project structure patterns, task tracking, CLAUDE.md patterns, multi-model review dispatch, review step template, dev environment, git workflow patterns, automated review tooling, vision craft
1322
1343
  - **product/** (5 entries) — PRD craft, PRD innovation, gap analysis, vision craft, vision innovation
@@ -1334,6 +1355,7 @@ Scaffold ships with 194 domain expertise entries organized in sixteen categories
1334
1355
  - **data-pipeline/** (12 entries) — batch/streaming/hybrid patterns, orchestration (DAG/event-driven/scheduled), data quality, schema management, lineage, pipeline testing
1335
1356
  - **ml/** (12 entries) — training and inference patterns, model types (classical/deep-learning/llm), serving patterns, experiment tracking, model evaluation, MLOps observability
1336
1357
  - **browser-extension/** (12 entries) — Manifest V3, content scripts, service workers, cross-browser compatibility, extension security, store submission
1358
+ - **research/** (25 entries) — experiment loop architecture, parameter optimization, overfitting prevention, experiment tracking, security/sandboxing; domain knowledge for quant-finance (backtesting, risk metrics, market data, strategy patterns), ML-research (architecture search, ablation studies, evaluation), and simulation (engine integration, parameter spaces, compute management)
1337
1359
 
1338
1360
  Each pipeline step declares which knowledge entries it needs in its frontmatter. The assembly engine injects them automatically. Knowledge files with a `## Deep Guidance` section are optimized for the CLI — only the deep guidance content is loaded into the assembled prompt, skipping the summary to avoid redundancy with the prompt text.
1339
1361
 
@@ -1540,7 +1562,7 @@ All build inputs live under `content/`:
1540
1562
  content/
1541
1563
  ├── pipeline/ # 60 meta-prompts organized by 16 phases (phases 0-15, including build)
1542
1564
  ├── tools/ # 10 tool meta-prompts (stateless, category: tool)
1543
- ├── knowledge/ # 194 domain expertise entries (core, product, review, validation, finalization, execution, tools, game, web-app, backend, cli, library, mobile-app, data-pipeline, ml, browser-extension)
1565
+ ├── knowledge/ # 222 domain expertise entries (core, product, review, validation, finalization, execution, tools, game, web-app, backend, cli, library, mobile-app, data-pipeline, ml, browser-extension)
1544
1566
  ├── methodology/ # 3 YAML presets (deep, mvp, custom)
1545
1567
  └── skills/ # Skill templates with {{markers}} for multi-platform resolution (includes mmr)
1546
1568
  ```
@@ -0,0 +1,385 @@
1
+ ---
2
+ name: research-architecture
3
+ description: Experiment runner architecture including pluggable experiment and evaluation interfaces, state management patterns, and result persistence
4
+ topics: [research, architecture, experiment-runner, state-management, interfaces, persistence]
5
+ ---
6
+
7
+ The experiment runner is the central architectural component of a research project. It orchestrates the loop of loading configuration, executing experiments, evaluating results, and deciding whether to keep or discard each run. The runner must be completely decoupled from the specific experiment logic (strategies, models, parameter spaces) so that it can drive any experiment without modification. This separation is what makes autonomous iteration possible -- the agent modifies experiment code while the runner infrastructure remains stable.
8
+
9
+ ## Summary
10
+
11
+ Build the experiment runner around three pluggable interfaces: Strategy (executes an experiment given config), Evaluator (computes metrics from raw results), and Tracker (records results for comparison). Use a state manager to track the current best result, iteration history, and budget consumption. Persist all state to disk so that the runner can resume after crashes. The runner never imports specific strategy code -- it discovers strategies via a registry or config-specified entry point.
12
+
13
+ ## Deep Guidance
14
+
15
+ ### Core Architecture
16
+
17
+ ```
18
+ ┌──────────────────────┐
19
+ │ ExperimentRunner │
20
+ │ ┌────────────────┐ │
21
+ Config ──────────►│ │ State Manager │ │
22
+ │ │ (best, history)│ │
23
+ │ └───────┬────────┘ │
24
+ │ │ │
25
+ │ ┌───────▼────────┐ │
26
+ │ │ Budget Checker │ │
27
+ │ └───────┬────────┘ │
28
+ │ │ │
29
+ │ ┌───────▼────────┐ │
30
+ │ │ Strategy │◄─┼── Registry lookup
31
+ │ │ (pluggable) │ │
32
+ │ └───────┬────────┘ │
33
+ │ │ │
34
+ │ ┌───────▼────────┐ │
35
+ │ │ Evaluator │ │
36
+ │ │ (pluggable) │ │
37
+ │ └───────┬────────┘ │
38
+ │ │ │
39
+ │ ┌───────▼────────┐ │
40
+ │ │ Tracker │ │
41
+ │ │ (pluggable) │ │
42
+ │ └────────────────┘ │
43
+ └──────────────────────┘
44
+ ```
45
+
46
+ ### Pluggable Interface Design
47
+
48
+ The three core interfaces use Python's Protocol type for structural subtyping. This means strategies do not need to inherit from a base class -- they only need to implement the required methods:
49
+
50
+ ```python
51
+ # src/interfaces.py
52
+ from typing import Protocol, Any, runtime_checkable
53
+
54
+ @runtime_checkable
55
+ class Strategy(Protocol):
56
+ """Interface for experiment execution strategies."""
57
+
58
+ @property
59
+ def name(self) -> str:
60
+ """Unique identifier for this strategy."""
61
+ ...
62
+
63
+ def execute(self, config: dict[str, Any]) -> dict[str, Any]:
64
+ """
65
+ Execute the experiment and return raw results.
66
+
67
+ Args:
68
+ config: Experiment configuration dict.
69
+
70
+ Returns:
71
+ Raw results dict. Structure is strategy-specific but must
72
+ contain enough information for the Evaluator to compute metrics.
73
+ """
74
+ ...
75
+
76
+ @runtime_checkable
77
+ class Evaluator(Protocol):
78
+ """Interface for result evaluation."""
79
+
80
+ def evaluate(self, raw_results: dict[str, Any]) -> dict[str, float]:
81
+ """
82
+ Compute metrics from raw experiment results.
83
+
84
+ Args:
85
+ raw_results: Output from Strategy.execute().
86
+
87
+ Returns:
88
+ Dict mapping metric names to float values.
89
+ """
90
+ ...
91
+
92
+ def is_improvement(self, current: dict[str, float],
93
+ best: dict[str, float]) -> bool:
94
+ """
95
+ Determine if current results improve on the best so far.
96
+
97
+ Args:
98
+ current: Metrics from the current run.
99
+ best: Metrics from the best run so far.
100
+
101
+ Returns:
102
+ True if current should replace best.
103
+ """
104
+ ...
105
+
106
+ @runtime_checkable
107
+ class Tracker(Protocol):
108
+ """Interface for experiment result tracking."""
109
+
110
+ def log_run(self, run_id: str, config: dict, metrics: dict[str, float],
111
+ artifacts: dict[str, Any] | None = None) -> None:
112
+ """Record a single experiment run."""
113
+ ...
114
+
115
+ def get_history(self) -> list[dict]:
116
+ """Return all recorded runs."""
117
+ ...
118
+ ```
119
+
120
+ ### Strategy Registry
121
+
122
+ The registry pattern allows the runner to instantiate strategies by name without importing them directly:
123
+
124
+ ```python
125
+ # src/strategies/registry.py
126
+ from typing import Type
127
+ from src.interfaces import Strategy
128
+
129
+ class StrategyRegistry:
130
+ """Registry for experiment strategy classes."""
131
+
132
+ _registry: dict[str, Type[Strategy]] = {}
133
+
134
+ @classmethod
135
+ def register(cls, name: str):
136
+ """Decorator to register a strategy class."""
137
+ def decorator(strategy_cls: Type[Strategy]):
138
+ if name in cls._registry:
139
+ raise ValueError(f"Strategy '{name}' already registered")
140
+ cls._registry[name] = strategy_cls
141
+ return strategy_cls
142
+ return decorator
143
+
144
+ @classmethod
145
+ def get(cls, name: str) -> Type[Strategy]:
146
+ """Look up a strategy by name."""
147
+ if name not in cls._registry:
148
+ available = ", ".join(sorted(cls._registry.keys()))
149
+ raise KeyError(
150
+ f"Strategy '{name}' not found. Available: {available}"
151
+ )
152
+ return cls._registry[name]
153
+
154
+ @classmethod
155
+ def list_strategies(cls) -> list[str]:
156
+ return sorted(cls._registry.keys())
157
+
158
+
159
+ # Usage in a strategy file:
160
+ # src/strategies/momentum.py
161
+ from src.strategies.registry import StrategyRegistry
162
+
163
+ @StrategyRegistry.register("momentum_crossover")
164
+ class MomentumCrossover:
165
+ name = "momentum_crossover"
166
+
167
+ def __init__(self, lookback: int = 20, **kwargs):
168
+ self.lookback = lookback
169
+
170
+ def execute(self, config: dict) -> dict:
171
+ # ... run the momentum crossover strategy ...
172
+ return {"trades": trades, "equity_curve": equity}
173
+ ```
174
+
175
+ ### State Management
176
+
177
+ The state manager tracks the experiment loop's progress and enables resume-after-crash:
178
+
179
+ ```python
180
+ # src/runner/state.py
181
+ import json
182
+ from pathlib import Path
183
+ from dataclasses import dataclass, field, asdict
184
+ from typing import Any
185
+
186
+ @dataclass
187
+ class RunRecord:
188
+ """Record of a single experiment run."""
189
+ run_id: str
190
+ config: dict[str, Any]
191
+ metrics: dict[str, float]
192
+ is_best: bool = False
193
+ decision: str = "" # "keep" or "discard"
194
+ reason: str = ""
195
+
196
+ @dataclass
197
+ class ExperimentState:
198
+ """Persistent state for the experiment loop."""
199
+ experiment_id: str
200
+ total_runs: int = 0
201
+ best_run: RunRecord | None = None
202
+ history: list[RunRecord] = field(default_factory=list)
203
+ runs_since_improvement: int = 0
204
+
205
+ def record_run(self, run: RunRecord) -> None:
206
+ """Record a completed run and update state."""
207
+ self.total_runs += 1
208
+ self.history.append(run)
209
+
210
+ if run.is_best:
211
+ self.best_run = run
212
+ self.runs_since_improvement = 0
213
+ else:
214
+ self.runs_since_improvement += 1
215
+
216
+ def save(self, path: Path) -> None:
217
+ """Persist state to disk for crash recovery."""
218
+ path.parent.mkdir(parents=True, exist_ok=True)
219
+ with open(path, "w") as f:
220
+ json.dump(asdict(self), f, indent=2, default=str)
221
+
222
+ @classmethod
223
+ def load(cls, path: Path) -> "ExperimentState":
224
+ """Load state from disk. Returns empty state if file missing."""
225
+ if not path.exists():
226
+ return cls(experiment_id="unknown")
227
+ with open(path) as f:
228
+ data = json.load(f)
229
+ state = cls(experiment_id=data["experiment_id"])
230
+ state.total_runs = data["total_runs"]
231
+ state.runs_since_improvement = data["runs_since_improvement"]
232
+ state.history = [RunRecord(**r) for r in data["history"]]
233
+ if data["best_run"]:
234
+ state.best_run = RunRecord(**data["best_run"])
235
+ return state
236
+ ```
237
+
238
+ ### The Experiment Runner
239
+
240
+ The runner ties the interfaces together:
241
+
242
+ ```python
243
+ # src/runner/experiment_runner.py
244
+ import logging
245
+ from pathlib import Path
246
+ from src.interfaces import Strategy, Evaluator, Tracker
247
+ from src.runner.state import ExperimentState, RunRecord
248
+ from src.runner.budget import IterationBudget
249
+ from src.config import load_config
250
+ from src.seed import set_seed, capture_environment
251
+ from src.strategies.registry import StrategyRegistry
252
+
253
+ logger = logging.getLogger(__name__)
254
+
255
+ class ExperimentRunner:
256
+ def __init__(self, config_path: str):
257
+ self.config = load_config(config_path)
258
+ self.experiment_id = Path(config_path).stem
259
+ self.results_dir = Path(self.config["logging"]["results_dir"]) / self.experiment_id
260
+
261
+ # Load pluggable components
262
+ strategy_cls = StrategyRegistry.get(self.config["strategy"]["type"])
263
+ self.strategy: Strategy = strategy_cls(**self.config["strategy"].get("params", {}))
264
+ self.evaluator: Evaluator = self._build_evaluator()
265
+ self.tracker: Tracker = self._build_tracker()
266
+ self.budget = IterationBudget(**self.config.get("budget", {}))
267
+
268
+ # Load or initialize state
269
+ self.state_path = self.results_dir / "state.json"
270
+ self.state = ExperimentState.load(self.state_path)
271
+ self.state.experiment_id = self.experiment_id
272
+
273
+ def run_loop(self) -> ExperimentState:
274
+ """Run the full experiment loop until budget exhaustion or convergence."""
275
+ logger.info("Starting experiment %s (resuming from run %d)",
276
+ self.experiment_id, self.state.total_runs)
277
+
278
+ while True:
279
+ # Check budget
280
+ exhausted, reason = self.budget.is_exhausted(
281
+ runs=self.state.total_runs,
282
+ runs_since_improvement=self.state.runs_since_improvement,
283
+ )
284
+ if exhausted:
285
+ logger.info("Stopping: %s", reason)
286
+ break
287
+
288
+ # Execute one iteration
289
+ run_id = f"run-{self.state.total_runs + 1:04d}"
290
+ set_seed(self.config["experiment"]["seed"] + self.state.total_runs)
291
+
292
+ try:
293
+ raw_results = self.strategy.execute(self.config)
294
+ metrics = self.evaluator.evaluate(raw_results)
295
+ except Exception as e:
296
+ logger.error("Run %s failed: %s", run_id, e)
297
+ continue
298
+
299
+ # Evaluate improvement
300
+ is_best = (
301
+ self.state.best_run is None
302
+ or self.evaluator.is_improvement(metrics, self.state.best_run.metrics)
303
+ )
304
+ decision = "keep" if is_best else "discard"
305
+
306
+ run = RunRecord(
307
+ run_id=run_id,
308
+ config=self.config,
309
+ metrics=metrics,
310
+ is_best=is_best,
311
+ decision=decision,
312
+ reason=f"{'New best' if is_best else 'No improvement'}",
313
+ )
314
+
315
+ # Record and persist
316
+ self.state.record_run(run)
317
+ self.tracker.log_run(run_id, self.config, metrics)
318
+ self.state.save(self.state_path)
319
+
320
+ logger.info(
321
+ "Run %s: %s (metrics: %s)",
322
+ run_id, decision,
323
+ {k: f"{v:.4f}" for k, v in metrics.items()},
324
+ )
325
+
326
+ return self.state
327
+ ```
328
+
329
+ ### Result Persistence
330
+
331
+ Results are persisted at two levels:
332
+
333
+ 1. **Per-run**: Each run's config, metrics, and artifacts are saved to `results/{experiment_id}/{run_id}/`.
334
+ 2. **Experiment state**: The full experiment state (history, best run, budget consumption) is saved to `results/{experiment_id}/state.json` after every run.
335
+
336
+ ```python
337
+ # src/tracking/file_tracker.py
338
+ import json
339
+ from pathlib import Path
340
+ from src.interfaces import Tracker
341
+
342
+ class FileTracker:
343
+ """Simple file-based experiment tracker."""
344
+
345
+ def __init__(self, results_dir: str):
346
+ self.results_dir = Path(results_dir)
347
+ self.results_dir.mkdir(parents=True, exist_ok=True)
348
+
349
+ def log_run(self, run_id: str, config: dict, metrics: dict[str, float],
350
+ artifacts: dict | None = None) -> None:
351
+ run_dir = self.results_dir / run_id
352
+ run_dir.mkdir(parents=True, exist_ok=True)
353
+
354
+ with open(run_dir / "config.json", "w") as f:
355
+ json.dump(config, f, indent=2, default=str)
356
+ with open(run_dir / "metrics.json", "w") as f:
357
+ json.dump(metrics, f, indent=2)
358
+
359
+ if artifacts:
360
+ artifact_dir = run_dir / "artifacts"
361
+ artifact_dir.mkdir(exist_ok=True)
362
+ for name, data in artifacts.items():
363
+ with open(artifact_dir / name, "w") as f:
364
+ json.dump(data, f, indent=2, default=str)
365
+
366
+ def get_history(self) -> list[dict]:
367
+ runs = []
368
+ for run_dir in sorted(self.results_dir.iterdir()):
369
+ if run_dir.is_dir() and (run_dir / "metrics.json").exists():
370
+ with open(run_dir / "metrics.json") as f:
371
+ metrics = json.load(f)
372
+ runs.append({"run_id": run_dir.name, "metrics": metrics})
373
+ return runs
374
+ ```
375
+
376
+ ### Architecture Decision: When to Use Each Driver
377
+
378
+ | Driver | Architecture Pattern | Use When |
379
+ |--------|---------------------|----------|
380
+ | Code-driven | Git state machine, agent modifies source | Exploring algorithmic variations, strategy development |
381
+ | Config-driven | Fixed runner, parameterised configs | Hyperparameter sweeps, systematic parameter search |
382
+ | API-driven | Client wrapper, parameter serialization | External backtest engines, cloud simulation APIs |
383
+ | Notebook-driven | Papermill execution, cell-level tracking | Exploratory research, visualization-heavy analysis |
384
+
385
+ The runner architecture remains the same across all drivers. What changes is the Strategy implementation: code-driven strategies contain the algorithm directly, config-driven strategies delegate to a parameterised engine, API-driven strategies wrap HTTP calls, and notebook-driven strategies use papermill to execute notebooks.