@zigrivers/scaffold 3.13.0 → 3.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +32 -10
- package/content/knowledge/research/research-architecture.md +385 -0
- package/content/knowledge/research/research-conventions.md +248 -0
- package/content/knowledge/research/research-dev-environment.md +303 -0
- package/content/knowledge/research/research-experiment-loop.md +429 -0
- package/content/knowledge/research/research-experiment-tracking.md +336 -0
- package/content/knowledge/research/research-ml-architecture-search.md +383 -0
- package/content/knowledge/research/research-ml-evaluation.md +407 -0
- package/content/knowledge/research/research-ml-experiment-tracking.md +466 -0
- package/content/knowledge/research/research-ml-training-patterns.md +413 -0
- package/content/knowledge/research/research-observability.md +395 -0
- package/content/knowledge/research/research-overfitting-prevention.md +306 -0
- package/content/knowledge/research/research-project-structure.md +264 -0
- package/content/knowledge/research/research-quant-backtesting.md +326 -0
- package/content/knowledge/research/research-quant-market-data.md +366 -0
- package/content/knowledge/research/research-quant-metrics.md +335 -0
- package/content/knowledge/research/research-quant-requirements.md +223 -0
- package/content/knowledge/research/research-quant-risk.md +469 -0
- package/content/knowledge/research/research-quant-strategy-patterns.md +412 -0
- package/content/knowledge/research/research-requirements.md +201 -0
- package/content/knowledge/research/research-security.md +374 -0
- package/content/knowledge/research/research-sim-compute-management.md +538 -0
- package/content/knowledge/research/research-sim-engine-patterns.md +448 -0
- package/content/knowledge/research/research-sim-parameter-spaces.md +425 -0
- package/content/knowledge/research/research-sim-validation.md +456 -0
- package/content/knowledge/research/research-testing.md +334 -0
- package/content/methodology/research-ml-research.yml +23 -0
- package/content/methodology/research-overlay.yml +65 -0
- package/content/methodology/research-quant-finance.yml +29 -0
- package/content/methodology/research-simulation.yml +23 -0
- package/dist/cli/commands/adopt.d.ts.map +1 -1
- package/dist/cli/commands/adopt.js +30 -8
- package/dist/cli/commands/adopt.js.map +1 -1
- package/dist/cli/commands/adopt.serialization.test.js +49 -0
- package/dist/cli/commands/adopt.serialization.test.js.map +1 -1
- package/dist/cli/commands/adopt.test.js +8 -0
- package/dist/cli/commands/adopt.test.js.map +1 -1
- package/dist/cli/commands/build.d.ts.map +1 -1
- package/dist/cli/commands/build.js +191 -180
- package/dist/cli/commands/build.js.map +1 -1
- package/dist/cli/commands/complete.d.ts.map +1 -1
- package/dist/cli/commands/complete.js +16 -12
- package/dist/cli/commands/complete.js.map +1 -1
- package/dist/cli/commands/complete.test.js +14 -5
- package/dist/cli/commands/complete.test.js.map +1 -1
- package/dist/cli/commands/init.d.ts +4 -0
- package/dist/cli/commands/init.d.ts.map +1 -1
- package/dist/cli/commands/init.js +75 -51
- package/dist/cli/commands/init.js.map +1 -1
- package/dist/cli/commands/init.test.js +33 -27
- package/dist/cli/commands/init.test.js.map +1 -1
- package/dist/cli/commands/reset.d.ts.map +1 -1
- package/dist/cli/commands/reset.js +44 -40
- package/dist/cli/commands/reset.js.map +1 -1
- package/dist/cli/commands/reset.test.js +42 -20
- package/dist/cli/commands/reset.test.js.map +1 -1
- package/dist/cli/commands/rework.d.ts.map +1 -1
- package/dist/cli/commands/rework.js +16 -12
- package/dist/cli/commands/rework.js.map +1 -1
- package/dist/cli/commands/rework.test.js +12 -3
- package/dist/cli/commands/rework.test.js.map +1 -1
- package/dist/cli/commands/run.d.ts.map +1 -1
- package/dist/cli/commands/run.js +318 -298
- package/dist/cli/commands/run.js.map +1 -1
- package/dist/cli/commands/run.test.js +92 -120
- package/dist/cli/commands/run.test.js.map +1 -1
- package/dist/cli/commands/skip.d.ts.map +1 -1
- package/dist/cli/commands/skip.js +19 -15
- package/dist/cli/commands/skip.js.map +1 -1
- package/dist/cli/commands/skip.test.js +22 -11
- package/dist/cli/commands/skip.test.js.map +1 -1
- package/dist/cli/commands/update.d.ts.map +1 -1
- package/dist/cli/commands/update.js +3 -1
- package/dist/cli/commands/update.js.map +1 -1
- package/dist/cli/commands/update.test.js +8 -4
- package/dist/cli/commands/update.test.js.map +1 -1
- package/dist/cli/commands/version.d.ts.map +1 -1
- package/dist/cli/commands/version.js +3 -1
- package/dist/cli/commands/version.js.map +1 -1
- package/dist/cli/commands/version.test.js +9 -5
- package/dist/cli/commands/version.test.js.map +1 -1
- package/dist/cli/index.d.ts.map +1 -1
- package/dist/cli/index.js +2 -0
- package/dist/cli/index.js.map +1 -1
- package/dist/cli/init-flag-families.d.ts +6 -1
- package/dist/cli/init-flag-families.d.ts.map +1 -1
- package/dist/cli/init-flag-families.js +32 -1
- package/dist/cli/init-flag-families.js.map +1 -1
- package/dist/cli/init-flag-families.test.js +47 -0
- package/dist/cli/init-flag-families.test.js.map +1 -1
- package/dist/cli/output/interactive.d.ts +1 -0
- package/dist/cli/output/interactive.d.ts.map +1 -1
- package/dist/cli/output/interactive.js +5 -0
- package/dist/cli/output/interactive.js.map +1 -1
- package/dist/cli/shutdown.d.ts +51 -0
- package/dist/cli/shutdown.d.ts.map +1 -0
- package/dist/cli/shutdown.js +199 -0
- package/dist/cli/shutdown.js.map +1 -0
- package/dist/cli/shutdown.test.d.ts +2 -0
- package/dist/cli/shutdown.test.d.ts.map +1 -0
- package/dist/cli/shutdown.test.js +316 -0
- package/dist/cli/shutdown.test.js.map +1 -0
- package/dist/config/schema.d.ts +272 -16
- package/dist/config/schema.d.ts.map +1 -1
- package/dist/config/schema.js +25 -1
- package/dist/config/schema.js.map +1 -1
- package/dist/config/schema.test.js +103 -3
- package/dist/config/schema.test.js.map +1 -1
- package/dist/core/assembly/overlay-loader.d.ts +12 -0
- package/dist/core/assembly/overlay-loader.d.ts.map +1 -1
- package/dist/core/assembly/overlay-loader.js +30 -0
- package/dist/core/assembly/overlay-loader.js.map +1 -1
- package/dist/core/assembly/overlay-loader.test.js +66 -1
- package/dist/core/assembly/overlay-loader.test.js.map +1 -1
- package/dist/core/assembly/overlay-state-resolver.d.ts.map +1 -1
- package/dist/core/assembly/overlay-state-resolver.js +48 -19
- package/dist/core/assembly/overlay-state-resolver.js.map +1 -1
- package/dist/core/assembly/overlay-state-resolver.test.js +80 -0
- package/dist/core/assembly/overlay-state-resolver.test.js.map +1 -1
- package/dist/e2e/init.test.js +5 -4
- package/dist/e2e/init.test.js.map +1 -1
- package/dist/e2e/project-type-overlays.test.js +119 -0
- package/dist/e2e/project-type-overlays.test.js.map +1 -1
- package/dist/project/adopt.d.ts.map +1 -1
- package/dist/project/adopt.js +3 -1
- package/dist/project/adopt.js.map +1 -1
- package/dist/project/detectors/disambiguate.js +1 -1
- package/dist/project/detectors/disambiguate.js.map +1 -1
- package/dist/project/detectors/index.d.ts.map +1 -1
- package/dist/project/detectors/index.js +2 -1
- package/dist/project/detectors/index.js.map +1 -1
- package/dist/project/detectors/ml.d.ts.map +1 -1
- package/dist/project/detectors/ml.js +2 -6
- package/dist/project/detectors/ml.js.map +1 -1
- package/dist/project/detectors/research.d.ts +4 -0
- package/dist/project/detectors/research.d.ts.map +1 -0
- package/dist/project/detectors/research.js +141 -0
- package/dist/project/detectors/research.js.map +1 -0
- package/dist/project/detectors/research.test.d.ts +2 -0
- package/dist/project/detectors/research.test.d.ts.map +1 -0
- package/dist/project/detectors/research.test.js +235 -0
- package/dist/project/detectors/research.test.js.map +1 -0
- package/dist/project/detectors/shared-signals.d.ts +3 -0
- package/dist/project/detectors/shared-signals.d.ts.map +1 -0
- package/dist/project/detectors/shared-signals.js +9 -0
- package/dist/project/detectors/shared-signals.js.map +1 -0
- package/dist/project/detectors/types.d.ts +6 -2
- package/dist/project/detectors/types.d.ts.map +1 -1
- package/dist/project/detectors/types.js.map +1 -1
- package/dist/state/lock-manager.d.ts +1 -0
- package/dist/state/lock-manager.d.ts.map +1 -1
- package/dist/state/lock-manager.js +1 -1
- package/dist/state/lock-manager.js.map +1 -1
- package/dist/types/config.d.ts +7 -1
- package/dist/types/config.d.ts.map +1 -1
- package/dist/wizard/copy/core.d.ts.map +1 -1
- package/dist/wizard/copy/core.js +4 -0
- package/dist/wizard/copy/core.js.map +1 -1
- package/dist/wizard/copy/index.d.ts.map +1 -1
- package/dist/wizard/copy/index.js +2 -0
- package/dist/wizard/copy/index.js.map +1 -1
- package/dist/wizard/copy/research.d.ts +3 -0
- package/dist/wizard/copy/research.d.ts.map +1 -0
- package/dist/wizard/copy/research.js +27 -0
- package/dist/wizard/copy/research.js.map +1 -0
- package/dist/wizard/copy/types.d.ts +5 -1
- package/dist/wizard/copy/types.d.ts.map +1 -1
- package/dist/wizard/flags.d.ts +7 -1
- package/dist/wizard/flags.d.ts.map +1 -1
- package/dist/wizard/questions.d.ts +4 -2
- package/dist/wizard/questions.d.ts.map +1 -1
- package/dist/wizard/questions.js +27 -1
- package/dist/wizard/questions.js.map +1 -1
- package/dist/wizard/questions.test.js +51 -0
- package/dist/wizard/questions.test.js.map +1 -1
- package/dist/wizard/wizard.d.ts +3 -2
- package/dist/wizard/wizard.d.ts.map +1 -1
- package/dist/wizard/wizard.js +3 -1
- package/dist/wizard/wizard.js.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -29,7 +29,7 @@ Either way, Scaffold constructs the prompt and the target AI tool does the work.
|
|
|
29
29
|
|
|
30
30
|
**Assembly engine** — At execution time, Scaffold builds a 7-section prompt from: system metadata, the meta-prompt, knowledge base entries, project context (artifacts from prior steps), methodology settings, layered instructions, and depth-specific execution guidance.
|
|
31
31
|
|
|
32
|
-
**Knowledge base** —
|
|
32
|
+
**Knowledge base** — 222 domain expertise entries in `content/knowledge/` organized in seventeen categories (core, product, review, validation, finalization, execution, tools, game, web-app, backend, cli, library, mobile-app, data-pipeline, ml, browser-extension, research) covering testing strategy, domain modeling, API design, security best practices, eval craft, TDD execution, task claiming, worktree management, release management, rendering strategies, data stores, CLI patterns, game engines, library bundling, mobile deployment, batch and streaming pipelines, model training and serving, browser extension manifests and service workers, and more. These get injected into prompts based on each step's `knowledge-base` frontmatter field. Knowledge files with a `## Deep Guidance` section are optimized for CLI assembly — only the deep guidance content is loaded, avoiding redundancy with the prompt text. Teams can add project-local overrides in `.scaffold/knowledge/` that layer on top of the global entries.
|
|
33
33
|
|
|
34
34
|
**Methodology presets** — Three built-in presets control which steps run and how deep the analysis goes:
|
|
35
35
|
- **deep** (depth 5) — all steps enabled, exhaustive analysis
|
|
@@ -40,7 +40,7 @@ Either way, Scaffold constructs the prompt and the target AI tool does the work.
|
|
|
40
40
|
|
|
41
41
|
**Multi-model validation** — At depth 4-5, all 19 review and validation steps can dispatch independent reviews to Codex and/or Gemini CLIs. Two independent models catch more blind spots than one. When both CLIs are available, findings are reconciled by confidence level (both agree = high confidence, single model P0 = still actionable). When a channel is unavailable, a compensating Claude self-review pass runs in its place (labeled `[compensating: Codex-equivalent]` or `[compensating: Gemini-equivalent]`, single-source confidence). CLI commands must always run in the foreground — background execution produces empty output. See the [Multi-Model Review](#multi-model-review) section.
|
|
42
42
|
|
|
43
|
-
**State management** — Pipeline progress is tracked in `.scaffold/state.json` with atomic file writes and crash recovery. An advisory lock prevents concurrent runs. Decisions are logged to an append-only `decisions.jsonl`.
|
|
43
|
+
**State management** — Pipeline progress is tracked in `.scaffold/state.json` with atomic file writes and crash recovery. An advisory lock prevents concurrent runs. Decisions are logged to an append-only `decisions.jsonl`. Pressing Ctrl+C during any command exits cleanly with an informative message — no stack traces, no orphaned locks, no corrupted state.
|
|
44
44
|
|
|
45
45
|
**Dependency graph** — Steps declare their prerequisites in frontmatter. Scaffold builds a DAG, runs topological sort (Kahn's algorithm), detects cycles, and computes which steps are eligible at any point.
|
|
46
46
|
|
|
@@ -368,7 +368,7 @@ Every `scaffold init` wizard question can be answered via CLI flags, making scaf
|
|
|
368
368
|
| `--depth` | 1-5 | Custom methodology depth (requires `--methodology custom`) |
|
|
369
369
|
| `--adapters` | comma-sep | AI adapters: claude-code, codex, gemini |
|
|
370
370
|
| `--traits` | comma-sep | Project traits: web, mobile |
|
|
371
|
-
| `--project-type` | string | web-app, mobile-app, backend, cli, library, game, data-pipeline, ml, browser-extension |
|
|
371
|
+
| `--project-type` | string | web-app, mobile-app, backend, cli, library, game, data-pipeline, ml, browser-extension, research |
|
|
372
372
|
| `--auto` | boolean | Non-interactive mode (uses Zod defaults for unset flags) |
|
|
373
373
|
|
|
374
374
|
#### Web-App Config Flags (require `--project-type web-app` or auto-set it)
|
|
@@ -445,6 +445,15 @@ Every `scaffold init` wizard question can be answered via CLI flags, making scaf
|
|
|
445
445
|
| `--ext-content-script` | boolean | `--ext-content-script` / `--no-ext-content-script` |
|
|
446
446
|
| `--ext-background-worker` | boolean | `--ext-background-worker` / `--no-ext-background-worker` |
|
|
447
447
|
|
|
448
|
+
#### Research Config Flags (require `--project-type research` or auto-set it)
|
|
449
|
+
|
|
450
|
+
| Flag | Type | Values |
|
|
451
|
+
|------|------|--------|
|
|
452
|
+
| `--research-driver` | string | code-driven, config-driven, api-driven, notebook-driven |
|
|
453
|
+
| `--research-interaction` | string | autonomous, checkpoint-gated, human-guided |
|
|
454
|
+
| `--research-domain` | string | none, quant-finance, ml-research, simulation |
|
|
455
|
+
| `--research-tracking` | boolean | `--research-tracking` / `--no-research-tracking` |
|
|
456
|
+
|
|
448
457
|
#### Game Config Flags (require `--project-type game` or auto-set it)
|
|
449
458
|
|
|
450
459
|
| Flag | Type | Values |
|
|
@@ -467,9 +476,9 @@ Every `scaffold init` wizard question can be answered via CLI flags, making scaf
|
|
|
467
476
|
|
|
468
477
|
- **Flag > auto > interactive**: Flags always take highest precedence. `--auto --engine unreal` uses defaults for everything except engine.
|
|
469
478
|
- **Partial flags + interactive**: Provide some flags and the wizard asks only the remaining questions. `scaffold init --project-type game --engine unreal` prompts interactively for multiplayer, platforms, etc.
|
|
470
|
-
- **Type-specific flags auto-set project type**: `--engine unity` automatically sets `--project-type game`, `--web-rendering ssr` sets `--project-type web-app`, `--backend-api-style rest` sets `--project-type backend`, `--cli-interactivity hybrid` sets `--project-type cli`, `--lib-visibility public` sets `--project-type library`, `--mobile-platform ios` sets `--project-type mobile-app`, `--pipeline-processing batch` sets `--project-type data-pipeline`, `--ml-phase training` sets `--project-type ml`, `--ext-manifest 3` sets `--project-type browser-extension`. Error if conflicting type.
|
|
471
|
-
- **Cannot mix flag families**: `--web-rendering ssr --backend-api-style rest` is an error. Each flag family (`--web-*`, `--backend-*`, `--cli-*`, `--lib-*`, `--mobile-*`, `--pipeline-*`, `--ml-*`, `--ext-*`, game) is exclusive.
|
|
472
|
-
- **Validation**: `--depth` requires `--methodology custom`. `--online-services` requires `--multiplayer online` or `hybrid`. SSR/hybrid rendering is incompatible with static deploy target. Session auth requires server state (not static). ML inference projects must specify a serving pattern. Browser extensions must declare at least one capability (UI surface, content script, or background worker).
|
|
479
|
+
- **Type-specific flags auto-set project type**: `--engine unity` automatically sets `--project-type game`, `--web-rendering ssr` sets `--project-type web-app`, `--backend-api-style rest` sets `--project-type backend`, `--cli-interactivity hybrid` sets `--project-type cli`, `--lib-visibility public` sets `--project-type library`, `--mobile-platform ios` sets `--project-type mobile-app`, `--pipeline-processing batch` sets `--project-type data-pipeline`, `--ml-phase training` sets `--project-type ml`, `--ext-manifest 3` sets `--project-type browser-extension`, `--research-driver code-driven` sets `--project-type research`. Error if conflicting type.
|
|
480
|
+
- **Cannot mix flag families**: `--web-rendering ssr --backend-api-style rest` is an error. Each flag family (`--web-*`, `--backend-*`, `--cli-*`, `--lib-*`, `--mobile-*`, `--pipeline-*`, `--ml-*`, `--research-*`, `--ext-*`, game) is exclusive.
|
|
481
|
+
- **Validation**: `--depth` requires `--methodology custom`. `--online-services` requires `--multiplayer online` or `hybrid`. SSR/hybrid rendering is incompatible with static deploy target. Session auth requires server state (not static). ML inference projects must specify a serving pattern. Browser extensions must declare at least one capability (UI surface, content script, or background worker). Notebook-driven research cannot be fully autonomous.
|
|
473
482
|
|
|
474
483
|
#### CI Examples
|
|
475
484
|
|
|
@@ -550,6 +559,16 @@ scaffold init --auto --methodology mvp --project-type browser-extension \
|
|
|
550
559
|
--ext-manifest 3 --ext-ui-surfaces devtools \
|
|
551
560
|
--no-ext-content-script
|
|
552
561
|
|
|
562
|
+
# Autonomous quant-finance research (trading strategy optimization)
|
|
563
|
+
scaffold init --auto --methodology deep --project-type research \
|
|
564
|
+
--research-driver code-driven --research-interaction autonomous \
|
|
565
|
+
--research-domain quant-finance
|
|
566
|
+
|
|
567
|
+
# Checkpoint-gated ML architecture search
|
|
568
|
+
scaffold init --auto --methodology deep --project-type research \
|
|
569
|
+
--research-driver config-driven --research-interaction checkpoint-gated \
|
|
570
|
+
--research-domain ml-research
|
|
571
|
+
|
|
553
572
|
# Multiplayer mobile game with Unity
|
|
554
573
|
scaffold init --project-type game --methodology deep --auto \
|
|
555
574
|
--engine unity --multiplayer online --target-platforms ios,android \
|
|
@@ -576,7 +595,7 @@ Scaffold supports **project-type overlays** — domain-specific knowledge and pi
|
|
|
576
595
|
|
|
577
596
|
- **Injects domain knowledge** into existing pipeline steps (e.g., SSR caching strategies into `tech-stack`, API pagination patterns into `coding-standards`)
|
|
578
597
|
|
|
579
|
-
The game overlay additionally adjusts step enablement, remaps artifact references, and adds dependency overrides (because game development has fundamentally different artifacts). The web-app, backend, CLI, library, mobile-app, data-pipeline, ML,
|
|
598
|
+
The game overlay additionally adjusts step enablement, remaps artifact references, and adds dependency overrides (because game development has fundamentally different artifacts). The web-app, backend, CLI, library, mobile-app, data-pipeline, ML, browser-extension, and research overlays are **knowledge-only** — they inject domain expertise into existing steps without changing which steps run or how they depend on each other. The research type additionally supports **domain sub-overlays** (quant-finance, ml-research, simulation) that layer domain-specific knowledge on top of the core research overlay.
|
|
580
599
|
|
|
581
600
|
Overlays are composable with methodology presets. An MVP web-app gets fewer steps at lower depth; a deep backend project gets exhaustive analysis of every architectural decision.
|
|
582
601
|
|
|
@@ -590,6 +609,7 @@ Overlays are composable with methodology presets. An MVP web-app gets fewer step
|
|
|
590
609
|
| `data-pipeline` | `data-pipeline-overlay.yml` | 12 entries (architecture, batch and streaming patterns, orchestration, schema management, quality, testing, security) | Processing model, orchestration, data quality strategy, schema management, data catalog |
|
|
591
610
|
| `ml` | `ml-overlay.yml` | 12 entries (architecture, training and serving patterns, experiment tracking, model evaluation, observability, testing, security) | Project phase, model type, serving pattern, experiment tracking |
|
|
592
611
|
| `browser-extension` | `browser-extension-overlay.yml` | 12 entries (architecture, manifest configuration, service workers, content scripts, cross-browser, store submission, testing, security) | Manifest version, UI surfaces, content script, background worker |
|
|
612
|
+
| `research` | `research-overlay.yml` + domain sub-overlays | 25 entries (experiment loop, tracking, overfitting prevention, backtesting, risk metrics, architecture search, simulation) | Experiment driver, interaction mode, domain, experiment tracking |
|
|
593
613
|
| `game` | `game-overlay.yml` | 24 entries (engines, networking, audio, VR/AR, economy, save systems, certification) | Engine, multiplayer, platforms, economy, narrative, and 6 more |
|
|
594
614
|
|
|
595
615
|
### Game Development
|
|
@@ -675,7 +695,7 @@ These answers control which conditional steps activate. A single-player puzzle g
|
|
|
675
695
|
|
|
676
696
|
#### Multi-type Detection
|
|
677
697
|
|
|
678
|
-
`scaffold adopt` detects
|
|
698
|
+
`scaffold adopt` detects 10 project types from manifest files and directory layouts:
|
|
679
699
|
|
|
680
700
|
| Type | Key Signals |
|
|
681
701
|
|------|-------------|
|
|
@@ -688,6 +708,7 @@ These answers control which conditional steps activate. A single-player puzzle g
|
|
|
688
708
|
| `data-pipeline` | `dags/` dir, Airflow/Prefect/Dagster deps, Spark configs |
|
|
689
709
|
| `ml` | `training/`/`models/` dirs, PyTorch/TensorFlow deps, MLflow configs |
|
|
690
710
|
| `browser-extension` | `manifest.json` with `manifest_version` field |
|
|
711
|
+
| `research` | `program.md` + `results.tsv`, backtest/strategy files with trading deps, optimization deps + experiment dirs, simulation framework deps |
|
|
691
712
|
|
|
692
713
|
Each detector returns a confidence tier (high/medium/low) with evidence trails. Override detection with `--project-type <type>`.
|
|
693
714
|
|
|
@@ -1316,7 +1337,7 @@ scaffold dashboard
|
|
|
1316
1337
|
|
|
1317
1338
|
## Knowledge System
|
|
1318
1339
|
|
|
1319
|
-
Scaffold ships with
|
|
1340
|
+
Scaffold ships with 222 domain expertise entries organized in sixteen categories:
|
|
1320
1341
|
|
|
1321
1342
|
- **core/** (26 entries) — eval craft, testing strategy, domain modeling, API design, database design, system architecture, ADR craft, security best practices, operations, task decomposition, user stories, UX specification, design system tokens, user story innovation, AI memory management, coding conventions, tech stack selection, project structure patterns, task tracking, CLAUDE.md patterns, multi-model review dispatch, review step template, dev environment, git workflow patterns, automated review tooling, vision craft
|
|
1322
1343
|
- **product/** (5 entries) — PRD craft, PRD innovation, gap analysis, vision craft, vision innovation
|
|
@@ -1334,6 +1355,7 @@ Scaffold ships with 194 domain expertise entries organized in sixteen categories
|
|
|
1334
1355
|
- **data-pipeline/** (12 entries) — batch/streaming/hybrid patterns, orchestration (DAG/event-driven/scheduled), data quality, schema management, lineage, pipeline testing
|
|
1335
1356
|
- **ml/** (12 entries) — training and inference patterns, model types (classical/deep-learning/llm), serving patterns, experiment tracking, model evaluation, MLOps observability
|
|
1336
1357
|
- **browser-extension/** (12 entries) — Manifest V3, content scripts, service workers, cross-browser compatibility, extension security, store submission
|
|
1358
|
+
- **research/** (25 entries) — experiment loop architecture, parameter optimization, overfitting prevention, experiment tracking, security/sandboxing; domain knowledge for quant-finance (backtesting, risk metrics, market data, strategy patterns), ML-research (architecture search, ablation studies, evaluation), and simulation (engine integration, parameter spaces, compute management)
|
|
1337
1359
|
|
|
1338
1360
|
Each pipeline step declares which knowledge entries it needs in its frontmatter. The assembly engine injects them automatically. Knowledge files with a `## Deep Guidance` section are optimized for the CLI — only the deep guidance content is loaded into the assembled prompt, skipping the summary to avoid redundancy with the prompt text.
|
|
1339
1361
|
|
|
@@ -1540,7 +1562,7 @@ All build inputs live under `content/`:
|
|
|
1540
1562
|
content/
|
|
1541
1563
|
├── pipeline/ # 60 meta-prompts organized by 16 phases (phases 0-15, including build)
|
|
1542
1564
|
├── tools/ # 10 tool meta-prompts (stateless, category: tool)
|
|
1543
|
-
├── knowledge/ #
|
|
1565
|
+
├── knowledge/ # 222 domain expertise entries (core, product, review, validation, finalization, execution, tools, game, web-app, backend, cli, library, mobile-app, data-pipeline, ml, browser-extension)
|
|
1544
1566
|
├── methodology/ # 3 YAML presets (deep, mvp, custom)
|
|
1545
1567
|
└── skills/ # Skill templates with {{markers}} for multi-platform resolution (includes mmr)
|
|
1546
1568
|
```
|
|
@@ -0,0 +1,385 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: research-architecture
|
|
3
|
+
description: Experiment runner architecture including pluggable experiment and evaluation interfaces, state management patterns, and result persistence
|
|
4
|
+
topics: [research, architecture, experiment-runner, state-management, interfaces, persistence]
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
The experiment runner is the central architectural component of a research project. It orchestrates the loop of loading configuration, executing experiments, evaluating results, and deciding whether to keep or discard each run. The runner must be completely decoupled from the specific experiment logic (strategies, models, parameter spaces) so that it can drive any experiment without modification. This separation is what makes autonomous iteration possible -- the agent modifies experiment code while the runner infrastructure remains stable.
|
|
8
|
+
|
|
9
|
+
## Summary
|
|
10
|
+
|
|
11
|
+
Build the experiment runner around three pluggable interfaces: Strategy (executes an experiment given config), Evaluator (computes metrics from raw results), and Tracker (records results for comparison). Use a state manager to track the current best result, iteration history, and budget consumption. Persist all state to disk so that the runner can resume after crashes. The runner never imports specific strategy code -- it discovers strategies via a registry or config-specified entry point.
|
|
12
|
+
|
|
13
|
+
## Deep Guidance
|
|
14
|
+
|
|
15
|
+
### Core Architecture
|
|
16
|
+
|
|
17
|
+
```
|
|
18
|
+
┌──────────────────────┐
|
|
19
|
+
│ ExperimentRunner │
|
|
20
|
+
│ ┌────────────────┐ │
|
|
21
|
+
Config ──────────►│ │ State Manager │ │
|
|
22
|
+
│ │ (best, history)│ │
|
|
23
|
+
│ └───────┬────────┘ │
|
|
24
|
+
│ │ │
|
|
25
|
+
│ ┌───────▼────────┐ │
|
|
26
|
+
│ │ Budget Checker │ │
|
|
27
|
+
│ └───────┬────────┘ │
|
|
28
|
+
│ │ │
|
|
29
|
+
│ ┌───────▼────────┐ │
|
|
30
|
+
│ │ Strategy │◄─┼── Registry lookup
|
|
31
|
+
│ │ (pluggable) │ │
|
|
32
|
+
│ └───────┬────────┘ │
|
|
33
|
+
│ │ │
|
|
34
|
+
│ ┌───────▼────────┐ │
|
|
35
|
+
│ │ Evaluator │ │
|
|
36
|
+
│ │ (pluggable) │ │
|
|
37
|
+
│ └───────┬────────┘ │
|
|
38
|
+
│ │ │
|
|
39
|
+
│ ┌───────▼────────┐ │
|
|
40
|
+
│ │ Tracker │ │
|
|
41
|
+
│ │ (pluggable) │ │
|
|
42
|
+
│ └────────────────┘ │
|
|
43
|
+
└──────────────────────┘
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
### Pluggable Interface Design
|
|
47
|
+
|
|
48
|
+
The three core interfaces use Python's Protocol type for structural subtyping. This means strategies do not need to inherit from a base class -- they only need to implement the required methods:
|
|
49
|
+
|
|
50
|
+
```python
|
|
51
|
+
# src/interfaces.py
|
|
52
|
+
from typing import Protocol, Any, runtime_checkable
|
|
53
|
+
|
|
54
|
+
@runtime_checkable
|
|
55
|
+
class Strategy(Protocol):
|
|
56
|
+
"""Interface for experiment execution strategies."""
|
|
57
|
+
|
|
58
|
+
@property
|
|
59
|
+
def name(self) -> str:
|
|
60
|
+
"""Unique identifier for this strategy."""
|
|
61
|
+
...
|
|
62
|
+
|
|
63
|
+
def execute(self, config: dict[str, Any]) -> dict[str, Any]:
|
|
64
|
+
"""
|
|
65
|
+
Execute the experiment and return raw results.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
config: Experiment configuration dict.
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
Raw results dict. Structure is strategy-specific but must
|
|
72
|
+
contain enough information for the Evaluator to compute metrics.
|
|
73
|
+
"""
|
|
74
|
+
...
|
|
75
|
+
|
|
76
|
+
@runtime_checkable
|
|
77
|
+
class Evaluator(Protocol):
|
|
78
|
+
"""Interface for result evaluation."""
|
|
79
|
+
|
|
80
|
+
def evaluate(self, raw_results: dict[str, Any]) -> dict[str, float]:
|
|
81
|
+
"""
|
|
82
|
+
Compute metrics from raw experiment results.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
raw_results: Output from Strategy.execute().
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
Dict mapping metric names to float values.
|
|
89
|
+
"""
|
|
90
|
+
...
|
|
91
|
+
|
|
92
|
+
def is_improvement(self, current: dict[str, float],
|
|
93
|
+
best: dict[str, float]) -> bool:
|
|
94
|
+
"""
|
|
95
|
+
Determine if current results improve on the best so far.
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
current: Metrics from the current run.
|
|
99
|
+
best: Metrics from the best run so far.
|
|
100
|
+
|
|
101
|
+
Returns:
|
|
102
|
+
True if current should replace best.
|
|
103
|
+
"""
|
|
104
|
+
...
|
|
105
|
+
|
|
106
|
+
@runtime_checkable
|
|
107
|
+
class Tracker(Protocol):
|
|
108
|
+
"""Interface for experiment result tracking."""
|
|
109
|
+
|
|
110
|
+
def log_run(self, run_id: str, config: dict, metrics: dict[str, float],
|
|
111
|
+
artifacts: dict[str, Any] | None = None) -> None:
|
|
112
|
+
"""Record a single experiment run."""
|
|
113
|
+
...
|
|
114
|
+
|
|
115
|
+
def get_history(self) -> list[dict]:
|
|
116
|
+
"""Return all recorded runs."""
|
|
117
|
+
...
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
### Strategy Registry
|
|
121
|
+
|
|
122
|
+
The registry pattern allows the runner to instantiate strategies by name without importing them directly:
|
|
123
|
+
|
|
124
|
+
```python
|
|
125
|
+
# src/strategies/registry.py
|
|
126
|
+
from typing import Type
|
|
127
|
+
from src.interfaces import Strategy
|
|
128
|
+
|
|
129
|
+
class StrategyRegistry:
|
|
130
|
+
"""Registry for experiment strategy classes."""
|
|
131
|
+
|
|
132
|
+
_registry: dict[str, Type[Strategy]] = {}
|
|
133
|
+
|
|
134
|
+
@classmethod
|
|
135
|
+
def register(cls, name: str):
|
|
136
|
+
"""Decorator to register a strategy class."""
|
|
137
|
+
def decorator(strategy_cls: Type[Strategy]):
|
|
138
|
+
if name in cls._registry:
|
|
139
|
+
raise ValueError(f"Strategy '{name}' already registered")
|
|
140
|
+
cls._registry[name] = strategy_cls
|
|
141
|
+
return strategy_cls
|
|
142
|
+
return decorator
|
|
143
|
+
|
|
144
|
+
@classmethod
|
|
145
|
+
def get(cls, name: str) -> Type[Strategy]:
|
|
146
|
+
"""Look up a strategy by name."""
|
|
147
|
+
if name not in cls._registry:
|
|
148
|
+
available = ", ".join(sorted(cls._registry.keys()))
|
|
149
|
+
raise KeyError(
|
|
150
|
+
f"Strategy '{name}' not found. Available: {available}"
|
|
151
|
+
)
|
|
152
|
+
return cls._registry[name]
|
|
153
|
+
|
|
154
|
+
@classmethod
|
|
155
|
+
def list_strategies(cls) -> list[str]:
|
|
156
|
+
return sorted(cls._registry.keys())
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
# Usage in a strategy file:
|
|
160
|
+
# src/strategies/momentum.py
|
|
161
|
+
from src.strategies.registry import StrategyRegistry
|
|
162
|
+
|
|
163
|
+
@StrategyRegistry.register("momentum_crossover")
|
|
164
|
+
class MomentumCrossover:
|
|
165
|
+
name = "momentum_crossover"
|
|
166
|
+
|
|
167
|
+
def __init__(self, lookback: int = 20, **kwargs):
|
|
168
|
+
self.lookback = lookback
|
|
169
|
+
|
|
170
|
+
def execute(self, config: dict) -> dict:
|
|
171
|
+
# ... run the momentum crossover strategy ...
|
|
172
|
+
return {"trades": trades, "equity_curve": equity}
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
### State Management
|
|
176
|
+
|
|
177
|
+
The state manager tracks the experiment loop's progress and enables resume-after-crash:
|
|
178
|
+
|
|
179
|
+
```python
|
|
180
|
+
# src/runner/state.py
|
|
181
|
+
import json
|
|
182
|
+
from pathlib import Path
|
|
183
|
+
from dataclasses import dataclass, field, asdict
|
|
184
|
+
from typing import Any
|
|
185
|
+
|
|
186
|
+
@dataclass
|
|
187
|
+
class RunRecord:
|
|
188
|
+
"""Record of a single experiment run."""
|
|
189
|
+
run_id: str
|
|
190
|
+
config: dict[str, Any]
|
|
191
|
+
metrics: dict[str, float]
|
|
192
|
+
is_best: bool = False
|
|
193
|
+
decision: str = "" # "keep" or "discard"
|
|
194
|
+
reason: str = ""
|
|
195
|
+
|
|
196
|
+
@dataclass
|
|
197
|
+
class ExperimentState:
|
|
198
|
+
"""Persistent state for the experiment loop."""
|
|
199
|
+
experiment_id: str
|
|
200
|
+
total_runs: int = 0
|
|
201
|
+
best_run: RunRecord | None = None
|
|
202
|
+
history: list[RunRecord] = field(default_factory=list)
|
|
203
|
+
runs_since_improvement: int = 0
|
|
204
|
+
|
|
205
|
+
def record_run(self, run: RunRecord) -> None:
|
|
206
|
+
"""Record a completed run and update state."""
|
|
207
|
+
self.total_runs += 1
|
|
208
|
+
self.history.append(run)
|
|
209
|
+
|
|
210
|
+
if run.is_best:
|
|
211
|
+
self.best_run = run
|
|
212
|
+
self.runs_since_improvement = 0
|
|
213
|
+
else:
|
|
214
|
+
self.runs_since_improvement += 1
|
|
215
|
+
|
|
216
|
+
def save(self, path: Path) -> None:
|
|
217
|
+
"""Persist state to disk for crash recovery."""
|
|
218
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
219
|
+
with open(path, "w") as f:
|
|
220
|
+
json.dump(asdict(self), f, indent=2, default=str)
|
|
221
|
+
|
|
222
|
+
@classmethod
|
|
223
|
+
def load(cls, path: Path) -> "ExperimentState":
|
|
224
|
+
"""Load state from disk. Returns empty state if file missing."""
|
|
225
|
+
if not path.exists():
|
|
226
|
+
return cls(experiment_id="unknown")
|
|
227
|
+
with open(path) as f:
|
|
228
|
+
data = json.load(f)
|
|
229
|
+
state = cls(experiment_id=data["experiment_id"])
|
|
230
|
+
state.total_runs = data["total_runs"]
|
|
231
|
+
state.runs_since_improvement = data["runs_since_improvement"]
|
|
232
|
+
state.history = [RunRecord(**r) for r in data["history"]]
|
|
233
|
+
if data["best_run"]:
|
|
234
|
+
state.best_run = RunRecord(**data["best_run"])
|
|
235
|
+
return state
|
|
236
|
+
```
|
|
237
|
+
|
|
238
|
+
### The Experiment Runner
|
|
239
|
+
|
|
240
|
+
The runner ties the interfaces together:
|
|
241
|
+
|
|
242
|
+
```python
|
|
243
|
+
# src/runner/experiment_runner.py
|
|
244
|
+
import logging
|
|
245
|
+
from pathlib import Path
|
|
246
|
+
from src.interfaces import Strategy, Evaluator, Tracker
|
|
247
|
+
from src.runner.state import ExperimentState, RunRecord
|
|
248
|
+
from src.runner.budget import IterationBudget
|
|
249
|
+
from src.config import load_config
|
|
250
|
+
from src.seed import set_seed, capture_environment
|
|
251
|
+
from src.strategies.registry import StrategyRegistry
|
|
252
|
+
|
|
253
|
+
logger = logging.getLogger(__name__)
|
|
254
|
+
|
|
255
|
+
class ExperimentRunner:
|
|
256
|
+
def __init__(self, config_path: str):
|
|
257
|
+
self.config = load_config(config_path)
|
|
258
|
+
self.experiment_id = Path(config_path).stem
|
|
259
|
+
self.results_dir = Path(self.config["logging"]["results_dir"]) / self.experiment_id
|
|
260
|
+
|
|
261
|
+
# Load pluggable components
|
|
262
|
+
strategy_cls = StrategyRegistry.get(self.config["strategy"]["type"])
|
|
263
|
+
self.strategy: Strategy = strategy_cls(**self.config["strategy"].get("params", {}))
|
|
264
|
+
self.evaluator: Evaluator = self._build_evaluator()
|
|
265
|
+
self.tracker: Tracker = self._build_tracker()
|
|
266
|
+
self.budget = IterationBudget(**self.config.get("budget", {}))
|
|
267
|
+
|
|
268
|
+
# Load or initialize state
|
|
269
|
+
self.state_path = self.results_dir / "state.json"
|
|
270
|
+
self.state = ExperimentState.load(self.state_path)
|
|
271
|
+
self.state.experiment_id = self.experiment_id
|
|
272
|
+
|
|
273
|
+
def run_loop(self) -> ExperimentState:
|
|
274
|
+
"""Run the full experiment loop until budget exhaustion or convergence."""
|
|
275
|
+
logger.info("Starting experiment %s (resuming from run %d)",
|
|
276
|
+
self.experiment_id, self.state.total_runs)
|
|
277
|
+
|
|
278
|
+
while True:
|
|
279
|
+
# Check budget
|
|
280
|
+
exhausted, reason = self.budget.is_exhausted(
|
|
281
|
+
runs=self.state.total_runs,
|
|
282
|
+
runs_since_improvement=self.state.runs_since_improvement,
|
|
283
|
+
)
|
|
284
|
+
if exhausted:
|
|
285
|
+
logger.info("Stopping: %s", reason)
|
|
286
|
+
break
|
|
287
|
+
|
|
288
|
+
# Execute one iteration
|
|
289
|
+
run_id = f"run-{self.state.total_runs + 1:04d}"
|
|
290
|
+
set_seed(self.config["experiment"]["seed"] + self.state.total_runs)
|
|
291
|
+
|
|
292
|
+
try:
|
|
293
|
+
raw_results = self.strategy.execute(self.config)
|
|
294
|
+
metrics = self.evaluator.evaluate(raw_results)
|
|
295
|
+
except Exception as e:
|
|
296
|
+
logger.error("Run %s failed: %s", run_id, e)
|
|
297
|
+
continue
|
|
298
|
+
|
|
299
|
+
# Evaluate improvement
|
|
300
|
+
is_best = (
|
|
301
|
+
self.state.best_run is None
|
|
302
|
+
or self.evaluator.is_improvement(metrics, self.state.best_run.metrics)
|
|
303
|
+
)
|
|
304
|
+
decision = "keep" if is_best else "discard"
|
|
305
|
+
|
|
306
|
+
run = RunRecord(
|
|
307
|
+
run_id=run_id,
|
|
308
|
+
config=self.config,
|
|
309
|
+
metrics=metrics,
|
|
310
|
+
is_best=is_best,
|
|
311
|
+
decision=decision,
|
|
312
|
+
reason=f"{'New best' if is_best else 'No improvement'}",
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
# Record and persist
|
|
316
|
+
self.state.record_run(run)
|
|
317
|
+
self.tracker.log_run(run_id, self.config, metrics)
|
|
318
|
+
self.state.save(self.state_path)
|
|
319
|
+
|
|
320
|
+
logger.info(
|
|
321
|
+
"Run %s: %s (metrics: %s)",
|
|
322
|
+
run_id, decision,
|
|
323
|
+
{k: f"{v:.4f}" for k, v in metrics.items()},
|
|
324
|
+
)
|
|
325
|
+
|
|
326
|
+
return self.state
|
|
327
|
+
```
|
|
328
|
+
|
|
329
|
+
### Result Persistence
|
|
330
|
+
|
|
331
|
+
Results are persisted at two levels:
|
|
332
|
+
|
|
333
|
+
1. **Per-run**: Each run's config, metrics, and artifacts are saved to `results/{experiment_id}/{run_id}/`.
|
|
334
|
+
2. **Experiment state**: The full experiment state (history, best run, budget consumption) is saved to `results/{experiment_id}/state.json` after every run.
|
|
335
|
+
|
|
336
|
+
```python
|
|
337
|
+
# src/tracking/file_tracker.py
|
|
338
|
+
import json
|
|
339
|
+
from pathlib import Path
|
|
340
|
+
from src.interfaces import Tracker
|
|
341
|
+
|
|
342
|
+
class FileTracker:
|
|
343
|
+
"""Simple file-based experiment tracker."""
|
|
344
|
+
|
|
345
|
+
def __init__(self, results_dir: str):
|
|
346
|
+
self.results_dir = Path(results_dir)
|
|
347
|
+
self.results_dir.mkdir(parents=True, exist_ok=True)
|
|
348
|
+
|
|
349
|
+
def log_run(self, run_id: str, config: dict, metrics: dict[str, float],
|
|
350
|
+
artifacts: dict | None = None) -> None:
|
|
351
|
+
run_dir = self.results_dir / run_id
|
|
352
|
+
run_dir.mkdir(parents=True, exist_ok=True)
|
|
353
|
+
|
|
354
|
+
with open(run_dir / "config.json", "w") as f:
|
|
355
|
+
json.dump(config, f, indent=2, default=str)
|
|
356
|
+
with open(run_dir / "metrics.json", "w") as f:
|
|
357
|
+
json.dump(metrics, f, indent=2)
|
|
358
|
+
|
|
359
|
+
if artifacts:
|
|
360
|
+
artifact_dir = run_dir / "artifacts"
|
|
361
|
+
artifact_dir.mkdir(exist_ok=True)
|
|
362
|
+
for name, data in artifacts.items():
|
|
363
|
+
with open(artifact_dir / name, "w") as f:
|
|
364
|
+
json.dump(data, f, indent=2, default=str)
|
|
365
|
+
|
|
366
|
+
def get_history(self) -> list[dict]:
|
|
367
|
+
runs = []
|
|
368
|
+
for run_dir in sorted(self.results_dir.iterdir()):
|
|
369
|
+
if run_dir.is_dir() and (run_dir / "metrics.json").exists():
|
|
370
|
+
with open(run_dir / "metrics.json") as f:
|
|
371
|
+
metrics = json.load(f)
|
|
372
|
+
runs.append({"run_id": run_dir.name, "metrics": metrics})
|
|
373
|
+
return runs
|
|
374
|
+
```
|
|
375
|
+
|
|
376
|
+
### Architecture Decision: When to Use Each Driver
|
|
377
|
+
|
|
378
|
+
| Driver | Architecture Pattern | Use When |
|
|
379
|
+
|--------|---------------------|----------|
|
|
380
|
+
| Code-driven | Git state machine, agent modifies source | Exploring algorithmic variations, strategy development |
|
|
381
|
+
| Config-driven | Fixed runner, parameterised configs | Hyperparameter sweeps, systematic parameter search |
|
|
382
|
+
| API-driven | Client wrapper, parameter serialization | External backtest engines, cloud simulation APIs |
|
|
383
|
+
| Notebook-driven | Papermill execution, cell-level tracking | Exploratory research, visualization-heavy analysis |
|
|
384
|
+
|
|
385
|
+
The runner architecture remains the same across all drivers. What changes is the Strategy implementation: code-driven strategies contain the algorithm directly, config-driven strategies delegate to a parameterised engine, API-driven strategies wrap HTTP calls, and notebook-driven strategies use papermill to execute notebooks.
|