npm - @zigrivers/scaffold - Versions diffs - 3.13.0 → 3.15.0 - Mend

@zigrivers/scaffold 3.13.0 → 3.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (180) hide show

package/README.md +32 -10
package/content/knowledge/research/research-architecture.md +385 -0
package/content/knowledge/research/research-conventions.md +248 -0
package/content/knowledge/research/research-dev-environment.md +303 -0
package/content/knowledge/research/research-experiment-loop.md +429 -0
package/content/knowledge/research/research-experiment-tracking.md +336 -0
package/content/knowledge/research/research-ml-architecture-search.md +383 -0
package/content/knowledge/research/research-ml-evaluation.md +407 -0
package/content/knowledge/research/research-ml-experiment-tracking.md +466 -0
package/content/knowledge/research/research-ml-training-patterns.md +413 -0
package/content/knowledge/research/research-observability.md +395 -0
package/content/knowledge/research/research-overfitting-prevention.md +306 -0
package/content/knowledge/research/research-project-structure.md +264 -0
package/content/knowledge/research/research-quant-backtesting.md +326 -0
package/content/knowledge/research/research-quant-market-data.md +366 -0
package/content/knowledge/research/research-quant-metrics.md +335 -0
package/content/knowledge/research/research-quant-requirements.md +223 -0
package/content/knowledge/research/research-quant-risk.md +469 -0
package/content/knowledge/research/research-quant-strategy-patterns.md +412 -0
package/content/knowledge/research/research-requirements.md +201 -0
package/content/knowledge/research/research-security.md +374 -0
package/content/knowledge/research/research-sim-compute-management.md +538 -0
package/content/knowledge/research/research-sim-engine-patterns.md +448 -0
package/content/knowledge/research/research-sim-parameter-spaces.md +425 -0
package/content/knowledge/research/research-sim-validation.md +456 -0
package/content/knowledge/research/research-testing.md +334 -0
package/content/methodology/research-ml-research.yml +23 -0
package/content/methodology/research-overlay.yml +65 -0
package/content/methodology/research-quant-finance.yml +29 -0
package/content/methodology/research-simulation.yml +23 -0
package/dist/cli/commands/adopt.d.ts.map +1 -1
package/dist/cli/commands/adopt.js +30 -8
package/dist/cli/commands/adopt.js.map +1 -1
package/dist/cli/commands/adopt.serialization.test.js +49 -0
package/dist/cli/commands/adopt.serialization.test.js.map +1 -1
package/dist/cli/commands/adopt.test.js +8 -0
package/dist/cli/commands/adopt.test.js.map +1 -1
package/dist/cli/commands/build.d.ts.map +1 -1
package/dist/cli/commands/build.js +191 -180
package/dist/cli/commands/build.js.map +1 -1
package/dist/cli/commands/complete.d.ts.map +1 -1
package/dist/cli/commands/complete.js +16 -12
package/dist/cli/commands/complete.js.map +1 -1
package/dist/cli/commands/complete.test.js +14 -5
package/dist/cli/commands/complete.test.js.map +1 -1
package/dist/cli/commands/init.d.ts +4 -0
package/dist/cli/commands/init.d.ts.map +1 -1
package/dist/cli/commands/init.js +75 -51
package/dist/cli/commands/init.js.map +1 -1
package/dist/cli/commands/init.test.js +33 -27
package/dist/cli/commands/init.test.js.map +1 -1
package/dist/cli/commands/reset.d.ts.map +1 -1
package/dist/cli/commands/reset.js +44 -40
package/dist/cli/commands/reset.js.map +1 -1
package/dist/cli/commands/reset.test.js +42 -20
package/dist/cli/commands/reset.test.js.map +1 -1
package/dist/cli/commands/rework.d.ts.map +1 -1
package/dist/cli/commands/rework.js +16 -12
package/dist/cli/commands/rework.js.map +1 -1
package/dist/cli/commands/rework.test.js +12 -3
package/dist/cli/commands/rework.test.js.map +1 -1
package/dist/cli/commands/run.d.ts.map +1 -1
package/dist/cli/commands/run.js +318 -298
package/dist/cli/commands/run.js.map +1 -1
package/dist/cli/commands/run.test.js +92 -120
package/dist/cli/commands/run.test.js.map +1 -1
package/dist/cli/commands/skip.d.ts.map +1 -1
package/dist/cli/commands/skip.js +19 -15
package/dist/cli/commands/skip.js.map +1 -1
package/dist/cli/commands/skip.test.js +22 -11
package/dist/cli/commands/skip.test.js.map +1 -1
package/dist/cli/commands/update.d.ts.map +1 -1
package/dist/cli/commands/update.js +3 -1
package/dist/cli/commands/update.js.map +1 -1
package/dist/cli/commands/update.test.js +8 -4
package/dist/cli/commands/update.test.js.map +1 -1
package/dist/cli/commands/version.d.ts.map +1 -1
package/dist/cli/commands/version.js +3 -1
package/dist/cli/commands/version.js.map +1 -1
package/dist/cli/commands/version.test.js +9 -5
package/dist/cli/commands/version.test.js.map +1 -1
package/dist/cli/index.d.ts.map +1 -1
package/dist/cli/index.js +2 -0
package/dist/cli/index.js.map +1 -1
package/dist/cli/init-flag-families.d.ts +6 -1
package/dist/cli/init-flag-families.d.ts.map +1 -1
package/dist/cli/init-flag-families.js +32 -1
package/dist/cli/init-flag-families.js.map +1 -1
package/dist/cli/init-flag-families.test.js +47 -0
package/dist/cli/init-flag-families.test.js.map +1 -1
package/dist/cli/output/interactive.d.ts +1 -0
package/dist/cli/output/interactive.d.ts.map +1 -1
package/dist/cli/output/interactive.js +5 -0
package/dist/cli/output/interactive.js.map +1 -1
package/dist/cli/shutdown.d.ts +51 -0
package/dist/cli/shutdown.d.ts.map +1 -0
package/dist/cli/shutdown.js +199 -0
package/dist/cli/shutdown.js.map +1 -0
package/dist/cli/shutdown.test.d.ts +2 -0
package/dist/cli/shutdown.test.d.ts.map +1 -0
package/dist/cli/shutdown.test.js +316 -0
package/dist/cli/shutdown.test.js.map +1 -0
package/dist/config/schema.d.ts +272 -16
package/dist/config/schema.d.ts.map +1 -1
package/dist/config/schema.js +25 -1
package/dist/config/schema.js.map +1 -1
package/dist/config/schema.test.js +103 -3
package/dist/config/schema.test.js.map +1 -1
package/dist/core/assembly/overlay-loader.d.ts +12 -0
package/dist/core/assembly/overlay-loader.d.ts.map +1 -1
package/dist/core/assembly/overlay-loader.js +30 -0
package/dist/core/assembly/overlay-loader.js.map +1 -1
package/dist/core/assembly/overlay-loader.test.js +66 -1
package/dist/core/assembly/overlay-loader.test.js.map +1 -1
package/dist/core/assembly/overlay-state-resolver.d.ts.map +1 -1
package/dist/core/assembly/overlay-state-resolver.js +48 -19
package/dist/core/assembly/overlay-state-resolver.js.map +1 -1
package/dist/core/assembly/overlay-state-resolver.test.js +80 -0
package/dist/core/assembly/overlay-state-resolver.test.js.map +1 -1
package/dist/e2e/init.test.js +5 -4
package/dist/e2e/init.test.js.map +1 -1
package/dist/e2e/project-type-overlays.test.js +119 -0
package/dist/e2e/project-type-overlays.test.js.map +1 -1
package/dist/project/adopt.d.ts.map +1 -1
package/dist/project/adopt.js +3 -1
package/dist/project/adopt.js.map +1 -1
package/dist/project/detectors/disambiguate.js +1 -1
package/dist/project/detectors/disambiguate.js.map +1 -1
package/dist/project/detectors/index.d.ts.map +1 -1
package/dist/project/detectors/index.js +2 -1
package/dist/project/detectors/index.js.map +1 -1
package/dist/project/detectors/ml.d.ts.map +1 -1
package/dist/project/detectors/ml.js +2 -6
package/dist/project/detectors/ml.js.map +1 -1
package/dist/project/detectors/research.d.ts +4 -0
package/dist/project/detectors/research.d.ts.map +1 -0
package/dist/project/detectors/research.js +141 -0
package/dist/project/detectors/research.js.map +1 -0
package/dist/project/detectors/research.test.d.ts +2 -0
package/dist/project/detectors/research.test.d.ts.map +1 -0
package/dist/project/detectors/research.test.js +235 -0
package/dist/project/detectors/research.test.js.map +1 -0
package/dist/project/detectors/shared-signals.d.ts +3 -0
package/dist/project/detectors/shared-signals.d.ts.map +1 -0
package/dist/project/detectors/shared-signals.js +9 -0
package/dist/project/detectors/shared-signals.js.map +1 -0
package/dist/project/detectors/types.d.ts +6 -2
package/dist/project/detectors/types.d.ts.map +1 -1
package/dist/project/detectors/types.js.map +1 -1
package/dist/state/lock-manager.d.ts +1 -0
package/dist/state/lock-manager.d.ts.map +1 -1
package/dist/state/lock-manager.js +1 -1
package/dist/state/lock-manager.js.map +1 -1
package/dist/types/config.d.ts +7 -1
package/dist/types/config.d.ts.map +1 -1
package/dist/wizard/copy/core.d.ts.map +1 -1
package/dist/wizard/copy/core.js +4 -0
package/dist/wizard/copy/core.js.map +1 -1
package/dist/wizard/copy/index.d.ts.map +1 -1
package/dist/wizard/copy/index.js +2 -0
package/dist/wizard/copy/index.js.map +1 -1
package/dist/wizard/copy/research.d.ts +3 -0
package/dist/wizard/copy/research.d.ts.map +1 -0
package/dist/wizard/copy/research.js +27 -0
package/dist/wizard/copy/research.js.map +1 -0
package/dist/wizard/copy/types.d.ts +5 -1
package/dist/wizard/copy/types.d.ts.map +1 -1
package/dist/wizard/flags.d.ts +7 -1
package/dist/wizard/flags.d.ts.map +1 -1
package/dist/wizard/questions.d.ts +4 -2
package/dist/wizard/questions.d.ts.map +1 -1
package/dist/wizard/questions.js +27 -1
package/dist/wizard/questions.js.map +1 -1
package/dist/wizard/questions.test.js +51 -0
package/dist/wizard/questions.test.js.map +1 -1
package/dist/wizard/wizard.d.ts +3 -2
package/dist/wizard/wizard.d.ts.map +1 -1
package/dist/wizard/wizard.js +3 -1
package/dist/wizard/wizard.js.map +1 -1
package/package.json +1 -1

package/README.md CHANGED Viewed

@@ -29,7 +29,7 @@ Either way, Scaffold constructs the prompt and the target AI tool does the work.
 **Assembly engine** — At execution time, Scaffold builds a 7-section prompt from: system metadata, the meta-prompt, knowledge base entries, project context (artifacts from prior steps), methodology settings, layered instructions, and depth-specific execution guidance.
-**Knowledge base** — 194 domain expertise entries in `content/knowledge/` organized in sixteen categories (core, product, review, validation, finalization, execution, tools, game, web-app, backend, cli, library, mobile-app, data-pipeline, ml, browser-extension) covering testing strategy, domain modeling, API design, security best practices, eval craft, TDD execution, task claiming, worktree management, release management, rendering strategies, data stores, CLI patterns, game engines, library bundling, mobile deployment, batch and streaming pipelines, model training and serving, browser extension manifests and service workers, and more. These get injected into prompts based on each step's `knowledge-base` frontmatter field. Knowledge files with a `## Deep Guidance` section are optimized for CLI assembly — only the deep guidance content is loaded, avoiding redundancy with the prompt text. Teams can add project-local overrides in `.scaffold/knowledge/` that layer on top of the global entries.
+**Knowledge base** — 222 domain expertise entries in `content/knowledge/` organized in seventeen categories (core, product, review, validation, finalization, execution, tools, game, web-app, backend, cli, library, mobile-app, data-pipeline, ml, browser-extension, research) covering testing strategy, domain modeling, API design, security best practices, eval craft, TDD execution, task claiming, worktree management, release management, rendering strategies, data stores, CLI patterns, game engines, library bundling, mobile deployment, batch and streaming pipelines, model training and serving, browser extension manifests and service workers, and more. These get injected into prompts based on each step's `knowledge-base` frontmatter field. Knowledge files with a `## Deep Guidance` section are optimized for CLI assembly — only the deep guidance content is loaded, avoiding redundancy with the prompt text. Teams can add project-local overrides in `.scaffold/knowledge/` that layer on top of the global entries.
 **Methodology presets** — Three built-in presets control which steps run and how deep the analysis goes:
 - **deep** (depth 5) — all steps enabled, exhaustive analysis
@@ -40,7 +40,7 @@ Either way, Scaffold constructs the prompt and the target AI tool does the work.
 **Multi-model validation** — At depth 4-5, all 19 review and validation steps can dispatch independent reviews to Codex and/or Gemini CLIs. Two independent models catch more blind spots than one. When both CLIs are available, findings are reconciled by confidence level (both agree = high confidence, single model P0 = still actionable). When a channel is unavailable, a compensating Claude self-review pass runs in its place (labeled `[compensating: Codex-equivalent]` or `[compensating: Gemini-equivalent]`, single-source confidence). CLI commands must always run in the foreground — background execution produces empty output. See the [Multi-Model Review](#multi-model-review) section.
-**State management** — Pipeline progress is tracked in `.scaffold/state.json` with atomic file writes and crash recovery. An advisory lock prevents concurrent runs. Decisions are logged to an append-only `decisions.jsonl`.
+**State management** — Pipeline progress is tracked in `.scaffold/state.json` with atomic file writes and crash recovery. An advisory lock prevents concurrent runs. Decisions are logged to an append-only `decisions.jsonl`. Pressing Ctrl+C during any command exits cleanly with an informative message — no stack traces, no orphaned locks, no corrupted state.
 **Dependency graph** — Steps declare their prerequisites in frontmatter. Scaffold builds a DAG, runs topological sort (Kahn's algorithm), detects cycles, and computes which steps are eligible at any point.
@@ -368,7 +368,7 @@ Every `scaffold init` wizard question can be answered via CLI flags, making scaf
 | `--depth` | 1-5 | Custom methodology depth (requires `--methodology custom`) |
 | `--adapters` | comma-sep | AI adapters: claude-code, codex, gemini |
 | `--traits` | comma-sep | Project traits: web, mobile |
-| `--project-type` | string | web-app, mobile-app, backend, cli, library, game, data-pipeline, ml, browser-extension |
+| `--project-type` | string | web-app, mobile-app, backend, cli, library, game, data-pipeline, ml, browser-extension, research |
 | `--auto` | boolean | Non-interactive mode (uses Zod defaults for unset flags) |
 #### Web-App Config Flags (require `--project-type web-app` or auto-set it)
@@ -445,6 +445,15 @@ Every `scaffold init` wizard question can be answered via CLI flags, making scaf
 | `--ext-content-script` | boolean | `--ext-content-script` / `--no-ext-content-script` |
 | `--ext-background-worker` | boolean | `--ext-background-worker` / `--no-ext-background-worker` |
+#### Research Config Flags (require `--project-type research` or auto-set it)
+| Flag | Type | Values |
+|------|------|--------|
+| `--research-driver` | string | code-driven, config-driven, api-driven, notebook-driven |
+| `--research-interaction` | string | autonomous, checkpoint-gated, human-guided |
+| `--research-domain` | string | none, quant-finance, ml-research, simulation |
+| `--research-tracking` | boolean | `--research-tracking` / `--no-research-tracking` |
 #### Game Config Flags (require `--project-type game` or auto-set it)
 | Flag | Type | Values |
@@ -467,9 +476,9 @@ Every `scaffold init` wizard question can be answered via CLI flags, making scaf
 - **Flag > auto > interactive**: Flags always take highest precedence. `--auto --engine unreal` uses defaults for everything except engine.
 - **Partial flags + interactive**: Provide some flags and the wizard asks only the remaining questions. `scaffold init --project-type game --engine unreal` prompts interactively for multiplayer, platforms, etc.
-- **Type-specific flags auto-set project type**: `--engine unity` automatically sets `--project-type game`, `--web-rendering ssr` sets `--project-type web-app`, `--backend-api-style rest` sets `--project-type backend`, `--cli-interactivity hybrid` sets `--project-type cli`, `--lib-visibility public` sets `--project-type library`, `--mobile-platform ios` sets `--project-type mobile-app`, `--pipeline-processing batch` sets `--project-type data-pipeline`, `--ml-phase training` sets `--project-type ml`, `--ext-manifest 3` sets `--project-type browser-extension`. Error if conflicting type.
-- **Cannot mix flag families**: `--web-rendering ssr --backend-api-style rest` is an error. Each flag family (`--web-*`, `--backend-*`, `--cli-*`, `--lib-*`, `--mobile-*`, `--pipeline-*`, `--ml-*`, `--ext-*`, game) is exclusive.
-- **Validation**: `--depth` requires `--methodology custom`. `--online-services` requires `--multiplayer online` or `hybrid`. SSR/hybrid rendering is incompatible with static deploy target. Session auth requires server state (not static). ML inference projects must specify a serving pattern. Browser extensions must declare at least one capability (UI surface, content script, or background worker).
+- **Type-specific flags auto-set project type**: `--engine unity` automatically sets `--project-type game`, `--web-rendering ssr` sets `--project-type web-app`, `--backend-api-style rest` sets `--project-type backend`, `--cli-interactivity hybrid` sets `--project-type cli`, `--lib-visibility public` sets `--project-type library`, `--mobile-platform ios` sets `--project-type mobile-app`, `--pipeline-processing batch` sets `--project-type data-pipeline`, `--ml-phase training` sets `--project-type ml`, `--ext-manifest 3` sets `--project-type browser-extension`, `--research-driver code-driven` sets `--project-type research`. Error if conflicting type.
+- **Cannot mix flag families**: `--web-rendering ssr --backend-api-style rest` is an error. Each flag family (`--web-*`, `--backend-*`, `--cli-*`, `--lib-*`, `--mobile-*`, `--pipeline-*`, `--ml-*`, `--research-*`, `--ext-*`, game) is exclusive.
+- **Validation**: `--depth` requires `--methodology custom`. `--online-services` requires `--multiplayer online` or `hybrid`. SSR/hybrid rendering is incompatible with static deploy target. Session auth requires server state (not static). ML inference projects must specify a serving pattern. Browser extensions must declare at least one capability (UI surface, content script, or background worker). Notebook-driven research cannot be fully autonomous.
 #### CI Examples
@@ -550,6 +559,16 @@ scaffold init --auto --methodology mvp --project-type browser-extension \
   --ext-manifest 3 --ext-ui-surfaces devtools \
   --no-ext-content-script
+# Autonomous quant-finance research (trading strategy optimization)
+scaffold init --auto --methodology deep --project-type research \
+  --research-driver code-driven --research-interaction autonomous \
+  --research-domain quant-finance
+# Checkpoint-gated ML architecture search
+scaffold init --auto --methodology deep --project-type research \
+  --research-driver config-driven --research-interaction checkpoint-gated \
+  --research-domain ml-research
 # Multiplayer mobile game with Unity
 scaffold init --project-type game --methodology deep --auto \
   --engine unity --multiplayer online --target-platforms ios,android \
@@ -576,7 +595,7 @@ Scaffold supports **project-type overlays** — domain-specific knowledge and pi
 - **Injects domain knowledge** into existing pipeline steps (e.g., SSR caching strategies into `tech-stack`, API pagination patterns into `coding-standards`)
-The game overlay additionally adjusts step enablement, remaps artifact references, and adds dependency overrides (because game development has fundamentally different artifacts). The web-app, backend, CLI, library, mobile-app, data-pipeline, ML, and browser-extension overlays are **knowledge-only** — they inject domain expertise into existing steps without changing which steps run or how they depend on each other.
+The game overlay additionally adjusts step enablement, remaps artifact references, and adds dependency overrides (because game development has fundamentally different artifacts). The web-app, backend, CLI, library, mobile-app, data-pipeline, ML, browser-extension, and research overlays are **knowledge-only** — they inject domain expertise into existing steps without changing which steps run or how they depend on each other. The research type additionally supports **domain sub-overlays** (quant-finance, ml-research, simulation) that layer domain-specific knowledge on top of the core research overlay.
 Overlays are composable with methodology presets. An MVP web-app gets fewer steps at lower depth; a deep backend project gets exhaustive analysis of every architectural decision.
@@ -590,6 +609,7 @@ Overlays are composable with methodology presets. An MVP web-app gets fewer step
 | `data-pipeline` | `data-pipeline-overlay.yml` | 12 entries (architecture, batch and streaming patterns, orchestration, schema management, quality, testing, security) | Processing model, orchestration, data quality strategy, schema management, data catalog |
 | `ml` | `ml-overlay.yml` | 12 entries (architecture, training and serving patterns, experiment tracking, model evaluation, observability, testing, security) | Project phase, model type, serving pattern, experiment tracking |
 | `browser-extension` | `browser-extension-overlay.yml` | 12 entries (architecture, manifest configuration, service workers, content scripts, cross-browser, store submission, testing, security) | Manifest version, UI surfaces, content script, background worker |
+| `research` | `research-overlay.yml` + domain sub-overlays | 25 entries (experiment loop, tracking, overfitting prevention, backtesting, risk metrics, architecture search, simulation) | Experiment driver, interaction mode, domain, experiment tracking |
 | `game` | `game-overlay.yml` | 24 entries (engines, networking, audio, VR/AR, economy, save systems, certification) | Engine, multiplayer, platforms, economy, narrative, and 6 more |
 ### Game Development
@@ -675,7 +695,7 @@ These answers control which conditional steps activate. A single-player puzzle g
 #### Multi-type Detection
-`scaffold adopt` detects 9 project types from manifest files and directory layouts:
+`scaffold adopt` detects 10 project types from manifest files and directory layouts:
 | Type | Key Signals |
 |------|-------------|
@@ -688,6 +708,7 @@ These answers control which conditional steps activate. A single-player puzzle g
 | `data-pipeline` | `dags/` dir, Airflow/Prefect/Dagster deps, Spark configs |
 | `ml` | `training/`/`models/` dirs, PyTorch/TensorFlow deps, MLflow configs |
 | `browser-extension` | `manifest.json` with `manifest_version` field |
+| `research` | `program.md` + `results.tsv`, backtest/strategy files with trading deps, optimization deps + experiment dirs, simulation framework deps |
 Each detector returns a confidence tier (high/medium/low) with evidence trails. Override detection with `--project-type <type>`.
@@ -1316,7 +1337,7 @@ scaffold dashboard
 ## Knowledge System
-Scaffold ships with 194 domain expertise entries organized in sixteen categories:
+Scaffold ships with 222 domain expertise entries organized in sixteen categories:
 - **core/** (26 entries) — eval craft, testing strategy, domain modeling, API design, database design, system architecture, ADR craft, security best practices, operations, task decomposition, user stories, UX specification, design system tokens, user story innovation, AI memory management, coding conventions, tech stack selection, project structure patterns, task tracking, CLAUDE.md patterns, multi-model review dispatch, review step template, dev environment, git workflow patterns, automated review tooling, vision craft
 - **product/** (5 entries) — PRD craft, PRD innovation, gap analysis, vision craft, vision innovation
@@ -1334,6 +1355,7 @@ Scaffold ships with 194 domain expertise entries organized in sixteen categories
 - **data-pipeline/** (12 entries) — batch/streaming/hybrid patterns, orchestration (DAG/event-driven/scheduled), data quality, schema management, lineage, pipeline testing
 - **ml/** (12 entries) — training and inference patterns, model types (classical/deep-learning/llm), serving patterns, experiment tracking, model evaluation, MLOps observability
 - **browser-extension/** (12 entries) — Manifest V3, content scripts, service workers, cross-browser compatibility, extension security, store submission
+- **research/** (25 entries) — experiment loop architecture, parameter optimization, overfitting prevention, experiment tracking, security/sandboxing; domain knowledge for quant-finance (backtesting, risk metrics, market data, strategy patterns), ML-research (architecture search, ablation studies, evaluation), and simulation (engine integration, parameter spaces, compute management)
 Each pipeline step declares which knowledge entries it needs in its frontmatter. The assembly engine injects them automatically. Knowledge files with a `## Deep Guidance` section are optimized for the CLI — only the deep guidance content is loaded into the assembled prompt, skipping the summary to avoid redundancy with the prompt text.
@@ -1540,7 +1562,7 @@ All build inputs live under `content/`:
 content/
 ├── pipeline/         # 60 meta-prompts organized by 16 phases (phases 0-15, including build)
 ├── tools/            # 10 tool meta-prompts (stateless, category: tool)
-├── knowledge/        # 194 domain expertise entries (core, product, review, validation, finalization, execution, tools, game, web-app, backend, cli, library, mobile-app, data-pipeline, ml, browser-extension)
+├── knowledge/        # 222 domain expertise entries (core, product, review, validation, finalization, execution, tools, game, web-app, backend, cli, library, mobile-app, data-pipeline, ml, browser-extension)
 ├── methodology/      # 3 YAML presets (deep, mvp, custom)
 └── skills/           # Skill templates with {{markers}} for multi-platform resolution (includes mmr)
 ```

package/content/knowledge/research/research-architecture.md ADDED Viewed

@@ -0,0 +1,385 @@
+---
+name: research-architecture
+description: Experiment runner architecture including pluggable experiment and evaluation interfaces, state management patterns, and result persistence
+topics: [research, architecture, experiment-runner, state-management, interfaces, persistence]
+---
+The experiment runner is the central architectural component of a research project. It orchestrates the loop of loading configuration, executing experiments, evaluating results, and deciding whether to keep or discard each run. The runner must be completely decoupled from the specific experiment logic (strategies, models, parameter spaces) so that it can drive any experiment without modification. This separation is what makes autonomous iteration possible -- the agent modifies experiment code while the runner infrastructure remains stable.
+## Summary
+Build the experiment runner around three pluggable interfaces: Strategy (executes an experiment given config), Evaluator (computes metrics from raw results), and Tracker (records results for comparison). Use a state manager to track the current best result, iteration history, and budget consumption. Persist all state to disk so that the runner can resume after crashes. The runner never imports specific strategy code -- it discovers strategies via a registry or config-specified entry point.
+## Deep Guidance
+### Core Architecture
+```
+                    ┌──────────────────────┐
+                    │   ExperimentRunner    │
+                    │  ┌────────────────┐  │
+  Config ──────────►│  │ State Manager  │  │
+                    │  │ (best, history)│  │
+                    │  └───────┬────────┘  │
+                    │          │            │
+                    │  ┌───────▼────────┐  │
+                    │  │ Budget Checker │  │
+                    │  └───────┬────────┘  │
+                    │          │            │
+                    │  ┌───────▼────────┐  │
+                    │  │   Strategy     │◄─┼── Registry lookup
+                    │  │  (pluggable)   │  │
+                    │  └───────┬────────┘  │
+                    │          │            │
+                    │  ┌───────▼────────┐  │
+                    │  │   Evaluator    │  │
+                    │  │  (pluggable)   │  │
+                    │  └───────┬────────┘  │
+                    │          │            │
+                    │  ┌───────▼────────┐  │
+                    │  │   Tracker      │  │
+                    │  │  (pluggable)   │  │
+                    │  └────────────────┘  │
+                    └──────────────────────┘
+```
+### Pluggable Interface Design
+The three core interfaces use Python's Protocol type for structural subtyping. This means strategies do not need to inherit from a base class -- they only need to implement the required methods:
+```python
+# src/interfaces.py
+from typing import Protocol, Any, runtime_checkable
+@runtime_checkable
+class Strategy(Protocol):
+    """Interface for experiment execution strategies."""
+    @property
+    def name(self) -> str:
+        """Unique identifier for this strategy."""
+        ...
+    def execute(self, config: dict[str, Any]) -> dict[str, Any]:
+        """
+        Execute the experiment and return raw results.
+        Args:
+            config: Experiment configuration dict.
+        Returns:
+            Raw results dict. Structure is strategy-specific but must
+            contain enough information for the Evaluator to compute metrics.
+        """
+        ...
+@runtime_checkable
+class Evaluator(Protocol):
+    """Interface for result evaluation."""
+    def evaluate(self, raw_results: dict[str, Any]) -> dict[str, float]:
+        """
+        Compute metrics from raw experiment results.
+        Args:
+            raw_results: Output from Strategy.execute().
+        Returns:
+            Dict mapping metric names to float values.
+        """
+        ...
+    def is_improvement(self, current: dict[str, float],
+                        best: dict[str, float]) -> bool:
+        """
+        Determine if current results improve on the best so far.
+        Args:
+            current: Metrics from the current run.
+            best: Metrics from the best run so far.
+        Returns:
+            True if current should replace best.
+        """
+        ...
+@runtime_checkable
+class Tracker(Protocol):
+    """Interface for experiment result tracking."""
+    def log_run(self, run_id: str, config: dict, metrics: dict[str, float],
+                artifacts: dict[str, Any] | None = None) -> None:
+        """Record a single experiment run."""
+        ...
+    def get_history(self) -> list[dict]:
+        """Return all recorded runs."""
+        ...
+```
+### Strategy Registry
+The registry pattern allows the runner to instantiate strategies by name without importing them directly:
+```python
+# src/strategies/registry.py
+from typing import Type
+from src.interfaces import Strategy
+class StrategyRegistry:
+    """Registry for experiment strategy classes."""
+    _registry: dict[str, Type[Strategy]] = {}
+    @classmethod
+    def register(cls, name: str):
+        """Decorator to register a strategy class."""
+        def decorator(strategy_cls: Type[Strategy]):
+            if name in cls._registry:
+                raise ValueError(f"Strategy '{name}' already registered")
+            cls._registry[name] = strategy_cls
+            return strategy_cls
+        return decorator
+    @classmethod
+    def get(cls, name: str) -> Type[Strategy]:
+        """Look up a strategy by name."""
+        if name not in cls._registry:
+            available = ", ".join(sorted(cls._registry.keys()))
+            raise KeyError(
+                f"Strategy '{name}' not found. Available: {available}"
+            )
+        return cls._registry[name]
+    @classmethod
+    def list_strategies(cls) -> list[str]:
+        return sorted(cls._registry.keys())
+# Usage in a strategy file:
+# src/strategies/momentum.py
+from src.strategies.registry import StrategyRegistry
+@StrategyRegistry.register("momentum_crossover")
+class MomentumCrossover:
+    name = "momentum_crossover"
+    def __init__(self, lookback: int = 20, **kwargs):
+        self.lookback = lookback
+    def execute(self, config: dict) -> dict:
+        # ... run the momentum crossover strategy ...
+        return {"trades": trades, "equity_curve": equity}
+```
+### State Management
+The state manager tracks the experiment loop's progress and enables resume-after-crash:
+```python
+# src/runner/state.py
+import json
+from pathlib import Path
+from dataclasses import dataclass, field, asdict
+from typing import Any
+@dataclass
+class RunRecord:
+    """Record of a single experiment run."""
+    run_id: str
+    config: dict[str, Any]
+    metrics: dict[str, float]
+    is_best: bool = False
+    decision: str = ""  # "keep" or "discard"
+    reason: str = ""
+@dataclass
+class ExperimentState:
+    """Persistent state for the experiment loop."""
+    experiment_id: str
+    total_runs: int = 0
+    best_run: RunRecord | None = None
+    history: list[RunRecord] = field(default_factory=list)
+    runs_since_improvement: int = 0
+    def record_run(self, run: RunRecord) -> None:
+        """Record a completed run and update state."""
+        self.total_runs += 1
+        self.history.append(run)
+        if run.is_best:
+            self.best_run = run
+            self.runs_since_improvement = 0
+        else:
+            self.runs_since_improvement += 1
+    def save(self, path: Path) -> None:
+        """Persist state to disk for crash recovery."""
+        path.parent.mkdir(parents=True, exist_ok=True)
+        with open(path, "w") as f:
+            json.dump(asdict(self), f, indent=2, default=str)
+    @classmethod
+    def load(cls, path: Path) -> "ExperimentState":
+        """Load state from disk. Returns empty state if file missing."""
+        if not path.exists():
+            return cls(experiment_id="unknown")
+        with open(path) as f:
+            data = json.load(f)
+        state = cls(experiment_id=data["experiment_id"])
+        state.total_runs = data["total_runs"]
+        state.runs_since_improvement = data["runs_since_improvement"]
+        state.history = [RunRecord(**r) for r in data["history"]]
+        if data["best_run"]:
+            state.best_run = RunRecord(**data["best_run"])
+        return state
+```
+### The Experiment Runner
+The runner ties the interfaces together:
+```python
+# src/runner/experiment_runner.py
+import logging
+from pathlib import Path
+from src.interfaces import Strategy, Evaluator, Tracker
+from src.runner.state import ExperimentState, RunRecord
+from src.runner.budget import IterationBudget
+from src.config import load_config
+from src.seed import set_seed, capture_environment
+from src.strategies.registry import StrategyRegistry
+logger = logging.getLogger(__name__)
+class ExperimentRunner:
+    def __init__(self, config_path: str):
+        self.config = load_config(config_path)
+        self.experiment_id = Path(config_path).stem
+        self.results_dir = Path(self.config["logging"]["results_dir"]) / self.experiment_id
+        # Load pluggable components
+        strategy_cls = StrategyRegistry.get(self.config["strategy"]["type"])
+        self.strategy: Strategy = strategy_cls(**self.config["strategy"].get("params", {}))
+        self.evaluator: Evaluator = self._build_evaluator()
+        self.tracker: Tracker = self._build_tracker()
+        self.budget = IterationBudget(**self.config.get("budget", {}))
+        # Load or initialize state
+        self.state_path = self.results_dir / "state.json"
+        self.state = ExperimentState.load(self.state_path)
+        self.state.experiment_id = self.experiment_id
+    def run_loop(self) -> ExperimentState:
+        """Run the full experiment loop until budget exhaustion or convergence."""
+        logger.info("Starting experiment %s (resuming from run %d)",
+                     self.experiment_id, self.state.total_runs)
+        while True:
+            # Check budget
+            exhausted, reason = self.budget.is_exhausted(
+                runs=self.state.total_runs,
+                runs_since_improvement=self.state.runs_since_improvement,
+            )
+            if exhausted:
+                logger.info("Stopping: %s", reason)
+                break
+            # Execute one iteration
+            run_id = f"run-{self.state.total_runs + 1:04d}"
+            set_seed(self.config["experiment"]["seed"] + self.state.total_runs)
+            try:
+                raw_results = self.strategy.execute(self.config)
+                metrics = self.evaluator.evaluate(raw_results)
+            except Exception as e:
+                logger.error("Run %s failed: %s", run_id, e)
+                continue
+            # Evaluate improvement
+            is_best = (
+                self.state.best_run is None
+                or self.evaluator.is_improvement(metrics, self.state.best_run.metrics)
+            )
+            decision = "keep" if is_best else "discard"
+            run = RunRecord(
+                run_id=run_id,
+                config=self.config,
+                metrics=metrics,
+                is_best=is_best,
+                decision=decision,
+                reason=f"{'New best' if is_best else 'No improvement'}",
+            )
+            # Record and persist
+            self.state.record_run(run)
+            self.tracker.log_run(run_id, self.config, metrics)
+            self.state.save(self.state_path)
+            logger.info(
+                "Run %s: %s (metrics: %s)",
+                run_id, decision,
+                {k: f"{v:.4f}" for k, v in metrics.items()},
+            )
+        return self.state
+```
+### Result Persistence
+Results are persisted at two levels:
+1. **Per-run**: Each run's config, metrics, and artifacts are saved to `results/{experiment_id}/{run_id}/`.
+2. **Experiment state**: The full experiment state (history, best run, budget consumption) is saved to `results/{experiment_id}/state.json` after every run.
+```python
+# src/tracking/file_tracker.py
+import json
+from pathlib import Path
+from src.interfaces import Tracker
+class FileTracker:
+    """Simple file-based experiment tracker."""
+    def __init__(self, results_dir: str):
+        self.results_dir = Path(results_dir)
+        self.results_dir.mkdir(parents=True, exist_ok=True)
+    def log_run(self, run_id: str, config: dict, metrics: dict[str, float],
+                artifacts: dict | None = None) -> None:
+        run_dir = self.results_dir / run_id
+        run_dir.mkdir(parents=True, exist_ok=True)
+        with open(run_dir / "config.json", "w") as f:
+            json.dump(config, f, indent=2, default=str)
+        with open(run_dir / "metrics.json", "w") as f:
+            json.dump(metrics, f, indent=2)
+        if artifacts:
+            artifact_dir = run_dir / "artifacts"
+            artifact_dir.mkdir(exist_ok=True)
+            for name, data in artifacts.items():
+                with open(artifact_dir / name, "w") as f:
+                    json.dump(data, f, indent=2, default=str)
+    def get_history(self) -> list[dict]:
+        runs = []
+        for run_dir in sorted(self.results_dir.iterdir()):
+            if run_dir.is_dir() and (run_dir / "metrics.json").exists():
+                with open(run_dir / "metrics.json") as f:
+                    metrics = json.load(f)
+                runs.append({"run_id": run_dir.name, "metrics": metrics})
+        return runs
+```
+### Architecture Decision: When to Use Each Driver
+| Driver | Architecture Pattern | Use When |
+|--------|---------------------|----------|
+| Code-driven | Git state machine, agent modifies source | Exploring algorithmic variations, strategy development |
+| Config-driven | Fixed runner, parameterised configs | Hyperparameter sweeps, systematic parameter search |
+| API-driven | Client wrapper, parameter serialization | External backtest engines, cloud simulation APIs |
+| Notebook-driven | Papermill execution, cell-level tracking | Exploratory research, visualization-heavy analysis |
+The runner architecture remains the same across all drivers. What changes is the Strategy implementation: code-driven strategies contain the algorithm directly, config-driven strategies delegate to a parameterised engine, API-driven strategies wrap HTTP calls, and notebook-driven strategies use papermill to execute notebooks.