forgecraft-mcp 1.2.0 → 1.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. package/README.md +525 -525
  2. package/dist/cli/help.js +44 -44
  3. package/dist/registry/renderer-skeletons.js +92 -92
  4. package/dist/shared/gs-score-logger.js +6 -6
  5. package/dist/tools/add-module.js +123 -123
  6. package/dist/tools/advice-registry.js +18 -18
  7. package/dist/tools/check-cascade-report.js +64 -64
  8. package/dist/tools/configure-mcp.d.ts +3 -0
  9. package/dist/tools/configure-mcp.d.ts.map +1 -1
  10. package/dist/tools/configure-mcp.js +10 -0
  11. package/dist/tools/configure-mcp.js.map +1 -1
  12. package/dist/tools/forgecraft-dispatch.d.ts.map +1 -1
  13. package/dist/tools/forgecraft-dispatch.js +3 -0
  14. package/dist/tools/forgecraft-dispatch.js.map +1 -1
  15. package/dist/tools/forgecraft-schema-params.d.ts +9 -0
  16. package/dist/tools/forgecraft-schema-params.d.ts.map +1 -1
  17. package/dist/tools/forgecraft-schema-params.js +21 -0
  18. package/dist/tools/forgecraft-schema-params.js.map +1 -1
  19. package/dist/tools/forgecraft-schema.d.ts +9 -0
  20. package/dist/tools/forgecraft-schema.d.ts.map +1 -1
  21. package/dist/tools/refresh-output.js +14 -14
  22. package/dist/tools/scaffold-spec-stubs.js +115 -115
  23. package/dist/tools/scaffold-templates.js +62 -62
  24. package/dist/tools/setup-artifact-writers.d.ts +30 -0
  25. package/dist/tools/setup-artifact-writers.d.ts.map +1 -1
  26. package/dist/tools/setup-artifact-writers.js +120 -8
  27. package/dist/tools/setup-artifact-writers.js.map +1 -1
  28. package/dist/tools/setup-phase1.d.ts +3 -0
  29. package/dist/tools/setup-phase1.d.ts.map +1 -1
  30. package/dist/tools/setup-phase1.js +79 -35
  31. package/dist/tools/setup-phase1.js.map +1 -1
  32. package/dist/tools/setup-phase2.d.ts +2 -0
  33. package/dist/tools/setup-phase2.d.ts.map +1 -1
  34. package/dist/tools/setup-phase2.js +10 -1
  35. package/dist/tools/setup-phase2.js.map +1 -1
  36. package/dist/tools/setup-project.d.ts +18 -0
  37. package/dist/tools/setup-project.d.ts.map +1 -1
  38. package/dist/tools/setup-project.js +77 -1
  39. package/dist/tools/setup-project.js.map +1 -1
  40. package/dist/tools/spec-parser-tags.d.ts +9 -0
  41. package/dist/tools/spec-parser-tags.d.ts.map +1 -1
  42. package/dist/tools/spec-parser-tags.js +92 -0
  43. package/dist/tools/spec-parser-tags.js.map +1 -1
  44. package/package.json +89 -86
  45. package/templates/analytics/instructions.yaml +37 -37
  46. package/templates/analytics/mcp-servers.yaml +11 -11
  47. package/templates/analytics/structure.yaml +25 -25
  48. package/templates/api/instructions.yaml +231 -231
  49. package/templates/api/mcp-servers.yaml +22 -13
  50. package/templates/api/nfr.yaml +23 -23
  51. package/templates/api/review.yaml +103 -103
  52. package/templates/api/structure.yaml +34 -34
  53. package/templates/api/verification.yaml +132 -132
  54. package/templates/cli/instructions.yaml +31 -31
  55. package/templates/cli/mcp-servers.yaml +11 -11
  56. package/templates/cli/review.yaml +53 -53
  57. package/templates/cli/structure.yaml +16 -16
  58. package/templates/data-lineage/instructions.yaml +28 -28
  59. package/templates/data-lineage/mcp-servers.yaml +22 -22
  60. package/templates/data-pipeline/instructions.yaml +84 -84
  61. package/templates/data-pipeline/mcp-servers.yaml +13 -13
  62. package/templates/data-pipeline/nfr.yaml +39 -39
  63. package/templates/data-pipeline/structure.yaml +23 -23
  64. package/templates/fintech/hooks.yaml +55 -55
  65. package/templates/fintech/instructions.yaml +112 -112
  66. package/templates/fintech/mcp-servers.yaml +13 -13
  67. package/templates/fintech/nfr.yaml +46 -46
  68. package/templates/fintech/playbook.yaml +210 -210
  69. package/templates/fintech/verification.yaml +239 -239
  70. package/templates/game/instructions.yaml +289 -289
  71. package/templates/game/mcp-servers.yaml +38 -38
  72. package/templates/game/nfr.yaml +64 -64
  73. package/templates/game/playbook.yaml +214 -214
  74. package/templates/game/review.yaml +97 -97
  75. package/templates/game/structure.yaml +67 -67
  76. package/templates/game/verification.yaml +174 -174
  77. package/templates/healthcare/instructions.yaml +42 -42
  78. package/templates/healthcare/mcp-servers.yaml +13 -13
  79. package/templates/healthcare/nfr.yaml +47 -47
  80. package/templates/hipaa/instructions.yaml +41 -41
  81. package/templates/hipaa/mcp-servers.yaml +13 -13
  82. package/templates/infra/instructions.yaml +104 -104
  83. package/templates/infra/mcp-servers.yaml +20 -20
  84. package/templates/infra/nfr.yaml +46 -46
  85. package/templates/infra/review.yaml +65 -65
  86. package/templates/infra/structure.yaml +25 -25
  87. package/templates/library/instructions.yaml +36 -36
  88. package/templates/library/mcp-servers.yaml +20 -20
  89. package/templates/library/review.yaml +56 -56
  90. package/templates/library/structure.yaml +19 -19
  91. package/templates/medallion-architecture/instructions.yaml +41 -41
  92. package/templates/medallion-architecture/mcp-servers.yaml +22 -22
  93. package/templates/ml/instructions.yaml +85 -85
  94. package/templates/ml/mcp-servers.yaml +11 -11
  95. package/templates/ml/nfr.yaml +39 -39
  96. package/templates/ml/structure.yaml +25 -25
  97. package/templates/ml/verification.yaml +156 -156
  98. package/templates/mobile/instructions.yaml +44 -44
  99. package/templates/mobile/mcp-servers.yaml +11 -11
  100. package/templates/mobile/nfr.yaml +49 -49
  101. package/templates/mobile/structure.yaml +27 -27
  102. package/templates/mobile/verification.yaml +121 -121
  103. package/templates/observability-xray/instructions.yaml +40 -40
  104. package/templates/observability-xray/mcp-servers.yaml +15 -15
  105. package/templates/realtime/instructions.yaml +42 -42
  106. package/templates/realtime/mcp-servers.yaml +13 -13
  107. package/templates/soc2/instructions.yaml +41 -41
  108. package/templates/soc2/mcp-servers.yaml +24 -24
  109. package/templates/social/instructions.yaml +43 -43
  110. package/templates/social/mcp-servers.yaml +24 -24
  111. package/templates/state-machine/instructions.yaml +42 -42
  112. package/templates/state-machine/mcp-servers.yaml +11 -11
  113. package/templates/tools-registry.yaml +164 -164
  114. package/templates/universal/hooks.yaml +531 -531
  115. package/templates/universal/instructions.yaml +1692 -1692
  116. package/templates/universal/mcp-servers.yaml +50 -50
  117. package/templates/universal/nfr.yaml +197 -197
  118. package/templates/universal/reference.yaml +326 -326
  119. package/templates/universal/review.yaml +204 -204
  120. package/templates/universal/skills.yaml +262 -262
  121. package/templates/universal/structure.yaml +67 -67
  122. package/templates/universal/verification.yaml +416 -416
  123. package/templates/web-react/hooks.yaml +44 -44
  124. package/templates/web-react/instructions.yaml +207 -207
  125. package/templates/web-react/mcp-servers.yaml +20 -20
  126. package/templates/web-react/nfr.yaml +27 -27
  127. package/templates/web-react/review.yaml +94 -94
  128. package/templates/web-react/structure.yaml +46 -46
  129. package/templates/web-react/verification.yaml +126 -126
  130. package/templates/web-static/instructions.yaml +115 -115
  131. package/templates/web-static/mcp-servers.yaml +20 -20
  132. package/templates/web3/instructions.yaml +44 -44
  133. package/templates/web3/mcp-servers.yaml +11 -11
  134. package/templates/web3/verification.yaml +159 -159
  135. package/templates/zero-trust/instructions.yaml +41 -41
  136. package/templates/zero-trust/mcp-servers.yaml +15 -15
@@ -1,210 +1,210 @@
1
- tag: FINTECH
2
- section: playbook
3
- title: "Quantitative Model Development Pipeline"
4
- description: >
5
- A structured, agent-driven pipeline for building production-grade financial models.
6
- Covers formula research and selection, state-machine design, parametrization,
7
- heuristic search with pruning, and simulation with insight distillation.
8
- Run this playbook before writing any pricing, risk, or execution model code.
9
-
10
- phases:
11
-
12
- - id: formula-research
13
- title: "Formula Research & Selection"
14
- rationale: >
15
- The agent must survey the domain literature first — using web search, arxiv,
16
- and internal docs — before choosing a model. Committing to the wrong formula
17
- is the most expensive mistake in quant work.
18
- steps:
19
- - id: enumerate-candidates
20
- instruction: >
21
- Search academic and industry sources for all formulas/models relevant
22
- to the problem domain (e.g., pricing, risk, signal generation).
23
- Produce a candidate list with: name, formula notation, assumptions, and original source.
24
- expected_output: "Markdown table: Name | Formula | Key Assumptions | Source"
25
- tools: ["web_search", "fetch_webpage"]
26
-
27
- - id: evaluate-candidates
28
- instruction: >
29
- Score each candidate on: mathematical tractability, data requirements,
30
- known failure modes, computational cost, and regulatory acceptability.
31
- Eliminate candidates that fail hard constraints (e.g., negative price assumption).
32
- expected_output: "Scored comparison table; candidates ranked with pass/fail on hard constraints"
33
- tools: ["web_search"]
34
-
35
- - id: select-and-justify
36
- instruction: >
37
- Select the winning formula. Write a one-page justification covering:
38
- why it beats alternatives, what it assumes, and what conditions would
39
- invalidate it. This becomes ADR content.
40
- expected_output: "ADR draft with title, status=Proposed, context, decision, consequences"
41
-
42
- - id: state-machine-design
43
- title: "State Machine Design"
44
- rationale: >
45
- Financial workflows (order lifecycle, position states, settlement stages)
46
- are inherently state machines. Formalising them before code prevents
47
- impossible state transitions, race conditions, and audit gaps.
48
- steps:
49
- - id: enumerate-states
50
- instruction: >
51
- List every possible state for the entity (e.g., order: NEW, PENDING, PARTIALLY_FILLED,
52
- FILLED, CANCELLED, REJECTED, EXPIRED). For each state, describe: what it means,
53
- who/what causes entry, and what invariants must hold while in this state.
54
- expected_output: "States table: State | Entry Condition | Invariants | Exit Triggers"
55
-
56
- - id: enumerate-transitions
57
- instruction: >
58
- For each state, enumerate all valid transitions and the events/guards that trigger them.
59
- Mark transitions that require idempotency guarantees or external settlement confirmation.
60
- expected_output: "Transition table: From | Event | Guard | To | Side Effects"
61
-
62
- - id: draw-diagram
63
- instruction: >
64
- Generate a Mermaid stateDiagram-v2 diagram from the state and transition tables.
65
- Include notes on parallel states or guard conditions where relevant.
66
- expected_output: "Mermaid stateDiagram-v2 code block that compiles without errors"
67
- tools: ["run_in_terminal"]
68
-
69
- - id: validate-completeness
70
- instruction: >
71
- Verify the diagram satisfies: (1) no dead-end states except terminal ones,
72
- (2) every state reachable from the initial state, (3) every external event handled.
73
- Flag any gaps.
74
- expected_output: "Completeness checklist: pass/fail per criterion; gaps listed"
75
-
76
- - id: parametrization
77
- title: "Model Parametrization"
78
- rationale: >
79
- Hard-coded constants are a maintenance liability and a back-test overfitting risk.
80
- Every numeric knob must be named, typed, range-validated, and externally configurable.
81
- steps:
82
- - id: extract-parameters
83
- instruction: >
84
- From the selected formula and state machine, extract every numeric constant
85
- or threshold. For each: name it, specify its type (rate, multiplier, count,
86
- duration), its valid range, and its economic meaning.
87
- expected_output: "Parameter registry table: Name | Type | Valid Range | Default | Meaning"
88
-
89
- - id: design-config-schema
90
- instruction: >
91
- Define a Zod (or Pydantic) schema for the full parameter set.
92
- Include range validators, cross-parameter constraints (e.g., stop_loss < take_profit),
93
- and environment-specific override docs.
94
- expected_output: "Schema source file with all validators and inline JSDoc"
95
-
96
- - id: sensitivity-matrix
97
- instruction: >
98
- For each parameter, estimate first-order sensitivity: how much does the model
99
- output change for a ±10% change in the parameter? Identify the top 3 most
100
- sensitive parameters — these need the tightest validation and monitoring.
101
- expected_output: "Sensitivity table: Parameter | Δ+10% impact | Δ-10% impact | Risk rank"
102
-
103
- - id: heuristic-search
104
- title: "Heuristic Search with Pruning"
105
- rationale: >
106
- Exhaustive grid search over parameter space is computationally prohibitive.
107
- Structured heuristic search (Bayesian optimisation, evolutionary algorithms,
108
- or domain-guided hill climbing) with aggressive pruning finds good solutions
109
- orders of magnitude faster.
110
- steps:
111
- - id: define-objective
112
- instruction: >
113
- Define the objective function: what scalar metric are we optimising?
114
- (Sharpe ratio, max drawdown %, expected profit per unit risk, etc.)
115
- Document: formula, units, whether higher or lower is better, and any
116
- multi-objective tradeoff weights.
117
- expected_output: "Objective function definition with formula, direction, and tradeoff weights"
118
-
119
- - id: select-search-strategy
120
- instruction: >
121
- Choose a search strategy appropriate to parameter count and evaluation cost:
122
- - ≤5 params, cheap eval → exhaustive grid or random search
123
- - ≤15 params → Bayesian optimisation (optuna, hyperopt)
124
- - >15 params or expensive eval → evolutionary (CMA-ES, NSGA-II)
125
- Justify the choice and configure the initial seed and budget.
126
- expected_output: "Strategy selection with justification and search budget (eval count)"
127
-
128
- - id: implement-pruning
129
- instruction: >
130
- Implement pruning callbacks that abort parameter sets early if:
131
- (1) intermediate metrics fall below a floor threshold,
132
- (2) required constraints are violated (e.g., max drawdown breached before run ends).
133
- Log all pruned candidates with their termination reason.
134
- expected_output: "Pruning callback code + test showing early termination fires correctly"
135
- tools: ["run_in_terminal"]
136
-
137
- - id: run-search
138
- instruction: >
139
- Execute the heuristic search. Checkpoint results every N evaluations.
140
- Produce: top-10 parameter sets by objective score, pruning rate, and
141
- the marginal gain curve (how much did adding more evaluations help?).
142
- expected_output: "Search results: top-10 table, pruning rate %, convergence plot data"
143
- tools: ["run_in_terminal"]
144
-
145
- - id: simulation-and-distillation
146
- title: "Simulation & Insight Distillation"
147
- rationale: >
148
- Optimised parameters must be stress-tested against edge cases and adverse scenarios
149
- before deployment. Simulations should produce compact, decision-relevant summaries —
150
- not raw data dumps.
151
- steps:
152
- - id: define-scenarios
153
- instruction: >
154
- Define the simulation scenario set: at minimum, include (1) historical base case,
155
- (2) fat-tail / crisis scenario (2008, 2020-03, etc.), (3) low-liquidity scenario,
156
- (4) mean-reversion failure scenario. Add domain-specific stress scenarios.
157
- expected_output: "Scenario catalogue: Name | Description | Key parameter overrides"
158
-
159
- - id: run-simulations
160
- instruction: >
161
- Run Monte Carlo or historical replay simulations for each scenario using the
162
- top-3 parameter sets from the search phase. Use at least 10,000 paths where
163
- stochastic. Record: P&L distribution, max drawdown, Sharpe, Sortino, hit rate,
164
- and worst consecutive loss streak.
165
- expected_output: "Per-scenario stats table for each of the top-3 parameter sets"
166
- tools: ["run_in_terminal"]
167
-
168
- - id: distill-insights
169
- instruction: >
170
- From simulation results, extract the 5–7 most important insights.
171
- For each: state the insight in one sentence, the evidence from the data,
172
- and the actionable implication for deployment (e.g., reduce position size in
173
- low-liquidity regimes). Discard raw data; keep only insights.
174
- expected_output: "Insight list: Insight | Evidence | Deployment Implication"
175
-
176
- - id: define-guardrails
177
- instruction: >
178
- From the simulation results, define production guardrails as concrete,
179
- machine-checkable conditions: circuit breakers (halt if drawdown > X%),
180
- position size limits per regime, and anomaly alerts (volume spike, spread widening).
181
- These become config values in the parameter schema.
182
- expected_output: "Guardrails config additions + monitoring alert definitions"
183
-
184
- - id: implementation-handoff
185
- title: "Implementation Handoff"
186
- rationale: >
187
- All upstream artefacts (ADR, schema, state machine, guardrails) must be in place
188
- before a line of production code is written. This phase locks the specification.
189
- steps:
190
- - id: finalize-adr
191
- instruction: >
192
- Promote the ADR draft from Proposed → Accepted. Add a summary of the
193
- simulation results that confirms the decision. Record the parameter set
194
- selected for deployment.
195
- expected_output: "docs/adrs/NNNN-<model-name>.md with Status: Accepted"
196
-
197
- - id: write-acceptance-tests
198
- instruction: >
199
- Translate the simulation guardrails and state machine transitions into
200
- acceptance tests. These must pass before any code is merged to main.
201
- Tests should use the production Zod/Pydantic schema for inputs.
202
- expected_output: "Test file with at minimum: 1 test per state transition + 1 per guardrail"
203
- tools: ["run_in_terminal"]
204
-
205
- - id: checklist-sign-off
206
- instruction: >
207
- Verify all items are complete: ADR accepted, schema merged, state machine
208
- diagram committed, acceptance tests green, simulation results committed to
209
- docs/simulations/. Only then is implementation cleared to start.
210
- expected_output: "Sign-off checklist: all items checked"
1
+ tag: FINTECH
2
+ section: playbook
3
+ title: "Quantitative Model Development Pipeline"
4
+ description: >
5
+ A structured, agent-driven pipeline for building production-grade financial models.
6
+ Covers formula research and selection, state-machine design, parametrization,
7
+ heuristic search with pruning, and simulation with insight distillation.
8
+ Run this playbook before writing any pricing, risk, or execution model code.
9
+
10
+ phases:
11
+
12
+ - id: formula-research
13
+ title: "Formula Research & Selection"
14
+ rationale: >
15
+ The agent must survey the domain literature first — using web search, arxiv,
16
+ and internal docs — before choosing a model. Committing to the wrong formula
17
+ is the most expensive mistake in quant work.
18
+ steps:
19
+ - id: enumerate-candidates
20
+ instruction: >
21
+ Search academic and industry sources for all formulas/models relevant
22
+ to the problem domain (e.g., pricing, risk, signal generation).
23
+ Produce a candidate list with: name, formula notation, assumptions, and original source.
24
+ expected_output: "Markdown table: Name | Formula | Key Assumptions | Source"
25
+ tools: ["web_search", "fetch_webpage"]
26
+
27
+ - id: evaluate-candidates
28
+ instruction: >
29
+ Score each candidate on: mathematical tractability, data requirements,
30
+ known failure modes, computational cost, and regulatory acceptability.
31
+ Eliminate candidates that fail hard constraints (e.g., negative price assumption).
32
+ expected_output: "Scored comparison table; candidates ranked with pass/fail on hard constraints"
33
+ tools: ["web_search"]
34
+
35
+ - id: select-and-justify
36
+ instruction: >
37
+ Select the winning formula. Write a one-page justification covering:
38
+ why it beats alternatives, what it assumes, and what conditions would
39
+ invalidate it. This becomes ADR content.
40
+ expected_output: "ADR draft with title, status=Proposed, context, decision, consequences"
41
+
42
+ - id: state-machine-design
43
+ title: "State Machine Design"
44
+ rationale: >
45
+ Financial workflows (order lifecycle, position states, settlement stages)
46
+ are inherently state machines. Formalising them before code prevents
47
+ impossible state transitions, race conditions, and audit gaps.
48
+ steps:
49
+ - id: enumerate-states
50
+ instruction: >
51
+ List every possible state for the entity (e.g., order: NEW, PENDING, PARTIALLY_FILLED,
52
+ FILLED, CANCELLED, REJECTED, EXPIRED). For each state, describe: what it means,
53
+ who/what causes entry, and what invariants must hold while in this state.
54
+ expected_output: "States table: State | Entry Condition | Invariants | Exit Triggers"
55
+
56
+ - id: enumerate-transitions
57
+ instruction: >
58
+ For each state, enumerate all valid transitions and the events/guards that trigger them.
59
+ Mark transitions that require idempotency guarantees or external settlement confirmation.
60
+ expected_output: "Transition table: From | Event | Guard | To | Side Effects"
61
+
62
+ - id: draw-diagram
63
+ instruction: >
64
+ Generate a Mermaid stateDiagram-v2 diagram from the state and transition tables.
65
+ Include notes on parallel states or guard conditions where relevant.
66
+ expected_output: "Mermaid stateDiagram-v2 code block that compiles without errors"
67
+ tools: ["run_in_terminal"]
68
+
69
+ - id: validate-completeness
70
+ instruction: >
71
+ Verify the diagram satisfies: (1) no dead-end states except terminal ones,
72
+ (2) every state reachable from the initial state, (3) every external event handled.
73
+ Flag any gaps.
74
+ expected_output: "Completeness checklist: pass/fail per criterion; gaps listed"
75
+
76
+ - id: parametrization
77
+ title: "Model Parametrization"
78
+ rationale: >
79
+ Hard-coded constants are a maintenance liability and a back-test overfitting risk.
80
+ Every numeric knob must be named, typed, range-validated, and externally configurable.
81
+ steps:
82
+ - id: extract-parameters
83
+ instruction: >
84
+ From the selected formula and state machine, extract every numeric constant
85
+ or threshold. For each: name it, specify its type (rate, multiplier, count,
86
+ duration), its valid range, and its economic meaning.
87
+ expected_output: "Parameter registry table: Name | Type | Valid Range | Default | Meaning"
88
+
89
+ - id: design-config-schema
90
+ instruction: >
91
+ Define a Zod (or Pydantic) schema for the full parameter set.
92
+ Include range validators, cross-parameter constraints (e.g., stop_loss < take_profit),
93
+ and environment-specific override docs.
94
+ expected_output: "Schema source file with all validators and inline JSDoc"
95
+
96
+ - id: sensitivity-matrix
97
+ instruction: >
98
+ For each parameter, estimate first-order sensitivity: how much does the model
99
+ output change for a ±10% change in the parameter? Identify the top 3 most
100
+ sensitive parameters — these need the tightest validation and monitoring.
101
+ expected_output: "Sensitivity table: Parameter | Δ+10% impact | Δ-10% impact | Risk rank"
102
+
103
+ - id: heuristic-search
104
+ title: "Heuristic Search with Pruning"
105
+ rationale: >
106
+ Exhaustive grid search over parameter space is computationally prohibitive.
107
+ Structured heuristic search (Bayesian optimisation, evolutionary algorithms,
108
+ or domain-guided hill climbing) with aggressive pruning finds good solutions
109
+ orders of magnitude faster.
110
+ steps:
111
+ - id: define-objective
112
+ instruction: >
113
+ Define the objective function: what scalar metric are we optimising?
114
+ (Sharpe ratio, max drawdown %, expected profit per unit risk, etc.)
115
+ Document: formula, units, whether higher or lower is better, and any
116
+ multi-objective tradeoff weights.
117
+ expected_output: "Objective function definition with formula, direction, and tradeoff weights"
118
+
119
+ - id: select-search-strategy
120
+ instruction: >
121
+ Choose a search strategy appropriate to parameter count and evaluation cost:
122
+ - ≤5 params, cheap eval → exhaustive grid or random search
123
+ - ≤15 params → Bayesian optimisation (optuna, hyperopt)
124
+ - >15 params or expensive eval → evolutionary (CMA-ES, NSGA-II)
125
+ Justify the choice and configure the initial seed and budget.
126
+ expected_output: "Strategy selection with justification and search budget (eval count)"
127
+
128
+ - id: implement-pruning
129
+ instruction: >
130
+ Implement pruning callbacks that abort parameter sets early if:
131
+ (1) intermediate metrics fall below a floor threshold,
132
+ (2) required constraints are violated (e.g., max drawdown breached before run ends).
133
+ Log all pruned candidates with their termination reason.
134
+ expected_output: "Pruning callback code + test showing early termination fires correctly"
135
+ tools: ["run_in_terminal"]
136
+
137
+ - id: run-search
138
+ instruction: >
139
+ Execute the heuristic search. Checkpoint results every N evaluations.
140
+ Produce: top-10 parameter sets by objective score, pruning rate, and
141
+ the marginal gain curve (how much did adding more evaluations help?).
142
+ expected_output: "Search results: top-10 table, pruning rate %, convergence plot data"
143
+ tools: ["run_in_terminal"]
144
+
145
+ - id: simulation-and-distillation
146
+ title: "Simulation & Insight Distillation"
147
+ rationale: >
148
+ Optimised parameters must be stress-tested against edge cases and adverse scenarios
149
+ before deployment. Simulations should produce compact, decision-relevant summaries —
150
+ not raw data dumps.
151
+ steps:
152
+ - id: define-scenarios
153
+ instruction: >
154
+ Define the simulation scenario set: at minimum, include (1) historical base case,
155
+ (2) fat-tail / crisis scenario (2008, 2020-03, etc.), (3) low-liquidity scenario,
156
+ (4) mean-reversion failure scenario. Add domain-specific stress scenarios.
157
+ expected_output: "Scenario catalogue: Name | Description | Key parameter overrides"
158
+
159
+ - id: run-simulations
160
+ instruction: >
161
+ Run Monte Carlo or historical replay simulations for each scenario using the
162
+ top-3 parameter sets from the search phase. Use at least 10,000 paths where
163
+ stochastic. Record: P&L distribution, max drawdown, Sharpe, Sortino, hit rate,
164
+ and worst consecutive loss streak.
165
+ expected_output: "Per-scenario stats table for each of the top-3 parameter sets"
166
+ tools: ["run_in_terminal"]
167
+
168
+ - id: distill-insights
169
+ instruction: >
170
+ From simulation results, extract the 5–7 most important insights.
171
+ For each: state the insight in one sentence, the evidence from the data,
172
+ and the actionable implication for deployment (e.g., reduce position size in
173
+ low-liquidity regimes). Discard raw data; keep only insights.
174
+ expected_output: "Insight list: Insight | Evidence | Deployment Implication"
175
+
176
+ - id: define-guardrails
177
+ instruction: >
178
+ From the simulation results, define production guardrails as concrete,
179
+ machine-checkable conditions: circuit breakers (halt if drawdown > X%),
180
+ position size limits per regime, and anomaly alerts (volume spike, spread widening).
181
+ These become config values in the parameter schema.
182
+ expected_output: "Guardrails config additions + monitoring alert definitions"
183
+
184
+ - id: implementation-handoff
185
+ title: "Implementation Handoff"
186
+ rationale: >
187
+ All upstream artefacts (ADR, schema, state machine, guardrails) must be in place
188
+ before a line of production code is written. This phase locks the specification.
189
+ steps:
190
+ - id: finalize-adr
191
+ instruction: >
192
+ Promote the ADR draft from Proposed → Accepted. Add a summary of the
193
+ simulation results that confirms the decision. Record the parameter set
194
+ selected for deployment.
195
+ expected_output: "docs/adrs/NNNN-<model-name>.md with Status: Accepted"
196
+
197
+ - id: write-acceptance-tests
198
+ instruction: >
199
+ Translate the simulation guardrails and state machine transitions into
200
+ acceptance tests. These must pass before any code is merged to main.
201
+ Tests should use the production Zod/Pydantic schema for inputs.
202
+ expected_output: "Test file with at minimum: 1 test per state transition + 1 per guardrail"
203
+ tools: ["run_in_terminal"]
204
+
205
+ - id: checklist-sign-off
206
+ instruction: >
207
+ Verify all items are complete: ADR accepted, schema merged, state machine
208
+ diagram committed, acceptance tests green, simulation results committed to
209
+ docs/simulations/. Only then is implementation cleared to start.
210
+ expected_output: "Sign-off checklist: all items checked"