forgecraft-mcp 1.2.0 → 1.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +525 -525
- package/dist/cli/help.js +44 -44
- package/dist/registry/renderer-skeletons.js +92 -92
- package/dist/shared/gs-score-logger.js +6 -6
- package/dist/tools/add-module.js +123 -123
- package/dist/tools/advice-registry.js +18 -18
- package/dist/tools/check-cascade-report.js +64 -64
- package/dist/tools/configure-mcp.d.ts +3 -0
- package/dist/tools/configure-mcp.d.ts.map +1 -1
- package/dist/tools/configure-mcp.js +10 -0
- package/dist/tools/configure-mcp.js.map +1 -1
- package/dist/tools/forgecraft-dispatch.d.ts.map +1 -1
- package/dist/tools/forgecraft-dispatch.js +3 -0
- package/dist/tools/forgecraft-dispatch.js.map +1 -1
- package/dist/tools/forgecraft-schema-params.d.ts +9 -0
- package/dist/tools/forgecraft-schema-params.d.ts.map +1 -1
- package/dist/tools/forgecraft-schema-params.js +21 -0
- package/dist/tools/forgecraft-schema-params.js.map +1 -1
- package/dist/tools/forgecraft-schema.d.ts +9 -0
- package/dist/tools/forgecraft-schema.d.ts.map +1 -1
- package/dist/tools/refresh-output.js +14 -14
- package/dist/tools/scaffold-spec-stubs.js +115 -115
- package/dist/tools/scaffold-templates.js +62 -62
- package/dist/tools/setup-artifact-writers.d.ts +30 -0
- package/dist/tools/setup-artifact-writers.d.ts.map +1 -1
- package/dist/tools/setup-artifact-writers.js +120 -8
- package/dist/tools/setup-artifact-writers.js.map +1 -1
- package/dist/tools/setup-phase1.d.ts +3 -0
- package/dist/tools/setup-phase1.d.ts.map +1 -1
- package/dist/tools/setup-phase1.js +79 -35
- package/dist/tools/setup-phase1.js.map +1 -1
- package/dist/tools/setup-phase2.d.ts +2 -0
- package/dist/tools/setup-phase2.d.ts.map +1 -1
- package/dist/tools/setup-phase2.js +10 -1
- package/dist/tools/setup-phase2.js.map +1 -1
- package/dist/tools/setup-project.d.ts +18 -0
- package/dist/tools/setup-project.d.ts.map +1 -1
- package/dist/tools/setup-project.js +77 -1
- package/dist/tools/setup-project.js.map +1 -1
- package/dist/tools/spec-parser-tags.d.ts +9 -0
- package/dist/tools/spec-parser-tags.d.ts.map +1 -1
- package/dist/tools/spec-parser-tags.js +92 -0
- package/dist/tools/spec-parser-tags.js.map +1 -1
- package/package.json +89 -86
- package/templates/analytics/instructions.yaml +37 -37
- package/templates/analytics/mcp-servers.yaml +11 -11
- package/templates/analytics/structure.yaml +25 -25
- package/templates/api/instructions.yaml +231 -231
- package/templates/api/mcp-servers.yaml +22 -13
- package/templates/api/nfr.yaml +23 -23
- package/templates/api/review.yaml +103 -103
- package/templates/api/structure.yaml +34 -34
- package/templates/api/verification.yaml +132 -132
- package/templates/cli/instructions.yaml +31 -31
- package/templates/cli/mcp-servers.yaml +11 -11
- package/templates/cli/review.yaml +53 -53
- package/templates/cli/structure.yaml +16 -16
- package/templates/data-lineage/instructions.yaml +28 -28
- package/templates/data-lineage/mcp-servers.yaml +22 -22
- package/templates/data-pipeline/instructions.yaml +84 -84
- package/templates/data-pipeline/mcp-servers.yaml +13 -13
- package/templates/data-pipeline/nfr.yaml +39 -39
- package/templates/data-pipeline/structure.yaml +23 -23
- package/templates/fintech/hooks.yaml +55 -55
- package/templates/fintech/instructions.yaml +112 -112
- package/templates/fintech/mcp-servers.yaml +13 -13
- package/templates/fintech/nfr.yaml +46 -46
- package/templates/fintech/playbook.yaml +210 -210
- package/templates/fintech/verification.yaml +239 -239
- package/templates/game/instructions.yaml +289 -289
- package/templates/game/mcp-servers.yaml +38 -38
- package/templates/game/nfr.yaml +64 -64
- package/templates/game/playbook.yaml +214 -214
- package/templates/game/review.yaml +97 -97
- package/templates/game/structure.yaml +67 -67
- package/templates/game/verification.yaml +174 -174
- package/templates/healthcare/instructions.yaml +42 -42
- package/templates/healthcare/mcp-servers.yaml +13 -13
- package/templates/healthcare/nfr.yaml +47 -47
- package/templates/hipaa/instructions.yaml +41 -41
- package/templates/hipaa/mcp-servers.yaml +13 -13
- package/templates/infra/instructions.yaml +104 -104
- package/templates/infra/mcp-servers.yaml +20 -20
- package/templates/infra/nfr.yaml +46 -46
- package/templates/infra/review.yaml +65 -65
- package/templates/infra/structure.yaml +25 -25
- package/templates/library/instructions.yaml +36 -36
- package/templates/library/mcp-servers.yaml +20 -20
- package/templates/library/review.yaml +56 -56
- package/templates/library/structure.yaml +19 -19
- package/templates/medallion-architecture/instructions.yaml +41 -41
- package/templates/medallion-architecture/mcp-servers.yaml +22 -22
- package/templates/ml/instructions.yaml +85 -85
- package/templates/ml/mcp-servers.yaml +11 -11
- package/templates/ml/nfr.yaml +39 -39
- package/templates/ml/structure.yaml +25 -25
- package/templates/ml/verification.yaml +156 -156
- package/templates/mobile/instructions.yaml +44 -44
- package/templates/mobile/mcp-servers.yaml +11 -11
- package/templates/mobile/nfr.yaml +49 -49
- package/templates/mobile/structure.yaml +27 -27
- package/templates/mobile/verification.yaml +121 -121
- package/templates/observability-xray/instructions.yaml +40 -40
- package/templates/observability-xray/mcp-servers.yaml +15 -15
- package/templates/realtime/instructions.yaml +42 -42
- package/templates/realtime/mcp-servers.yaml +13 -13
- package/templates/soc2/instructions.yaml +41 -41
- package/templates/soc2/mcp-servers.yaml +24 -24
- package/templates/social/instructions.yaml +43 -43
- package/templates/social/mcp-servers.yaml +24 -24
- package/templates/state-machine/instructions.yaml +42 -42
- package/templates/state-machine/mcp-servers.yaml +11 -11
- package/templates/tools-registry.yaml +164 -164
- package/templates/universal/hooks.yaml +531 -531
- package/templates/universal/instructions.yaml +1692 -1692
- package/templates/universal/mcp-servers.yaml +50 -50
- package/templates/universal/nfr.yaml +197 -197
- package/templates/universal/reference.yaml +326 -326
- package/templates/universal/review.yaml +204 -204
- package/templates/universal/skills.yaml +262 -262
- package/templates/universal/structure.yaml +67 -67
- package/templates/universal/verification.yaml +416 -416
- package/templates/web-react/hooks.yaml +44 -44
- package/templates/web-react/instructions.yaml +207 -207
- package/templates/web-react/mcp-servers.yaml +20 -20
- package/templates/web-react/nfr.yaml +27 -27
- package/templates/web-react/review.yaml +94 -94
- package/templates/web-react/structure.yaml +46 -46
- package/templates/web-react/verification.yaml +126 -126
- package/templates/web-static/instructions.yaml +115 -115
- package/templates/web-static/mcp-servers.yaml +20 -20
- package/templates/web3/instructions.yaml +44 -44
- package/templates/web3/mcp-servers.yaml +11 -11
- package/templates/web3/verification.yaml +159 -159
- package/templates/zero-trust/instructions.yaml +41 -41
- package/templates/zero-trust/mcp-servers.yaml +15 -15
|
@@ -1,210 +1,210 @@
|
|
|
1
|
-
tag: FINTECH
|
|
2
|
-
section: playbook
|
|
3
|
-
title: "Quantitative Model Development Pipeline"
|
|
4
|
-
description: >
|
|
5
|
-
A structured, agent-driven pipeline for building production-grade financial models.
|
|
6
|
-
Covers formula research and selection, state-machine design, parametrization,
|
|
7
|
-
heuristic search with pruning, and simulation with insight distillation.
|
|
8
|
-
Run this playbook before writing any pricing, risk, or execution model code.
|
|
9
|
-
|
|
10
|
-
phases:
|
|
11
|
-
|
|
12
|
-
- id: formula-research
|
|
13
|
-
title: "Formula Research & Selection"
|
|
14
|
-
rationale: >
|
|
15
|
-
The agent must survey the domain literature first — using web search, arxiv,
|
|
16
|
-
and internal docs — before choosing a model. Committing to the wrong formula
|
|
17
|
-
is the most expensive mistake in quant work.
|
|
18
|
-
steps:
|
|
19
|
-
- id: enumerate-candidates
|
|
20
|
-
instruction: >
|
|
21
|
-
Search academic and industry sources for all formulas/models relevant
|
|
22
|
-
to the problem domain (e.g., pricing, risk, signal generation).
|
|
23
|
-
Produce a candidate list with: name, formula notation, assumptions, and original source.
|
|
24
|
-
expected_output: "Markdown table: Name | Formula | Key Assumptions | Source"
|
|
25
|
-
tools: ["web_search", "fetch_webpage"]
|
|
26
|
-
|
|
27
|
-
- id: evaluate-candidates
|
|
28
|
-
instruction: >
|
|
29
|
-
Score each candidate on: mathematical tractability, data requirements,
|
|
30
|
-
known failure modes, computational cost, and regulatory acceptability.
|
|
31
|
-
Eliminate candidates that fail hard constraints (e.g., negative price assumption).
|
|
32
|
-
expected_output: "Scored comparison table; candidates ranked with pass/fail on hard constraints"
|
|
33
|
-
tools: ["web_search"]
|
|
34
|
-
|
|
35
|
-
- id: select-and-justify
|
|
36
|
-
instruction: >
|
|
37
|
-
Select the winning formula. Write a one-page justification covering:
|
|
38
|
-
why it beats alternatives, what it assumes, and what conditions would
|
|
39
|
-
invalidate it. This becomes ADR content.
|
|
40
|
-
expected_output: "ADR draft with title, status=Proposed, context, decision, consequences"
|
|
41
|
-
|
|
42
|
-
- id: state-machine-design
|
|
43
|
-
title: "State Machine Design"
|
|
44
|
-
rationale: >
|
|
45
|
-
Financial workflows (order lifecycle, position states, settlement stages)
|
|
46
|
-
are inherently state machines. Formalising them before code prevents
|
|
47
|
-
impossible state transitions, race conditions, and audit gaps.
|
|
48
|
-
steps:
|
|
49
|
-
- id: enumerate-states
|
|
50
|
-
instruction: >
|
|
51
|
-
List every possible state for the entity (e.g., order: NEW, PENDING, PARTIALLY_FILLED,
|
|
52
|
-
FILLED, CANCELLED, REJECTED, EXPIRED). For each state, describe: what it means,
|
|
53
|
-
who/what causes entry, and what invariants must hold while in this state.
|
|
54
|
-
expected_output: "States table: State | Entry Condition | Invariants | Exit Triggers"
|
|
55
|
-
|
|
56
|
-
- id: enumerate-transitions
|
|
57
|
-
instruction: >
|
|
58
|
-
For each state, enumerate all valid transitions and the events/guards that trigger them.
|
|
59
|
-
Mark transitions that require idempotency guarantees or external settlement confirmation.
|
|
60
|
-
expected_output: "Transition table: From | Event | Guard | To | Side Effects"
|
|
61
|
-
|
|
62
|
-
- id: draw-diagram
|
|
63
|
-
instruction: >
|
|
64
|
-
Generate a Mermaid stateDiagram-v2 diagram from the state and transition tables.
|
|
65
|
-
Include notes on parallel states or guard conditions where relevant.
|
|
66
|
-
expected_output: "Mermaid stateDiagram-v2 code block that compiles without errors"
|
|
67
|
-
tools: ["run_in_terminal"]
|
|
68
|
-
|
|
69
|
-
- id: validate-completeness
|
|
70
|
-
instruction: >
|
|
71
|
-
Verify the diagram satisfies: (1) no dead-end states except terminal ones,
|
|
72
|
-
(2) every state reachable from the initial state, (3) every external event handled.
|
|
73
|
-
Flag any gaps.
|
|
74
|
-
expected_output: "Completeness checklist: pass/fail per criterion; gaps listed"
|
|
75
|
-
|
|
76
|
-
- id: parametrization
|
|
77
|
-
title: "Model Parametrization"
|
|
78
|
-
rationale: >
|
|
79
|
-
Hard-coded constants are a maintenance liability and a back-test overfitting risk.
|
|
80
|
-
Every numeric knob must be named, typed, range-validated, and externally configurable.
|
|
81
|
-
steps:
|
|
82
|
-
- id: extract-parameters
|
|
83
|
-
instruction: >
|
|
84
|
-
From the selected formula and state machine, extract every numeric constant
|
|
85
|
-
or threshold. For each: name it, specify its type (rate, multiplier, count,
|
|
86
|
-
duration), its valid range, and its economic meaning.
|
|
87
|
-
expected_output: "Parameter registry table: Name | Type | Valid Range | Default | Meaning"
|
|
88
|
-
|
|
89
|
-
- id: design-config-schema
|
|
90
|
-
instruction: >
|
|
91
|
-
Define a Zod (or Pydantic) schema for the full parameter set.
|
|
92
|
-
Include range validators, cross-parameter constraints (e.g., stop_loss < take_profit),
|
|
93
|
-
and environment-specific override docs.
|
|
94
|
-
expected_output: "Schema source file with all validators and inline JSDoc"
|
|
95
|
-
|
|
96
|
-
- id: sensitivity-matrix
|
|
97
|
-
instruction: >
|
|
98
|
-
For each parameter, estimate first-order sensitivity: how much does the model
|
|
99
|
-
output change for a ±10% change in the parameter? Identify the top 3 most
|
|
100
|
-
sensitive parameters — these need the tightest validation and monitoring.
|
|
101
|
-
expected_output: "Sensitivity table: Parameter | Δ+10% impact | Δ-10% impact | Risk rank"
|
|
102
|
-
|
|
103
|
-
- id: heuristic-search
|
|
104
|
-
title: "Heuristic Search with Pruning"
|
|
105
|
-
rationale: >
|
|
106
|
-
Exhaustive grid search over parameter space is computationally prohibitive.
|
|
107
|
-
Structured heuristic search (Bayesian optimisation, evolutionary algorithms,
|
|
108
|
-
or domain-guided hill climbing) with aggressive pruning finds good solutions
|
|
109
|
-
orders of magnitude faster.
|
|
110
|
-
steps:
|
|
111
|
-
- id: define-objective
|
|
112
|
-
instruction: >
|
|
113
|
-
Define the objective function: what scalar metric are we optimising?
|
|
114
|
-
(Sharpe ratio, max drawdown %, expected profit per unit risk, etc.)
|
|
115
|
-
Document: formula, units, whether higher or lower is better, and any
|
|
116
|
-
multi-objective tradeoff weights.
|
|
117
|
-
expected_output: "Objective function definition with formula, direction, and tradeoff weights"
|
|
118
|
-
|
|
119
|
-
- id: select-search-strategy
|
|
120
|
-
instruction: >
|
|
121
|
-
Choose a search strategy appropriate to parameter count and evaluation cost:
|
|
122
|
-
- ≤5 params, cheap eval → exhaustive grid or random search
|
|
123
|
-
- ≤15 params → Bayesian optimisation (optuna, hyperopt)
|
|
124
|
-
- >15 params or expensive eval → evolutionary (CMA-ES, NSGA-II)
|
|
125
|
-
Justify the choice and configure the initial seed and budget.
|
|
126
|
-
expected_output: "Strategy selection with justification and search budget (eval count)"
|
|
127
|
-
|
|
128
|
-
- id: implement-pruning
|
|
129
|
-
instruction: >
|
|
130
|
-
Implement pruning callbacks that abort parameter sets early if:
|
|
131
|
-
(1) intermediate metrics fall below a floor threshold,
|
|
132
|
-
(2) required constraints are violated (e.g., max drawdown breached before run ends).
|
|
133
|
-
Log all pruned candidates with their termination reason.
|
|
134
|
-
expected_output: "Pruning callback code + test showing early termination fires correctly"
|
|
135
|
-
tools: ["run_in_terminal"]
|
|
136
|
-
|
|
137
|
-
- id: run-search
|
|
138
|
-
instruction: >
|
|
139
|
-
Execute the heuristic search. Checkpoint results every N evaluations.
|
|
140
|
-
Produce: top-10 parameter sets by objective score, pruning rate, and
|
|
141
|
-
the marginal gain curve (how much did adding more evaluations help?).
|
|
142
|
-
expected_output: "Search results: top-10 table, pruning rate %, convergence plot data"
|
|
143
|
-
tools: ["run_in_terminal"]
|
|
144
|
-
|
|
145
|
-
- id: simulation-and-distillation
|
|
146
|
-
title: "Simulation & Insight Distillation"
|
|
147
|
-
rationale: >
|
|
148
|
-
Optimised parameters must be stress-tested against edge cases and adverse scenarios
|
|
149
|
-
before deployment. Simulations should produce compact, decision-relevant summaries —
|
|
150
|
-
not raw data dumps.
|
|
151
|
-
steps:
|
|
152
|
-
- id: define-scenarios
|
|
153
|
-
instruction: >
|
|
154
|
-
Define the simulation scenario set: at minimum, include (1) historical base case,
|
|
155
|
-
(2) fat-tail / crisis scenario (2008, 2020-03, etc.), (3) low-liquidity scenario,
|
|
156
|
-
(4) mean-reversion failure scenario. Add domain-specific stress scenarios.
|
|
157
|
-
expected_output: "Scenario catalogue: Name | Description | Key parameter overrides"
|
|
158
|
-
|
|
159
|
-
- id: run-simulations
|
|
160
|
-
instruction: >
|
|
161
|
-
Run Monte Carlo or historical replay simulations for each scenario using the
|
|
162
|
-
top-3 parameter sets from the search phase. Use at least 10,000 paths where
|
|
163
|
-
stochastic. Record: P&L distribution, max drawdown, Sharpe, Sortino, hit rate,
|
|
164
|
-
and worst consecutive loss streak.
|
|
165
|
-
expected_output: "Per-scenario stats table for each of the top-3 parameter sets"
|
|
166
|
-
tools: ["run_in_terminal"]
|
|
167
|
-
|
|
168
|
-
- id: distill-insights
|
|
169
|
-
instruction: >
|
|
170
|
-
From simulation results, extract the 5–7 most important insights.
|
|
171
|
-
For each: state the insight in one sentence, the evidence from the data,
|
|
172
|
-
and the actionable implication for deployment (e.g., reduce position size in
|
|
173
|
-
low-liquidity regimes). Discard raw data; keep only insights.
|
|
174
|
-
expected_output: "Insight list: Insight | Evidence | Deployment Implication"
|
|
175
|
-
|
|
176
|
-
- id: define-guardrails
|
|
177
|
-
instruction: >
|
|
178
|
-
From the simulation results, define production guardrails as concrete,
|
|
179
|
-
machine-checkable conditions: circuit breakers (halt if drawdown > X%),
|
|
180
|
-
position size limits per regime, and anomaly alerts (volume spike, spread widening).
|
|
181
|
-
These become config values in the parameter schema.
|
|
182
|
-
expected_output: "Guardrails config additions + monitoring alert definitions"
|
|
183
|
-
|
|
184
|
-
- id: implementation-handoff
|
|
185
|
-
title: "Implementation Handoff"
|
|
186
|
-
rationale: >
|
|
187
|
-
All upstream artefacts (ADR, schema, state machine, guardrails) must be in place
|
|
188
|
-
before a line of production code is written. This phase locks the specification.
|
|
189
|
-
steps:
|
|
190
|
-
- id: finalize-adr
|
|
191
|
-
instruction: >
|
|
192
|
-
Promote the ADR draft from Proposed → Accepted. Add a summary of the
|
|
193
|
-
simulation results that confirms the decision. Record the parameter set
|
|
194
|
-
selected for deployment.
|
|
195
|
-
expected_output: "docs/adrs/NNNN-<model-name>.md with Status: Accepted"
|
|
196
|
-
|
|
197
|
-
- id: write-acceptance-tests
|
|
198
|
-
instruction: >
|
|
199
|
-
Translate the simulation guardrails and state machine transitions into
|
|
200
|
-
acceptance tests. These must pass before any code is merged to main.
|
|
201
|
-
Tests should use the production Zod/Pydantic schema for inputs.
|
|
202
|
-
expected_output: "Test file with at minimum: 1 test per state transition + 1 per guardrail"
|
|
203
|
-
tools: ["run_in_terminal"]
|
|
204
|
-
|
|
205
|
-
- id: checklist-sign-off
|
|
206
|
-
instruction: >
|
|
207
|
-
Verify all items are complete: ADR accepted, schema merged, state machine
|
|
208
|
-
diagram committed, acceptance tests green, simulation results committed to
|
|
209
|
-
docs/simulations/. Only then is implementation cleared to start.
|
|
210
|
-
expected_output: "Sign-off checklist: all items checked"
|
|
1
|
+
tag: FINTECH
|
|
2
|
+
section: playbook
|
|
3
|
+
title: "Quantitative Model Development Pipeline"
|
|
4
|
+
description: >
|
|
5
|
+
A structured, agent-driven pipeline for building production-grade financial models.
|
|
6
|
+
Covers formula research and selection, state-machine design, parametrization,
|
|
7
|
+
heuristic search with pruning, and simulation with insight distillation.
|
|
8
|
+
Run this playbook before writing any pricing, risk, or execution model code.
|
|
9
|
+
|
|
10
|
+
phases:
|
|
11
|
+
|
|
12
|
+
- id: formula-research
|
|
13
|
+
title: "Formula Research & Selection"
|
|
14
|
+
rationale: >
|
|
15
|
+
The agent must survey the domain literature first — using web search, arxiv,
|
|
16
|
+
and internal docs — before choosing a model. Committing to the wrong formula
|
|
17
|
+
is the most expensive mistake in quant work.
|
|
18
|
+
steps:
|
|
19
|
+
- id: enumerate-candidates
|
|
20
|
+
instruction: >
|
|
21
|
+
Search academic and industry sources for all formulas/models relevant
|
|
22
|
+
to the problem domain (e.g., pricing, risk, signal generation).
|
|
23
|
+
Produce a candidate list with: name, formula notation, assumptions, and original source.
|
|
24
|
+
expected_output: "Markdown table: Name | Formula | Key Assumptions | Source"
|
|
25
|
+
tools: ["web_search", "fetch_webpage"]
|
|
26
|
+
|
|
27
|
+
- id: evaluate-candidates
|
|
28
|
+
instruction: >
|
|
29
|
+
Score each candidate on: mathematical tractability, data requirements,
|
|
30
|
+
known failure modes, computational cost, and regulatory acceptability.
|
|
31
|
+
Eliminate candidates that fail hard constraints (e.g., negative price assumption).
|
|
32
|
+
expected_output: "Scored comparison table; candidates ranked with pass/fail on hard constraints"
|
|
33
|
+
tools: ["web_search"]
|
|
34
|
+
|
|
35
|
+
- id: select-and-justify
|
|
36
|
+
instruction: >
|
|
37
|
+
Select the winning formula. Write a one-page justification covering:
|
|
38
|
+
why it beats alternatives, what it assumes, and what conditions would
|
|
39
|
+
invalidate it. This becomes ADR content.
|
|
40
|
+
expected_output: "ADR draft with title, status=Proposed, context, decision, consequences"
|
|
41
|
+
|
|
42
|
+
- id: state-machine-design
|
|
43
|
+
title: "State Machine Design"
|
|
44
|
+
rationale: >
|
|
45
|
+
Financial workflows (order lifecycle, position states, settlement stages)
|
|
46
|
+
are inherently state machines. Formalising them before code prevents
|
|
47
|
+
impossible state transitions, race conditions, and audit gaps.
|
|
48
|
+
steps:
|
|
49
|
+
- id: enumerate-states
|
|
50
|
+
instruction: >
|
|
51
|
+
List every possible state for the entity (e.g., order: NEW, PENDING, PARTIALLY_FILLED,
|
|
52
|
+
FILLED, CANCELLED, REJECTED, EXPIRED). For each state, describe: what it means,
|
|
53
|
+
who/what causes entry, and what invariants must hold while in this state.
|
|
54
|
+
expected_output: "States table: State | Entry Condition | Invariants | Exit Triggers"
|
|
55
|
+
|
|
56
|
+
- id: enumerate-transitions
|
|
57
|
+
instruction: >
|
|
58
|
+
For each state, enumerate all valid transitions and the events/guards that trigger them.
|
|
59
|
+
Mark transitions that require idempotency guarantees or external settlement confirmation.
|
|
60
|
+
expected_output: "Transition table: From | Event | Guard | To | Side Effects"
|
|
61
|
+
|
|
62
|
+
- id: draw-diagram
|
|
63
|
+
instruction: >
|
|
64
|
+
Generate a Mermaid stateDiagram-v2 diagram from the state and transition tables.
|
|
65
|
+
Include notes on parallel states or guard conditions where relevant.
|
|
66
|
+
expected_output: "Mermaid stateDiagram-v2 code block that compiles without errors"
|
|
67
|
+
tools: ["run_in_terminal"]
|
|
68
|
+
|
|
69
|
+
- id: validate-completeness
|
|
70
|
+
instruction: >
|
|
71
|
+
Verify the diagram satisfies: (1) no dead-end states except terminal ones,
|
|
72
|
+
(2) every state reachable from the initial state, (3) every external event handled.
|
|
73
|
+
Flag any gaps.
|
|
74
|
+
expected_output: "Completeness checklist: pass/fail per criterion; gaps listed"
|
|
75
|
+
|
|
76
|
+
- id: parametrization
|
|
77
|
+
title: "Model Parametrization"
|
|
78
|
+
rationale: >
|
|
79
|
+
Hard-coded constants are a maintenance liability and a back-test overfitting risk.
|
|
80
|
+
Every numeric knob must be named, typed, range-validated, and externally configurable.
|
|
81
|
+
steps:
|
|
82
|
+
- id: extract-parameters
|
|
83
|
+
instruction: >
|
|
84
|
+
From the selected formula and state machine, extract every numeric constant
|
|
85
|
+
or threshold. For each: name it, specify its type (rate, multiplier, count,
|
|
86
|
+
duration), its valid range, and its economic meaning.
|
|
87
|
+
expected_output: "Parameter registry table: Name | Type | Valid Range | Default | Meaning"
|
|
88
|
+
|
|
89
|
+
- id: design-config-schema
|
|
90
|
+
instruction: >
|
|
91
|
+
Define a Zod (or Pydantic) schema for the full parameter set.
|
|
92
|
+
Include range validators, cross-parameter constraints (e.g., stop_loss < take_profit),
|
|
93
|
+
and environment-specific override docs.
|
|
94
|
+
expected_output: "Schema source file with all validators and inline JSDoc"
|
|
95
|
+
|
|
96
|
+
- id: sensitivity-matrix
|
|
97
|
+
instruction: >
|
|
98
|
+
For each parameter, estimate first-order sensitivity: how much does the model
|
|
99
|
+
output change for a ±10% change in the parameter? Identify the top 3 most
|
|
100
|
+
sensitive parameters — these need the tightest validation and monitoring.
|
|
101
|
+
expected_output: "Sensitivity table: Parameter | Δ+10% impact | Δ-10% impact | Risk rank"
|
|
102
|
+
|
|
103
|
+
- id: heuristic-search
|
|
104
|
+
title: "Heuristic Search with Pruning"
|
|
105
|
+
rationale: >
|
|
106
|
+
Exhaustive grid search over parameter space is computationally prohibitive.
|
|
107
|
+
Structured heuristic search (Bayesian optimisation, evolutionary algorithms,
|
|
108
|
+
or domain-guided hill climbing) with aggressive pruning finds good solutions
|
|
109
|
+
orders of magnitude faster.
|
|
110
|
+
steps:
|
|
111
|
+
- id: define-objective
|
|
112
|
+
instruction: >
|
|
113
|
+
Define the objective function: what scalar metric are we optimising?
|
|
114
|
+
(Sharpe ratio, max drawdown %, expected profit per unit risk, etc.)
|
|
115
|
+
Document: formula, units, whether higher or lower is better, and any
|
|
116
|
+
multi-objective tradeoff weights.
|
|
117
|
+
expected_output: "Objective function definition with formula, direction, and tradeoff weights"
|
|
118
|
+
|
|
119
|
+
- id: select-search-strategy
|
|
120
|
+
instruction: >
|
|
121
|
+
Choose a search strategy appropriate to parameter count and evaluation cost:
|
|
122
|
+
- ≤5 params, cheap eval → exhaustive grid or random search
|
|
123
|
+
- ≤15 params → Bayesian optimisation (optuna, hyperopt)
|
|
124
|
+
- >15 params or expensive eval → evolutionary (CMA-ES, NSGA-II)
|
|
125
|
+
Justify the choice and configure the initial seed and budget.
|
|
126
|
+
expected_output: "Strategy selection with justification and search budget (eval count)"
|
|
127
|
+
|
|
128
|
+
- id: implement-pruning
|
|
129
|
+
instruction: >
|
|
130
|
+
Implement pruning callbacks that abort parameter sets early if:
|
|
131
|
+
(1) intermediate metrics fall below a floor threshold,
|
|
132
|
+
(2) required constraints are violated (e.g., max drawdown breached before run ends).
|
|
133
|
+
Log all pruned candidates with their termination reason.
|
|
134
|
+
expected_output: "Pruning callback code + test showing early termination fires correctly"
|
|
135
|
+
tools: ["run_in_terminal"]
|
|
136
|
+
|
|
137
|
+
- id: run-search
|
|
138
|
+
instruction: >
|
|
139
|
+
Execute the heuristic search. Checkpoint results every N evaluations.
|
|
140
|
+
Produce: top-10 parameter sets by objective score, pruning rate, and
|
|
141
|
+
the marginal gain curve (how much did adding more evaluations help?).
|
|
142
|
+
expected_output: "Search results: top-10 table, pruning rate %, convergence plot data"
|
|
143
|
+
tools: ["run_in_terminal"]
|
|
144
|
+
|
|
145
|
+
- id: simulation-and-distillation
|
|
146
|
+
title: "Simulation & Insight Distillation"
|
|
147
|
+
rationale: >
|
|
148
|
+
Optimised parameters must be stress-tested against edge cases and adverse scenarios
|
|
149
|
+
before deployment. Simulations should produce compact, decision-relevant summaries —
|
|
150
|
+
not raw data dumps.
|
|
151
|
+
steps:
|
|
152
|
+
- id: define-scenarios
|
|
153
|
+
instruction: >
|
|
154
|
+
Define the simulation scenario set: at minimum, include (1) historical base case,
|
|
155
|
+
(2) fat-tail / crisis scenario (2008, 2020-03, etc.), (3) low-liquidity scenario,
|
|
156
|
+
(4) mean-reversion failure scenario. Add domain-specific stress scenarios.
|
|
157
|
+
expected_output: "Scenario catalogue: Name | Description | Key parameter overrides"
|
|
158
|
+
|
|
159
|
+
- id: run-simulations
|
|
160
|
+
instruction: >
|
|
161
|
+
Run Monte Carlo or historical replay simulations for each scenario using the
|
|
162
|
+
top-3 parameter sets from the search phase. Use at least 10,000 paths where
|
|
163
|
+
stochastic. Record: P&L distribution, max drawdown, Sharpe, Sortino, hit rate,
|
|
164
|
+
and worst consecutive loss streak.
|
|
165
|
+
expected_output: "Per-scenario stats table for each of the top-3 parameter sets"
|
|
166
|
+
tools: ["run_in_terminal"]
|
|
167
|
+
|
|
168
|
+
- id: distill-insights
|
|
169
|
+
instruction: >
|
|
170
|
+
From simulation results, extract the 5–7 most important insights.
|
|
171
|
+
For each: state the insight in one sentence, the evidence from the data,
|
|
172
|
+
and the actionable implication for deployment (e.g., reduce position size in
|
|
173
|
+
low-liquidity regimes). Discard raw data; keep only insights.
|
|
174
|
+
expected_output: "Insight list: Insight | Evidence | Deployment Implication"
|
|
175
|
+
|
|
176
|
+
- id: define-guardrails
|
|
177
|
+
instruction: >
|
|
178
|
+
From the simulation results, define production guardrails as concrete,
|
|
179
|
+
machine-checkable conditions: circuit breakers (halt if drawdown > X%),
|
|
180
|
+
position size limits per regime, and anomaly alerts (volume spike, spread widening).
|
|
181
|
+
These become config values in the parameter schema.
|
|
182
|
+
expected_output: "Guardrails config additions + monitoring alert definitions"
|
|
183
|
+
|
|
184
|
+
- id: implementation-handoff
|
|
185
|
+
title: "Implementation Handoff"
|
|
186
|
+
rationale: >
|
|
187
|
+
All upstream artefacts (ADR, schema, state machine, guardrails) must be in place
|
|
188
|
+
before a line of production code is written. This phase locks the specification.
|
|
189
|
+
steps:
|
|
190
|
+
- id: finalize-adr
|
|
191
|
+
instruction: >
|
|
192
|
+
Promote the ADR draft from Proposed → Accepted. Add a summary of the
|
|
193
|
+
simulation results that confirms the decision. Record the parameter set
|
|
194
|
+
selected for deployment.
|
|
195
|
+
expected_output: "docs/adrs/NNNN-<model-name>.md with Status: Accepted"
|
|
196
|
+
|
|
197
|
+
- id: write-acceptance-tests
|
|
198
|
+
instruction: >
|
|
199
|
+
Translate the simulation guardrails and state machine transitions into
|
|
200
|
+
acceptance tests. These must pass before any code is merged to main.
|
|
201
|
+
Tests should use the production Zod/Pydantic schema for inputs.
|
|
202
|
+
expected_output: "Test file with at minimum: 1 test per state transition + 1 per guardrail"
|
|
203
|
+
tools: ["run_in_terminal"]
|
|
204
|
+
|
|
205
|
+
- id: checklist-sign-off
|
|
206
|
+
instruction: >
|
|
207
|
+
Verify all items are complete: ADR accepted, schema merged, state machine
|
|
208
|
+
diagram committed, acceptance tests green, simulation results committed to
|
|
209
|
+
docs/simulations/. Only then is implementation cleared to start.
|
|
210
|
+
expected_output: "Sign-off checklist: all items checked"
|