@infinitedusky/indusk-mcp 1.20.0 → 1.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/hooks/validate-impl-structure.js +75 -2
- package/package.json +1 -1
- package/skills/planner.md +140 -10
|
@@ -311,9 +311,10 @@ for (const phase of phases) {
|
|
|
311
311
|
const trajectoryRequiredFrontmatter = /trajectory:\s*required/.test(frontmatter);
|
|
312
312
|
const hasTrajectoryHeading = /^##\s+Test Trajectory\b/m.test(body);
|
|
313
313
|
const trajectoryValidationEnabled = trajectoryRequiredFrontmatter || hasTrajectoryHeading;
|
|
314
|
+
const rationaleRequiredFrontmatter = /rationale:\s*required/.test(frontmatter);
|
|
314
315
|
|
|
315
316
|
if (trajectoryValidationEnabled) {
|
|
316
|
-
const trajectoryErrors = validateTrajectory(body);
|
|
317
|
+
const trajectoryErrors = validateTrajectory(body, rationaleRequiredFrontmatter);
|
|
317
318
|
if (trajectoryErrors.length > 0) {
|
|
318
319
|
process.stderr.write(
|
|
319
320
|
`Test Trajectory validation failed (policy: ${gatePolicy}):\n${trajectoryErrors.map((e) => ` [${e.rule}] ${e.message}`).join("\n")}\n\nSee .indusk/planning/tests-first-planning/adr.md Sections 3-6 for the Test Trajectory shape and validator rules.\n`,
|
|
@@ -346,7 +347,7 @@ process.exit(0);
|
|
|
346
347
|
// apps/indusk-mcp/src/lib/trajectory/validator.ts and parser.ts)
|
|
347
348
|
// ------------------------------------------------------------------
|
|
348
349
|
|
|
349
|
-
function validateTrajectory(implBody) {
|
|
350
|
+
function validateTrajectory(implBody, rationaleRequired) {
|
|
350
351
|
const errors = [];
|
|
351
352
|
|
|
352
353
|
// Rule 1: trajectory presence
|
|
@@ -363,6 +364,9 @@ function validateTrajectory(implBody) {
|
|
|
363
364
|
errors.push(...validateCrossReferenceIntegrity(implBody, trajectory));
|
|
364
365
|
errors.push(...validateTemporalCoherence(trajectory));
|
|
365
366
|
errors.push(...validateDeferredCompleteness(trajectory));
|
|
367
|
+
if (rationaleRequired) {
|
|
368
|
+
errors.push(...validateRationaleCompleteness(implBody, trajectory));
|
|
369
|
+
}
|
|
366
370
|
return errors;
|
|
367
371
|
}
|
|
368
372
|
|
|
@@ -632,3 +636,72 @@ function validateDeferredCompleteness(trajectory) {
|
|
|
632
636
|
}
|
|
633
637
|
return errors;
|
|
634
638
|
}
|
|
639
|
+
|
|
640
|
+
// ------------------------------------------------------------------
|
|
641
|
+
// Rationale validation (earliest-writable discipline)
|
|
642
|
+
//
|
|
643
|
+
// When frontmatter has `rationale: required`, the impl must contain a
|
|
644
|
+
// `### Trajectory Rationale` subsection with an entry per trajectory row.
|
|
645
|
+
// Each entry names what prevents authoring the test at Phase 0 (pre-plan).
|
|
646
|
+
// Read the entries together: shared weak excuses signal over-sequencing.
|
|
647
|
+
// ------------------------------------------------------------------
|
|
648
|
+
|
|
649
|
+
function validateRationaleCompleteness(implBody, trajectory) {
|
|
650
|
+
const errors = [];
|
|
651
|
+
|
|
652
|
+
const rowsNeedingRationale = trajectory.rows.filter(
|
|
653
|
+
(r) => Number.isFinite(r.writableAt) && r.writableAt > 0,
|
|
654
|
+
);
|
|
655
|
+
const hasSubsection = /^###\s+Trajectory Rationale\b/m.test(implBody);
|
|
656
|
+
const rationaleIds = hasSubsection ? parseRationaleBlock(implBody) : new Set();
|
|
657
|
+
|
|
658
|
+
if (rowsNeedingRationale.length > 0 && !hasSubsection) {
|
|
659
|
+
errors.push({
|
|
660
|
+
rule: "rationale-completeness",
|
|
661
|
+
message: `\`rationale: required\` is set and ${rowsNeedingRationale.length} trajectory row(s) have \`Writable at\` later than Phase 0, but the impl is missing the \`### Trajectory Rationale\` subsection. Phase 0 rows don't need rationale; rows where authoring waits on plan code do — add an entry for ${rowsNeedingRationale.map((r) => r.id).join(", ")}.`,
|
|
662
|
+
});
|
|
663
|
+
}
|
|
664
|
+
|
|
665
|
+
const missing = [];
|
|
666
|
+
for (const row of rowsNeedingRationale) {
|
|
667
|
+
if (!rationaleIds.has(row.id)) missing.push(row.id);
|
|
668
|
+
}
|
|
669
|
+
|
|
670
|
+
if (missing.length > 0 && hasSubsection) {
|
|
671
|
+
errors.push({
|
|
672
|
+
rule: "rationale-completeness",
|
|
673
|
+
message: `Trajectory rows with \`Writable at\` later than Phase 0 missing from \`### Trajectory Rationale\`: ${missing.join(", ")}. Every row whose authoring waits on plan code needs a \`- **TN** \`Writable at: Phase N\` — {reason}\` entry. Phase 0 rows (writable today against the current stack) do not need rationale.`,
|
|
674
|
+
});
|
|
675
|
+
}
|
|
676
|
+
|
|
677
|
+
const extra = [...rationaleIds].filter((id) => !trajectory.rows.some((r) => r.id === id));
|
|
678
|
+
if (extra.length > 0) {
|
|
679
|
+
errors.push({
|
|
680
|
+
rule: "rationale-completeness",
|
|
681
|
+
message: `\`### Trajectory Rationale\` contains entries for IDs not present in the trajectory table: ${extra.join(", ")}. Remove the stale entries or add the missing trajectory rows.`,
|
|
682
|
+
});
|
|
683
|
+
}
|
|
684
|
+
|
|
685
|
+
return errors;
|
|
686
|
+
}
|
|
687
|
+
|
|
688
|
+
function parseRationaleBlock(implBody) {
|
|
689
|
+
const lines = implBody.split("\n");
|
|
690
|
+
const ids = new Set();
|
|
691
|
+
let inRationale = false;
|
|
692
|
+
|
|
693
|
+
for (const line of lines) {
|
|
694
|
+
if (/^###\s+Trajectory Rationale\b/.test(line)) {
|
|
695
|
+
inRationale = true;
|
|
696
|
+
continue;
|
|
697
|
+
}
|
|
698
|
+
if (!inRationale) continue;
|
|
699
|
+
// Break on next heading of depth 1-3 (new section starts)
|
|
700
|
+
if (/^#{1,3}\s+/.test(line) && !/^###\s+Trajectory Rationale\b/.test(line)) break;
|
|
701
|
+
// Match `- **TN**` at the start of a rationale entry
|
|
702
|
+
const match = line.match(/^-\s+\*\*(T\d+)\*\*/);
|
|
703
|
+
if (match) ids.add(match[1]);
|
|
704
|
+
}
|
|
705
|
+
|
|
706
|
+
return ids;
|
|
707
|
+
}
|
package/package.json
CHANGED
package/skills/planner.md
CHANGED
|
@@ -11,17 +11,17 @@ You know how to plan work in this project.
|
|
|
11
11
|
Every plan lives in `.indusk/planning/{kebab-case-name}/` and follows the same document lifecycle:
|
|
12
12
|
|
|
13
13
|
```
|
|
14
|
-
research.md → brief.md → adr.md → impl.md → retrospective.md
|
|
14
|
+
research.md → brief.md → test-plan.md → adr.md → impl.md → retrospective.md
|
|
15
15
|
```
|
|
16
16
|
|
|
17
|
-
Each document builds on the ones before it. Not every plan needs all
|
|
17
|
+
Each document builds on the ones before it. Not every plan needs all six — use the guide below to decide what's needed:
|
|
18
18
|
|
|
19
19
|
| Situation | Documents |
|
|
20
20
|
|---|---|
|
|
21
21
|
| Quick config change or bug fix | brief + impl |
|
|
22
|
-
| Architecture or technology decision | research + brief + adr + impl |
|
|
22
|
+
| Architecture or technology decision | research + brief + test-plan + adr + impl |
|
|
23
23
|
| Exploratory spike (no commitment) | research only |
|
|
24
|
-
| Large feature or system change | all
|
|
24
|
+
| Large feature or system change | all six |
|
|
25
25
|
|
|
26
26
|
The order is always preserved — never write an ADR before the brief, or an impl before the ADR (when both exist).
|
|
27
27
|
|
|
@@ -36,7 +36,7 @@ The first argument to `/planner` can optionally be a workflow type that controls
|
|
|
36
36
|
| `/planner bugfix auth-expiry` | bugfix | brief + impl only |
|
|
37
37
|
| `/planner refactor extract-auth` | refactor | brief + impl (with boundary map) |
|
|
38
38
|
| `/planner spike redis-options` | spike | research only |
|
|
39
|
-
| `/planner feature payment-flow` | feature | full lifecycle (default) |
|
|
39
|
+
| `/planner feature payment-flow` | feature | full lifecycle (default — includes test-plan between brief and ADR) |
|
|
40
40
|
| `/planner payment-flow` | feature | same — no type defaults to feature |
|
|
41
41
|
|
|
42
42
|
Parse the input: if the first word is `bugfix`, `refactor`, `spike`, or `feature`, use that workflow. Otherwise, default to `feature`. The remaining words become the plan name (kebab-cased).
|
|
@@ -84,7 +84,44 @@ Workflow templates are in `templates/workflows/` in the package. They describe w
|
|
|
84
84
|
```
|
|
85
85
|
The working agent does not write Graphiti episodes directly. The eval agent reads unprocessed highlights (via `highlights_unprocessed`), extracts the full Problem + Proposed Direction + Scope context from the transcript, writes a structured episode into the project group, and marks the highlight processed. Skip silently if `mcp__indusk__highlight` is unavailable — highlights are best-effort and must not fail brief acceptance. See [`apps/indusk-docs/src/reference/tools/highlights.md`](../../indusk-docs/src/reference/tools/highlights.md) for the full flow.
|
|
86
86
|
|
|
87
|
-
5. **If brief is accepted** and the workflow includes
|
|
87
|
+
5. **If brief is accepted** and the workflow includes a test plan (feature only), write the test plan. The test plan is the bridge between the brief (what we want and why) and the ADR (architectural decision). It lists the **behavioral assertions** that must be true for the feature to be working, and for each assertion names **how it will be tested** — not the test code itself, but the test mechanism (vitest unit, vitest integration, end-to-end script, manual user test, manual smoke against running stack, etc.).
|
|
88
|
+
|
|
89
|
+
The discipline this produces: when you walk into the ADR with a test plan in hand, the architectural decision is constrained by "what makes all these assertions true?" rather than invented from intuition. The ADR's "We decided for" / "And against" clauses gain teeth because alternatives can be rejected against specific assertions. The impl's Test Trajectory rows derive directly from the test plan's assertions — one trajectory row per assertion, with the `Writable at` / `Passes at` columns added during impl authoring.
|
|
90
|
+
|
|
91
|
+
**CRITICAL: assertions must be BEHAVIORAL, not functional.** This is the single most important authoring discipline for the test plan. A behavioral assertion describes *what an outside observer sees the system do* — a user action, a visible outcome, an externally-observable state change. A functional assertion describes *how the system does it internally* — function calls, return types, internal state, method signatures. Functional assertions belong in unit tests inside the impl phase, not in the test plan.
|
|
92
|
+
|
|
93
|
+
The phrasing test: read the assertion aloud to a non-engineer stakeholder. If they understand it without you having to explain a function name or type, it's behavioral. If you have to say "this is the function that…", it's functional — rewrite at the user-facing level.
|
|
94
|
+
|
|
95
|
+
**Behavioral (good)** — describes what the user / outside observer experiences:
|
|
96
|
+
- "User can sign in with Google."
|
|
97
|
+
- "Sign-in with an invalid password shows the error 'Invalid credentials'."
|
|
98
|
+
- "Forgotten-password email arrives in the user's inbox within 60 seconds."
|
|
99
|
+
- "Settled match results appear in the user's history within 5 seconds of on-chain confirmation."
|
|
100
|
+
- "Migration from rooms → tables preserves every existing row's primary key."
|
|
101
|
+
- "Withdrawing $50 of chips returns $50 to the wallet within 5 seconds."
|
|
102
|
+
|
|
103
|
+
**Functional (bad — rewrite)** — describes implementation details:
|
|
104
|
+
- ❌ "googleAuth() returns a JWT" → behavioral: "User can sign in with Google"
|
|
105
|
+
- ❌ "POST /api/login validates the request body schema" → behavioral: "Sign-in with malformed payload returns 400"
|
|
106
|
+
- ❌ "jwt.sign() is called with the correct payload" → behavioral: "Authenticated requests survive a server restart"
|
|
107
|
+
- ❌ "tablesRepository.create() inserts a row" → behavioral: "After creating a table, it appears in the table list"
|
|
108
|
+
- ❌ "The reconstructFromDb() method reads the new column" → behavioral: "Restarting the server preserves in-progress hands"
|
|
109
|
+
|
|
110
|
+
The mechanism column is the right place for "vitest unit" or "manual smoke" or "end-to-end script" — the *how to test*. The assertion column stays at the *what should be true* level. If naming a function or type creeps into the assertion, you've leaked the implementation across the boundary the test plan is meant to enforce.
|
|
111
|
+
|
|
112
|
+
**Present the test plan for review.** Walk the user through the assertions: "Here's everything I think must be true for this to work, and how I'd test each one. Anything missing? Anything we'd test differently?" The user signs off before you proceed to the ADR. If they push back on assertions, that's the plan working — better to discover scope gaps here than at impl time. If you catch yourself writing functional-sounding assertions, stop and re-phrase before presenting.
|
|
113
|
+
|
|
114
|
+
**When the test plan moves from `draft` to `accepted`**, write a highlight:
|
|
115
|
+
```
|
|
116
|
+
mcp__indusk__highlight({
|
|
117
|
+
tag: "test-plan-accepted",
|
|
118
|
+
note: "{plan-name}: {N} assertions covering {one-line summary of feature scope}",
|
|
119
|
+
level: "important"
|
|
120
|
+
})
|
|
121
|
+
```
|
|
122
|
+
Skip silently on highlight unavailability.
|
|
123
|
+
|
|
124
|
+
6. **If test plan is accepted** and the workflow includes an ADR (feature only), write the ADR. The ADR formalizes the decisions that were discussed during research and led to the brief. It records what was chosen, what was rejected, and why. **After the ADR is accepted**, add a one-liner to CLAUDE.md's Key Decisions section per the context skill: `- {decision summary} — see .indusk/planning/{plan}/adr.md`
|
|
88
125
|
|
|
89
126
|
**When the ADR moves from `proposed` to `accepted`**, write a highlight so the eval agent can turn it into a structured Y-statement episode:
|
|
90
127
|
```
|
|
@@ -96,9 +133,46 @@ Workflow templates are in `templates/workflows/` in the package. They describe w
|
|
|
96
133
|
```
|
|
97
134
|
The eval agent reads the highlight, pulls the full Y-statement from the ADR file, writes a structured episode into the project group, and marks it processed. Graphiti's entity extraction will pick up the chosen option, rejected alternatives, constraint, and rationale, and will detect contradictions if a later ADR overrides this one. The working agent does not write the episode directly. Skip silently on highlight unavailability — degrade gracefully.
|
|
98
135
|
|
|
99
|
-
|
|
136
|
+
7. **If ADR is accepted** (or brief is accepted for bugfix/refactor), write the impl. Break into phased checklists with concrete tasks. For refactor workflows, include a `## Boundary Map` section. For multi-phase impls of any type, consider adding a boundary map.
|
|
100
137
|
|
|
101
|
-
**
|
|
138
|
+
**Derive the Test Trajectory from the test plan.** Every new impl opens with a `## Test Trajectory` table (after `## Boundary Map`, before `## Checklist`) that enumerates the tests the plan commits to. Columns: `ID | Asserts | Writable at | Passes at | State` (plus optional `Kind`, `Scope`). For feature plans, walk the test plan's assertion list — each assertion becomes a trajectory row, with the assertion text becoming the `Asserts` column and the test plan's mechanism informing the optional `Kind`/`Scope` columns. Then walk each planned phase and assign `Writable at` / `Passes at`. Every phase's Verification block references test IDs from the trajectory rather than restating the checks. For bugfix/refactor workflows without a test plan, walk the ADR's Decision section (or the brief's Success Criteria) and ask "what test would prove this works?" for each item.
|
|
139
|
+
|
|
140
|
+
**Writable at is the earliest possible phase, not the fix phase.** The rule: *if it is possible to write a test, write it — then let it pass when it will.* The validator only enforces `Writable at ≤ Passes at` (a floor); the real discipline is `Writable at = earliest feasible phase`. A test authored in the same phase as its fix is a rubber stamp — nothing proves intermediate phases didn't break it or fix it by accident. A test that goes red early and stays red through intermediate phases until its fix lands is a live tripwire: any intermediate phase that turns it green prematurely signals unexpected coupling; any intermediate phase that breaks an unrelated passing test signals regression.
|
|
141
|
+
|
|
142
|
+
Honest shapes:
|
|
143
|
+
- **Regression tests for reported bugs**: `Writable at: Phase 0` (the stack runs, the bug is reproducible today, no plan code needed to author). Passes at = the phase that lands the fix.
|
|
144
|
+
- **End-to-end scenarios via HTTP/WS**: `Writable at: Phase 0` if the test can be a script hitting current endpoints (404 today is real-red). Passes at = the phase that closes the last gap. Only move later if authoring requires a not-yet-existing TypeScript symbol or constructor signature.
|
|
145
|
+
- **Reconstruction / persistence tests**: `Writable at: Phase 0` if the test is a "restart-and-check" script (today fails because state doesn't persist, which is real-red). Move later only if the assertion references a not-yet-existing symbol.
|
|
146
|
+
- **Unit tests for new code**: `Writable at = Passes at` is legitimate when the test's subject is a TypeScript symbol (schema file, new function, new enum value) introduced in that phase — the test file would not compile today.
|
|
147
|
+
- **Grep-the-thing-is-gone tests**: `Writable at: Phase 0` (the old identifier exists today; the grep finds it, which is the red state). Passes at = the phase that removes the identifier.
|
|
148
|
+
|
|
149
|
+
Challenge each row before you write it down: *"could this test be authored earlier than the phase that makes it pass?"* If yes, `Writable at` must point to that earlier phase. The Writable-phase's Verification block gains a `(write red)` item that commits the test against the current implementation and asserts the expected failure symptom; the Passes-phase's Verification block keeps its `(goes green)` item. Both reference the same test ID — the validator accepts multiple phase references to one trajectory row.
|
|
150
|
+
|
|
151
|
+
**Phase 0 is the default; rationale is required only for Phase 1+ rows.** Every new impl sets `rationale: required` in its frontmatter. The `### Trajectory Rationale` subsection (placed after `### Deferred Verification`) is required ONLY when at least one trajectory row has `Writable at` later than Phase 0. Phase 0 means "writable today against the current stack, before any plan code lands" — it's the default and needs no justification. We only require rationale when a test will be authored AFTER some plan implementation has happened (Writable at: Phase 1+). This keeps the subsection from filling with "trivially writable today" boilerplate when most rows are correctly Phase 0.
|
|
152
|
+
|
|
153
|
+
The `validate-impl-structure.js` hook enforces completeness: every Phase 1+ T-ID must appear as a `- **TN** \`Writable at: Phase N\` — {reason}` entry, the subsection itself must exist when any Phase 1+ row exists, and stale entries (entries for IDs not in the trajectory table) are flagged.
|
|
154
|
+
|
|
155
|
+
Entry shape: `- **TN** \`Writable at: Phase N\` — {one-sentence reason}`. Examples:
|
|
156
|
+
- `- **T22** \`Writable at: Phase 0\` — Bug is reproducible today against the running stack; test is authorable against current behavior and fails red.` *(no rationale entry needed; included here only as a reminder of the Phase 0 default)*
|
|
157
|
+
- `- **T14** \`Writable at: Phase 5\` — Subject is the zod schema file authored in Phase 5; no import target exists before then.` *(needs rationale)*
|
|
158
|
+
- `- **T20** \`Writable at: Phase 6\` — Test constructs PokerV2Room with a settings argument; the constructor signature gains the settings parameter in Phase 6, so TypeScript rejects the test source today.` *(needs rationale)*
|
|
159
|
+
|
|
160
|
+
**The rationale-quality test:** *Does this rationale describe a compile error against today's symbols, or does it describe an uninteresting failure mode?* If the latter, the row is a rubber-stamp — move it to Phase 1.
|
|
161
|
+
|
|
162
|
+
- **Legitimate `Writable > Phase 1` (compile error against today's symbols):**
|
|
163
|
+
- Test imports a not-yet-exported TypeScript symbol — `import { pokerTableSettingsSchema } from "@numero/types"` when the export doesn't exist. The import line is a compile error; the test file cannot be authored.
|
|
164
|
+
- Test constructs an object using a constructor signature that doesn't exist — `new PokerV2Room({ settings: {...} })` when the constructor doesn't take `settings`. TypeScript rejects.
|
|
165
|
+
- Test asserts against an enum value that doesn't exist — `expect(result.phase).toBe(GamePhase.CollectingBlinds)` when `CollectingBlinds` isn't in the enum.
|
|
166
|
+
- **Rubber-stamp `Writable > Phase 1` (red for an uninteresting reason — move to Phase 1):**
|
|
167
|
+
- "Assertion checks for error code `X` which is introduced in Phase N." → String comparison. Authorable today; fails because today's response is silent-swallow or a different error code. Stays red until the convention lands.
|
|
168
|
+
- "Endpoint doesn't exist yet." → HTTP request returns 404. Authorable today; 404-red is real-red.
|
|
169
|
+
- "Column doesn't exist yet." → SQL query errors. Authorable today; query-error-red is real-red.
|
|
170
|
+
- "Reconstruction code doesn't read from this column yet." → Restart-and-check script. Authorable today; whatever signal emerges is real.
|
|
171
|
+
- "Migration script doesn't exist yet." → Migration runner returns "migration NNNN not found." Authorable today.
|
|
172
|
+
|
|
173
|
+
The line is *can the test source code be authored today*, not *would it fail for a satisfying reason*. Red-for-uninteresting-reason is the whole point of `Writable at = Phase 1`: the test stays red through every intermediate phase, and any phase that turns it green prematurely or breaks an unrelated test surfaces a regression you'd otherwise miss.
|
|
174
|
+
|
|
175
|
+
Why it matters: read the rationales as a set after authoring. If multiple rows share the same weak excuse ("depends on the fix landing", "endpoint doesn't exist yet", "error code not defined yet"), the plan is over-sequenced and those tests should move earlier. The rationale subsection is the discipline tool — the validator enforces its presence; the human judgment is whether each rationale describes a real compile error or a rubber-stamped failure mode.
|
|
102
176
|
|
|
103
177
|
**Trajectory sizing:** 3–5 tests for a bugfix or small feature, 10–25 for a multi-phase infrastructure plan. Prefer one high-level property test over five example tests where possible. If your trajectory has more rows than lines of new code, the plan is over-specified — consolidate. If it has fewer than one row per phase, you probably have untested phases — add rows or declare `(no tests flip at this phase — reason: {schema-only|delete|refactor|infra})` in the phase's Verification.
|
|
104
178
|
|
|
@@ -116,9 +190,9 @@ Workflow templates are in `templates/workflows/` in the package. They describe w
|
|
|
116
190
|
|
|
117
191
|
**OTel gate is conditional on `otel.role`.** Read `.indusk/config.json` for the project's `otel.role` field (or use the `shouldEmitOtelGate(projectRoot)` helper from `apps/indusk-mcp/src/lib/config.ts`). The OTel gate fires for projects whose `otel.role` is unset or `"service"` — these are user-facing apps that produce telemetry you want to collect. **Do NOT write `#### Phase N OTel` sections** for projects whose `otel.role` is `"library"`, `"tool"`, or `"none"` — these are libraries, CLIs, or scripts that should never emit telemetry and writing OTel gates for them is friction without value. The `validate-impl-structure` and `check-gates` hooks apply the same rule. The other gates (verify, context, document) always apply regardless of `otel.role`.
|
|
118
192
|
|
|
119
|
-
|
|
193
|
+
8. **If impl is completed** (all items checked off by `/work`), invoke the retrospective skill (`/retrospective {plan-name}`). This handles the structured audit (docs, tests, quality, context), knowledge handoff to the docs site, and archival. Do not write a freeform retrospective — use the skill. (Bugfix and refactor workflows may skip retrospective for small changes — user's call.)
|
|
120
194
|
|
|
121
|
-
|
|
195
|
+
9. **Always present each document for review** before moving to the next stage. The user signs off on each step.
|
|
122
196
|
|
|
123
197
|
## Cross-Referencing Between Plans
|
|
124
198
|
|
|
@@ -192,6 +266,51 @@ status: draft | accepted
|
|
|
192
266
|
- {Plans that are waiting on this one — e.g., `.indusk/planning/electric-ledger-sync/`}
|
|
193
267
|
```
|
|
194
268
|
|
|
269
|
+
### test-plan.md
|
|
270
|
+
|
|
271
|
+
The test plan is the bridge between the brief and the ADR. It enumerates the **behavioral assertions** that must be true for the feature to be working, plus the **mechanism** by which each assertion will be tested. It does NOT contain test code — only the contract the implementation must satisfy and the kind of test that will verify it.
|
|
272
|
+
|
|
273
|
+
**Behavioral, not functional.** Every assertion must describe what an outside observer (typically a user) experiences — not what an internal function does. "User can sign in with Google" not "googleAuth() returns a JWT." See step 5 above for the full bad-vs-good list. If an assertion mentions a function name, type name, internal endpoint name, repository method, or other implementation detail, rewrite it at the user-facing level before saving.
|
|
274
|
+
|
|
275
|
+
```markdown
|
|
276
|
+
---
|
|
277
|
+
title: "{Title} — Test Plan"
|
|
278
|
+
date: {YYYY-MM-DD}
|
|
279
|
+
status: draft | accepted
|
|
280
|
+
---
|
|
281
|
+
|
|
282
|
+
# {Title} — Test Plan
|
|
283
|
+
|
|
284
|
+
## Purpose
|
|
285
|
+
|
|
286
|
+
This document lists the behavioral assertions that, taken together, mean the feature is working. Each assertion names the mechanism by which it will be tested — not the test code, but the test approach (vitest unit / vitest integration / end-to-end script / manual user test / manual smoke / etc.). When all assertions can be made true by an architecture, we have a feature; when all assertions are passing in code, the feature is shipped.
|
|
287
|
+
|
|
288
|
+
The assertions here become the source rows for the impl's `## Test Trajectory` table. The ADR that follows this document is constrained by "what makes all these assertions true?" rather than invented from intuition.
|
|
289
|
+
|
|
290
|
+
## Behavioral Assertions
|
|
291
|
+
|
|
292
|
+
**Every assertion must be observable from outside the system.** Describe what the user sees, what the API returns to a caller, what an external observer measures — never internal function calls, return types, or method signatures. If a non-engineer stakeholder couldn't read an assertion and understand it, rewrite it.
|
|
293
|
+
|
|
294
|
+
| ID | Assertion (user-visible behavior) | Mechanism |
|
|
295
|
+
|----|-----------------------------------|-----------|
|
|
296
|
+
| A1 | {Behavioral fact — e.g., "User can sign in with Google."} | {vitest unit / vitest integration / e2e script / manual user test / manual smoke} |
|
|
297
|
+
| A2 | {Behavioral fact — e.g., "Sign-in with invalid password shows the error 'Invalid credentials'."} | vitest integration |
|
|
298
|
+
| A3 | {Behavioral fact — e.g., "Forgotten-password email arrives in inbox within 60 seconds."} | manual smoke (account on staging) |
|
|
299
|
+
|
|
300
|
+
## Untestable Assertions
|
|
301
|
+
|
|
302
|
+
{Optional. Include only if the feature has behaviors that cannot be tested within this plan — LLM output quality, paid third-party integrations, UX judgment, behaviors only observable in production traffic. For each, name the reason and what compensating control covers it.}
|
|
303
|
+
|
|
304
|
+
| ID | Assertion | Reason untestable | Compensating control |
|
|
305
|
+
|----|-----------|-------------------|----------------------|
|
|
306
|
+
| U1 | {behavior} | {why no test} | {alert / scheduled review / canary / downstream plan} |
|
|
307
|
+
|
|
308
|
+
## Notes
|
|
309
|
+
|
|
310
|
+
- {Open questions about the test approach}
|
|
311
|
+
- {Known mechanism choices that may need revisiting}
|
|
312
|
+
```
|
|
313
|
+
|
|
195
314
|
### adr.md
|
|
196
315
|
```markdown
|
|
197
316
|
---
|
|
@@ -286,6 +405,7 @@ title: "{Title}"
|
|
|
286
405
|
date: {YYYY-MM-DD}
|
|
287
406
|
status: draft | approved | in-progress | completed | abandoned
|
|
288
407
|
trajectory: required
|
|
408
|
+
rationale: required
|
|
289
409
|
gate_policy: ask
|
|
290
410
|
---
|
|
291
411
|
|
|
@@ -325,6 +445,15 @@ For multi-phase impls, include a boundary map showing what each phase produces a
|
|
|
325
445
|
- would require: {what would unlock a proper test — a new environment, a future plan, production data}
|
|
326
446
|
- mitigation: {compensating control — telemetry alert, scheduled review, downstream plan, canary procedure, feedback signal}
|
|
327
447
|
|
|
448
|
+
### Trajectory Rationale
|
|
449
|
+
|
|
450
|
+
**Starting assumption: every test is writable at Phase 0 (pre-plan) against the current stack — Phase 0 rows need no rationale.** This subsection is required ONLY when one or more rows have `Writable at` later than Phase 0. List one entry per Phase 1+ row, naming what prevents authoring the test before plan code lands. Read the entries together — if multiple rows share the same weak excuse, the plan is over-sequenced.
|
|
451
|
+
|
|
452
|
+
- **T3** `Writable at: Phase 2` — {one-sentence reason — typically because the subject under test is a TypeScript symbol authored in Phase 2 and the test file would not compile against today's stack}
|
|
453
|
+
- **T14** `Writable at: Phase 5` — {reason — e.g., "subject is the zod schema introduced in Phase 5; the test's import line is a compile error today"}
|
|
454
|
+
|
|
455
|
+
The `validate-impl-structure.js` hook enforces that every Phase 1+ T-ID from the trajectory table appears as an entry here. Phase 0 rows are exempt. Stale entries (rationale entries for IDs not in the trajectory) are flagged.
|
|
456
|
+
|
|
328
457
|
## Checklist
|
|
329
458
|
### Phase 1: {Name}
|
|
330
459
|
- [ ] {Task — include code snippets when syntax matters}
|
|
@@ -410,6 +539,7 @@ date: {YYYY-MM-DD}
|
|
|
410
539
|
├── {plan-name}/
|
|
411
540
|
│ ├── research.md
|
|
412
541
|
│ ├── brief.md
|
|
542
|
+
│ ├── test-plan.md
|
|
413
543
|
│ ├── adr.md
|
|
414
544
|
│ ├── impl.md
|
|
415
545
|
│ └── retrospective.md
|