mandrel 1.63.0 → 1.65.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/scripts/agents-bootstrap-github.js +40 -48
- package/.agents/scripts/bootstrap.js +74 -60
- package/.agents/scripts/lib/bootstrap/branch-protection.js +8 -8
- package/.agents/scripts/lib/bootstrap/gh-preflight.js +3 -3
- package/.agents/scripts/lib/bootstrap/hitl-confirm.js +2 -2
- package/.agents/scripts/lib/bootstrap/merge-methods.js +7 -7
- package/.agents/scripts/lib/bootstrap/preflight.js +18 -15
- package/.agents/scripts/lib/bootstrap/project-bootstrap.js +5 -5
- package/.agents/scripts/lib/bootstrap/prompt.js +5 -1
- package/.agents/scripts/lib/detect-package-manager.js +2 -2
- package/.agents/scripts/lib/onboard/init-tail.js +60 -69
- package/.agents/scripts/lib/test-tiers.js +25 -5
- package/.agents/scripts/providers/github/tickets.js +1 -1
- package/.agents/workflows/helpers/deliver-stories.md +24 -2
- package/.agents/workflows/helpers/single-story-deliver.md +84 -1
- package/.agents/workflows/qa-assist.md +67 -9
- package/.agents/workflows/qa-explore.md +77 -11
- package/docs/CHANGELOG.md +23 -0
- package/lib/cli/init.js +66 -21
- package/lib/cli/sync.js +3 -3
- package/package.json +2 -1
- package/.agents/scripts/lib/onboard/detect-stack.js +0 -300
|
@@ -277,7 +277,7 @@ export function ensureDependenciesInstalled(ctx) {
|
|
|
277
277
|
});
|
|
278
278
|
if (result.status !== 0) {
|
|
279
279
|
throw new Error(
|
|
280
|
-
`[
|
|
280
|
+
`[Bootstrap] ${manager} install failed (exit ${result.status}). Resolve the install error and re-run.`,
|
|
281
281
|
);
|
|
282
282
|
}
|
|
283
283
|
return { ran: true, manager, skipped: false };
|
|
@@ -462,7 +462,7 @@ export function runSyncCommands(ctx) {
|
|
|
462
462
|
});
|
|
463
463
|
if (result.status !== 0) {
|
|
464
464
|
throw new Error(
|
|
465
|
-
`[
|
|
465
|
+
`[Bootstrap] sync-claude-commands.js failed (exit ${result.status}): ${(
|
|
466
466
|
result.stderr ?? ''
|
|
467
467
|
)
|
|
468
468
|
.trim()
|
|
@@ -615,12 +615,12 @@ export function checkWindowsGitPerf(ctx) {
|
|
|
615
615
|
const fatalNodeCheck = (result) =>
|
|
616
616
|
result.ok
|
|
617
617
|
? null
|
|
618
|
-
: `[
|
|
618
|
+
: `[Bootstrap] Node ${result.version} is below required ${result.required}. Upgrade Node and re-run.`;
|
|
619
619
|
|
|
620
620
|
const fatalValidation = (result) =>
|
|
621
621
|
result.ok
|
|
622
622
|
? null
|
|
623
|
-
: `[
|
|
623
|
+
: `[Bootstrap] .agentrc.json failed schema validation: ${JSON.stringify(
|
|
624
624
|
result.errors,
|
|
625
625
|
null,
|
|
626
626
|
2,
|
|
@@ -629,7 +629,7 @@ const fatalValidation = (result) =>
|
|
|
629
629
|
const fatalParity = (result) =>
|
|
630
630
|
result.ok
|
|
631
631
|
? null
|
|
632
|
-
: `[
|
|
632
|
+
: `[Bootstrap] Parity check failed — workflows missing commands: ${
|
|
633
633
|
result.missingCommand.join(', ') || '(none)'
|
|
634
634
|
}; orphan commands: ${result.orphanCommand.join(', ') || '(none)'}`;
|
|
635
635
|
|
|
@@ -286,7 +286,11 @@ export async function resolveFromPicker(ctx) {
|
|
|
286
286
|
|
|
287
287
|
const normalized = choices.map(normalizePickerChoice);
|
|
288
288
|
const rl = await ctx.getRl();
|
|
289
|
-
|
|
289
|
+
// The picker header uses `pickerMessage` when set, so a question can show
|
|
290
|
+
// list-oriented guidance here (e.g. "Select existing or press ENTER to create
|
|
291
|
+
// new one") while the manual-entry fall-through prompt (`askOnce`) uses the
|
|
292
|
+
// shorter `message` (e.g. "New GitHub repo name"). Falls back to `message`.
|
|
293
|
+
ctx.output.write(`${ctx.q.pickerMessage ?? ctx.q.message}:\n`);
|
|
290
294
|
normalized.forEach((choice, index) => {
|
|
291
295
|
ctx.output.write(` ${index + 1}) ${choice.label}\n`);
|
|
292
296
|
});
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* detect-package-manager — shared lockfile-probe helper (Story #4048 B3).
|
|
3
3
|
*
|
|
4
|
-
*
|
|
4
|
+
* Several independent copies of this lockfile probe existed across the
|
|
5
|
+
* codebase before this consolidation:
|
|
5
6
|
* - `lib/cli/update.js#detectPackageManager`
|
|
6
7
|
* - `lib/bootstrap/project-bootstrap.js#detectPackageManager`
|
|
7
8
|
* - `lib/runtime-deps/preflight.js#detectPackageManager`
|
|
8
|
-
* - `lib/onboard/detect-stack.js#detectPackageManager`
|
|
9
9
|
* - `lib/worktree/node-modules-strategy.js#selectInstallCommand` (inline)
|
|
10
10
|
*
|
|
11
11
|
* This module is the single authoritative implementation. It uses the
|
|
@@ -2,18 +2,17 @@
|
|
|
2
2
|
* init-tail.js — post-bootstrap onboarding tail for `mandrel init`.
|
|
3
3
|
*
|
|
4
4
|
* Called by `mandrel init` after `bootstrap.js` completes successfully on
|
|
5
|
-
* the "configure now" path. Sequences the
|
|
5
|
+
* the "configure now" path. Sequences the three phases that walk an operator
|
|
6
6
|
* from a freshly bootstrapped project to a ready-to-plan workspace:
|
|
7
7
|
*
|
|
8
|
-
* Phase 1 —
|
|
9
|
-
* Phase 2 —
|
|
10
|
-
* Phase 3 —
|
|
11
|
-
* Phase 4 — Print the /plan handoff next-step text.
|
|
8
|
+
* Phase 1 — Offer to scaffold missing docsContextFiles (scaffold-docs.js).
|
|
9
|
+
* Phase 2 — Run `mandrel doctor` as a readiness gate.
|
|
10
|
+
* Phase 3 — Print the /plan handoff next-step text.
|
|
12
11
|
*
|
|
13
12
|
* The whole tail is idempotent: re-running after an already-onboarded project
|
|
14
|
-
* re-
|
|
15
|
-
*
|
|
16
|
-
*
|
|
13
|
+
* re-checks and re-offers scaffolding without duplicating stubs (the scaffolder
|
|
14
|
+
* only writes genuinely absent files) and without modifying anything (doctor is
|
|
15
|
+
* read-only).
|
|
17
16
|
*
|
|
18
17
|
* Injectable seams: `runDoctor`, `stdout`, `confirmScaffold`, and `isTTY`
|
|
19
18
|
* allow the unit suite to drive every branch without real I/O.
|
|
@@ -22,10 +21,9 @@
|
|
|
22
21
|
*/
|
|
23
22
|
|
|
24
23
|
import { spawnSync as defaultSpawnSync } from 'node:child_process';
|
|
25
|
-
import fs from 'node:fs';
|
|
26
24
|
import path from 'node:path';
|
|
25
|
+
import readline from 'node:readline/promises';
|
|
27
26
|
|
|
28
|
-
import { detectStack } from './detect-stack.js';
|
|
29
27
|
import { STUB_MARKER, scaffoldDocs } from './scaffold-docs.js';
|
|
30
28
|
|
|
31
29
|
// ---------------------------------------------------------------------------
|
|
@@ -38,7 +36,7 @@ import { STUB_MARKER, scaffoldDocs } from './scaffold-docs.js';
|
|
|
38
36
|
* @type {string}
|
|
39
37
|
*/
|
|
40
38
|
export const PLAN_HANDOFF_TEXT =
|
|
41
|
-
'\n✅ Mandrel is ready. Start your first
|
|
39
|
+
'\n✅ Mandrel is ready. Start your first project:\n\n' +
|
|
42
40
|
' /plan --idea "<one-line description of what you want to build>"\n\n' +
|
|
43
41
|
'Or, if you already have a `type::epic` Issue open:\n\n' +
|
|
44
42
|
' /plan <epicId>\n';
|
|
@@ -47,24 +45,6 @@ export const PLAN_HANDOFF_TEXT =
|
|
|
47
45
|
// Internal helpers
|
|
48
46
|
// ---------------------------------------------------------------------------
|
|
49
47
|
|
|
50
|
-
/**
|
|
51
|
-
* Format a stack-detection result as a human-readable report line.
|
|
52
|
-
*
|
|
53
|
-
* @param {{ packageManager: string|null, testRunner: string|null, primaryLanguage: string|null }} stack
|
|
54
|
-
* @returns {string}
|
|
55
|
-
*/
|
|
56
|
-
function formatStackReport(stack) {
|
|
57
|
-
const pm = stack.packageManager ?? '(unknown)';
|
|
58
|
-
const runner = stack.testRunner ?? '(unknown)';
|
|
59
|
-
const lang = stack.primaryLanguage ?? '(unknown)';
|
|
60
|
-
return (
|
|
61
|
-
'\n[init] Stack detection:\n' +
|
|
62
|
-
` Package manager : ${pm}\n` +
|
|
63
|
-
` Test runner : ${runner}\n` +
|
|
64
|
-
` Primary language: ${lang}\n`
|
|
65
|
-
);
|
|
66
|
-
}
|
|
67
|
-
|
|
68
48
|
/**
|
|
69
49
|
* Format a list of missing docs as a human-readable report (no prompt).
|
|
70
50
|
*
|
|
@@ -75,30 +55,54 @@ function formatMissingList(missing) {
|
|
|
75
55
|
if (missing.length === 0) return '';
|
|
76
56
|
const list = missing.map((f) => ` • ${f}`).join('\n');
|
|
77
57
|
return (
|
|
78
|
-
'\n[
|
|
79
|
-
'
|
|
58
|
+
'\n[Final Checks] The following docsContextFiles are missing,\n' +
|
|
59
|
+
'agents will load degraded context until you create them:\n' +
|
|
80
60
|
`${list}\n`
|
|
81
61
|
);
|
|
82
62
|
}
|
|
83
63
|
|
|
84
64
|
/** Prompt text shown only on a TTY when asking to scaffold. */
|
|
85
|
-
const SCAFFOLD_PROMPT = '\
|
|
65
|
+
const SCAFFOLD_PROMPT = '\nCreate placeholders? [Y/n]: ';
|
|
86
66
|
|
|
87
67
|
/**
|
|
88
|
-
*
|
|
89
|
-
*
|
|
68
|
+
* Async y/N read from stdin via `node:readline` (mirrors the prompt mechanism
|
|
69
|
+
* in `bootstrap.js`). Returns on Enter and never blocks waiting for EOF the way
|
|
70
|
+
* `fs.readFileSync(0)` did — that EOF-blocking read hung `mandrel init` on an
|
|
71
|
+
* interactive TTY. Yes is the default (`[Y/n]`): a bare Enter — or anything but
|
|
72
|
+
* an explicit `n`/`no` — resolves to `true` (create the placeholders), since the
|
|
73
|
+
* missing docs are known-needed and the stubs carry a `MANDREL:STUB` marker the
|
|
74
|
+
* `/plan` preflight still flags until they are fleshed out. A read error
|
|
75
|
+
* resolves to `false` so a genuine I/O failure never writes unattended. The
|
|
76
|
+
* prompt text is written by the caller via `stdout`, so the question string
|
|
77
|
+
* passed here is empty.
|
|
78
|
+
*
|
|
79
|
+
* `terminal: false` is **load-bearing**: with terminal mode on (the default
|
|
80
|
+
* when stdout is a TTY) readline emits cursor-control escapes
|
|
81
|
+
* (`\x1b[1G\x1b[0J`) that erase the `Create placeholders? [Y/n]:` prompt already
|
|
82
|
+
* written via the caller's `stdout`, leaving the operator staring at a blank,
|
|
83
|
+
* dead-looking line. Disabling terminal mode preserves the pre-written prompt
|
|
84
|
+
* and reads the line via the TTY's cooked-mode echo. `createInterface` is
|
|
85
|
+
* injectable so a test can assert this option is set (regression guard).
|
|
90
86
|
*
|
|
91
|
-
* @
|
|
87
|
+
* @param {{ createInterface?: typeof readline.createInterface }} [opts]
|
|
88
|
+
* @returns {Promise<boolean>}
|
|
92
89
|
*/
|
|
93
|
-
function
|
|
94
|
-
|
|
90
|
+
export async function readConfirm({
|
|
91
|
+
createInterface = readline.createInterface,
|
|
92
|
+
} = {}) {
|
|
93
|
+
const rl = createInterface({
|
|
94
|
+
input: process.stdin,
|
|
95
|
+
output: process.stdout,
|
|
96
|
+
terminal: false,
|
|
97
|
+
});
|
|
95
98
|
try {
|
|
96
|
-
const
|
|
97
|
-
answer
|
|
99
|
+
const answer = (await rl.question('')).trim().toLowerCase();
|
|
100
|
+
return answer !== 'n' && answer !== 'no';
|
|
98
101
|
} catch {
|
|
99
|
-
|
|
102
|
+
return false;
|
|
103
|
+
} finally {
|
|
104
|
+
rl.close();
|
|
100
105
|
}
|
|
101
|
-
return answer === 'y' || answer === 'yes';
|
|
102
106
|
}
|
|
103
107
|
|
|
104
108
|
// ---------------------------------------------------------------------------
|
|
@@ -119,14 +123,13 @@ function syncConfirm() {
|
|
|
119
123
|
* - Run `mandrel doctor`; injectable for tests.
|
|
120
124
|
* @param {boolean} [opts.isTTY] - Whether stdin is a TTY (defaults to
|
|
121
125
|
* `Boolean(process.stdin.isTTY)`).
|
|
122
|
-
* @returns {{
|
|
123
|
-
* stack: { packageManager: string|null, testRunner: string|null, primaryLanguage: string|null },
|
|
126
|
+
* @returns {Promise<{
|
|
124
127
|
* scaffoldResult: object,
|
|
125
128
|
* doctorStatus: number,
|
|
126
129
|
* ok: boolean,
|
|
127
|
-
* }}
|
|
130
|
+
* }>}
|
|
128
131
|
*/
|
|
129
|
-
export function runInitTail({
|
|
132
|
+
export async function runInitTail({
|
|
130
133
|
root,
|
|
131
134
|
stdout = (s) => process.stdout.write(s),
|
|
132
135
|
confirmScaffold,
|
|
@@ -140,7 +143,7 @@ export function runInitTail({
|
|
|
140
143
|
// it. When using the default, auto-decline on non-TTY so the scaffolder
|
|
141
144
|
// never writes unattended.
|
|
142
145
|
const usingDefaultConfirm = confirmScaffold == null;
|
|
143
|
-
const confirmFn = confirmScaffold ??
|
|
146
|
+
const confirmFn = confirmScaffold ?? readConfirm;
|
|
144
147
|
|
|
145
148
|
// Default doctor runner — spawns `mandrel doctor` via the locally installed
|
|
146
149
|
// bin; inherits stdio so the report streams to the terminal.
|
|
@@ -159,21 +162,12 @@ export function runInitTail({
|
|
|
159
162
|
|
|
160
163
|
const doctorFn = runDoctor ?? defaultRunDoctor;
|
|
161
164
|
|
|
162
|
-
// --- Phase 1:
|
|
163
|
-
let stack;
|
|
164
|
-
try {
|
|
165
|
-
stack = detectStack(projectRoot);
|
|
166
|
-
} catch {
|
|
167
|
-
stack = { packageManager: null, testRunner: null, primaryLanguage: null };
|
|
168
|
-
}
|
|
169
|
-
stdout(formatStackReport(stack));
|
|
170
|
-
|
|
171
|
-
// --- Phase 2: Offer to scaffold missing docsContextFiles -----------------
|
|
165
|
+
// --- Phase 1: Offer to scaffold missing docsContextFiles -----------------
|
|
172
166
|
const preview = scaffoldDocs({ root: projectRoot, write: false });
|
|
173
167
|
let scaffoldResult = preview;
|
|
174
168
|
|
|
175
169
|
if (preview.missing.length === 0) {
|
|
176
|
-
stdout('\n[
|
|
170
|
+
stdout('\n[Final Checks] All docsContextFiles are present.\n');
|
|
177
171
|
} else {
|
|
178
172
|
stdout(formatMissingList(preview.missing));
|
|
179
173
|
// On non-TTY without an injected confirm, auto-decline so the scaffolder
|
|
@@ -181,38 +175,35 @@ export function runInitTail({
|
|
|
181
175
|
// the prompt and consult the confirm function.
|
|
182
176
|
const canPrompt = tty || !usingDefaultConfirm;
|
|
183
177
|
if (canPrompt) stdout(SCAFFOLD_PROMPT);
|
|
184
|
-
const accepted = canPrompt ? confirmFn() : false;
|
|
178
|
+
const accepted = canPrompt ? await confirmFn() : false;
|
|
185
179
|
if (accepted) {
|
|
186
180
|
scaffoldResult = scaffoldDocs({ root: projectRoot, write: true });
|
|
187
181
|
if (scaffoldResult.created.length > 0) {
|
|
188
182
|
stdout(
|
|
189
|
-
`[
|
|
183
|
+
`[Final Checks] Scaffolded ${scaffoldResult.created.length} stub(s). ` +
|
|
190
184
|
`Each carries a \`${STUB_MARKER}\` marker — replace placeholder ` +
|
|
191
185
|
'content before planning.\n',
|
|
192
186
|
);
|
|
193
187
|
}
|
|
194
188
|
} else {
|
|
195
|
-
stdout(
|
|
196
|
-
'[init] Scaffolding declined. docsContextFiles are still missing — ' +
|
|
197
|
-
'agents will load degraded context until you create them.\n',
|
|
198
|
-
);
|
|
189
|
+
stdout('[Final Checks] Placeholders declined.\n');
|
|
199
190
|
}
|
|
200
191
|
}
|
|
201
192
|
|
|
202
|
-
// --- Phase
|
|
203
|
-
stdout('\n[
|
|
193
|
+
// --- Phase 2: Readiness gate (mandrel doctor) ----------------------------
|
|
194
|
+
stdout('\n[Final Checks] Final installation summary via mandrel doctor…\n');
|
|
204
195
|
const doctorResult = doctorFn();
|
|
205
196
|
const doctorStatus = doctorResult?.status ?? 1;
|
|
206
197
|
|
|
207
198
|
if (doctorStatus !== 0) {
|
|
208
199
|
stdout(
|
|
209
|
-
'\n[
|
|
200
|
+
'\n[Final Checks] ❌ Doctor check failed. Resolve the remedies above and\n' +
|
|
210
201
|
'then re-run: mandrel init\n',
|
|
211
202
|
);
|
|
212
|
-
return {
|
|
203
|
+
return { scaffoldResult, doctorStatus, ok: false };
|
|
213
204
|
}
|
|
214
205
|
|
|
215
|
-
// --- Phase
|
|
206
|
+
// --- Phase 3: Handoff to /plan -------------------------------------------
|
|
216
207
|
stdout(PLAN_HANDOFF_TEXT);
|
|
217
|
-
return {
|
|
208
|
+
return { scaffoldResult, doctorStatus, ok: true };
|
|
218
209
|
}
|
|
@@ -26,6 +26,28 @@ export const INTEGRATION_INCLUDE = [
|
|
|
26
26
|
|
|
27
27
|
const matchesIntegration = picomatch(INTEGRATION_INCLUDE, { dot: true });
|
|
28
28
|
|
|
29
|
+
/**
|
|
30
|
+
* Repo-relative roots the tier walker scans for test files (names ending in
|
|
31
|
+
* `.test.js`).
|
|
32
|
+
*
|
|
33
|
+
* `tests` holds the framework's suite tree; `lib` holds the published CLI
|
|
34
|
+
* (under `lib/cli` and `lib/migrations`) whose tests are colocated in
|
|
35
|
+
* `__tests__` directories per the unit-tier convention in
|
|
36
|
+
* `rules/testing-standards.md`. Without `lib` here, both the quick /
|
|
37
|
+
* integration walk and the full-tier glob set miss the colocated CLI tests,
|
|
38
|
+
* leaving that coverage dark in `npm test`. The matching full-tier globs
|
|
39
|
+
* live in `FULL_TIER_GLOBS`.
|
|
40
|
+
*/
|
|
41
|
+
const TEST_WALK_ROOTS = ['tests', 'lib'];
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Glob targets for the `full` tier — one per walk root in `TEST_WALK_ROOTS`.
|
|
45
|
+
* The `tests` glob is a flat recursive sweep; the `lib` glob is scoped to
|
|
46
|
+
* `__tests__` subtrees so it only matches colocated tests, never the shipped
|
|
47
|
+
* source modules themselves.
|
|
48
|
+
*/
|
|
49
|
+
const FULL_TIER_GLOBS = ['tests/**/*.test.js', 'lib/**/__tests__/**/*.test.js'];
|
|
50
|
+
|
|
29
51
|
/**
|
|
30
52
|
* @param {string} dir
|
|
31
53
|
* @param {string} prefix
|
|
@@ -56,13 +78,11 @@ function walkTestFiles(dir, prefix, fsLike) {
|
|
|
56
78
|
* @returns {string[]}
|
|
57
79
|
*/
|
|
58
80
|
export function listTestFilesForTier(tier, repoRoot, fsLike = fs) {
|
|
59
|
-
const all =
|
|
60
|
-
path.join(repoRoot,
|
|
61
|
-
'tests',
|
|
62
|
-
fsLike,
|
|
81
|
+
const all = TEST_WALK_ROOTS.flatMap((root) =>
|
|
82
|
+
walkTestFiles(path.join(repoRoot, root), root, fsLike),
|
|
63
83
|
).sort();
|
|
64
84
|
if (tier === 'full') {
|
|
65
|
-
return [
|
|
85
|
+
return [...FULL_TIER_GLOBS];
|
|
66
86
|
}
|
|
67
87
|
const integration = all.filter((file) => matchesIntegration(file));
|
|
68
88
|
if (tier === 'integration') {
|
|
@@ -77,7 +77,7 @@ export function composeStoryBody({
|
|
|
77
77
|
}) {
|
|
78
78
|
const head = typeof body === 'string' ? body : '';
|
|
79
79
|
const lines = ['---', `parent: #${parentId}`];
|
|
80
|
-
if (epicId !== undefined && epicId !== null
|
|
80
|
+
if (epicId !== undefined && epicId !== null) {
|
|
81
81
|
lines.push(`Epic: #${epicId}`);
|
|
82
82
|
}
|
|
83
83
|
if (dependencies.length > 0) {
|
|
@@ -192,7 +192,18 @@ Each Agent call:
|
|
|
192
192
|
1. Names the Story ID and instructs the child to invoke
|
|
193
193
|
[`helpers/single-story-deliver`](single-story-deliver.md)
|
|
194
194
|
for that Story.
|
|
195
|
-
2. States the **return contract** (see § 2c)
|
|
195
|
+
2. States the **return contract** (see § 2c) and the **no-park rule**: the
|
|
196
|
+
child MUST drive the close → CI-watch → merge-confirm → `agent::done`
|
|
197
|
+
sequence to a terminal state *within its own turn* and end **only** by
|
|
198
|
+
returning the § 2c JSON object. The auto-merge wait is an
|
|
199
|
+
internally-blocking step (`gh pr checks --watch` blocks the turn), **not**
|
|
200
|
+
a reason to suspend and hand back. A child that ends its turn with
|
|
201
|
+
free-form prose and an unconfirmed merge (e.g. "I'll wait for the
|
|
202
|
+
background watch task…") has violated the contract — the wave loop cannot
|
|
203
|
+
advance, and the Story strands at `agent::closing` (the Story #1553 /
|
|
204
|
+
PR #1554 failure mode). There is no "pending" return status: the child
|
|
205
|
+
returns `done` (merge confirmed), `blocked` (transitioned + friction
|
|
206
|
+
posted), or `failed`.
|
|
196
207
|
3. Reminds the child of the **non-interactive contract**: no clarifying
|
|
197
208
|
questions — if stuck, transition to `agent::blocked`, post a
|
|
198
209
|
`friction` comment, and exit non-zero.
|
|
@@ -209,7 +220,8 @@ Agent call has returned a result (success, blocked, or failed).
|
|
|
209
220
|
|
|
210
221
|
### 2c. Per-Story return contract
|
|
211
222
|
|
|
212
|
-
Each child
|
|
223
|
+
Each child ends its turn by returning **exactly one** JSON object — never
|
|
224
|
+
free-form prose:
|
|
213
225
|
|
|
214
226
|
```json
|
|
215
227
|
{
|
|
@@ -223,6 +235,16 @@ Each child returns:
|
|
|
223
235
|
}
|
|
224
236
|
```
|
|
225
237
|
|
|
238
|
+
The status enum is **closed** — `done`, `blocked`, or `failed`. There is no
|
|
239
|
+
"pending" / "waiting" status, because the close-phase auto-merge wait is
|
|
240
|
+
**not** a returnable suspension: the child blocks on `gh pr checks --watch`
|
|
241
|
+
*inside its own turn*, confirms the merge, flips `agent::done`, and only then
|
|
242
|
+
returns `status: "done"`. A child that returns prose instead — parking on the
|
|
243
|
+
CI wait with an unconfirmed merge — breaks the wave loop's ability to advance
|
|
244
|
+
and leaves the Story at `agent::closing` (Story #1553 / PR #1554). The
|
|
245
|
+
single-homed restatement of this no-park rule for the child's own perspective
|
|
246
|
+
is [`single-story-deliver.md` § Step 7](single-story-deliver.md#return-contract).
|
|
247
|
+
|
|
226
248
|
### 2d. Wave outcome handling
|
|
227
249
|
|
|
228
250
|
After every Story in a wave returns:
|
|
@@ -336,6 +336,26 @@ coverage rounding, platform-conditional branches, and timing-sensitive
|
|
|
336
336
|
tests routinely drift between the two. The agent owns the green-CI
|
|
337
337
|
outcome, not just the push.
|
|
338
338
|
|
|
339
|
+
> **The auto-merge wait is an internally-blocking step, not a reason to end
|
|
340
|
+
> your turn.** This is the single most important contract of this workflow,
|
|
341
|
+
> and the seam where a worker most often misbehaves: it delivers up to arming
|
|
342
|
+
> auto-merge, then ends its turn with **free-form prose** — e.g. "I'll wait
|
|
343
|
+
> for the background watch task to complete" or "the next event will be its
|
|
344
|
+
> completion notification" — leaving the merge unconfirmed and the Story
|
|
345
|
+
> stranded at `agent::closing` (observed on Story #1553 / PR #1554). **Do not
|
|
346
|
+
> do this.** `gh pr checks <prNumber> --watch` *blocks the current turn* until
|
|
347
|
+
> CI resolves — that is the mechanism by which you wait. You MUST keep your
|
|
348
|
+
> turn alive across the wait: watch → (fix + push + re-watch on red) → confirm
|
|
349
|
+
> the merge (Step 5) → flip `agent::done` → run the post-merge steps → and
|
|
350
|
+
> only then return the terminal JSON status contract (Step 4 of
|
|
351
|
+
> [`deliver-stories.md` § 2c](deliver-stories.md), mirrored in
|
|
352
|
+
> [§ Return contract](#return-contract) for the standalone caller). The CI
|
|
353
|
+
> wait NEVER terminates your turn; **only** a confirmed-`MERGED` PR (→
|
|
354
|
+
> `status: "done"`), an `agent::blocked` transition (→ `status: "blocked"`),
|
|
355
|
+
> or an unrecoverable failure (→ `status: "failed"`) does. Ending your turn
|
|
356
|
+
> with prose and an unconfirmed merge is a contract violation — it is the very
|
|
357
|
+
> bug this workflow exists to prevent.
|
|
358
|
+
|
|
339
359
|
After `single-story-close.js` succeeds, enter the watch + fix loop:
|
|
340
360
|
|
|
341
361
|
```bash
|
|
@@ -348,7 +368,9 @@ When the watch exits:
|
|
|
348
368
|
still at `agent::closing` with its issue OPEN at this point (Step 3
|
|
349
369
|
deferred the `agent::done` flip). The `Closes #<id>` footer closes the
|
|
350
370
|
Story issue when the merge lands; Step 5 confirms the merge and Step 5.5
|
|
351
|
-
flips the Story to `agent::done`. Proceed to Step 5
|
|
371
|
+
flips the Story to `agent::done`. **Proceed to Step 5 within the same
|
|
372
|
+
turn** — do not end your turn here. Green CI is the *start* of the
|
|
373
|
+
merge-confirm sequence, not a terminal state (see Step 7's no-park rule).
|
|
352
374
|
- **Any check ✗** — diagnose, fix, and push a new commit on
|
|
353
375
|
`story-<storyId>`, then re-watch. Auto-merge stays enabled across
|
|
354
376
|
retries; no need to re-arm it. The Story stays at `agent::closing`
|
|
@@ -582,6 +604,67 @@ cleanup.
|
|
|
582
604
|
|
|
583
605
|
---
|
|
584
606
|
|
|
607
|
+
## Step 7 — Return contract (**required when dispatched as a sub-agent**) {#return-contract}
|
|
608
|
+
|
|
609
|
+
When this workflow runs as a per-Story sub-agent (dispatched by `/deliver`
|
|
610
|
+
via [`deliver-stories.md` § 2a/2c](deliver-stories.md)), the **only**
|
|
611
|
+
acceptable way to end your turn is to **return a single terminal JSON status
|
|
612
|
+
object** — never free-form prose:
|
|
613
|
+
|
|
614
|
+
```json
|
|
615
|
+
{
|
|
616
|
+
"storyId": <number>,
|
|
617
|
+
"status": "done" | "blocked" | "failed",
|
|
618
|
+
"phase": "init|implementing|closing|blocked|done",
|
|
619
|
+
"branchDeleted": <boolean>,
|
|
620
|
+
"blockerCommentId": <string|null>,
|
|
621
|
+
"detail": "<one-liner: what changed + what was verified, e.g. PR #N merged>",
|
|
622
|
+
"renderedBody": "<terminal Story body>"
|
|
623
|
+
}
|
|
624
|
+
```
|
|
625
|
+
|
|
626
|
+
This is the same envelope [`deliver-stories.md` § 2c](deliver-stories.md)
|
|
627
|
+
mandates; this section is its single-homed restatement for the standalone
|
|
628
|
+
worker so the contract is self-contained when this workflow is the entry
|
|
629
|
+
point.
|
|
630
|
+
|
|
631
|
+
**The auto-merge wait does not produce a fourth status.** There is no
|
|
632
|
+
"pending" or "waiting" terminal — the CI/auto-merge wait is handled
|
|
633
|
+
*internally* by blocking on `gh pr checks --watch` (Step 4) and confirming
|
|
634
|
+
the merge (Step 5). You return **only** when you have reached a genuinely
|
|
635
|
+
terminal state:
|
|
636
|
+
|
|
637
|
+
- **`status: "done"`** — the PR is confirmed `state: "MERGED"` (Step 5),
|
|
638
|
+
the Story carries `agent::done`, and Steps 5.5 / 6 have run. `phase: "done"`,
|
|
639
|
+
`branchDeleted: true`.
|
|
640
|
+
- **`status: "blocked"`** — you transitioned the Story to `agent::blocked`
|
|
641
|
+
and posted a `friction` comment (acceptance self-eval block in Step 1a, a
|
|
642
|
+
base-sync conflict, or an operator-blocking CI failure / Anti-Thrashing
|
|
643
|
+
stop in Step 4). `phase: "blocked"`, `blockerCommentId` set.
|
|
644
|
+
- **`status: "failed"`** — an unrecoverable failure outside the blocked
|
|
645
|
+
protocol. `phase` reflects where it died.
|
|
646
|
+
|
|
647
|
+
A turn that ends with prose ("I'll wait for the watch task…", "the next event
|
|
648
|
+
will be its completion notification…") and an **unconfirmed merge** is a
|
|
649
|
+
**contract violation** (the Story #1553 / PR #1554 failure mode): the parent
|
|
650
|
+
wave loop cannot distinguish "still working" from "done but silent", and the
|
|
651
|
+
Story strands at `agent::closing`. If you genuinely cannot confirm the merge,
|
|
652
|
+
that is a `blocked` or `failed` outcome with the JSON contract above — not a
|
|
653
|
+
prose hand-off.
|
|
654
|
+
|
|
655
|
+
> **Handoff discipline — report state, not process.** Populate the envelope
|
|
656
|
+
> with essential terminal state only (mirroring the fields
|
|
657
|
+
> `single-story-close.js` / `story-phase.js` already emit). Do not narrate the
|
|
658
|
+
> steps you took, and do not prescribe how the next stage should work. Prose
|
|
659
|
+
> process commentary only bloats the hydrated prompt
|
|
660
|
+
> (`delivery.maxTokenBudget` elision). When run **interactively** (no parent
|
|
661
|
+
> aggregator), this JSON envelope is optional — relay terminal state to the
|
|
662
|
+
> operator in prose instead — but the **no-park rule still holds**: never end
|
|
663
|
+
> an interactive turn with an unconfirmed merge either; block on the watch,
|
|
664
|
+
> confirm, and report the merged outcome.
|
|
665
|
+
|
|
666
|
+
---
|
|
667
|
+
|
|
585
668
|
## Idempotence
|
|
586
669
|
|
|
587
670
|
- `single-story-init.js` re-prints the same `workCwd` without recreating
|
|
@@ -239,24 +239,61 @@ and optionally route or promote it — with the operator deciding each write.
|
|
|
239
239
|
`regression-of-closed`. Stamp the `fingerprintFooter(sha)` marker into any
|
|
240
240
|
Issue body so future runs dedup against it.
|
|
241
241
|
|
|
242
|
-
3. **
|
|
242
|
+
3. **Promote `file`-dispositioned findings through `/plan`** (never a raw
|
|
243
|
+
GitHub Issue) via
|
|
243
244
|
[`promote-finding.js`](../scripts/lib/findings/promote-finding.js), which
|
|
244
|
-
clusters, routes, and files through the same ports
|
|
245
|
-
promotion
|
|
245
|
+
clusters, sizes, routes, and files through the same ports `/qa-explore` and
|
|
246
|
+
`/audit-to-stories` consume — never hand-roll the promotion, the clustering,
|
|
247
|
+
or the sizing:
|
|
246
248
|
|
|
247
249
|
```js
|
|
248
250
|
import { promoteFindings } from '../scripts/lib/findings/promote-finding.js';
|
|
249
|
-
const promotions = await promoteFindings(ledgerItems, {
|
|
251
|
+
const { promotions } = await promoteFindings(ledgerItems, {
|
|
252
|
+
searchIssues, // GitHub provider, open + closed
|
|
253
|
+
createStory, // tight cluster (≤2 surfaces): render seed → /plan --from-notes
|
|
254
|
+
createEpic, // broad cluster (>2 surfaces): render seed → /plan --idea
|
|
255
|
+
});
|
|
250
256
|
```
|
|
251
257
|
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
258
|
+
- **Sizing is delegated, not decided in prose.** `promoteFindings` runs
|
|
259
|
+
`clusterLedgerItems` + `targetForCluster`: a cluster spanning **≤2**
|
|
260
|
+
distinct coverage surfaces routes to `createStory`; **>2** routes to
|
|
261
|
+
`createEpic`. Do not re-cluster, re-size, or re-dedup in the workflow —
|
|
262
|
+
[`route-finding.js`](../scripts/lib/findings/route-finding.js) /
|
|
263
|
+
[`promote-finding.js`](../scripts/lib/findings/promote-finding.js) are the
|
|
264
|
+
single implementation.
|
|
265
|
+
- **`createStory` (`/plan --from-notes`)** — render a **redacted**
|
|
266
|
+
`--from-notes` seed from the cluster (reuse the `/audit-to-stories`
|
|
267
|
+
Phase 5b notes shape; redaction already ran in Phase 2), **stamp the
|
|
268
|
+
cluster's `fingerprintFooter(sha)` verbatim into the seed body**, then
|
|
269
|
+
chain `/plan --from-notes <seed>`. The footer must survive into the issue
|
|
270
|
+
body the Story create path writes — it round-trips through
|
|
271
|
+
`story-plan.js --body <file> --dry-run` unchanged (asserted by the
|
|
272
|
+
deterministic round-trip test under `tests/`) so a later `routeFinding`
|
|
273
|
+
dedups the same finding instead of re-filing it.
|
|
274
|
+
- **`createEpic` (`/plan --idea`)** — carry the cluster's
|
|
275
|
+
`fingerprintFooter(sha)` into the `/plan --idea` seed, then chain
|
|
276
|
+
`/plan --idea <seed>`. **Known limitation (not solved here):**
|
|
277
|
+
per-child-Story fingerprint propagation through full Epic decomposition is
|
|
278
|
+
*not* guaranteed — the fingerprint is carried in the Epic seed only; the
|
|
279
|
+
child Stories `/plan` spawns from that seed are not individually
|
|
280
|
+
footer-stamped.
|
|
281
|
+
- **A `file` disposition never opens a raw GitHub Issue.** Every `file`
|
|
282
|
+
finding flows through `promoteFindings` → `/plan`; only `defer` (carry
|
|
283
|
+
forward as backlog) and `dismiss` (non-actionable) skip the `/plan`
|
|
284
|
+
handoff.
|
|
285
|
+
|
|
286
|
+
4. **Gate:** any ledger append, seed write, `/plan` invocation, ticket-filing,
|
|
287
|
+
or label mutation is a write — confirm **each one** with the operator before
|
|
288
|
+
it happens. The plan→deliver hard stop is preserved: each `/plan` chain
|
|
289
|
+
pauses at its own HITL gates and never auto-delivers. Redaction has already
|
|
290
|
+
run, so nothing unredacted reaches disk or GitHub.
|
|
255
291
|
|
|
256
292
|
After recording, summarize: the finding recorded, its coverage verdict and
|
|
257
293
|
`missingTest`, any route/promotion decision
|
|
258
|
-
(`new`/`update-existing`/`duplicate`/`regression-of-closed`)
|
|
259
|
-
|
|
294
|
+
(`new`/`update-existing`/`duplicate`/`regression-of-closed`) and whether it was
|
|
295
|
+
promoted to a Story (`/plan --from-notes`) or Epic (`/plan --idea`), and the
|
|
296
|
+
rolling backlog a resumed session will pick up.
|
|
260
297
|
|
|
261
298
|
---
|
|
262
299
|
|
|
@@ -291,3 +328,24 @@ backlog a resumed session will pick up.
|
|
|
291
328
|
promotion ([`promote-finding.js`](../scripts/lib/findings/promote-finding.js)),
|
|
292
329
|
and session resolution ([`qa-session.js`](../scripts/lib/qa/qa-session.js))
|
|
293
330
|
are deterministic — never re-derive them in prose.
|
|
331
|
+
- **Promote through `/plan`, never a raw Issue.** A `file`-dispositioned
|
|
332
|
+
finding is promoted via `promoteFindings`, which chains into
|
|
333
|
+
[`/plan`](plan.md) (`--from-notes` for a tight cluster, `--idea` for a broad
|
|
334
|
+
one) — mirroring [`/audit-to-stories`](audit-to-stories.md). `/qa-assist`
|
|
335
|
+
never opens a bare GitHub Issue for a `file` finding. The cluster's
|
|
336
|
+
`fingerprintFooter(sha)` is stamped verbatim into the seed so a future
|
|
337
|
+
`routeFinding` dedups it.
|
|
338
|
+
|
|
339
|
+
## See also
|
|
340
|
+
|
|
341
|
+
- [`/plan`](plan.md) — the planning pipeline `/qa-assist` chains into when an
|
|
342
|
+
operator dispositions a finding `file` (`--from-notes` for a Story, `--idea`
|
|
343
|
+
for an Epic). The plan→deliver hard stop is preserved across the handoff.
|
|
344
|
+
- [`/qa-explore`](qa-explore.md) — the agent-led sibling that drives a named
|
|
345
|
+
surface and triages through the same `/plan` handoff.
|
|
346
|
+
- [`/audit-to-stories`](audit-to-stories.md) — the precedent for the
|
|
347
|
+
findings → `/plan` handoff and the shared fingerprint-footer dedup contract.
|
|
348
|
+
- [`promote-finding.js`](../scripts/lib/findings/promote-finding.js) /
|
|
349
|
+
[`route-finding.js`](../scripts/lib/findings/route-finding.js) — the shared
|
|
350
|
+
cluster/size/promote and dedup/route/fingerprint-footer helpers. There is no
|
|
351
|
+
second clustering, sizing, or dedup implementation.
|