pan-wizard 3.5.2 → 3.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. package/README.md +28 -9
  2. package/agents/pan-executor.md +18 -0
  3. package/agents/pan-experiment-runner.md +126 -0
  4. package/agents/pan-phase-researcher.md +16 -0
  5. package/agents/pan-plan-checker.md +80 -0
  6. package/agents/pan-planner.md +19 -0
  7. package/agents/pan-reviewer.md +2 -0
  8. package/agents/pan-verifier.md +41 -0
  9. package/bin/install-lib.cjs +55 -0
  10. package/bin/install.js +71 -22
  11. package/commands/pan/debug.md +1 -1
  12. package/commands/pan/experiment.md +219 -0
  13. package/commands/pan/health.md +1 -1
  14. package/commands/pan/learn.md +15 -1
  15. package/commands/pan/links.md +102 -0
  16. package/commands/pan/optimize.md +13 -0
  17. package/commands/pan/patches.md +10 -1
  18. package/commands/pan/phase-tests.md +1 -4
  19. package/commands/pan/todo-add.md +1 -1
  20. package/commands/pan/todo-check.md +1 -1
  21. package/hooks/dist/pan-cost-logger.js +54 -4
  22. package/hooks/dist/pan-trace-logger.js +72 -3
  23. package/package.json +67 -66
  24. package/pan-wizard-core/bin/lib/codebase.cjs +2 -0
  25. package/pan-wizard-core/bin/lib/commands.cjs +8 -0
  26. package/pan-wizard-core/bin/lib/config.cjs +13 -2
  27. package/pan-wizard-core/bin/lib/context-budget.cjs +73 -0
  28. package/pan-wizard-core/bin/lib/core.cjs +13 -0
  29. package/pan-wizard-core/bin/lib/doc-lint/frontmatter.js +270 -0
  30. package/pan-wizard-core/bin/lib/doc-lint/reporter.js +45 -0
  31. package/pan-wizard-core/bin/lib/doc-lint/schema.js +202 -0
  32. package/pan-wizard-core/bin/lib/doc-lint/validate.js +190 -0
  33. package/pan-wizard-core/bin/lib/doc-lint/walk.js +135 -0
  34. package/pan-wizard-core/bin/lib/doc-lint.cjs +287 -0
  35. package/pan-wizard-core/bin/lib/experiment.cjs +502 -0
  36. package/pan-wizard-core/bin/lib/learn-index.cjs +235 -0
  37. package/pan-wizard-core/bin/lib/learn-lint.cjs +292 -0
  38. package/pan-wizard-core/bin/lib/links.cjs +549 -0
  39. package/pan-wizard-core/bin/lib/optimize.cjs +474 -1
  40. package/pan-wizard-core/bin/lib/runner.cjs +473 -0
  41. package/pan-wizard-core/bin/lib/verify.cjs +23 -0
  42. package/pan-wizard-core/bin/pan-tools.cjs +247 -3
  43. package/pan-wizard-core/learnings/README.md +70 -0
  44. package/pan-wizard-core/learnings/index.json +540 -0
  45. package/pan-wizard-core/learnings/internal/.gitkeep +2 -0
  46. package/pan-wizard-core/learnings/internal/experiment-runner.md +81 -0
  47. package/pan-wizard-core/learnings/internal/external-research.md +93 -0
  48. package/pan-wizard-core/learnings/internal/loop-design.md +33 -0
  49. package/pan-wizard-core/learnings/internal/pan-dev-bugs.md +181 -0
  50. package/pan-wizard-core/learnings/universal/.gitkeep +2 -0
  51. package/pan-wizard-core/learnings/universal/atomic-state.md +21 -0
  52. package/pan-wizard-core/learnings/universal/binary-io.md +21 -0
  53. package/pan-wizard-core/learnings/universal/comment-syntax.md +21 -0
  54. package/pan-wizard-core/learnings/universal/composition.md +33 -0
  55. package/pan-wizard-core/learnings/universal/concurrency.md +33 -0
  56. package/pan-wizard-core/learnings/universal/dag-scheduler.md +33 -0
  57. package/pan-wizard-core/learnings/universal/data-driven-design.md +21 -0
  58. package/pan-wizard-core/learnings/universal/design-process.md +21 -0
  59. package/pan-wizard-core/learnings/universal/empirical-spike.md +21 -0
  60. package/pan-wizard-core/learnings/universal/error-handling.md +23 -0
  61. package/pan-wizard-core/learnings/universal/error-paths.md +21 -0
  62. package/pan-wizard-core/learnings/universal/glob-semantics.md +21 -0
  63. package/pan-wizard-core/learnings/universal/idempotency.md +21 -0
  64. package/pan-wizard-core/learnings/universal/invariants.md +21 -0
  65. package/pan-wizard-core/learnings/universal/io-patterns.md +21 -0
  66. package/pan-wizard-core/learnings/universal/numeric-edge-cases.md +21 -0
  67. package/pan-wizard-core/learnings/universal/output-conventions.md +21 -0
  68. package/pan-wizard-core/learnings/universal/parser-design.md +21 -0
  69. package/pan-wizard-core/learnings/universal/phase-locking.md +21 -0
  70. package/pan-wizard-core/learnings/universal/pipe-friendly-cli.md +21 -0
  71. package/pan-wizard-core/learnings/universal/schema-design.md +21 -0
  72. package/pan-wizard-core/learnings/universal/secret-handling.md +21 -0
  73. package/pan-wizard-core/learnings/universal/streaming-io.md +21 -0
  74. package/pan-wizard-core/learnings/universal/test-patterns.md +57 -0
  75. package/pan-wizard-core/learnings/universal/test-strategy.md +33 -0
  76. package/pan-wizard-core/learnings/universal/unicode.md +21 -0
  77. package/pan-wizard-core/learnings/universal/vendor-pattern.md +21 -0
  78. package/pan-wizard-core/references/guardrails.md +58 -0
  79. package/pan-wizard-core/references/handoff-decisions.md +156 -0
  80. package/pan-wizard-core/references/schemas/pan-command.schema.yml +39 -0
  81. package/pan-wizard-core/references/verification-patterns.md +31 -0
  82. package/pan-wizard-core/templates/config.json +2 -1
  83. package/pan-wizard-core/templates/idea.md +52 -0
  84. package/pan-wizard-core/templates/summary-complex.md +14 -5
  85. package/pan-wizard-core/templates/summary-minimal.md +6 -0
  86. package/pan-wizard-core/templates/summary-standard.md +14 -3
  87. package/pan-wizard-core/workflows/discuss-phase.md +108 -1
  88. package/pan-wizard-core/workflows/exec-phase.md +37 -1
  89. package/pan-wizard-core/workflows/execute-plan.md +14 -0
  90. package/pan-wizard-core/workflows/health.md +23 -0
  91. package/pan-wizard-core/workflows/new-project.md +65 -81
  92. package/pan-wizard-core/workflows/plan-phase.md +58 -0
  93. package/pan-wizard-core/workflows/transition.md +102 -7
  94. package/pan-wizard-core/workflows/verify-phase.md +14 -0
  95. package/scripts/build-hooks.js +7 -1
  96. package/scripts/generate-skills-docs.py +10 -8
  97. package/scripts/git-hooks/pre-commit +40 -0
  98. package/scripts/release-check.js +184 -0
@@ -0,0 +1,473 @@
1
+ 'use strict';
2
+ // @pan: ADR-0026
3
+ /**
4
+ * runner.cjs — Self-improvement loop W2: external agent runner.
5
+ *
6
+ * Spec: docs/specs/self_improvement_loop_featureai.md §3.2
7
+ *
8
+ * Spawns an external AI coding session (Claude/Codex/Gemini/OpenCode) against
9
+ * an experiment folder, observes progress via run-state.json, enforces timeout
10
+ * + circuit breaker. The external instance runs autonomously; this runner
11
+ * observes only — it does NOT inject prompts mid-flight.
12
+ *
13
+ * Exports:
14
+ * - runExperiment(slug, opts) — spawn + observe + return result
15
+ * - tailExperimentState(slug, opts) — read run-state.json snapshot
16
+ * - stopExperiment(slug, opts) — graceful halt of a running experiment
17
+ * - RUNTIME_RUNNERS — adapter map (per-runtime headless invocation)
18
+ */
19
+
20
+ const fs = require('fs');
21
+ const path = require('path');
22
+ const { spawnSync } = require('child_process');
23
+ const { getExperimentManifest, PAN_EXPERIMENTS_ROOT_DEFAULT } = require('./experiment.cjs');
24
+
25
+ // ── Runtime adapter map ─────────────────────────────────────────────────────
26
+
27
+ /**
28
+ * Each adapter knows how to invoke its runtime headlessly with a prompt.
29
+ * `bin` is the binary name (PATH lookup at spawn time).
30
+ * `buildArgs(prompt)` returns argv to pass after the bin.
31
+ * `shell: 'win32'` opts the adapter into shell-based spawn ON WINDOWS ONLY —
32
+ * needed for CLI tools that ship as .cmd shims (npx/npm-installed binaries
33
+ * like claude/codex/gemini/opencode) which Node's spawnSync cannot resolve
34
+ * without a shell.
35
+ *
36
+ * Runtime overrides (--runtime-override / opts.runtimeOverride) do NOT inherit
37
+ * shell: 'win32' — they default to direct spawn, which suits test mocks like
38
+ * `node -e '...'` that are resolvable directly. P-102 fix (v3.7.1).
39
+ *
40
+ * GitHub Copilot CLI has no documented headless prompt mode, so it's null.
41
+ */
42
+ // P-1302 fix (v3.7.2): autonomous claude/gemini runs default to non-interactive
43
+ // permissions. Without these flags, the CLI prompts for tool approval, which
44
+ // can't be answered in headless mode and exits 1 silently. Surfaced by the
45
+ // first real autonomous loop run (panloop experiment). The runner's purpose IS
46
+ // autonomous execution — defaulting to interactive permission prompts contradicts
47
+ // the design.
48
+ //
49
+ // Safety: the flags trust the prompt's tool choices. Acceptable because the
50
+ // runner only spawns inside isolated experiment folders (PAN_SOURCE_ROOT-guarded
51
+ // by experiment.cjs) — blast radius is bounded to the experiment dir.
52
+ // P-1603 (v3.7.5): when `opts.captureMetrics` is true the runner switches
53
+ // claude into `--output-format json` so the trailing usage envelope can be
54
+ // parsed for cost/token metrics. Other runtimes are unchanged — token
55
+ // metering for codex/gemini/opencode is deferred (no equivalent flag).
56
+ const RUNTIME_RUNNERS = Object.freeze({
57
+ claude: {
58
+ bin: 'claude',
59
+ buildArgs: (prompt, opts) => {
60
+ const args = ['-p', '--dangerously-skip-permissions'];
61
+ if (opts && opts.captureMetrics) args.push('--output-format', 'json');
62
+ args.push(prompt);
63
+ return args;
64
+ },
65
+ shell: 'win32',
66
+ },
67
+ codex: { bin: 'codex', buildArgs: (prompt) => ['exec', prompt], shell: 'win32' },
68
+ gemini: { bin: 'gemini', buildArgs: (prompt) => ['-p', '--yolo', prompt], shell: 'win32' },
69
+ opencode: { bin: 'opencode', buildArgs: (prompt) => [prompt], shell: 'win32' },
70
+ copilot: null,
71
+ });
72
+
73
+ // ── Stop reasons (enum-ish) ─────────────────────────────────────────────────
74
+
75
+ const STOP_REASONS = Object.freeze({
76
+ SUCCESS: 'success',
77
+ ERROR: 'error',
78
+ TIMEOUT: 'timeout',
79
+ CIRCUIT_BREAKER: 'circuit_breaker',
80
+ MANUAL: 'manual',
81
+ INCOMPLETE: 'incomplete', // P-1502 (v3.7.4): exit 0 but workflow didn't reach milestone-completion
82
+ });
83
+
84
+ // P-EXP-004 (2026-05-02): bumped from 30 min to 60 min — 30 min cut off real
85
+ // 3-plan phases mid-execution (whoolog Phase 1 first run hit this).
86
+ const DEFAULT_TIMEOUT_MS = 60 * 60 * 1000; // 60 min
87
+
88
+ // ── Helpers ─────────────────────────────────────────────────────────────────
89
+
90
+ function getRunStatePath(experimentPath) {
91
+ return path.join(experimentPath, '.planning', 'run-state.json');
92
+ }
93
+
94
+ // P-1502 helper: read state.md and extract the milestone status field.
95
+ // Returns the status string or null if state.md is missing/malformed.
96
+ function readMilestoneStatus(experimentPath) {
97
+ const statePath = path.join(experimentPath, '.planning', 'state.md');
98
+ try {
99
+ const text = fs.readFileSync(statePath, 'utf-8');
100
+ const m = text.match(/^status:\s*(\S+)/m);
101
+ return m ? m[1].trim() : null;
102
+ } catch {
103
+ return null;
104
+ }
105
+ }
106
+
107
+ // P-1603 (v3.7.5): parse the trailing `--output-format json` envelope claude
108
+ // emits at end of a `claude -p --output-format json` session. The envelope is
109
+ // a single JSON object on its own line containing `{result, total_cost_usd,
110
+ // num_turns, session_id, usage: {input_tokens, output_tokens, ...}}`. We
111
+ // scan from end of stdout for the last `{...}` block and JSON-parse it.
112
+ // Returns null if claude was not invoked with --output-format json or the
113
+ // envelope is malformed.
114
+ function parseClaudeJsonEnvelope(stdout) {
115
+ if (!stdout || typeof stdout !== 'string') return null;
116
+ const trimmed = stdout.trimEnd();
117
+ if (!trimmed.endsWith('}')) return null;
118
+ // Walk back to find the matching opening brace at column 0.
119
+ const lines = trimmed.split(/\r?\n/);
120
+ for (let i = lines.length - 1; i >= 0; i -= 1) {
121
+ const line = lines[i].trimEnd();
122
+ if (!line.startsWith('{')) continue;
123
+ try {
124
+ const obj = JSON.parse(lines.slice(i).join('\n'));
125
+ if (obj && typeof obj === 'object' && (obj.total_cost_usd != null || obj.usage)) {
126
+ return obj;
127
+ }
128
+ } catch {
129
+ // try next earlier line
130
+ }
131
+ }
132
+ return null;
133
+ }
134
+
135
+ function writeRunState(experimentPath, state) {
136
+ const file = getRunStatePath(experimentPath);
137
+ try {
138
+ fs.writeFileSync(file, JSON.stringify(state, null, 2));
139
+ } catch {
140
+ // best-effort; runner does not fail on persistence errors
141
+ }
142
+ }
143
+
144
+ function readRunState(experimentPath) {
145
+ const file = getRunStatePath(experimentPath);
146
+ try {
147
+ return JSON.parse(fs.readFileSync(file, 'utf-8'));
148
+ } catch (err) {
149
+ if (err.code === 'ENOENT') return null;
150
+ return null;
151
+ }
152
+ }
153
+
154
+ function appendEvent(state, type, details) {
155
+ state.events = state.events || [];
156
+ state.events.push({
157
+ ts: new Date().toISOString(),
158
+ type,
159
+ details: details || null,
160
+ });
161
+ }
162
+
163
+ // ── runExperiment ───────────────────────────────────────────────────────────
164
+
165
+ /**
166
+ * Spawn the external runtime and wait for it to finish (or be aborted).
167
+ *
168
+ * @param {string} slug - experiment id
169
+ * @param {object} opts
170
+ * @param {string} [opts.root] - experiment root (default PAN_EXPERIMENTS_ROOT_DEFAULT)
171
+ * @param {string} [opts.prompt] - prompt passed to the external runtime; default
172
+ * is `/pan:new-project --auto @.planning/idea.md`
173
+ * @param {number} [opts.timeoutMs] - hard timeout (default 30 min)
174
+ * @param {object} [opts.runtimeOverride] - { bin, buildArgs } to bypass the manifest's
175
+ * runtime adapter (used by tests)
176
+ * @param {function} [opts.onProgress] - callback invoked per line of stdout/stderr
177
+ * @param {boolean} [opts.captureMetrics] - when true, claude is invoked with
178
+ * --output-format json so the trailing usage envelope can be parsed and
179
+ * stored under runState.metrics (P-1603, v3.7.5). Other runtimes ignore.
180
+ * @returns {object} { exit_code, status, stop_reason, elapsed_ms, error? }
181
+ */
182
+ function runExperiment(slug, opts = {}) {
183
+ const root = opts.root || PAN_EXPERIMENTS_ROOT_DEFAULT;
184
+ const manifest = getExperimentManifest(slug, { root });
185
+ if (manifest.error) return { error: manifest.error };
186
+
187
+ const expPath = path.join(root, slug);
188
+ if (!fs.existsSync(expPath)) {
189
+ return { error: `experiment folder missing: ${expPath}` };
190
+ }
191
+
192
+ // Adapter selection
193
+ let adapter = opts.runtimeOverride;
194
+ if (!adapter) {
195
+ const runtime = manifest.runtime;
196
+ adapter = RUNTIME_RUNNERS[runtime];
197
+ if (adapter == null) {
198
+ return {
199
+ error: `runtime "${runtime}" is not supported by the experiment runner ` +
200
+ `(known: ${Object.keys(RUNTIME_RUNNERS).filter(r => RUNTIME_RUNNERS[r]).join(', ')})`,
201
+ };
202
+ }
203
+ }
204
+
205
+ const prompt = opts.prompt || '/pan:new-project --auto @.planning/idea.md';
206
+ const timeoutMs = opts.timeoutMs || DEFAULT_TIMEOUT_MS;
207
+ const onProgress = typeof opts.onProgress === 'function' ? opts.onProgress : null;
208
+
209
+ const startedAt = new Date().toISOString();
210
+ const startTime = Date.now();
211
+
212
+ // Initialize run-state.json
213
+ const runState = {
214
+ experiment_id: slug,
215
+ status: 'running',
216
+ started_at: startedAt,
217
+ ended_at: null,
218
+ pid: null,
219
+ exit_code: null,
220
+ stop_reason: null,
221
+ elapsed_ms: null,
222
+ events: [],
223
+ };
224
+ appendEvent(runState, 'started', `runtime=${manifest.runtime}, prompt=${prompt}`);
225
+ writeRunState(expPath, runState);
226
+
227
+ // Synchronous spawn with native timeout. spawnSync delivers the child's
228
+ // exit signal cleanly even on Windows, and supports a `timeout` option
229
+ // that sends SIGTERM if the child runs past the deadline.
230
+ //
231
+ // Streaming progress is deferred to W3 (async/Promise variant) — for v3.7.0
232
+ // W2 we capture stdout/stderr after exit and emit a single onProgress call
233
+ // with the full text. A real-time stream would require child_process.spawn
234
+ // + an async runner, which clashes with the rest of pan-tools.cjs's
235
+ // synchronous CLI shape.
236
+ // P-102 fix (v3.7.1): on Windows, CLI tools that ship as .cmd shims
237
+ // (npx-installed binaries like claude/codex/gemini/opencode) cannot be
238
+ // spawned with shell:false — Node's spawnSync doesn't resolve the .cmd
239
+ // extension. Adapters opt into shell-based spawn via `shell: 'win32'`.
240
+ //
241
+ // Runtime overrides (test mocks, ad-hoc dev) do NOT inherit shell:'win32',
242
+ // so `node -e '...'` works without shell-based arg mangling.
243
+ const useShell = adapter.shell === 'win32' && process.platform === 'win32';
244
+
245
+ // P-1304 fix (v3.7.2): under shell:true Node joins args with spaces but
246
+ // does NOT quote them. Multi-word args (the prompt has spaces) get re-split
247
+ // by cmd.exe. Surfaced by panloop second autonomous run: prompt was split
248
+ // into ['claude', '-p', '--dangerously-skip-permissions', '/pan:new-project',
249
+ // '--auto', '@.planning/idea.md'] instead of preserving the prompt as one arg.
250
+ // Solution: quote any arg containing whitespace when useShell is true.
251
+ // Escapes embedded double-quotes by doubling (cmd.exe convention).
252
+ // buildArgs may accept opts (claude uses it for --output-format json metric
253
+ // capture). Pass opts safely; legacy adapters that ignore the second arg
254
+ // work unchanged.
255
+ const captureMetrics = Boolean(opts.captureMetrics);
256
+ let rawArgs = adapter.buildArgs(prompt, { captureMetrics });
257
+ const quotedArgs = useShell
258
+ ? rawArgs.map(a => /\s/.test(a) ? `"${String(a).replace(/"/g, '""')}"` : a)
259
+ : rawArgs;
260
+
261
+ // P-1501-r2 fix (v3.7.4): inherit parent's stdin so the spawned claude -p
262
+ // sees a TTY (when the runner is invoked from a terminal) and continues its
263
+ // autonomous tool-use loop. With stdio:[ignore,...] claude detects no-TTY
264
+ // → "scripted single-shot" mode → exits after first response. Manual bash
265
+ // invocation of the same flags worked because bash's stdin IS a TTY.
266
+ // Trade-off: `inherit` means the child reads from the same TTY as the
267
+ // parent. Acceptable because the runner is short-lived and the user
268
+ // typically isn't typing while a run is in flight.
269
+ let result;
270
+ try {
271
+ result = spawnSync(adapter.bin, quotedArgs, {
272
+ cwd: expPath,
273
+ stdio: ['inherit', 'pipe', 'pipe'],
274
+ shell: useShell,
275
+ timeout: timeoutMs,
276
+ encoding: 'utf-8',
277
+ });
278
+ } catch (err) {
279
+ runState.status = 'failed';
280
+ runState.stop_reason = STOP_REASONS.ERROR;
281
+ runState.ended_at = new Date().toISOString();
282
+ runState.elapsed_ms = Date.now() - startTime;
283
+ appendEvent(runState, 'spawn_failed', err.message);
284
+ writeRunState(expPath, runState);
285
+ return {
286
+ error: `failed to spawn ${adapter.bin}: ${err.message}`,
287
+ status: 'failed',
288
+ stop_reason: STOP_REASONS.ERROR,
289
+ elapsed_ms: runState.elapsed_ms,
290
+ };
291
+ }
292
+
293
+ runState.pid = result.pid || null;
294
+
295
+ // Emit captured output if a progress handler is set
296
+ if (onProgress) {
297
+ if (result.stdout) onProgress({ stream: 'stdout', text: result.stdout });
298
+ if (result.stderr) onProgress({ stream: 'stderr', text: result.stderr });
299
+ }
300
+
301
+ // P-1603 (v3.7.5): when captureMetrics was requested, parse the trailing
302
+ // claude --output-format json envelope from stdout and persist metrics into
303
+ // run-state.json so downstream `/pan:learn` analysis can attribute real cost
304
+ // and token usage instead of inferring from event counts.
305
+ if (captureMetrics && result.stdout) {
306
+ const envelope = parseClaudeJsonEnvelope(result.stdout);
307
+ if (envelope) {
308
+ runState.metrics = {
309
+ total_cost_usd: envelope.total_cost_usd ?? null,
310
+ num_turns: envelope.num_turns ?? null,
311
+ session_id: envelope.session_id ?? null,
312
+ input_tokens: envelope.usage?.input_tokens ?? null,
313
+ output_tokens: envelope.usage?.output_tokens ?? null,
314
+ cache_creation_input_tokens: envelope.usage?.cache_creation_input_tokens ?? null,
315
+ cache_read_input_tokens: envelope.usage?.cache_read_input_tokens ?? null,
316
+ };
317
+ appendEvent(runState, 'metrics_captured', `cost=$${envelope.total_cost_usd ?? '?'}, turns=${envelope.num_turns ?? '?'}`);
318
+ } else {
319
+ appendEvent(runState, 'metrics_unavailable', 'no JSON envelope in stdout');
320
+ }
321
+ }
322
+
323
+ const endedAt = new Date().toISOString();
324
+ const elapsedMs = Date.now() - startTime;
325
+
326
+ runState.ended_at = endedAt;
327
+ runState.elapsed_ms = elapsedMs;
328
+ runState.exit_code = result.status;
329
+
330
+ // Detect timeout. spawnSync sets result.signal to 'SIGTERM' when the timeout
331
+ // fires (on Unix) or kills via taskkill on Windows. We also check elapsed
332
+ // time as a fallback heuristic.
333
+ const timedOut =
334
+ result.signal === 'SIGTERM' ||
335
+ (result.error && result.error.code === 'ETIMEDOUT') ||
336
+ (result.status === null && elapsedMs >= timeoutMs - 50);
337
+
338
+ if (timedOut) {
339
+ runState.status = 'failed';
340
+ runState.stop_reason = STOP_REASONS.TIMEOUT;
341
+ appendEvent(runState, 'timeout', `aborted after ${timeoutMs}ms`);
342
+ } else if (result.error) {
343
+ runState.status = 'failed';
344
+ runState.stop_reason = STOP_REASONS.ERROR;
345
+ appendEvent(runState, 'spawn_error', result.error.message);
346
+ } else if (result.status === 0) {
347
+ // P-1502 fix (v3.7.4): exit_code=0 alone is too coarse. Read state.md
348
+ // to verify the workflow actually reached milestone-completion. If it
349
+ // exited cleanly but the project is stuck in 'planning' or 'in_progress',
350
+ // mark as 'incomplete' so /pan:learn analysis can distinguish real
351
+ // success from premature exits (P-1501 / P-1701 patterns).
352
+ //
353
+ // Skip the milestone check when runtimeOverride is set (tests/dev path
354
+ // simulating with `node -e` mocks that don't write state.md). The check
355
+ // is meaningful only for real production-runtime invocations.
356
+ if (opts.runtimeOverride) {
357
+ runState.status = 'done';
358
+ runState.stop_reason = STOP_REASONS.SUCCESS;
359
+ appendEvent(runState, 'completed', 'exit_code=0 (runtime override; milestone check skipped)');
360
+ } else {
361
+ const milestone = readMilestoneStatus(expPath);
362
+ if (milestone === 'completed') {
363
+ runState.status = 'done';
364
+ runState.stop_reason = STOP_REASONS.SUCCESS;
365
+ appendEvent(runState, 'completed', 'exit_code=0, milestone=completed');
366
+ } else {
367
+ runState.status = 'incomplete';
368
+ runState.stop_reason = STOP_REASONS.INCOMPLETE;
369
+ appendEvent(runState, 'incomplete', `exit_code=0 but milestone status=${milestone || 'unknown'}`);
370
+ }
371
+ }
372
+ } else {
373
+ runState.status = 'failed';
374
+ runState.stop_reason = STOP_REASONS.ERROR;
375
+ appendEvent(runState, 'completed', `exit_code=${result.status}`);
376
+ }
377
+
378
+ writeRunState(expPath, runState);
379
+
380
+ return {
381
+ experiment_id: slug,
382
+ status: runState.status,
383
+ stop_reason: runState.stop_reason,
384
+ exit_code: result.status,
385
+ elapsed_ms: elapsedMs,
386
+ started_at: startedAt,
387
+ ended_at: endedAt,
388
+ };
389
+ }
390
+
391
+ // ── tailExperimentState ─────────────────────────────────────────────────────
392
+
393
+ /**
394
+ * Read the current run-state.json for an experiment.
395
+ * Snapshot semantics — no streaming. (W3 may add a poll-loop variant.)
396
+ */
397
+ function tailExperimentState(slug, opts = {}) {
398
+ const root = opts.root || PAN_EXPERIMENTS_ROOT_DEFAULT;
399
+ const manifest = getExperimentManifest(slug, { root });
400
+ if (manifest.error) return { error: manifest.error };
401
+
402
+ const expPath = path.join(root, slug);
403
+ const state = readRunState(expPath);
404
+ if (!state) {
405
+ return { error: `experiment "${slug}" has no run state (not started yet)` };
406
+ }
407
+ return state;
408
+ }
409
+
410
+ // ── stopExperiment ──────────────────────────────────────────────────────────
411
+
412
+ /**
413
+ * Stop a running experiment.
414
+ *
415
+ * If the experiment is currently running (run-state.json shows status=running
416
+ * and pid is alive), send SIGTERM. If still alive after a short grace period,
417
+ * SIGKILL.
418
+ *
419
+ * If the experiment has already finished, return its current state (no error).
420
+ */
421
+ function stopExperiment(slug, opts = {}) {
422
+ const root = opts.root || PAN_EXPERIMENTS_ROOT_DEFAULT;
423
+ const manifest = getExperimentManifest(slug, { root });
424
+ if (manifest.error) return { error: manifest.error };
425
+
426
+ const expPath = path.join(root, slug);
427
+ const state = readRunState(expPath);
428
+ if (!state) {
429
+ return { error: `experiment "${slug}" has no active run` };
430
+ }
431
+
432
+ if (state.status !== 'running') {
433
+ // Already finished — return current state, not an error
434
+ return state;
435
+ }
436
+
437
+ if (!state.pid) {
438
+ return { error: `experiment "${slug}" has no recorded pid` };
439
+ }
440
+
441
+ // Try graceful term, then kill
442
+ try {
443
+ process.kill(state.pid, 'SIGTERM');
444
+ } catch {
445
+ // Process likely already dead
446
+ state.status = 'failed';
447
+ state.stop_reason = STOP_REASONS.MANUAL;
448
+ state.ended_at = new Date().toISOString();
449
+ appendEvent(state, 'stop_no_pid', `pid ${state.pid} already gone`);
450
+ writeRunState(expPath, state);
451
+ return state;
452
+ }
453
+
454
+ // Update state to reflect manual stop
455
+ state.status = 'failed';
456
+ state.stop_reason = STOP_REASONS.MANUAL;
457
+ state.ended_at = new Date().toISOString();
458
+ appendEvent(state, 'stopped', 'SIGTERM sent');
459
+ writeRunState(expPath, state);
460
+
461
+ return state;
462
+ }
463
+
464
+ // ── Exports ─────────────────────────────────────────────────────────────────
465
+
466
+ module.exports = {
467
+ runExperiment,
468
+ tailExperimentState,
469
+ stopExperiment,
470
+ RUNTIME_RUNNERS,
471
+ STOP_REASONS,
472
+ DEFAULT_TIMEOUT_MS,
473
+ };
@@ -1215,6 +1215,26 @@ function cmdValidateHealth(cwd, options, raw) {
1215
1215
  }
1216
1216
  }
1217
1217
 
1218
+ // Check 12 (optional): doc-code link graph (ADR-0027)
1219
+ let linkGraphResult;
1220
+ if (options.links) {
1221
+ const links = require('./links.cjs');
1222
+ const r = links.validateAll(cwd);
1223
+ linkGraphResult = {
1224
+ status: r.summary.status,
1225
+ errors: r.summary.errors,
1226
+ warnings: r.summary.warnings,
1227
+ doc_files_scanned: r.summary.doc_files_scanned,
1228
+ source_files_scanned: r.summary.source_files_scanned,
1229
+ anchors_found: r.summary.anchors_found,
1230
+ forward_links_found: r.summary.forward_links_found,
1231
+ backlink_contracts_checked: r.summary.backlink_contracts_checked,
1232
+ };
1233
+ if (r.summary.errors > 0) {
1234
+ addIssue('warning', 'LINKS_ERR', `Link graph has ${r.summary.errors} errors (broken refs or uncovered backlink contracts)`, 'Run pan-tools links validate for details');
1235
+ }
1236
+ }
1237
+
1218
1238
  const result = {
1219
1239
  status,
1220
1240
  errors,
@@ -1230,6 +1250,9 @@ function cmdValidateHealth(cwd, options, raw) {
1230
1250
  if (options.drift) {
1231
1251
  result.drift_status = driftResult;
1232
1252
  }
1253
+ if (options.links) {
1254
+ result.link_graph = linkGraphResult;
1255
+ }
1233
1256
 
1234
1257
  output(result, raw);
1235
1258
  }