pan-wizard 3.5.1 → 3.7.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -10
- package/agents/pan-executor.md +18 -0
- package/agents/pan-experiment-runner.md +126 -0
- package/agents/pan-phase-researcher.md +16 -0
- package/agents/pan-plan-checker.md +80 -0
- package/agents/pan-planner.md +19 -0
- package/agents/pan-reviewer.md +2 -0
- package/agents/pan-verifier.md +41 -0
- package/bin/install-lib.cjs +55 -0
- package/bin/install.js +71 -22
- package/commands/pan/debug.md +1 -1
- package/commands/pan/experiment.md +219 -0
- package/commands/pan/health.md +1 -1
- package/commands/pan/learn.md +15 -1
- package/commands/pan/optimize.md +13 -0
- package/commands/pan/patches.md +10 -1
- package/commands/pan/phase-tests.md +1 -4
- package/commands/pan/todo-add.md +1 -1
- package/commands/pan/todo-check.md +1 -1
- package/hooks/dist/pan-cost-logger.js +54 -4
- package/hooks/dist/pan-trace-logger.js +72 -3
- package/package.json +67 -66
- package/pan-wizard-core/bin/lib/commands.cjs +8 -0
- package/pan-wizard-core/bin/lib/config.cjs +13 -2
- package/pan-wizard-core/bin/lib/context-budget.cjs +73 -0
- package/pan-wizard-core/bin/lib/core.cjs +13 -0
- package/pan-wizard-core/bin/lib/doc-lint/frontmatter.js +270 -0
- package/pan-wizard-core/bin/lib/doc-lint/reporter.js +45 -0
- package/pan-wizard-core/bin/lib/doc-lint/schema.js +202 -0
- package/pan-wizard-core/bin/lib/doc-lint/validate.js +190 -0
- package/pan-wizard-core/bin/lib/doc-lint/walk.js +135 -0
- package/pan-wizard-core/bin/lib/doc-lint.cjs +287 -0
- package/pan-wizard-core/bin/lib/experiment.cjs +501 -0
- package/pan-wizard-core/bin/lib/learn-index.cjs +235 -0
- package/pan-wizard-core/bin/lib/learn-lint.cjs +292 -0
- package/pan-wizard-core/bin/lib/optimize.cjs +474 -1
- package/pan-wizard-core/bin/lib/runner.cjs +472 -0
- package/pan-wizard-core/bin/pan-tools.cjs +222 -2
- package/pan-wizard-core/learnings/README.md +70 -0
- package/pan-wizard-core/learnings/index.json +540 -0
- package/pan-wizard-core/learnings/internal/.gitkeep +2 -0
- package/pan-wizard-core/learnings/internal/experiment-runner.md +81 -0
- package/pan-wizard-core/learnings/internal/external-research.md +93 -0
- package/pan-wizard-core/learnings/internal/loop-design.md +33 -0
- package/pan-wizard-core/learnings/internal/pan-dev-bugs.md +181 -0
- package/pan-wizard-core/learnings/universal/.gitkeep +2 -0
- package/pan-wizard-core/learnings/universal/atomic-state.md +21 -0
- package/pan-wizard-core/learnings/universal/binary-io.md +21 -0
- package/pan-wizard-core/learnings/universal/comment-syntax.md +21 -0
- package/pan-wizard-core/learnings/universal/composition.md +33 -0
- package/pan-wizard-core/learnings/universal/concurrency.md +33 -0
- package/pan-wizard-core/learnings/universal/dag-scheduler.md +33 -0
- package/pan-wizard-core/learnings/universal/data-driven-design.md +21 -0
- package/pan-wizard-core/learnings/universal/design-process.md +21 -0
- package/pan-wizard-core/learnings/universal/empirical-spike.md +21 -0
- package/pan-wizard-core/learnings/universal/error-handling.md +23 -0
- package/pan-wizard-core/learnings/universal/error-paths.md +21 -0
- package/pan-wizard-core/learnings/universal/glob-semantics.md +21 -0
- package/pan-wizard-core/learnings/universal/idempotency.md +21 -0
- package/pan-wizard-core/learnings/universal/invariants.md +21 -0
- package/pan-wizard-core/learnings/universal/io-patterns.md +21 -0
- package/pan-wizard-core/learnings/universal/numeric-edge-cases.md +21 -0
- package/pan-wizard-core/learnings/universal/output-conventions.md +21 -0
- package/pan-wizard-core/learnings/universal/parser-design.md +21 -0
- package/pan-wizard-core/learnings/universal/phase-locking.md +21 -0
- package/pan-wizard-core/learnings/universal/pipe-friendly-cli.md +21 -0
- package/pan-wizard-core/learnings/universal/schema-design.md +21 -0
- package/pan-wizard-core/learnings/universal/secret-handling.md +21 -0
- package/pan-wizard-core/learnings/universal/streaming-io.md +21 -0
- package/pan-wizard-core/learnings/universal/test-patterns.md +57 -0
- package/pan-wizard-core/learnings/universal/test-strategy.md +33 -0
- package/pan-wizard-core/learnings/universal/unicode.md +21 -0
- package/pan-wizard-core/learnings/universal/vendor-pattern.md +21 -0
- package/pan-wizard-core/references/guardrails.md +58 -0
- package/pan-wizard-core/references/handoff-decisions.md +156 -0
- package/pan-wizard-core/references/schemas/pan-command.schema.yml +39 -0
- package/pan-wizard-core/references/verification-patterns.md +31 -0
- package/pan-wizard-core/templates/config.json +2 -1
- package/pan-wizard-core/templates/idea.md +52 -0
- package/pan-wizard-core/templates/summary-complex.md +14 -5
- package/pan-wizard-core/templates/summary-minimal.md +6 -0
- package/pan-wizard-core/templates/summary-standard.md +14 -3
- package/pan-wizard-core/workflows/discuss-phase.md +108 -1
- package/pan-wizard-core/workflows/exec-phase.md +37 -1
- package/pan-wizard-core/workflows/execute-plan.md +14 -0
- package/pan-wizard-core/workflows/health.md +23 -0
- package/pan-wizard-core/workflows/new-project.md +65 -81
- package/pan-wizard-core/workflows/plan-phase.md +58 -0
- package/pan-wizard-core/workflows/transition.md +102 -7
- package/pan-wizard-core/workflows/verify-phase.md +14 -0
- package/scripts/build-hooks.js +7 -1
- package/scripts/generate-skills-docs.py +10 -8
- package/scripts/release-check.js +184 -0
|
@@ -0,0 +1,472 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
/**
|
|
3
|
+
* runner.cjs — Self-improvement loop W2: external agent runner.
|
|
4
|
+
*
|
|
5
|
+
* Spec: docs/specs/self_improvement_loop_featureai.md §3.2
|
|
6
|
+
*
|
|
7
|
+
* Spawns an external AI coding session (Claude/Codex/Gemini/OpenCode) against
|
|
8
|
+
* an experiment folder, observes progress via run-state.json, enforces timeout
|
|
9
|
+
* + circuit breaker. The external instance runs autonomously; this runner
|
|
10
|
+
* observes only — it does NOT inject prompts mid-flight.
|
|
11
|
+
*
|
|
12
|
+
* Exports:
|
|
13
|
+
* - runExperiment(slug, opts) — spawn + observe + return result
|
|
14
|
+
* - tailExperimentState(slug, opts) — read run-state.json snapshot
|
|
15
|
+
* - stopExperiment(slug, opts) — graceful halt of a running experiment
|
|
16
|
+
* - RUNTIME_RUNNERS — adapter map (per-runtime headless invocation)
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
const fs = require('fs');
|
|
20
|
+
const path = require('path');
|
|
21
|
+
const { spawnSync } = require('child_process');
|
|
22
|
+
const { getExperimentManifest, PAN_EXPERIMENTS_ROOT_DEFAULT } = require('./experiment.cjs');
|
|
23
|
+
|
|
24
|
+
// ── Runtime adapter map ─────────────────────────────────────────────────────
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Each adapter knows how to invoke its runtime headlessly with a prompt.
|
|
28
|
+
* `bin` is the binary name (PATH lookup at spawn time).
|
|
29
|
+
* `buildArgs(prompt)` returns argv to pass after the bin.
|
|
30
|
+
* `shell: 'win32'` opts the adapter into shell-based spawn ON WINDOWS ONLY —
|
|
31
|
+
* needed for CLI tools that ship as .cmd shims (npx/npm-installed binaries
|
|
32
|
+
* like claude/codex/gemini/opencode) which Node's spawnSync cannot resolve
|
|
33
|
+
* without a shell.
|
|
34
|
+
*
|
|
35
|
+
* Runtime overrides (--runtime-override / opts.runtimeOverride) do NOT inherit
|
|
36
|
+
* shell: 'win32' — they default to direct spawn, which suits test mocks like
|
|
37
|
+
* `node -e '...'` that are resolvable directly. P-102 fix (v3.7.1).
|
|
38
|
+
*
|
|
39
|
+
* GitHub Copilot CLI has no documented headless prompt mode, so it's null.
|
|
40
|
+
*/
|
|
41
|
+
// P-1302 fix (v3.7.2): autonomous claude/gemini runs default to non-interactive
|
|
42
|
+
// permissions. Without these flags, the CLI prompts for tool approval, which
|
|
43
|
+
// can't be answered in headless mode and exits 1 silently. Surfaced by the
|
|
44
|
+
// first real autonomous loop run (panloop experiment). The runner's purpose IS
|
|
45
|
+
// autonomous execution — defaulting to interactive permission prompts contradicts
|
|
46
|
+
// the design.
|
|
47
|
+
//
|
|
48
|
+
// Safety: the flags trust the prompt's tool choices. Acceptable because the
|
|
49
|
+
// runner only spawns inside isolated experiment folders (PAN_SOURCE_ROOT-guarded
|
|
50
|
+
// by experiment.cjs) — blast radius is bounded to the experiment dir.
|
|
51
|
+
// P-1603 (v3.7.5): when `opts.captureMetrics` is true the runner switches
|
|
52
|
+
// claude into `--output-format json` so the trailing usage envelope can be
|
|
53
|
+
// parsed for cost/token metrics. Other runtimes are unchanged — token
|
|
54
|
+
// metering for codex/gemini/opencode is deferred (no equivalent flag).
|
|
55
|
+
const RUNTIME_RUNNERS = Object.freeze({
|
|
56
|
+
claude: {
|
|
57
|
+
bin: 'claude',
|
|
58
|
+
buildArgs: (prompt, opts) => {
|
|
59
|
+
const args = ['-p', '--dangerously-skip-permissions'];
|
|
60
|
+
if (opts && opts.captureMetrics) args.push('--output-format', 'json');
|
|
61
|
+
args.push(prompt);
|
|
62
|
+
return args;
|
|
63
|
+
},
|
|
64
|
+
shell: 'win32',
|
|
65
|
+
},
|
|
66
|
+
codex: { bin: 'codex', buildArgs: (prompt) => ['exec', prompt], shell: 'win32' },
|
|
67
|
+
gemini: { bin: 'gemini', buildArgs: (prompt) => ['-p', '--yolo', prompt], shell: 'win32' },
|
|
68
|
+
opencode: { bin: 'opencode', buildArgs: (prompt) => [prompt], shell: 'win32' },
|
|
69
|
+
copilot: null,
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
// ── Stop reasons (enum-ish) ─────────────────────────────────────────────────
|
|
73
|
+
|
|
74
|
+
const STOP_REASONS = Object.freeze({
|
|
75
|
+
SUCCESS: 'success',
|
|
76
|
+
ERROR: 'error',
|
|
77
|
+
TIMEOUT: 'timeout',
|
|
78
|
+
CIRCUIT_BREAKER: 'circuit_breaker',
|
|
79
|
+
MANUAL: 'manual',
|
|
80
|
+
INCOMPLETE: 'incomplete', // P-1502 (v3.7.4): exit 0 but workflow didn't reach milestone-completion
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
// P-EXP-004 (2026-05-02): bumped from 30 min to 60 min — 30 min cut off real
|
|
84
|
+
// 3-plan phases mid-execution (whoolog Phase 1 first run hit this).
|
|
85
|
+
const DEFAULT_TIMEOUT_MS = 60 * 60 * 1000; // 60 min
|
|
86
|
+
|
|
87
|
+
// ── Helpers ─────────────────────────────────────────────────────────────────
|
|
88
|
+
|
|
89
|
+
function getRunStatePath(experimentPath) {
|
|
90
|
+
return path.join(experimentPath, '.planning', 'run-state.json');
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
// P-1502 helper: read state.md and extract the milestone status field.
|
|
94
|
+
// Returns the status string or null if state.md is missing/malformed.
|
|
95
|
+
function readMilestoneStatus(experimentPath) {
|
|
96
|
+
const statePath = path.join(experimentPath, '.planning', 'state.md');
|
|
97
|
+
try {
|
|
98
|
+
const text = fs.readFileSync(statePath, 'utf-8');
|
|
99
|
+
const m = text.match(/^status:\s*(\S+)/m);
|
|
100
|
+
return m ? m[1].trim() : null;
|
|
101
|
+
} catch {
|
|
102
|
+
return null;
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
// P-1603 (v3.7.5): parse the trailing `--output-format json` envelope claude
|
|
107
|
+
// emits at end of a `claude -p --output-format json` session. The envelope is
|
|
108
|
+
// a single JSON object on its own line containing `{result, total_cost_usd,
|
|
109
|
+
// num_turns, session_id, usage: {input_tokens, output_tokens, ...}}`. We
|
|
110
|
+
// scan from end of stdout for the last `{...}` block and JSON-parse it.
|
|
111
|
+
// Returns null if claude was not invoked with --output-format json or the
|
|
112
|
+
// envelope is malformed.
|
|
113
|
+
function parseClaudeJsonEnvelope(stdout) {
|
|
114
|
+
if (!stdout || typeof stdout !== 'string') return null;
|
|
115
|
+
const trimmed = stdout.trimEnd();
|
|
116
|
+
if (!trimmed.endsWith('}')) return null;
|
|
117
|
+
// Walk back to find the matching opening brace at column 0.
|
|
118
|
+
const lines = trimmed.split(/\r?\n/);
|
|
119
|
+
for (let i = lines.length - 1; i >= 0; i -= 1) {
|
|
120
|
+
const line = lines[i].trimEnd();
|
|
121
|
+
if (!line.startsWith('{')) continue;
|
|
122
|
+
try {
|
|
123
|
+
const obj = JSON.parse(lines.slice(i).join('\n'));
|
|
124
|
+
if (obj && typeof obj === 'object' && (obj.total_cost_usd != null || obj.usage)) {
|
|
125
|
+
return obj;
|
|
126
|
+
}
|
|
127
|
+
} catch {
|
|
128
|
+
// try next earlier line
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
return null;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
function writeRunState(experimentPath, state) {
|
|
135
|
+
const file = getRunStatePath(experimentPath);
|
|
136
|
+
try {
|
|
137
|
+
fs.writeFileSync(file, JSON.stringify(state, null, 2));
|
|
138
|
+
} catch {
|
|
139
|
+
// best-effort; runner does not fail on persistence errors
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
function readRunState(experimentPath) {
|
|
144
|
+
const file = getRunStatePath(experimentPath);
|
|
145
|
+
try {
|
|
146
|
+
return JSON.parse(fs.readFileSync(file, 'utf-8'));
|
|
147
|
+
} catch (err) {
|
|
148
|
+
if (err.code === 'ENOENT') return null;
|
|
149
|
+
return null;
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
function appendEvent(state, type, details) {
|
|
154
|
+
state.events = state.events || [];
|
|
155
|
+
state.events.push({
|
|
156
|
+
ts: new Date().toISOString(),
|
|
157
|
+
type,
|
|
158
|
+
details: details || null,
|
|
159
|
+
});
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
// ── runExperiment ───────────────────────────────────────────────────────────
|
|
163
|
+
|
|
164
|
+
/**
|
|
165
|
+
* Spawn the external runtime and wait for it to finish (or be aborted).
|
|
166
|
+
*
|
|
167
|
+
* @param {string} slug - experiment id
|
|
168
|
+
* @param {object} opts
|
|
169
|
+
* @param {string} [opts.root] - experiment root (default PAN_EXPERIMENTS_ROOT_DEFAULT)
|
|
170
|
+
* @param {string} [opts.prompt] - prompt passed to the external runtime; default
|
|
171
|
+
* is `/pan:new-project --auto @.planning/idea.md`
|
|
172
|
+
* @param {number} [opts.timeoutMs] - hard timeout (default 30 min)
|
|
173
|
+
* @param {object} [opts.runtimeOverride] - { bin, buildArgs } to bypass the manifest's
|
|
174
|
+
* runtime adapter (used by tests)
|
|
175
|
+
* @param {function} [opts.onProgress] - callback invoked per line of stdout/stderr
|
|
176
|
+
* @param {boolean} [opts.captureMetrics] - when true, claude is invoked with
|
|
177
|
+
* --output-format json so the trailing usage envelope can be parsed and
|
|
178
|
+
* stored under runState.metrics (P-1603, v3.7.5). Other runtimes ignore.
|
|
179
|
+
* @returns {object} { exit_code, status, stop_reason, elapsed_ms, error? }
|
|
180
|
+
*/
|
|
181
|
+
function runExperiment(slug, opts = {}) {
|
|
182
|
+
const root = opts.root || PAN_EXPERIMENTS_ROOT_DEFAULT;
|
|
183
|
+
const manifest = getExperimentManifest(slug, { root });
|
|
184
|
+
if (manifest.error) return { error: manifest.error };
|
|
185
|
+
|
|
186
|
+
const expPath = path.join(root, slug);
|
|
187
|
+
if (!fs.existsSync(expPath)) {
|
|
188
|
+
return { error: `experiment folder missing: ${expPath}` };
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
// Adapter selection
|
|
192
|
+
let adapter = opts.runtimeOverride;
|
|
193
|
+
if (!adapter) {
|
|
194
|
+
const runtime = manifest.runtime;
|
|
195
|
+
adapter = RUNTIME_RUNNERS[runtime];
|
|
196
|
+
if (adapter == null) {
|
|
197
|
+
return {
|
|
198
|
+
error: `runtime "${runtime}" is not supported by the experiment runner ` +
|
|
199
|
+
`(known: ${Object.keys(RUNTIME_RUNNERS).filter(r => RUNTIME_RUNNERS[r]).join(', ')})`,
|
|
200
|
+
};
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
const prompt = opts.prompt || '/pan:new-project --auto @.planning/idea.md';
|
|
205
|
+
const timeoutMs = opts.timeoutMs || DEFAULT_TIMEOUT_MS;
|
|
206
|
+
const onProgress = typeof opts.onProgress === 'function' ? opts.onProgress : null;
|
|
207
|
+
|
|
208
|
+
const startedAt = new Date().toISOString();
|
|
209
|
+
const startTime = Date.now();
|
|
210
|
+
|
|
211
|
+
// Initialize run-state.json
|
|
212
|
+
const runState = {
|
|
213
|
+
experiment_id: slug,
|
|
214
|
+
status: 'running',
|
|
215
|
+
started_at: startedAt,
|
|
216
|
+
ended_at: null,
|
|
217
|
+
pid: null,
|
|
218
|
+
exit_code: null,
|
|
219
|
+
stop_reason: null,
|
|
220
|
+
elapsed_ms: null,
|
|
221
|
+
events: [],
|
|
222
|
+
};
|
|
223
|
+
appendEvent(runState, 'started', `runtime=${manifest.runtime}, prompt=${prompt}`);
|
|
224
|
+
writeRunState(expPath, runState);
|
|
225
|
+
|
|
226
|
+
// Synchronous spawn with native timeout. spawnSync delivers the child's
|
|
227
|
+
// exit signal cleanly even on Windows, and supports a `timeout` option
|
|
228
|
+
// that sends SIGTERM if the child runs past the deadline.
|
|
229
|
+
//
|
|
230
|
+
// Streaming progress is deferred to W3 (async/Promise variant) — for v3.7.0
|
|
231
|
+
// W2 we capture stdout/stderr after exit and emit a single onProgress call
|
|
232
|
+
// with the full text. A real-time stream would require child_process.spawn
|
|
233
|
+
// + an async runner, which clashes with the rest of pan-tools.cjs's
|
|
234
|
+
// synchronous CLI shape.
|
|
235
|
+
// P-102 fix (v3.7.1): on Windows, CLI tools that ship as .cmd shims
|
|
236
|
+
// (npx-installed binaries like claude/codex/gemini/opencode) cannot be
|
|
237
|
+
// spawned with shell:false — Node's spawnSync doesn't resolve the .cmd
|
|
238
|
+
// extension. Adapters opt into shell-based spawn via `shell: 'win32'`.
|
|
239
|
+
//
|
|
240
|
+
// Runtime overrides (test mocks, ad-hoc dev) do NOT inherit shell:'win32',
|
|
241
|
+
// so `node -e '...'` works without shell-based arg mangling.
|
|
242
|
+
const useShell = adapter.shell === 'win32' && process.platform === 'win32';
|
|
243
|
+
|
|
244
|
+
// P-1304 fix (v3.7.2): under shell:true Node joins args with spaces but
|
|
245
|
+
// does NOT quote them. Multi-word args (the prompt has spaces) get re-split
|
|
246
|
+
// by cmd.exe. Surfaced by panloop second autonomous run: prompt was split
|
|
247
|
+
// into ['claude', '-p', '--dangerously-skip-permissions', '/pan:new-project',
|
|
248
|
+
// '--auto', '@.planning/idea.md'] instead of preserving the prompt as one arg.
|
|
249
|
+
// Solution: quote any arg containing whitespace when useShell is true.
|
|
250
|
+
// Escapes embedded double-quotes by doubling (cmd.exe convention).
|
|
251
|
+
// buildArgs may accept opts (claude uses it for --output-format json metric
|
|
252
|
+
// capture). Pass opts safely; legacy adapters that ignore the second arg
|
|
253
|
+
// work unchanged.
|
|
254
|
+
const captureMetrics = Boolean(opts.captureMetrics);
|
|
255
|
+
let rawArgs = adapter.buildArgs(prompt, { captureMetrics });
|
|
256
|
+
const quotedArgs = useShell
|
|
257
|
+
? rawArgs.map(a => /\s/.test(a) ? `"${String(a).replace(/"/g, '""')}"` : a)
|
|
258
|
+
: rawArgs;
|
|
259
|
+
|
|
260
|
+
// P-1501-r2 fix (v3.7.4): inherit parent's stdin so the spawned claude -p
|
|
261
|
+
// sees a TTY (when the runner is invoked from a terminal) and continues its
|
|
262
|
+
// autonomous tool-use loop. With stdio:[ignore,...] claude detects no-TTY
|
|
263
|
+
// → "scripted single-shot" mode → exits after first response. Manual bash
|
|
264
|
+
// invocation of the same flags worked because bash's stdin IS a TTY.
|
|
265
|
+
// Trade-off: `inherit` means the child reads from the same TTY as the
|
|
266
|
+
// parent. Acceptable because the runner is short-lived and the user
|
|
267
|
+
// typically isn't typing while a run is in flight.
|
|
268
|
+
let result;
|
|
269
|
+
try {
|
|
270
|
+
result = spawnSync(adapter.bin, quotedArgs, {
|
|
271
|
+
cwd: expPath,
|
|
272
|
+
stdio: ['inherit', 'pipe', 'pipe'],
|
|
273
|
+
shell: useShell,
|
|
274
|
+
timeout: timeoutMs,
|
|
275
|
+
encoding: 'utf-8',
|
|
276
|
+
});
|
|
277
|
+
} catch (err) {
|
|
278
|
+
runState.status = 'failed';
|
|
279
|
+
runState.stop_reason = STOP_REASONS.ERROR;
|
|
280
|
+
runState.ended_at = new Date().toISOString();
|
|
281
|
+
runState.elapsed_ms = Date.now() - startTime;
|
|
282
|
+
appendEvent(runState, 'spawn_failed', err.message);
|
|
283
|
+
writeRunState(expPath, runState);
|
|
284
|
+
return {
|
|
285
|
+
error: `failed to spawn ${adapter.bin}: ${err.message}`,
|
|
286
|
+
status: 'failed',
|
|
287
|
+
stop_reason: STOP_REASONS.ERROR,
|
|
288
|
+
elapsed_ms: runState.elapsed_ms,
|
|
289
|
+
};
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
runState.pid = result.pid || null;
|
|
293
|
+
|
|
294
|
+
// Emit captured output if a progress handler is set
|
|
295
|
+
if (onProgress) {
|
|
296
|
+
if (result.stdout) onProgress({ stream: 'stdout', text: result.stdout });
|
|
297
|
+
if (result.stderr) onProgress({ stream: 'stderr', text: result.stderr });
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
// P-1603 (v3.7.5): when captureMetrics was requested, parse the trailing
|
|
301
|
+
// claude --output-format json envelope from stdout and persist metrics into
|
|
302
|
+
// run-state.json so downstream `/pan:learn` analysis can attribute real cost
|
|
303
|
+
// and token usage instead of inferring from event counts.
|
|
304
|
+
if (captureMetrics && result.stdout) {
|
|
305
|
+
const envelope = parseClaudeJsonEnvelope(result.stdout);
|
|
306
|
+
if (envelope) {
|
|
307
|
+
runState.metrics = {
|
|
308
|
+
total_cost_usd: envelope.total_cost_usd ?? null,
|
|
309
|
+
num_turns: envelope.num_turns ?? null,
|
|
310
|
+
session_id: envelope.session_id ?? null,
|
|
311
|
+
input_tokens: envelope.usage?.input_tokens ?? null,
|
|
312
|
+
output_tokens: envelope.usage?.output_tokens ?? null,
|
|
313
|
+
cache_creation_input_tokens: envelope.usage?.cache_creation_input_tokens ?? null,
|
|
314
|
+
cache_read_input_tokens: envelope.usage?.cache_read_input_tokens ?? null,
|
|
315
|
+
};
|
|
316
|
+
appendEvent(runState, 'metrics_captured', `cost=$${envelope.total_cost_usd ?? '?'}, turns=${envelope.num_turns ?? '?'}`);
|
|
317
|
+
} else {
|
|
318
|
+
appendEvent(runState, 'metrics_unavailable', 'no JSON envelope in stdout');
|
|
319
|
+
}
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
const endedAt = new Date().toISOString();
|
|
323
|
+
const elapsedMs = Date.now() - startTime;
|
|
324
|
+
|
|
325
|
+
runState.ended_at = endedAt;
|
|
326
|
+
runState.elapsed_ms = elapsedMs;
|
|
327
|
+
runState.exit_code = result.status;
|
|
328
|
+
|
|
329
|
+
// Detect timeout. spawnSync sets result.signal to 'SIGTERM' when the timeout
|
|
330
|
+
// fires (on Unix) or kills via taskkill on Windows. We also check elapsed
|
|
331
|
+
// time as a fallback heuristic.
|
|
332
|
+
const timedOut =
|
|
333
|
+
result.signal === 'SIGTERM' ||
|
|
334
|
+
(result.error && result.error.code === 'ETIMEDOUT') ||
|
|
335
|
+
(result.status === null && elapsedMs >= timeoutMs - 50);
|
|
336
|
+
|
|
337
|
+
if (timedOut) {
|
|
338
|
+
runState.status = 'failed';
|
|
339
|
+
runState.stop_reason = STOP_REASONS.TIMEOUT;
|
|
340
|
+
appendEvent(runState, 'timeout', `aborted after ${timeoutMs}ms`);
|
|
341
|
+
} else if (result.error) {
|
|
342
|
+
runState.status = 'failed';
|
|
343
|
+
runState.stop_reason = STOP_REASONS.ERROR;
|
|
344
|
+
appendEvent(runState, 'spawn_error', result.error.message);
|
|
345
|
+
} else if (result.status === 0) {
|
|
346
|
+
// P-1502 fix (v3.7.4): exit_code=0 alone is too coarse. Read state.md
|
|
347
|
+
// to verify the workflow actually reached milestone-completion. If it
|
|
348
|
+
// exited cleanly but the project is stuck in 'planning' or 'in_progress',
|
|
349
|
+
// mark as 'incomplete' so /pan:learn analysis can distinguish real
|
|
350
|
+
// success from premature exits (P-1501 / P-1701 patterns).
|
|
351
|
+
//
|
|
352
|
+
// Skip the milestone check when runtimeOverride is set (tests/dev path
|
|
353
|
+
// simulating with `node -e` mocks that don't write state.md). The check
|
|
354
|
+
// is meaningful only for real production-runtime invocations.
|
|
355
|
+
if (opts.runtimeOverride) {
|
|
356
|
+
runState.status = 'done';
|
|
357
|
+
runState.stop_reason = STOP_REASONS.SUCCESS;
|
|
358
|
+
appendEvent(runState, 'completed', 'exit_code=0 (runtime override; milestone check skipped)');
|
|
359
|
+
} else {
|
|
360
|
+
const milestone = readMilestoneStatus(expPath);
|
|
361
|
+
if (milestone === 'completed') {
|
|
362
|
+
runState.status = 'done';
|
|
363
|
+
runState.stop_reason = STOP_REASONS.SUCCESS;
|
|
364
|
+
appendEvent(runState, 'completed', 'exit_code=0, milestone=completed');
|
|
365
|
+
} else {
|
|
366
|
+
runState.status = 'incomplete';
|
|
367
|
+
runState.stop_reason = STOP_REASONS.INCOMPLETE;
|
|
368
|
+
appendEvent(runState, 'incomplete', `exit_code=0 but milestone status=${milestone || 'unknown'}`);
|
|
369
|
+
}
|
|
370
|
+
}
|
|
371
|
+
} else {
|
|
372
|
+
runState.status = 'failed';
|
|
373
|
+
runState.stop_reason = STOP_REASONS.ERROR;
|
|
374
|
+
appendEvent(runState, 'completed', `exit_code=${result.status}`);
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
writeRunState(expPath, runState);
|
|
378
|
+
|
|
379
|
+
return {
|
|
380
|
+
experiment_id: slug,
|
|
381
|
+
status: runState.status,
|
|
382
|
+
stop_reason: runState.stop_reason,
|
|
383
|
+
exit_code: result.status,
|
|
384
|
+
elapsed_ms: elapsedMs,
|
|
385
|
+
started_at: startedAt,
|
|
386
|
+
ended_at: endedAt,
|
|
387
|
+
};
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
// ── tailExperimentState ─────────────────────────────────────────────────────
|
|
391
|
+
|
|
392
|
+
/**
|
|
393
|
+
* Read the current run-state.json for an experiment.
|
|
394
|
+
* Snapshot semantics — no streaming. (W3 may add a poll-loop variant.)
|
|
395
|
+
*/
|
|
396
|
+
function tailExperimentState(slug, opts = {}) {
|
|
397
|
+
const root = opts.root || PAN_EXPERIMENTS_ROOT_DEFAULT;
|
|
398
|
+
const manifest = getExperimentManifest(slug, { root });
|
|
399
|
+
if (manifest.error) return { error: manifest.error };
|
|
400
|
+
|
|
401
|
+
const expPath = path.join(root, slug);
|
|
402
|
+
const state = readRunState(expPath);
|
|
403
|
+
if (!state) {
|
|
404
|
+
return { error: `experiment "${slug}" has no run state (not started yet)` };
|
|
405
|
+
}
|
|
406
|
+
return state;
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
// ── stopExperiment ──────────────────────────────────────────────────────────
|
|
410
|
+
|
|
411
|
+
/**
|
|
412
|
+
* Stop a running experiment.
|
|
413
|
+
*
|
|
414
|
+
* If the experiment is currently running (run-state.json shows status=running
|
|
415
|
+
* and pid is alive), send SIGTERM. If still alive after a short grace period,
|
|
416
|
+
* SIGKILL.
|
|
417
|
+
*
|
|
418
|
+
* If the experiment has already finished, return its current state (no error).
|
|
419
|
+
*/
|
|
420
|
+
function stopExperiment(slug, opts = {}) {
|
|
421
|
+
const root = opts.root || PAN_EXPERIMENTS_ROOT_DEFAULT;
|
|
422
|
+
const manifest = getExperimentManifest(slug, { root });
|
|
423
|
+
if (manifest.error) return { error: manifest.error };
|
|
424
|
+
|
|
425
|
+
const expPath = path.join(root, slug);
|
|
426
|
+
const state = readRunState(expPath);
|
|
427
|
+
if (!state) {
|
|
428
|
+
return { error: `experiment "${slug}" has no active run` };
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
if (state.status !== 'running') {
|
|
432
|
+
// Already finished — return current state, not an error
|
|
433
|
+
return state;
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
if (!state.pid) {
|
|
437
|
+
return { error: `experiment "${slug}" has no recorded pid` };
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
// Try graceful term, then kill
|
|
441
|
+
try {
|
|
442
|
+
process.kill(state.pid, 'SIGTERM');
|
|
443
|
+
} catch {
|
|
444
|
+
// Process likely already dead
|
|
445
|
+
state.status = 'failed';
|
|
446
|
+
state.stop_reason = STOP_REASONS.MANUAL;
|
|
447
|
+
state.ended_at = new Date().toISOString();
|
|
448
|
+
appendEvent(state, 'stop_no_pid', `pid ${state.pid} already gone`);
|
|
449
|
+
writeRunState(expPath, state);
|
|
450
|
+
return state;
|
|
451
|
+
}
|
|
452
|
+
|
|
453
|
+
// Update state to reflect manual stop
|
|
454
|
+
state.status = 'failed';
|
|
455
|
+
state.stop_reason = STOP_REASONS.MANUAL;
|
|
456
|
+
state.ended_at = new Date().toISOString();
|
|
457
|
+
appendEvent(state, 'stopped', 'SIGTERM sent');
|
|
458
|
+
writeRunState(expPath, state);
|
|
459
|
+
|
|
460
|
+
return state;
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
// ── Exports ─────────────────────────────────────────────────────────────────
|
|
464
|
+
|
|
465
|
+
module.exports = {
|
|
466
|
+
runExperiment,
|
|
467
|
+
tailExperimentState,
|
|
468
|
+
stopExperiment,
|
|
469
|
+
RUNTIME_RUNNERS,
|
|
470
|
+
STOP_REASONS,
|
|
471
|
+
DEFAULT_TIMEOUT_MS,
|
|
472
|
+
};
|