@lannguyensi/harness 0.8.1 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +42 -0
- package/README.md +31 -3
- package/dist/cli/index.js +89 -0
- package/dist/cli/index.js.map +1 -1
- package/dist/cli/init/detect.d.ts +38 -0
- package/dist/cli/init/detect.js +111 -0
- package/dist/cli/init/detect.js.map +1 -0
- package/dist/cli/init/index.js +1 -1
- package/dist/cli/init/index.js.map +1 -1
- package/dist/cli/init/interactive.d.ts +30 -0
- package/dist/cli/init/interactive.js +178 -0
- package/dist/cli/init/interactive.js.map +1 -0
- package/dist/cli/init/profiles.d.ts +2 -0
- package/dist/cli/init/profiles.js +163 -0
- package/dist/cli/init/profiles.js.map +1 -0
- package/dist/cli/init/templates.d.ts +1 -1
- package/dist/cli/init/templates.js +11 -1
- package/dist/cli/init/templates.js.map +1 -1
- package/dist/cli/pack/hook-pre-tool-use.js +14 -1
- package/dist/cli/pack/hook-pre-tool-use.js.map +1 -1
- package/dist/cli/smoke/assertions.d.ts +39 -0
- package/dist/cli/smoke/assertions.js +163 -0
- package/dist/cli/smoke/assertions.js.map +1 -0
- package/dist/cli/smoke/index.d.ts +59 -0
- package/dist/cli/smoke/index.js +183 -0
- package/dist/cli/smoke/index.js.map +1 -0
- package/dist/cli/smoke/runner.d.ts +55 -0
- package/dist/cli/smoke/runner.js +134 -0
- package/dist/cli/smoke/runner.js.map +1 -0
- package/dist/cli/smoke/stream-parser.d.ts +65 -0
- package/dist/cli/smoke/stream-parser.js +115 -0
- package/dist/cli/smoke/stream-parser.js.map +1 -0
- package/dist/policies/ledger-client.js +15 -3
- package/dist/policies/ledger-client.js.map +1 -1
- package/dist/schema/requires.js +17 -0
- package/dist/schema/requires.js.map +1 -1
- package/package.json +2 -1
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
// Phase 7 follow-up: `harness smoke` assertion engine.
|
|
2
|
+
//
|
|
3
|
+
// Operators pass any combination of --expect-hook / --expect-no-hook /
|
|
4
|
+
// --expect-exit / --expect-decision; this module turns each one into a
|
|
5
|
+
// pass/fail check against the parsed StreamSummary and returns a
|
|
6
|
+
// one-line diff per failure. The smoke CLI exits non-zero iff at least
|
|
7
|
+
// one failure is reported (EX_FAIL), so forensic stream + stderr files
|
|
8
|
+
// are always written before any assertion is evaluated.
|
|
9
|
+
/**
|
|
10
|
+
* Match a user-supplied "hook" target against a stream hook entry by
|
|
11
|
+
* name or by event. The Phase 5 transcripts emit `hook_name` and
|
|
12
|
+
* `hook_event` independently (sometimes equal, sometimes not); the
|
|
13
|
+
* task-defined --expect-hook value semantically refers to whichever
|
|
14
|
+
* the operator finds in their generated settings.json. Matching either
|
|
15
|
+
* field keeps the verb tolerant across Claude Code releases that may
|
|
16
|
+
* swap which field carries the user-defined name.
|
|
17
|
+
*/
|
|
18
|
+
function hookMatches(pair, target) {
|
|
19
|
+
return pair.hookName === target || pair.hookEvent === target;
|
|
20
|
+
}
|
|
21
|
+
function summariseHooks(hooks) {
|
|
22
|
+
if (hooks.length === 0)
|
|
23
|
+
return "(no hook events observed)";
|
|
24
|
+
const labels = hooks.map((h) => {
|
|
25
|
+
const id = h.hookName || h.hookEvent || "(unnamed)";
|
|
26
|
+
const tag = h.response === null ? "[no response]" : h.outcome ?? "?";
|
|
27
|
+
return `${id}:${tag}`;
|
|
28
|
+
});
|
|
29
|
+
return labels.join(", ");
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* Classify the policy decision emitted by `harness policy intercept`.
|
|
33
|
+
*
|
|
34
|
+
* The intercept CLI contract (PR #81): on `deny`, stdout carries the
|
|
35
|
+
* Claude Code 2.1+ envelope (`decision:"block"` AND
|
|
36
|
+
* `hookSpecificOutput.permissionDecision:"deny"`). On `allow`, stdout
|
|
37
|
+
* is empty. On `warn-degraded`, stdout is empty AND stderr carries the
|
|
38
|
+
* Phase 5 #3 diagnostic line (`warn-degraded (ledger unreachable)`)
|
|
39
|
+
* when HARNESS_POLICY_VERBOSE is on. `harness smoke` sets that env var
|
|
40
|
+
* unconditionally when spawning claude, so the warn branch is
|
|
41
|
+
* observable.
|
|
42
|
+
*
|
|
43
|
+
* Order of detection: deny first (any hook stdout containing the
|
|
44
|
+
* envelope), then warn (any hook stderr containing the diagnostic),
|
|
45
|
+
* then allow if at least one policy-shaped hook fired without a deny
|
|
46
|
+
* stdout, else `null` (no policy hook in the stream, so the assertion
|
|
47
|
+
* is N/A and must be reported as a miss).
|
|
48
|
+
*/
|
|
49
|
+
export function classifyDecision(hooks) {
|
|
50
|
+
for (const h of hooks) {
|
|
51
|
+
if (h.stdout.includes('"decision":"block"') &&
|
|
52
|
+
h.stdout.includes('"permissionDecision":"deny"')) {
|
|
53
|
+
return "deny";
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
for (const h of hooks) {
|
|
57
|
+
if (h.stderr.includes("warn-degraded")) {
|
|
58
|
+
return "warn";
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
// Heuristic: any PreToolUse hook is potentially policy-driven. If
|
|
62
|
+
// there are no PreToolUse hooks at all, decision is N/A.
|
|
63
|
+
//
|
|
64
|
+
// Known false-positive: a manifest can wire non-policy PreToolUse
|
|
65
|
+
// hooks (audit/logging shims) alongside `harness policy intercept`.
|
|
66
|
+
// If only the non-policy hook fired and no policy was evaluated at
|
|
67
|
+
// all, this still classifies as `allow`. Operators chasing the
|
|
68
|
+
// policy-bypass case should pair `--expect-decision deny` with a
|
|
69
|
+
// prompt that is known to trigger the policy's tool matcher, so the
|
|
70
|
+
// assertion fails loudly on a missing fire.
|
|
71
|
+
const sawPreToolUse = hooks.some((h) => h.hookEvent === "PreToolUse" || h.hookName === "PreToolUse");
|
|
72
|
+
return sawPreToolUse ? "allow" : null;
|
|
73
|
+
}
|
|
74
|
+
export function evaluateExpectations(summary, expectations) {
|
|
75
|
+
const failures = [];
|
|
76
|
+
for (const target of expectations.expectHooks ?? []) {
|
|
77
|
+
const seen = summary.hooks.some((h) => hookMatches(h, target));
|
|
78
|
+
if (!seen) {
|
|
79
|
+
failures.push({
|
|
80
|
+
kind: "expect-hook",
|
|
81
|
+
expected: `hook "${target}" fires at least once`,
|
|
82
|
+
actual: summariseHooks(summary.hooks),
|
|
83
|
+
detail: `--expect-hook="${target}" was not observed in the stream. ` +
|
|
84
|
+
`Observed hooks: ${summariseHooks(summary.hooks)}`,
|
|
85
|
+
});
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
for (const target of expectations.expectNoHooks ?? []) {
|
|
89
|
+
const seen = summary.hooks.some((h) => hookMatches(h, target));
|
|
90
|
+
if (seen) {
|
|
91
|
+
failures.push({
|
|
92
|
+
kind: "expect-no-hook",
|
|
93
|
+
expected: `hook "${target}" does NOT fire`,
|
|
94
|
+
actual: `hook "${target}" fired ${summary.hooks.filter((h) => hookMatches(h, target)).length} time(s)`,
|
|
95
|
+
detail: `--expect-no-hook="${target}" was observed in the stream. ` +
|
|
96
|
+
`Full hook trace: ${summariseHooks(summary.hooks)}`,
|
|
97
|
+
});
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
if (expectations.expectExit !== undefined) {
|
|
101
|
+
const want = expectations.expectExit;
|
|
102
|
+
// `claude -p` does not emit a numeric exit code in the stream;
|
|
103
|
+
// it emits `result.is_error: boolean`. We map:
|
|
104
|
+
// expectExit === 0 ⇒ is_error must be false
|
|
105
|
+
// expectExit !== 0 ⇒ is_error must be true
|
|
106
|
+
// The exact numeric exit ladder is preserved in the spawn-side exit
|
|
107
|
+
// code (returned from the runner), so callers who need the literal
|
|
108
|
+
// number can inspect SmokeResult.claudeExitCode.
|
|
109
|
+
const isError = summary.result?.is_error ?? null;
|
|
110
|
+
const wantsError = want !== 0;
|
|
111
|
+
if (isError === null) {
|
|
112
|
+
failures.push({
|
|
113
|
+
kind: "expect-exit",
|
|
114
|
+
expected: `terminal result.is_error=${wantsError}`,
|
|
115
|
+
actual: "(no terminal result event observed)",
|
|
116
|
+
detail: `--expect-exit=${want} could not be evaluated: the stream ended ` +
|
|
117
|
+
"without a terminal `result` event. claude likely crashed or was killed mid-run.",
|
|
118
|
+
});
|
|
119
|
+
}
|
|
120
|
+
else if (isError !== wantsError) {
|
|
121
|
+
failures.push({
|
|
122
|
+
kind: "expect-exit",
|
|
123
|
+
expected: `is_error=${wantsError} (--expect-exit=${want})`,
|
|
124
|
+
actual: `is_error=${isError}`,
|
|
125
|
+
detail: `--expect-exit=${want} expected is_error=${wantsError} but the terminal ` +
|
|
126
|
+
`result event reports is_error=${isError}.`,
|
|
127
|
+
});
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
if (expectations.expectDecision !== undefined) {
|
|
131
|
+
const observed = classifyDecision(summary.hooks);
|
|
132
|
+
if (observed === null) {
|
|
133
|
+
failures.push({
|
|
134
|
+
kind: "expect-decision",
|
|
135
|
+
expected: `policy decision = ${expectations.expectDecision}`,
|
|
136
|
+
actual: "no PreToolUse hook fired",
|
|
137
|
+
detail: `--expect-decision=${expectations.expectDecision} requires at least one ` +
|
|
138
|
+
"PreToolUse hook in the stream; none observed.",
|
|
139
|
+
});
|
|
140
|
+
}
|
|
141
|
+
else if (observed !== expectations.expectDecision) {
|
|
142
|
+
failures.push({
|
|
143
|
+
kind: "expect-decision",
|
|
144
|
+
expected: `policy decision = ${expectations.expectDecision}`,
|
|
145
|
+
actual: `policy decision = ${observed}`,
|
|
146
|
+
detail: `--expect-decision=${expectations.expectDecision} but the last observable ` +
|
|
147
|
+
`policy decision was ${observed}.`,
|
|
148
|
+
});
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
return failures;
|
|
152
|
+
}
|
|
153
|
+
export function formatFailures(failures) {
|
|
154
|
+
if (failures.length === 0)
|
|
155
|
+
return "";
|
|
156
|
+
const lines = [];
|
|
157
|
+
lines.push(`harness smoke: ${failures.length} assertion(s) failed:`);
|
|
158
|
+
for (const f of failures) {
|
|
159
|
+
lines.push(` - [${f.kind}] expected ${f.expected}; got ${f.actual}`);
|
|
160
|
+
}
|
|
161
|
+
return `${lines.join("\n")}\n`;
|
|
162
|
+
}
|
|
163
|
+
//# sourceMappingURL=assertions.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"assertions.js","sourceRoot":"","sources":["../../../src/cli/smoke/assertions.ts"],"names":[],"mappings":"AAAA,uDAAuD;AACvD,EAAE;AACF,uEAAuE;AACvE,uEAAuE;AACvE,iEAAiE;AACjE,uEAAuE;AACvE,uEAAuE;AACvE,wDAAwD;AAwBxD;;;;;;;;GAQG;AACH,SAAS,WAAW,CAAC,IAAc,EAAE,MAAc;IACjD,OAAO,IAAI,CAAC,QAAQ,KAAK,MAAM,IAAI,IAAI,CAAC,SAAS,KAAK,MAAM,CAAC;AAC/D,CAAC;AAED,SAAS,cAAc,CAAC,KAAiB;IACvC,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,2BAA2B,CAAC;IAC3D,MAAM,MAAM,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE;QAC7B,MAAM,EAAE,GAAG,CAAC,CAAC,QAAQ,IAAI,CAAC,CAAC,SAAS,IAAI,WAAW,CAAC;QACpD,MAAM,GAAG,GAAG,CAAC,CAAC,QAAQ,KAAK,IAAI,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,IAAI,GAAG,CAAC;QACrE,OAAO,GAAG,EAAE,IAAI,GAAG,EAAE,CAAC;IACxB,CAAC,CAAC,CAAC;IACH,OAAO,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC3B,CAAC;AAED;;;;;;;;;;;;;;;;;GAiBG;AACH,MAAM,UAAU,gBAAgB,CAC9B,KAAiB;IAEjB,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;QACtB,IACE,CAAC,CAAC,MAAM,CAAC,QAAQ,CAAC,oBAAoB,CAAC;YACvC,CAAC,CAAC,MAAM,CAAC,QAAQ,CAAC,6BAA6B,CAAC,EAChD,CAAC;YACD,OAAO,MAAM,CAAC;QAChB,CAAC;IACH,CAAC;IACD,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;QACtB,IAAI,CAAC,CAAC,MAAM,CAAC,QAAQ,CAAC,eAAe,CAAC,EAAE,CAAC;YACvC,OAAO,MAAM,CAAC;QAChB,CAAC;IACH,CAAC;IACD,kEAAkE;IAClE,yDAAyD;IACzD,EAAE;IACF,kEAAkE;IAClE,oEAAoE;IACpE,mEAAmE;IACnE,+DAA+D;IAC/D,iEAAiE;IACjE,oEAAoE;IACpE,4CAA4C;IAC5C,MAAM,aAAa,GAAG,KAAK,CAAC,IAAI,CAC9B,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,KAAK,YAAY,IAAI,CAAC,CAAC,QAAQ,KAAK,YAAY,CACnE,CAAC;IACF,OAAO,aAAa,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC;AACxC,CAAC;AAED,MAAM,UAAU,oBAAoB,CAClC,OAAsB,EACtB,YAA+B;IAE/B,MAAM,QAAQ,GAAuB,EAAE,CAAC;IAExC,KAAK,MAAM,MAAM,IAAI,YAAY,CAAC,WAAW,IAAI,EAAE,EAAE,CAAC;QACpD,MAAM,IAAI,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,WAAW,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,CAAC;QAC/D,IAAI,CAAC,IAAI,EAAE,CAAC;YACV,QAAQ,CAAC,IAAI,CAAC;gBACZ,IAAI,EAAE,aAAa;gBACnB,QAAQ,EAAE,SAAS,MAAM,uBAAuB;gBAChD,MAAM,EAAE,cAAc,CAAC,OAAO,CAAC,KAAK,CAAC;gBACrC,MAAM,EACJ,kBAAkB,MAAM,oCAAoC;oBAC5D,mBAAmB,cAAc,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE;aACrD,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,KAAK,MAAM,MAAM,IAAI,YAAY,CAAC,aAAa,IAAI,EAAE,EAAE,CAAC;QACtD,MAAM,IAAI,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,WAAW,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,CAAC;QAC/D,IAAI,IAAI,EAAE,CAAC;YACT,QAAQ,CAAC,IAAI,CAAC;gBACZ,IAAI,EAAE,gBAAgB;gBACtB,QAAQ,EAAE,SAAS,MAAM,iBAAiB;gBAC1C,MAAM,EAAE,SAAS,MAAM,WAAW,OAAO,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,WAAW,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,CAAC,MAAM,UAAU;gBACtG,MAAM,EACJ,qBAAqB,MAAM,gCAAgC;oBAC3D,oBAAoB,cAAc,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE;aACtD,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,IAAI,YAAY,CAAC,UAAU,KAAK,SAAS,EAAE,CAAC;QAC1C,MAAM,IAAI,GAAG,YAAY,CAAC,UAAU,CAAC;QACrC,+DAA+D;QAC/D,+CAA+C;QAC/C,gDAAgD;QAChD,+CAA+C;QAC/C,oEAAoE;QACpE,mEAAmE;QACnE,iDAAiD;QACjD,MAAM,OAAO,GAAG,OAAO,CAAC,MAAM,EAAE,QAAQ,IAAI,IAAI,CAAC;QACjD,MAAM,UAAU,GAAG,IAAI,KAAK,CAAC,CAAC;QAC9B,IAAI,OAAO,KAAK,IAAI,EAAE,CAAC;YACrB,QAAQ,CAAC,IAAI,CAAC;gBACZ,IAAI,EAAE,aAAa;gBACnB,QAAQ,EAAE,4BAA4B,UAAU,EAAE;gBAClD,MAAM,EAAE,qCAAqC;gBAC7C,MAAM,EACJ,iBAAiB,IAAI,4CAA4C;oBACjE,iFAAiF;aACpF,CAAC,CAAC;QACL,CAAC;aAAM,IAAI,OAAO,KAAK,UAAU,EAAE,CAAC;YAClC,QAAQ,CAAC,IAAI,CAAC;gBACZ,IAAI,EAAE,aAAa;gBACnB,QAAQ,EAAE,YAAY,UAAU,mBAAmB,IAAI,GAAG;gBAC1D,MAAM,EAAE,YAAY,OAAO,EAAE;gBAC7B,MAAM,EACJ,iBAAiB,IAAI,sBAAsB,UAAU,oBAAoB;oBACzE,iCAAiC,OAAO,GAAG;aAC9C,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,IAAI,YAAY,CAAC,cAAc,KAAK,SAAS,EAAE,CAAC;QAC9C,MAAM,QAAQ,GAAG,gBAAgB,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;QACjD,IAAI,QAAQ,KAAK,IAAI,EAAE,CAAC;YACtB,QAAQ,CAAC,IAAI,CAAC;gBACZ,IAAI,EAAE,iBAAiB;gBACvB,QAAQ,EAAE,qBAAqB,YAAY,CAAC,cAAc,EAAE;gBAC5D,MAAM,EAAE,0BAA0B;gBAClC,MAAM,EACJ,qBAAqB,YAAY,CAAC,cAAc,yBAAyB;oBACzE,+CAA+C;aAClD,CAAC,CAAC;QACL,CAAC;aAAM,IAAI,QAAQ,KAAK,YAAY,CAAC,cAAc,EAAE,CAAC;YACpD,QAAQ,CAAC,IAAI,CAAC;gBACZ,IAAI,EAAE,iBAAiB;gBACvB,QAAQ,EAAE,qBAAqB,YAAY,CAAC,cAAc,EAAE;gBAC5D,MAAM,EAAE,qBAAqB,QAAQ,EAAE;gBACvC,MAAM,EACJ,qBAAqB,YAAY,CAAC,cAAc,2BAA2B;oBAC3E,uBAAuB,QAAQ,GAAG;aACrC,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,MAAM,UAAU,cAAc,CAAC,QAA4B;IACzD,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IACrC,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,KAAK,CAAC,IAAI,CAAC,kBAAkB,QAAQ,CAAC,MAAM,uBAAuB,CAAC,CAAC;IACrE,KAAK,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;QACzB,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,IAAI,cAAc,CAAC,CAAC,QAAQ,SAAS,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC;IACxE,CAAC;IACD,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC;AACjC,CAAC"}
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
import { apply } from "../apply/index.js";
|
|
2
|
+
import { type AssertionFailure, type SmokeExpectations } from "./assertions.js";
|
|
3
|
+
import { type RunClaudeOptions } from "./runner.js";
|
|
4
|
+
import { type StreamSummary } from "./stream-parser.js";
|
|
5
|
+
export interface SmokeOptions {
|
|
6
|
+
/** harness.yaml path. */
|
|
7
|
+
configPath?: string;
|
|
8
|
+
/** Per-project overrides. */
|
|
9
|
+
project?: string;
|
|
10
|
+
/** Prompt fed to claude -p. */
|
|
11
|
+
prompt: string;
|
|
12
|
+
/** Directory where stream.jsonl + stderr.log + settings.json land. */
|
|
13
|
+
outputDir: string;
|
|
14
|
+
/** Override the spawned session id (default: fresh uuid). */
|
|
15
|
+
sessionId?: string;
|
|
16
|
+
/** Override the claude binary (default: $CLAUDE_BIN, then "claude" on PATH). */
|
|
17
|
+
claudeBin?: string;
|
|
18
|
+
/** Wall-clock budget; default 60 s. */
|
|
19
|
+
timeoutMs?: number;
|
|
20
|
+
/** Expectations. */
|
|
21
|
+
expectations?: SmokeExpectations;
|
|
22
|
+
/** Spawn injection for tests. */
|
|
23
|
+
spawn?: RunClaudeOptions["spawn"];
|
|
24
|
+
/** Test seam for the manifest-apply step. Defaults to the real `apply`. */
|
|
25
|
+
applyImpl?: typeof apply;
|
|
26
|
+
/** Override cwd for the spawned claude. Defaults to `outputDir`. */
|
|
27
|
+
spawnCwd?: string;
|
|
28
|
+
/** Stdout/stderr writers (defaults to process.stdout / stderr). */
|
|
29
|
+
stdout?: (s: string) => void;
|
|
30
|
+
stderr?: (s: string) => void;
|
|
31
|
+
}
|
|
32
|
+
export interface SmokeResult {
|
|
33
|
+
/** EX_OK on green, EX_FAIL on an assertion miss, EX_UNAVAILABLE on missing claude. */
|
|
34
|
+
exitCode: number;
|
|
35
|
+
outputDir: string;
|
|
36
|
+
settingsPath: string;
|
|
37
|
+
streamPath: string;
|
|
38
|
+
stderrPath: string;
|
|
39
|
+
/** Parsed stream summary. */
|
|
40
|
+
summary: StreamSummary;
|
|
41
|
+
/** Spawned claude's exit code (numeric or null). */
|
|
42
|
+
claudeExitCode: number | null;
|
|
43
|
+
claudeTimedOut: boolean;
|
|
44
|
+
/** Wall-clock duration of the spawn. */
|
|
45
|
+
durationMs: number;
|
|
46
|
+
/** Empty when green. */
|
|
47
|
+
failures: AssertionFailure[];
|
|
48
|
+
/** Echoed argv for dogfood README capture. */
|
|
49
|
+
claudeArgv: string[];
|
|
50
|
+
}
|
|
51
|
+
/**
|
|
52
|
+
* Parse a comma-separated CLI value or a single token into a string list.
|
|
53
|
+
* commander's `--expect-hook <name>` accepts one value per flag, but a
|
|
54
|
+
* single comma-separated string is more ergonomic for the common case.
|
|
55
|
+
*/
|
|
56
|
+
export declare function splitCommaList(input: string): string[];
|
|
57
|
+
export declare function runSmoke(opts: SmokeOptions): Promise<SmokeResult>;
|
|
58
|
+
export declare function formatSmokeReport(result: SmokeResult): string;
|
|
59
|
+
export { type SmokeExpectations, type ExpectDecision } from "./assertions.js";
|
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
// Phase 7 follow-up: `harness smoke`, the built-in headless dogfood verb.
|
|
2
|
+
//
|
|
3
|
+
// Owns the headless `claude -p` loop that used to live as a hand-rolled
|
|
4
|
+
// shell recipe under `dogfood/phase5/run-smoke.sh`. Reuses the apply
|
|
5
|
+
// machinery to render a temp settings.json from the manifest, spawns
|
|
6
|
+
// claude with the canonical stream-json flags, and runs the operator-
|
|
7
|
+
// supplied --expect-* assertions against the captured stream.
|
|
8
|
+
//
|
|
9
|
+
// Stream + stderr are written to <output-dir>/ on every run, including
|
|
10
|
+
// assertion failures and timeouts, so a CI green-or-red signal always
|
|
11
|
+
// comes with a forensic trail.
|
|
12
|
+
import { randomUUID } from "node:crypto";
|
|
13
|
+
import * as fs from "node:fs";
|
|
14
|
+
import * as path from "node:path";
|
|
15
|
+
import { apply, SETTINGS_BASENAME } from "../apply/index.js";
|
|
16
|
+
import { EX_FAIL, EX_UNAVAILABLE, EX_USAGE, HarnessExitError } from "../exit-codes.js";
|
|
17
|
+
import { evaluateExpectations, formatFailures, } from "./assertions.js";
|
|
18
|
+
import { runClaude } from "./runner.js";
|
|
19
|
+
import { parseStreamJsonl } from "./stream-parser.js";
|
|
20
|
+
function resolveClaudeBin(opts) {
|
|
21
|
+
if (opts.claudeBin)
|
|
22
|
+
return opts.claudeBin;
|
|
23
|
+
if (process.env.CLAUDE_BIN)
|
|
24
|
+
return process.env.CLAUDE_BIN;
|
|
25
|
+
return "claude";
|
|
26
|
+
}
|
|
27
|
+
function ensureClaudeAvailable(bin) {
|
|
28
|
+
// Inline `which`-style probe. We cannot just trust `spawn()` to error
|
|
29
|
+
// cleanly: ENOENT surfaces async and the operator-facing message in
|
|
30
|
+
// that path is a stack trace from Node, not the EX_UNAVAILABLE this
|
|
31
|
+
// verb is supposed to emit.
|
|
32
|
+
if (path.isAbsolute(bin) || bin.startsWith("./") || bin.startsWith("../")) {
|
|
33
|
+
if (!fs.existsSync(bin)) {
|
|
34
|
+
throw new HarnessExitError(`harness smoke: claude binary not found at ${bin}`, EX_UNAVAILABLE);
|
|
35
|
+
}
|
|
36
|
+
return;
|
|
37
|
+
}
|
|
38
|
+
const pathEntries = (process.env.PATH ?? "").split(path.delimiter);
|
|
39
|
+
for (const dir of pathEntries) {
|
|
40
|
+
if (!dir)
|
|
41
|
+
continue;
|
|
42
|
+
const candidate = path.join(dir, bin);
|
|
43
|
+
if (fs.existsSync(candidate))
|
|
44
|
+
return;
|
|
45
|
+
}
|
|
46
|
+
throw new HarnessExitError(`harness smoke: ${bin} not found on PATH (set --claude-bin or CLAUDE_BIN env)`, EX_UNAVAILABLE);
|
|
47
|
+
}
|
|
48
|
+
function isExpectDecision(s) {
|
|
49
|
+
return s === "allow" || s === "deny" || s === "warn";
|
|
50
|
+
}
|
|
51
|
+
/**
|
|
52
|
+
* Parse a comma-separated CLI value or a single token into a string list.
|
|
53
|
+
* commander's `--expect-hook <name>` accepts one value per flag, but a
|
|
54
|
+
* single comma-separated string is more ergonomic for the common case.
|
|
55
|
+
*/
|
|
56
|
+
export function splitCommaList(input) {
|
|
57
|
+
return input
|
|
58
|
+
.split(",")
|
|
59
|
+
.map((s) => s.trim())
|
|
60
|
+
.filter((s) => s.length > 0);
|
|
61
|
+
}
|
|
62
|
+
export async function runSmoke(opts) {
|
|
63
|
+
if (!opts.prompt || !opts.prompt.trim()) {
|
|
64
|
+
throw new HarnessExitError("harness smoke: --prompt is required", EX_USAGE);
|
|
65
|
+
}
|
|
66
|
+
if (!opts.outputDir) {
|
|
67
|
+
throw new HarnessExitError("harness smoke: --output-dir is required", EX_USAGE);
|
|
68
|
+
}
|
|
69
|
+
if (opts.expectations?.expectDecision !== undefined) {
|
|
70
|
+
if (!isExpectDecision(opts.expectations.expectDecision)) {
|
|
71
|
+
throw new HarnessExitError(`harness smoke: --expect-decision must be one of allow|deny|warn`, EX_USAGE);
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
const claudeBin = resolveClaudeBin(opts);
|
|
75
|
+
ensureClaudeAvailable(claudeBin);
|
|
76
|
+
fs.mkdirSync(opts.outputDir, { recursive: true });
|
|
77
|
+
const settingsPath = path.join(opts.outputDir, SETTINGS_BASENAME);
|
|
78
|
+
const applyImpl = opts.applyImpl ?? apply;
|
|
79
|
+
const applyOpts = {
|
|
80
|
+
target: settingsPath,
|
|
81
|
+
force: true,
|
|
82
|
+
};
|
|
83
|
+
if (opts.configPath)
|
|
84
|
+
applyOpts.configPath = opts.configPath;
|
|
85
|
+
if (opts.project)
|
|
86
|
+
applyOpts.project = opts.project;
|
|
87
|
+
const applyResult = await applyImpl(applyOpts);
|
|
88
|
+
// `apply` can return a refusal outcome without throwing. Without this
|
|
89
|
+
// guard a stale generated/ dir or an unresolved --target conflict
|
|
90
|
+
// silently lets smoke run claude against the OLD settings, which then
|
|
91
|
+
// looks green for the wrong reason. Fail loud instead.
|
|
92
|
+
const REFUSAL_OUTCOMES = new Set([
|
|
93
|
+
"drift-refuse",
|
|
94
|
+
"lock-drift-refuse",
|
|
95
|
+
"target-exists-refuse",
|
|
96
|
+
]);
|
|
97
|
+
if (REFUSAL_OUTCOMES.has(applyResult.outcome)) {
|
|
98
|
+
throw new HarnessExitError(`harness smoke: apply refused with outcome="${applyResult.outcome}"; resolve drift before re-running`, EX_FAIL);
|
|
99
|
+
}
|
|
100
|
+
const sessionId = opts.sessionId ?? randomUUID();
|
|
101
|
+
const timeoutMs = opts.timeoutMs ?? 60_000;
|
|
102
|
+
const runOpts = {
|
|
103
|
+
claudeBin,
|
|
104
|
+
prompt: opts.prompt,
|
|
105
|
+
settingsPath,
|
|
106
|
+
sessionId,
|
|
107
|
+
outputDir: opts.outputDir,
|
|
108
|
+
timeoutMs,
|
|
109
|
+
};
|
|
110
|
+
if (opts.spawn)
|
|
111
|
+
runOpts.spawn = opts.spawn;
|
|
112
|
+
if (opts.spawnCwd !== undefined)
|
|
113
|
+
runOpts.cwd = opts.spawnCwd;
|
|
114
|
+
let runResult;
|
|
115
|
+
try {
|
|
116
|
+
runResult = await runClaude(runOpts);
|
|
117
|
+
}
|
|
118
|
+
catch (err) {
|
|
119
|
+
throw new HarnessExitError(`harness smoke: claude spawn failed: ${err.message}`, EX_UNAVAILABLE);
|
|
120
|
+
}
|
|
121
|
+
const summary = parseStreamJsonl(runResult.streamText);
|
|
122
|
+
const failures = evaluateExpectations(summary, opts.expectations ?? {});
|
|
123
|
+
// Even if the user passed no expectations, surface a timeout or a
|
|
124
|
+
// claude crash as a failure so green-or-red is unambiguous.
|
|
125
|
+
if (failures.length === 0 && runResult.timedOut) {
|
|
126
|
+
failures.push({
|
|
127
|
+
kind: "expect-exit",
|
|
128
|
+
expected: "claude completes before timeout",
|
|
129
|
+
actual: `claude killed after ${timeoutMs}ms`,
|
|
130
|
+
detail: `harness smoke: claude exceeded the ${timeoutMs}ms budget and was SIGTERM'd. Stream may be truncated.`,
|
|
131
|
+
});
|
|
132
|
+
}
|
|
133
|
+
// Claude crashed before emitting a terminal result event AND exited
|
|
134
|
+
// non-zero. Without an --expect-exit assertion this would silently
|
|
135
|
+
// pass; treat it as an implicit miss so the operator never sees
|
|
136
|
+
// green on a broken-pipe / ENOENT-after-spawn / abort-during-init.
|
|
137
|
+
if (failures.length === 0 &&
|
|
138
|
+
!runResult.timedOut &&
|
|
139
|
+
runResult.exitCode !== null &&
|
|
140
|
+
runResult.exitCode !== 0 &&
|
|
141
|
+
summary.result === null) {
|
|
142
|
+
failures.push({
|
|
143
|
+
kind: "expect-exit",
|
|
144
|
+
expected: "claude emits a terminal result event",
|
|
145
|
+
actual: `claude exited ${runResult.exitCode} without a terminal result event`,
|
|
146
|
+
detail: `harness smoke: claude exited ${runResult.exitCode} and the stream carries ` +
|
|
147
|
+
"no terminal `result` event. Treating as implicit failure; check stderr.log for forensics.",
|
|
148
|
+
});
|
|
149
|
+
}
|
|
150
|
+
const exitCode = failures.length === 0 ? 0 : EX_FAIL;
|
|
151
|
+
return {
|
|
152
|
+
exitCode,
|
|
153
|
+
outputDir: opts.outputDir,
|
|
154
|
+
settingsPath,
|
|
155
|
+
streamPath: runResult.streamPath,
|
|
156
|
+
stderrPath: runResult.stderrPath,
|
|
157
|
+
summary,
|
|
158
|
+
claudeExitCode: runResult.exitCode,
|
|
159
|
+
claudeTimedOut: runResult.timedOut,
|
|
160
|
+
durationMs: runResult.durationMs,
|
|
161
|
+
failures,
|
|
162
|
+
claudeArgv: runResult.argv,
|
|
163
|
+
};
|
|
164
|
+
}
|
|
165
|
+
export function formatSmokeReport(result) {
|
|
166
|
+
const lines = [];
|
|
167
|
+
lines.push(`harness smoke: ${result.failures.length === 0 ? "PASS" : "FAIL"} (${result.durationMs}ms)`);
|
|
168
|
+
lines.push(` output-dir: ${result.outputDir}`);
|
|
169
|
+
lines.push(` stream: ${result.streamPath}`);
|
|
170
|
+
lines.push(` stderr: ${result.stderrPath}`);
|
|
171
|
+
lines.push(` session_id: ${result.summary.init?.session_id ?? "(no init event)"}`);
|
|
172
|
+
lines.push(` hooks fired: ${result.summary.hooks.length}`);
|
|
173
|
+
lines.push(` result.is_error: ${result.summary.result?.is_error ?? "(no result event)"}`);
|
|
174
|
+
if (result.claudeTimedOut) {
|
|
175
|
+
lines.push(` TIMED OUT after ${result.durationMs}ms`);
|
|
176
|
+
}
|
|
177
|
+
if (result.failures.length > 0) {
|
|
178
|
+
lines.push("");
|
|
179
|
+
lines.push(formatFailures(result.failures).trimEnd());
|
|
180
|
+
}
|
|
181
|
+
return `${lines.join("\n")}\n`;
|
|
182
|
+
}
|
|
183
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/cli/smoke/index.ts"],"names":[],"mappings":"AAAA,0EAA0E;AAC1E,EAAE;AACF,wEAAwE;AACxE,qEAAqE;AACrE,qEAAqE;AACrE,sEAAsE;AACtE,8DAA8D;AAC9D,EAAE;AACF,uEAAuE;AACvE,sEAAsE;AACtE,+BAA+B;AAE/B,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AACzC,OAAO,KAAK,EAAE,MAAM,SAAS,CAAC;AAC9B,OAAO,KAAK,IAAI,MAAM,WAAW,CAAC;AAClC,OAAO,EAAE,KAAK,EAAE,iBAAiB,EAAE,MAAM,mBAAmB,CAAC;AAC7D,OAAO,EAAE,OAAO,EAAE,cAAc,EAAE,QAAQ,EAAE,gBAAgB,EAAE,MAAM,kBAAkB,CAAC;AACvF,OAAO,EACL,oBAAoB,EACpB,cAAc,GAIf,MAAM,iBAAiB,CAAC;AACzB,OAAO,EAAE,SAAS,EAA+C,MAAM,aAAa,CAAC;AACrF,OAAO,EAAE,gBAAgB,EAAsB,MAAM,oBAAoB,CAAC;AAkD1E,SAAS,gBAAgB,CAAC,IAAkB;IAC1C,IAAI,IAAI,CAAC,SAAS;QAAE,OAAO,IAAI,CAAC,SAAS,CAAC;IAC1C,IAAI,OAAO,CAAC,GAAG,CAAC,UAAU;QAAE,OAAO,OAAO,CAAC,GAAG,CAAC,UAAU,CAAC;IAC1D,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,SAAS,qBAAqB,CAAC,GAAW;IACxC,sEAAsE;IACtE,oEAAoE;IACpE,oEAAoE;IACpE,4BAA4B;IAC5B,IAAI,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,GAAG,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,GAAG,CAAC,UAAU,CAAC,KAAK,CAAC,EAAE,CAAC;QAC1E,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;YACxB,MAAM,IAAI,gBAAgB,CACxB,6CAA6C,GAAG,EAAE,EAClD,cAAc,CACf,CAAC;QACJ,CAAC;QACD,OAAO;IACT,CAAC;IACD,MAAM,WAAW,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,IAAI,EAAE,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IACnE,KAAK,MAAM,GAAG,IAAI,WAAW,EAAE,CAAC;QAC9B,IAAI,CAAC,GAAG;YAAE,SAAS;QACnB,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC;QACtC,IAAI,EAAE,CAAC,UAAU,CAAC,SAAS,CAAC;YAAE,OAAO;IACvC,CAAC;IACD,MAAM,IAAI,gBAAgB,CACxB,kBAAkB,GAAG,yDAAyD,EAC9E,cAAc,CACf,CAAC;AACJ,CAAC;AAED,SAAS,gBAAgB,CAAC,CAAS;IACjC,OAAO,CAAC,KAAK,OAAO,IAAI,CAAC,KAAK,MAAM,IAAI,CAAC,KAAK,MAAM,CAAC;AACvD,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,cAAc,CAAC,KAAa;IAC1C,OAAO,KAAK;SACT,KAAK,CAAC,GAAG,CAAC;SACV,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;SACpB,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;AACjC,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,QAAQ,CAAC,IAAkB;IAC/C,IAAI,CAAC,IAAI,CAAC,MAAM,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE,EAAE,CAAC;QACxC,MAAM,IAAI,gBAAgB,CAAC,qCAAqC,EAAE,QAAQ,CAAC,CAAC;IAC9E,CAAC;IACD,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC;QACpB,MAAM,IAAI,gBAAgB,CACxB,yCAAyC,EACzC,QAAQ,CACT,CAAC;IACJ,CAAC;IAED,IAAI,IAAI,CAAC,YAAY,EAAE,cAAc,KAAK,SAAS,EAAE,CAAC;QACpD,IAAI,CAAC,gBAAgB,CAAC,IAAI,CAAC,YAAY,CAAC,cAAc,CAAC,EAAE,CAAC;YACxD,MAAM,IAAI,gBAAgB,CACxB,iEAAiE,EACjE,QAAQ,CACT,CAAC;QACJ,CAAC;IACH,CAAC;IAED,MAAM,SAAS,GAAG,gBAAgB,CAAC,IAAI,CAAC,CAAC;IACzC,qBAAqB,CAAC,SAAS,CAAC,CAAC;IAEjC,EAAE,CAAC,SAAS,CAAC,IAAI,CAAC,SAAS,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAClD,MAAM,YAAY,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,iBAAiB,CAAC,CAAC;IAElE,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,IAAI,KAAK,CAAC;IAC1C,MAAM,SAAS,GAAgC;QAC7C,MAAM,EAAE,YAAY;QACpB,KAAK,EAAE,IAAI;KACZ,CAAC;IACF,IAAI,IAAI,CAAC,UAAU;QAAE,SAAS,CAAC,UAAU,GAAG,IAAI,CAAC,UAAU,CAAC;IAC5D,IAAI,IAAI,CAAC,OAAO;QAAE,SAAS,CAAC,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC;IACnD,MAAM,WAAW,GAAG,MAAM,SAAS,CAAC,SAAS,CAAC,CAAC;IAC/C,sEAAsE;IACtE,kEAAkE;IAClE,sEAAsE;IACtE,uDAAuD;IACvD,MAAM,gBAAgB,GAAG,IAAI,GAAG,CAAC;QAC/B,cAAc;QACd,mBAAmB;QACnB,sBAAsB;KACvB,CAAC,CAAC;IACH,IAAI,gBAAgB,CAAC,GAAG,CAAC,WAAW,CAAC,OAAO,CAAC,EAAE,CAAC;QAC9C,MAAM,IAAI,gBAAgB,CACxB,8CAA8C,WAAW,CAAC,OAAO,oCAAoC,EACrG,OAAO,CACR,CAAC;IACJ,CAAC;IAED,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,IAAI,UAAU,EAAE,CAAC;IACjD,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,IAAI,MAAM,CAAC;IAE3C,MAAM,OAAO,GAAqB;QAChC,SAAS;QACT,MAAM,EAAE,IAAI,CAAC,MAAM;QACnB,YAAY;QACZ,SAAS;QACT,SAAS,EAAE,IAAI,CAAC,SAAS;QACzB,SAAS;KACV,CAAC;IACF,IAAI,IAAI,CAAC,KAAK;QAAE,OAAO,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC;IAC3C,IAAI,IAAI,CAAC,QAAQ,KAAK,SAAS;QAAE,OAAO,CAAC,GAAG,GAAG,IAAI,CAAC,QAAQ,CAAC;IAE7D,IAAI,SAA0B,CAAC;IAC/B,IAAI,CAAC;QACH,SAAS,GAAG,MAAM,SAAS,CAAC,OAAO,CAAC,CAAC;IACvC,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,MAAM,IAAI,gBAAgB,CACxB,uCAAwC,GAAa,CAAC,OAAO,EAAE,EAC/D,cAAc,CACf,CAAC;IACJ,CAAC;IAED,MAAM,OAAO,GAAG,gBAAgB,CAAC,SAAS,CAAC,UAAU,CAAC,CAAC;IACvD,MAAM,QAAQ,GAAG,oBAAoB,CAAC,OAAO,EAAE,IAAI,CAAC,YAAY,IAAI,EAAE,CAAC,CAAC;IAExE,kEAAkE;IAClE,4DAA4D;IAC5D,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,IAAI,SAAS,CAAC,QAAQ,EAAE,CAAC;QAChD,QAAQ,CAAC,IAAI,CAAC;YACZ,IAAI,EAAE,aAAa;YACnB,QAAQ,EAAE,iCAAiC;YAC3C,MAAM,EAAE,uBAAuB,SAAS,IAAI;YAC5C,MAAM,EAAE,sCAAsC,SAAS,uDAAuD;SAC/G,CAAC,CAAC;IACL,CAAC;IACD,oEAAoE;IACpE,mEAAmE;IACnE,gEAAgE;IAChE,mEAAmE;IACnE,IACE,QAAQ,CAAC,MAAM,KAAK,CAAC;QACrB,CAAC,SAAS,CAAC,QAAQ;QACnB,SAAS,CAAC,QAAQ,KAAK,IAAI;QAC3B,SAAS,CAAC,QAAQ,KAAK,CAAC;QACxB,OAAO,CAAC,MAAM,KAAK,IAAI,EACvB,CAAC;QACD,QAAQ,CAAC,IAAI,CAAC;YACZ,IAAI,EAAE,aAAa;YACnB,QAAQ,EAAE,sCAAsC;YAChD,MAAM,EAAE,iBAAiB,SAAS,CAAC,QAAQ,kCAAkC;YAC7E,MAAM,EACJ,gCAAgC,SAAS,CAAC,QAAQ,0BAA0B;gBAC5E,2FAA2F;SAC9F,CAAC,CAAC;IACL,CAAC;IAED,MAAM,QAAQ,GAAG,QAAQ,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC;IACrD,OAAO;QACL,QAAQ;QACR,SAAS,EAAE,IAAI,CAAC,SAAS;QACzB,YAAY;QACZ,UAAU,EAAE,SAAS,CAAC,UAAU;QAChC,UAAU,EAAE,SAAS,CAAC,UAAU;QAChC,OAAO;QACP,cAAc,EAAE,SAAS,CAAC,QAAQ;QAClC,cAAc,EAAE,SAAS,CAAC,QAAQ;QAClC,UAAU,EAAE,SAAS,CAAC,UAAU;QAChC,QAAQ;QACR,UAAU,EAAE,SAAS,CAAC,IAAI;KAC3B,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,iBAAiB,CAAC,MAAmB;IACnD,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,KAAK,CAAC,IAAI,CAAC,kBAAkB,MAAM,CAAC,QAAQ,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,KAAK,MAAM,CAAC,UAAU,KAAK,CAAC,CAAC;IACxG,KAAK,CAAC,IAAI,CAAC,mBAAmB,MAAM,CAAC,SAAS,EAAE,CAAC,CAAC;IAClD,KAAK,CAAC,IAAI,CAAC,mBAAmB,MAAM,CAAC,UAAU,EAAE,CAAC,CAAC;IACnD,KAAK,CAAC,IAAI,CAAC,mBAAmB,MAAM,CAAC,UAAU,EAAE,CAAC,CAAC;IACnD,KAAK,CAAC,IAAI,CAAC,mBAAmB,MAAM,CAAC,OAAO,CAAC,IAAI,EAAE,UAAU,IAAI,iBAAiB,EAAE,CAAC,CAAC;IACtF,KAAK,CAAC,IAAI,CAAC,mBAAmB,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC,CAAC;IAC7D,KAAK,CAAC,IAAI,CAAC,sBAAsB,MAAM,CAAC,OAAO,CAAC,MAAM,EAAE,QAAQ,IAAI,mBAAmB,EAAE,CAAC,CAAC;IAC3F,IAAI,MAAM,CAAC,cAAc,EAAE,CAAC;QAC1B,KAAK,CAAC,IAAI,CAAC,qBAAqB,MAAM,CAAC,UAAU,IAAI,CAAC,CAAC;IACzD,CAAC;IACD,IAAI,MAAM,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC/B,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACf,KAAK,CAAC,IAAI,CAAC,cAAc,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC;IACxD,CAAC;IACD,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC;AACjC,CAAC"}
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import { type ChildProcessWithoutNullStreams } from "node:child_process";
|
|
2
|
+
export interface RunClaudeOptions {
|
|
3
|
+
/** Absolute path (or PATH-lookup name) of the claude binary. */
|
|
4
|
+
claudeBin: string;
|
|
5
|
+
/** Prompt fed to `claude -p`. */
|
|
6
|
+
prompt: string;
|
|
7
|
+
/** Settings.json the spawned claude uses (the apply'd manifest output). */
|
|
8
|
+
settingsPath: string;
|
|
9
|
+
/** Session id. */
|
|
10
|
+
sessionId: string;
|
|
11
|
+
/** Working dir for the spawn. Defaults to `cwd` of the parent. */
|
|
12
|
+
cwd?: string;
|
|
13
|
+
/** Forensic capture target. */
|
|
14
|
+
outputDir: string;
|
|
15
|
+
/** Hard wall-clock budget. Hitting it kills claude and resolves the run. */
|
|
16
|
+
timeoutMs: number;
|
|
17
|
+
/**
|
|
18
|
+
* Extra env merged onto process.env. `HARNESS_POLICY_VERBOSE=1` is
|
|
19
|
+
* baked in BEFORE this map, so an operator who explicitly passes
|
|
20
|
+
* `HARNESS_POLICY_VERBOSE=0` in `env` wins. (The verb sets the verbose
|
|
21
|
+
* default because `--expect-decision warn` reads the stderr diagnostic.)
|
|
22
|
+
*/
|
|
23
|
+
env?: Record<string, string>;
|
|
24
|
+
/**
|
|
25
|
+
* Test-injectable spawn. Defaults to node:child_process.spawn. The
|
|
26
|
+
* fixture sees the same argv harness would pass to claude in prod.
|
|
27
|
+
*/
|
|
28
|
+
spawn?: (command: string, args: string[], options: {
|
|
29
|
+
cwd?: string;
|
|
30
|
+
env: NodeJS.ProcessEnv;
|
|
31
|
+
}) => ChildProcessWithoutNullStreams;
|
|
32
|
+
}
|
|
33
|
+
export interface RunClaudeResult {
|
|
34
|
+
/** Numeric exit code of the spawned claude process, or null on signal. */
|
|
35
|
+
exitCode: number | null;
|
|
36
|
+
signal: NodeJS.Signals | null;
|
|
37
|
+
/** True if the run was killed by the wall-clock timeout. */
|
|
38
|
+
timedOut: boolean;
|
|
39
|
+
/** Forensic file paths. */
|
|
40
|
+
streamPath: string;
|
|
41
|
+
stderrPath: string;
|
|
42
|
+
/** Captured streams (also persisted to disk; in-memory copy for callers). */
|
|
43
|
+
streamText: string;
|
|
44
|
+
stderrText: string;
|
|
45
|
+
/** argv handed to claude, for debugging + dogfood README capture. */
|
|
46
|
+
argv: string[];
|
|
47
|
+
/** Total wall-clock spent in the spawn. */
|
|
48
|
+
durationMs: number;
|
|
49
|
+
}
|
|
50
|
+
export declare function buildClaudeArgv(opts: {
|
|
51
|
+
prompt: string;
|
|
52
|
+
settingsPath: string;
|
|
53
|
+
sessionId: string;
|
|
54
|
+
}): string[];
|
|
55
|
+
export declare function runClaude(opts: RunClaudeOptions): Promise<RunClaudeResult>;
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
// Phase 7 follow-up: claude -p driver for `harness smoke`.
|
|
2
|
+
//
|
|
3
|
+
// Spawns `claude -p` with the canonical headless-dogfood flags, tees
|
|
4
|
+
// stdout to <output-dir>/stream.jsonl and stderr to
|
|
5
|
+
// <output-dir>/stderr.log so forensic files exist even when the run
|
|
6
|
+
// crashes or hits the timeout. Returns the captured streams + the
|
|
7
|
+
// claude exit code so the caller can run assertions.
|
|
8
|
+
import { spawn } from "node:child_process";
|
|
9
|
+
import * as fs from "node:fs";
|
|
10
|
+
import * as path from "node:path";
|
|
11
|
+
const CLAUDE_FLAGS = [
|
|
12
|
+
"--output-format",
|
|
13
|
+
"stream-json",
|
|
14
|
+
"--include-hook-events",
|
|
15
|
+
"--verbose",
|
|
16
|
+
"--permission-mode",
|
|
17
|
+
"bypassPermissions",
|
|
18
|
+
];
|
|
19
|
+
export function buildClaudeArgv(opts) {
|
|
20
|
+
return [
|
|
21
|
+
"-p",
|
|
22
|
+
opts.prompt,
|
|
23
|
+
"--session-id",
|
|
24
|
+
opts.sessionId,
|
|
25
|
+
"--settings",
|
|
26
|
+
opts.settingsPath,
|
|
27
|
+
...CLAUDE_FLAGS,
|
|
28
|
+
];
|
|
29
|
+
}
|
|
30
|
+
export async function runClaude(opts) {
|
|
31
|
+
fs.mkdirSync(opts.outputDir, { recursive: true });
|
|
32
|
+
const streamPath = path.join(opts.outputDir, "stream.jsonl");
|
|
33
|
+
const stderrPath = path.join(opts.outputDir, "stderr.log");
|
|
34
|
+
const streamWriter = fs.createWriteStream(streamPath);
|
|
35
|
+
const stderrWriter = fs.createWriteStream(stderrPath);
|
|
36
|
+
const argv = buildClaudeArgv(opts);
|
|
37
|
+
const spawnFn = opts.spawn ?? spawn;
|
|
38
|
+
const env = {
|
|
39
|
+
...process.env,
|
|
40
|
+
// The Phase 5 #3 verbose diagnostic block is how `--expect-decision warn`
|
|
41
|
+
// becomes observable from the stream's hook_response.stderr field.
|
|
42
|
+
HARNESS_POLICY_VERBOSE: "1",
|
|
43
|
+
...(opts.env ?? {}),
|
|
44
|
+
};
|
|
45
|
+
const child = spawnFn(opts.claudeBin, argv, {
|
|
46
|
+
...(opts.cwd !== undefined && { cwd: opts.cwd }),
|
|
47
|
+
env,
|
|
48
|
+
});
|
|
49
|
+
let streamText = "";
|
|
50
|
+
let stderrText = "";
|
|
51
|
+
let timedOut = false;
|
|
52
|
+
const start = Date.now();
|
|
53
|
+
child.stdout.on("data", (chunk) => {
|
|
54
|
+
const text = chunk.toString("utf8");
|
|
55
|
+
streamText += text;
|
|
56
|
+
streamWriter.write(chunk);
|
|
57
|
+
});
|
|
58
|
+
child.stderr.on("data", (chunk) => {
|
|
59
|
+
const text = chunk.toString("utf8");
|
|
60
|
+
stderrText += text;
|
|
61
|
+
stderrWriter.write(chunk);
|
|
62
|
+
});
|
|
63
|
+
// Both timers are captured in scope so the close-listener can clear
|
|
64
|
+
// them when claude exits before the budget. Without that, every fast
|
|
65
|
+
// smoke run leaked an `unref`'d setTimeout pair that fires on a dead
|
|
66
|
+
// PID minutes later, a non-issue at process exit but visible noise in
|
|
67
|
+
// a long-lived parent (e.g. vitest batches).
|
|
68
|
+
let outerTimer = null;
|
|
69
|
+
let killTimer = null;
|
|
70
|
+
const clearTimers = () => {
|
|
71
|
+
if (outerTimer) {
|
|
72
|
+
clearTimeout(outerTimer);
|
|
73
|
+
outerTimer = null;
|
|
74
|
+
}
|
|
75
|
+
if (killTimer) {
|
|
76
|
+
clearTimeout(killTimer);
|
|
77
|
+
killTimer = null;
|
|
78
|
+
}
|
|
79
|
+
};
|
|
80
|
+
const exitPromise = new Promise((resolve) => {
|
|
81
|
+
child.once("close", (code, signal) => {
|
|
82
|
+
clearTimers();
|
|
83
|
+
resolve({ code, signal });
|
|
84
|
+
});
|
|
85
|
+
child.once("error", () => {
|
|
86
|
+
clearTimers();
|
|
87
|
+
resolve({ code: null, signal: null });
|
|
88
|
+
});
|
|
89
|
+
});
|
|
90
|
+
const timeoutPromise = new Promise((resolve) => {
|
|
91
|
+
outerTimer = setTimeout(() => {
|
|
92
|
+
timedOut = true;
|
|
93
|
+
try {
|
|
94
|
+
child.kill("SIGTERM");
|
|
95
|
+
}
|
|
96
|
+
catch {
|
|
97
|
+
/* already gone */
|
|
98
|
+
}
|
|
99
|
+
// SIGKILL escalation after a short grace period so a wedged
|
|
100
|
+
// claude does not hang the runner past `timeoutMs + epsilon`.
|
|
101
|
+
killTimer = setTimeout(() => {
|
|
102
|
+
try {
|
|
103
|
+
child.kill("SIGKILL");
|
|
104
|
+
}
|
|
105
|
+
catch {
|
|
106
|
+
/* already gone */
|
|
107
|
+
}
|
|
108
|
+
}, 2000);
|
|
109
|
+
killTimer.unref();
|
|
110
|
+
// Hand the resolved value over to the race; the close listener
|
|
111
|
+
// will land its own value first when the child exits cleanly.
|
|
112
|
+
exitPromise.then(resolve);
|
|
113
|
+
}, opts.timeoutMs);
|
|
114
|
+
outerTimer.unref();
|
|
115
|
+
});
|
|
116
|
+
const { code, signal } = await Promise.race([exitPromise, timeoutPromise]);
|
|
117
|
+
clearTimers();
|
|
118
|
+
// Flush writers before returning so a caller that re-reads the files
|
|
119
|
+
// sees the same bytes the in-memory text holds.
|
|
120
|
+
await new Promise((resolve) => streamWriter.end(resolve));
|
|
121
|
+
await new Promise((resolve) => stderrWriter.end(resolve));
|
|
122
|
+
return {
|
|
123
|
+
exitCode: code,
|
|
124
|
+
signal,
|
|
125
|
+
timedOut,
|
|
126
|
+
streamPath,
|
|
127
|
+
stderrPath,
|
|
128
|
+
streamText,
|
|
129
|
+
stderrText,
|
|
130
|
+
argv,
|
|
131
|
+
durationMs: Date.now() - start,
|
|
132
|
+
};
|
|
133
|
+
}
|
|
134
|
+
//# sourceMappingURL=runner.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"runner.js","sourceRoot":"","sources":["../../../src/cli/smoke/runner.ts"],"names":[],"mappings":"AAAA,2DAA2D;AAC3D,EAAE;AACF,qEAAqE;AACrE,oDAAoD;AACpD,oEAAoE;AACpE,kEAAkE;AAClE,qDAAqD;AAErD,OAAO,EAAuC,KAAK,EAAE,MAAM,oBAAoB,CAAC;AAChF,OAAO,KAAK,EAAE,MAAM,SAAS,CAAC;AAC9B,OAAO,KAAK,IAAI,MAAM,WAAW,CAAC;AAqDlC,MAAM,YAAY,GAAG;IACnB,iBAAiB;IACjB,aAAa;IACb,uBAAuB;IACvB,WAAW;IACX,mBAAmB;IACnB,mBAAmB;CACpB,CAAC;AAEF,MAAM,UAAU,eAAe,CAAC,IAI/B;IACC,OAAO;QACL,IAAI;QACJ,IAAI,CAAC,MAAM;QACX,cAAc;QACd,IAAI,CAAC,SAAS;QACd,YAAY;QACZ,IAAI,CAAC,YAAY;QACjB,GAAG,YAAY;KAChB,CAAC;AACJ,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,SAAS,CAC7B,IAAsB;IAEtB,EAAE,CAAC,SAAS,CAAC,IAAI,CAAC,SAAS,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAClD,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,cAAc,CAAC,CAAC;IAC7D,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,YAAY,CAAC,CAAC;IAC3D,MAAM,YAAY,GAAG,EAAE,CAAC,iBAAiB,CAAC,UAAU,CAAC,CAAC;IACtD,MAAM,YAAY,GAAG,EAAE,CAAC,iBAAiB,CAAC,UAAU,CAAC,CAAC;IAEtD,MAAM,IAAI,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;IACnC,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,IAAI,KAAK,CAAC;IACpC,MAAM,GAAG,GAAsB;QAC7B,GAAG,OAAO,CAAC,GAAG;QACd,0EAA0E;QAC1E,mEAAmE;QACnE,sBAAsB,EAAE,GAAG;QAC3B,GAAG,CAAC,IAAI,CAAC,GAAG,IAAI,EAAE,CAAC;KACpB,CAAC;IAEF,MAAM,KAAK,GAAG,OAAO,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,EAAE;QAC1C,GAAG,CAAC,IAAI,CAAC,GAAG,KAAK,SAAS,IAAI,EAAE,GAAG,EAAE,IAAI,CAAC,GAAG,EAAE,CAAC;QAChD,GAAG;KACJ,CAAC,CAAC;IAEH,IAAI,UAAU,GAAG,EAAE,CAAC;IACpB,IAAI,UAAU,GAAG,EAAE,CAAC;IACpB,IAAI,QAAQ,GAAG,KAAK,CAAC;IACrB,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAEzB,KAAK,CAAC,MAAM,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,KAAa,EAAE,EAAE;QACxC,MAAM,IAAI,GAAG,KAAK,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;QACpC,UAAU,IAAI,IAAI,CAAC;QACnB,YAAY,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;IAC5B,CAAC,CAAC,CAAC;IACH,KAAK,CAAC,MAAM,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,KAAa,EAAE,EAAE;QACxC,MAAM,IAAI,GAAG,KAAK,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;QACpC,UAAU,IAAI,IAAI,CAAC;QACnB,YAAY,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;IAC5B,CAAC,CAAC,CAAC;IAEH,oEAAoE;IACpE,qEAAqE;IACrE,qEAAqE;IACrE,sEAAsE;IACtE,6CAA6C;IAC7C,IAAI,UAAU,GAA0B,IAAI,CAAC;IAC7C,IAAI,SAAS,GAA0B,IAAI,CAAC;IAC5C,MAAM,WAAW,GAAG,GAAS,EAAE;QAC7B,IAAI,UAAU,EAAE,CAAC;YACf,YAAY,CAAC,UAAU,CAAC,CAAC;YACzB,UAAU,GAAG,IAAI,CAAC;QACpB,CAAC;QACD,IAAI,SAAS,EAAE,CAAC;YACd,YAAY,CAAC,SAAS,CAAC,CAAC;YACxB,SAAS,GAAG,IAAI,CAAC;QACnB,CAAC;IACH,CAAC,CAAC;IAEF,MAAM,WAAW,GAAG,IAAI,OAAO,CAC7B,CAAC,OAAO,EAAE,EAAE;QACV,KAAK,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC,IAAI,EAAE,MAAM,EAAE,EAAE;YACnC,WAAW,EAAE,CAAC;YACd,OAAO,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC,CAAC;QAC5B,CAAC,CAAC,CAAC;QACH,KAAK,CAAC,IAAI,CAAC,OAAO,EAAE,GAAG,EAAE;YACvB,WAAW,EAAE,CAAC;YACd,OAAO,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC;QACxC,CAAC,CAAC,CAAC;IACL,CAAC,CACF,CAAC;IAEF,MAAM,cAAc,GAAG,IAAI,OAAO,CAChC,CAAC,OAAO,EAAE,EAAE;QACV,UAAU,GAAG,UAAU,CAAC,GAAG,EAAE;YAC3B,QAAQ,GAAG,IAAI,CAAC;YAChB,IAAI,CAAC;gBACH,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YACxB,CAAC;YAAC,MAAM,CAAC;gBACP,kBAAkB;YACpB,CAAC;YACD,4DAA4D;YAC5D,8DAA8D;YAC9D,SAAS,GAAG,UAAU,CAAC,GAAG,EAAE;gBAC1B,IAAI,CAAC;oBACH,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;gBACxB,CAAC;gBAAC,MAAM,CAAC;oBACP,kBAAkB;gBACpB,CAAC;YACH,CAAC,EAAE,IAAI,CAAC,CAAC;YACT,SAAS,CAAC,KAAK,EAAE,CAAC;YAClB,+DAA+D;YAC/D,8DAA8D;YAC9D,WAAW,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QAC5B,CAAC,EAAE,IAAI,CAAC,SAAS,CAAC,CAAC;QACnB,UAAU,CAAC,KAAK,EAAE,CAAC;IACrB,CAAC,CACF,CAAC;IAEF,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,GAAG,MAAM,OAAO,CAAC,IAAI,CAAC,CAAC,WAAW,EAAE,cAAc,CAAC,CAAC,CAAC;IAC3E,WAAW,EAAE,CAAC;IAEd,qEAAqE;IACrE,gDAAgD;IAChD,MAAM,IAAI,OAAO,CAAO,CAAC,OAAO,EAAE,EAAE,CAAC,YAAY,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC;IAChE,MAAM,IAAI,OAAO,CAAO,CAAC,OAAO,EAAE,EAAE,CAAC,YAAY,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC;IAEhE,OAAO;QACL,QAAQ,EAAE,IAAI;QACd,MAAM;QACN,QAAQ;QACR,UAAU;QACV,UAAU;QACV,UAAU;QACV,UAAU;QACV,IAAI;QACJ,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK;KAC/B,CAAC;AACJ,CAAC"}
|