@crown-dev-studios/review-council 0.2.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -13
- package/SKILL.md +1 -1
- package/dist/orchestrate-review-council.js +250 -184
- package/dist/render-review-html.js +32 -4
- package/dist/stage-runtime.js +40 -0
- package/package.json +1 -1
- package/references/cli-integration.md +15 -18
- package/references/output-contract.md +58 -11
- package/dist/interaction-queue.js +0 -50
package/README.md
CHANGED
|
@@ -14,9 +14,9 @@ The orchestrator handles the happy path and common failure modes:
|
|
|
14
14
|
|
|
15
15
|
- Stage timeouts with two-phase kill (SIGTERM then SIGKILL) prevent hung runs
|
|
16
16
|
- Automatic retry with exponential backoff handles transient failures
|
|
17
|
-
-
|
|
17
|
+
- Every stage emits a JSONL stdout event stream for diagnosis
|
|
18
18
|
- Partial reviewer failure still allows the judge to run on available data
|
|
19
|
-
- Failed stages surface stderr excerpts
|
|
19
|
+
- Failed stages surface stderr excerpts, validation errors, warnings, and stream log paths in the HTML report
|
|
20
20
|
|
|
21
21
|
## Requirements
|
|
22
22
|
|
|
@@ -66,18 +66,13 @@ Main outputs:
|
|
|
66
66
|
--retries <n> Max retries per stage on failure (default: 2)
|
|
67
67
|
```
|
|
68
68
|
|
|
69
|
-
###
|
|
69
|
+
### Execution Contract
|
|
70
70
|
|
|
71
|
-
|
|
72
|
-
--claude-command <command> Override default Claude reviewer command
|
|
73
|
-
--codex-command <command> Override default Codex reviewer command
|
|
74
|
-
--judge-command <command> Override default judge command
|
|
75
|
-
--allow-missing-sentinel Treat exit code 0 as success without done.json
|
|
76
|
-
```
|
|
71
|
+
There are no command override or sentinel bypass flags. Review Council uses canonical built-in execution metadata for Claude, Codex, and the judge, and every executable stage still requires exit code `0` plus its expected artifacts plus `done.json`.
|
|
77
72
|
|
|
78
73
|
## Operational Rules
|
|
79
74
|
|
|
80
|
-
-
|
|
75
|
+
- Built-in reviewer commands are non-interactive and emit JSONL to stdout.
|
|
81
76
|
- Keep reviewer artifacts inside the run directory.
|
|
82
77
|
- Selected skills are passed into reviewer prompts as additional review lenses for the run; the orchestrator does not inline local `SKILL.md` contents.
|
|
83
78
|
- Do not create authoritative files in `todos/` during raw review.
|
|
@@ -90,9 +85,9 @@ If a run fails or stalls, inspect:
|
|
|
90
85
|
- `<run>/claude/status.json`
|
|
91
86
|
- `<run>/codex/status.json`
|
|
92
87
|
- `<run>/judge/status.json`
|
|
93
|
-
- each stage's `
|
|
88
|
+
- each stage's `stream.jsonl` and `stderr.log`
|
|
94
89
|
|
|
95
|
-
The `status.json` for each stage includes `exit_code`, `timed_out`, `attempts`, `
|
|
90
|
+
The `status.json` for each stage includes `exit_code`, `timed_out`, `attempts`, `missing_artifacts`, and `validation_errors`. Stages additionally record stream artifact paths, `last_activity_at`, `last_event_type`, `stream_event_count`, `stream_parse_errors`, and optional warnings.
|
|
96
91
|
|
|
97
92
|
If a stage exits `0` but does not write `done.json`, the stage is incomplete and the run should be treated as failed.
|
|
98
93
|
|
|
@@ -135,4 +130,3 @@ Update [`package.json`](package.json) `repository` / `homepage` / `bugs` if the
|
|
|
135
130
|
- [references/output-contract.md](references/output-contract.md)
|
|
136
131
|
- [src/orchestrate-review-council.ts](src/orchestrate-review-council.ts)
|
|
137
132
|
- [src/render-review-html.ts](src/render-review-html.ts)
|
|
138
|
-
- [src/interaction-queue.ts](src/interaction-queue.ts)
|
package/SKILL.md
CHANGED
|
@@ -135,7 +135,7 @@ Add `docs/reviews/` to `.gitignore` to keep review artifacts out of version cont
|
|
|
135
135
|
- Do not create files in `todos/` — the judge recommends todos and Review Council derives `follow-ups.md`, but neither creates authoritative todo files.
|
|
136
136
|
- Skills are passed to each model reviewer as additional review lenses for the run, not inlined prompt bodies.
|
|
137
137
|
- Model reviewers (Claude, Codex) run as CLI processes via the TS orchestrator.
|
|
138
|
-
- Interactive prompts from reviewer CLIs are detected and relayed; prefer explicit non-interactive mode (`claude --dangerously-skip-permissions -p`, `codex exec --
|
|
138
|
+
- Interactive prompts from reviewer CLIs are detected and relayed; prefer explicit non-interactive mode (`claude --dangerously-skip-permissions -p`, `codex exec --dangerously-bypass-approvals-and-sandbox`) for reliability.
|
|
139
139
|
|
|
140
140
|
## Supporting Files
|
|
141
141
|
|
|
@@ -4,12 +4,9 @@ import { basename, resolve } from "node:path";
|
|
|
4
4
|
import { finished } from "node:stream/promises";
|
|
5
5
|
import { fileURLToPath } from "node:url";
|
|
6
6
|
import { parseArgs } from "node:util";
|
|
7
|
-
import { close as closeInteractionQueue, enqueue } from "./interaction-queue.js";
|
|
8
7
|
import { buildReviewPaths, createRunId, normalizeReviewTarget, } from "./review-session.js";
|
|
9
8
|
import { renderRunDir, writeFollowUpsMarkdown } from "./render-review-html.js";
|
|
10
|
-
|
|
11
|
-
const PROMPT_SILENCE_MS = 3000;
|
|
12
|
-
const PROMPT_CHECK_INTERVAL_MS = 2000;
|
|
9
|
+
import { createStageExecution } from "./stage-runtime.js";
|
|
13
10
|
const DEFAULT_TIMEOUT_MS = 300000;
|
|
14
11
|
const DEFAULT_MAX_RETRIES = 2;
|
|
15
12
|
const REVIEW_PROFILE_TEMPLATES = {
|
|
@@ -18,9 +15,6 @@ const REVIEW_PROFILE_TEMPLATES = {
|
|
|
18
15
|
const JUDGE_PROFILE_TEMPLATES = {
|
|
19
16
|
default: "judge.md",
|
|
20
17
|
};
|
|
21
|
-
const DEFAULT_CLAUDE_COMMAND = 'claude --dangerously-skip-permissions -p "$(cat $CLAUDE_DIR/claude-review-export.md)"';
|
|
22
|
-
const DEFAULT_CODEX_COMMAND = 'codex exec --full-auto "$(cat $CODEX_DIR/codex-review-export.md)"';
|
|
23
|
-
const DEFAULT_JUDGE_COMMAND = 'codex exec --full-auto "$(cat $JUDGE_DIR/judge.md)"';
|
|
24
18
|
function nowIso() {
|
|
25
19
|
return new Date().toISOString();
|
|
26
20
|
}
|
|
@@ -122,53 +116,84 @@ function resolvePromptSelection(packageDir, kind, profileId, overridePath) {
|
|
|
122
116
|
profileId,
|
|
123
117
|
};
|
|
124
118
|
}
|
|
125
|
-
function cleanupStageFiles(
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
"status.json",
|
|
130
|
-
"done.json",
|
|
131
|
-
...
|
|
132
|
-
])
|
|
133
|
-
|
|
119
|
+
function cleanupStageFiles(stage) {
|
|
120
|
+
const artifactPaths = new Set([
|
|
121
|
+
stage.execution.artifacts.streamLog,
|
|
122
|
+
stage.execution.artifacts.stderrLog,
|
|
123
|
+
resolve(stage.stageDir, "status.json"),
|
|
124
|
+
resolve(stage.stageDir, "done.json"),
|
|
125
|
+
...stage.requiredArtifacts.map((artifactName) => resolve(stage.stageDir, artifactName)),
|
|
126
|
+
]);
|
|
127
|
+
for (const artifactPath of artifactPaths) {
|
|
128
|
+
rmSync(artifactPath, { force: true, recursive: false });
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
function extractEventType(value) {
|
|
132
|
+
if (typeof value !== "object" || value === null || Array.isArray(value)) {
|
|
133
|
+
return undefined;
|
|
134
134
|
}
|
|
135
|
+
const type = value.type;
|
|
136
|
+
return typeof type === "string" && type.length > 0 ? type : undefined;
|
|
135
137
|
}
|
|
136
|
-
async function runStageOnce(
|
|
137
|
-
const
|
|
138
|
-
const
|
|
138
|
+
async function runStageOnce(stage, workdir, timeoutMs, commandEnv) {
|
|
139
|
+
const { execution } = stage;
|
|
140
|
+
const streamPath = execution.artifacts.streamLog;
|
|
141
|
+
const stderrPath = execution.artifacts.stderrLog;
|
|
139
142
|
const startedAt = nowIso();
|
|
140
|
-
const
|
|
143
|
+
const streamFile = createWriteStream(streamPath);
|
|
141
144
|
const stderrFile = createWriteStream(stderrPath);
|
|
142
|
-
const child = spawn("/bin/sh", ["-c", command], {
|
|
145
|
+
const child = spawn("/bin/sh", ["-c", execution.command], {
|
|
143
146
|
cwd: workdir,
|
|
144
147
|
stdio: ["pipe", "pipe", "pipe"],
|
|
145
148
|
env: { ...process.env, ...commandEnv },
|
|
146
149
|
});
|
|
147
|
-
child.
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
let
|
|
151
|
-
let
|
|
150
|
+
child.stdin.end();
|
|
151
|
+
const warnings = [];
|
|
152
|
+
let lastActivityAt;
|
|
153
|
+
let lastEventType;
|
|
154
|
+
let streamBuffer = "";
|
|
155
|
+
let streamEventCount = 0;
|
|
156
|
+
let streamParseErrors = 0;
|
|
157
|
+
const recordActivity = (eventType) => {
|
|
158
|
+
lastActivityAt = nowIso();
|
|
159
|
+
if (eventType) {
|
|
160
|
+
lastEventType = eventType;
|
|
161
|
+
}
|
|
162
|
+
};
|
|
163
|
+
const processStructuredLine = (line) => {
|
|
164
|
+
const trimmed = line.trim();
|
|
165
|
+
if (!trimmed) {
|
|
166
|
+
return;
|
|
167
|
+
}
|
|
168
|
+
recordActivity(lastEventType);
|
|
169
|
+
try {
|
|
170
|
+
const parsed = JSON.parse(trimmed);
|
|
171
|
+
streamEventCount += 1;
|
|
172
|
+
lastEventType = extractEventType(parsed) ?? "stream-event";
|
|
173
|
+
}
|
|
174
|
+
catch {
|
|
175
|
+
streamParseErrors += 1;
|
|
176
|
+
lastEventType = "stream-parse-error";
|
|
177
|
+
warnings.push(`Failed to parse ${stage.name} stream output line ${streamParseErrors}.`);
|
|
178
|
+
}
|
|
179
|
+
};
|
|
152
180
|
child.stdout.on("data", (chunk) => {
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
const
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
enqueue({
|
|
165
|
-
stage: name,
|
|
166
|
-
prompt: promptText,
|
|
167
|
-
stdinPipe: child.stdin,
|
|
168
|
-
resolve: () => { },
|
|
169
|
-
});
|
|
181
|
+
streamFile.write(chunk);
|
|
182
|
+
const text = chunk.toString();
|
|
183
|
+
streamBuffer += text;
|
|
184
|
+
while (true) {
|
|
185
|
+
const newlineIndex = streamBuffer.indexOf("\n");
|
|
186
|
+
if (newlineIndex === -1) {
|
|
187
|
+
break;
|
|
188
|
+
}
|
|
189
|
+
const line = streamBuffer.slice(0, newlineIndex);
|
|
190
|
+
streamBuffer = streamBuffer.slice(newlineIndex + 1);
|
|
191
|
+
processStructuredLine(line);
|
|
170
192
|
}
|
|
171
|
-
}
|
|
193
|
+
});
|
|
194
|
+
child.stderr.on("data", (chunk) => {
|
|
195
|
+
stderrFile.write(chunk);
|
|
196
|
+
});
|
|
172
197
|
let timedOut = false;
|
|
173
198
|
let killTimer = null;
|
|
174
199
|
const timeoutTimer = setTimeout(() => {
|
|
@@ -190,17 +215,13 @@ async function runStageOnce(name, command, stageDir, workdir, timeoutMs, command
|
|
|
190
215
|
clearTimeout(timeoutTimer);
|
|
191
216
|
if (killTimer)
|
|
192
217
|
clearTimeout(killTimer);
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
child.stdin.end();
|
|
218
|
+
if (streamBuffer.trim().length > 0) {
|
|
219
|
+
processStructuredLine(streamBuffer);
|
|
196
220
|
}
|
|
197
|
-
|
|
198
|
-
// The pipe may already be closed.
|
|
199
|
-
}
|
|
200
|
-
stdoutFile.end();
|
|
221
|
+
streamFile.end();
|
|
201
222
|
stderrFile.end();
|
|
202
223
|
await Promise.all([
|
|
203
|
-
finished(
|
|
224
|
+
finished(streamFile),
|
|
204
225
|
finished(stderrFile),
|
|
205
226
|
]);
|
|
206
227
|
return {
|
|
@@ -208,16 +229,18 @@ async function runStageOnce(name, command, stageDir, workdir, timeoutMs, command
|
|
|
208
229
|
timedOut,
|
|
209
230
|
startedAt,
|
|
210
231
|
finishedAt: nowIso(),
|
|
211
|
-
|
|
232
|
+
streamPath,
|
|
212
233
|
stderrPath,
|
|
234
|
+
lastActivityAt,
|
|
235
|
+
lastEventType,
|
|
236
|
+
streamEventCount,
|
|
237
|
+
streamParseErrors,
|
|
238
|
+
warnings,
|
|
213
239
|
};
|
|
214
240
|
}
|
|
215
|
-
export function evaluateStageArtifacts(stage, attempt
|
|
241
|
+
export function evaluateStageArtifacts(stage, attempt) {
|
|
216
242
|
const artifactPresence = {};
|
|
217
|
-
const requiredArtifacts = [...stage.requiredArtifacts];
|
|
218
|
-
if (requireSentinel) {
|
|
219
|
-
requiredArtifacts.push("done.json");
|
|
220
|
-
}
|
|
243
|
+
const requiredArtifacts = [...stage.requiredArtifacts, "done.json"];
|
|
221
244
|
for (const artifactName of requiredArtifacts) {
|
|
222
245
|
artifactPresence[artifactName] = existsSync(resolve(stage.stageDir, artifactName));
|
|
223
246
|
}
|
|
@@ -228,6 +251,7 @@ export function evaluateStageArtifacts(stage, attempt, requireSentinel) {
|
|
|
228
251
|
failureReason: "timeout",
|
|
229
252
|
artifactPresence,
|
|
230
253
|
missingArtifacts,
|
|
254
|
+
validationErrors: [],
|
|
231
255
|
};
|
|
232
256
|
}
|
|
233
257
|
if (attempt.exitCode !== 0) {
|
|
@@ -236,6 +260,7 @@ export function evaluateStageArtifacts(stage, attempt, requireSentinel) {
|
|
|
236
260
|
failureReason: "process_failed",
|
|
237
261
|
artifactPresence,
|
|
238
262
|
missingArtifacts,
|
|
263
|
+
validationErrors: [],
|
|
239
264
|
};
|
|
240
265
|
}
|
|
241
266
|
if (missingArtifacts.length > 0) {
|
|
@@ -244,21 +269,24 @@ export function evaluateStageArtifacts(stage, attempt, requireSentinel) {
|
|
|
244
269
|
failureReason: "missing_artifacts",
|
|
245
270
|
artifactPresence,
|
|
246
271
|
missingArtifacts,
|
|
272
|
+
validationErrors: [],
|
|
247
273
|
};
|
|
248
274
|
}
|
|
249
|
-
const
|
|
250
|
-
if (
|
|
275
|
+
const validationErrors = validateJsonArtifact(resolve(stage.stageDir, stage.jsonArtifactName), stage.jsonArtifactName);
|
|
276
|
+
if (validationErrors.length > 0) {
|
|
251
277
|
return {
|
|
252
278
|
success: false,
|
|
253
279
|
failureReason: "invalid_artifacts",
|
|
254
280
|
artifactPresence,
|
|
255
281
|
missingArtifacts,
|
|
282
|
+
validationErrors,
|
|
256
283
|
};
|
|
257
284
|
}
|
|
258
285
|
return {
|
|
259
286
|
success: true,
|
|
260
287
|
artifactPresence,
|
|
261
288
|
missingArtifacts,
|
|
289
|
+
validationErrors: [],
|
|
262
290
|
};
|
|
263
291
|
}
|
|
264
292
|
function validateJsonArtifact(artifactPath, artifactName) {
|
|
@@ -267,43 +295,57 @@ function validateJsonArtifact(artifactPath, artifactName) {
|
|
|
267
295
|
parsed = JSON.parse(readFileSync(artifactPath, "utf8"));
|
|
268
296
|
}
|
|
269
297
|
catch {
|
|
270
|
-
return
|
|
298
|
+
return [{ path: "$", message: "File is not valid JSON." }];
|
|
271
299
|
}
|
|
272
300
|
if (typeof parsed !== "object" || parsed === null || Array.isArray(parsed)) {
|
|
273
|
-
return
|
|
301
|
+
return [{ path: "$", message: "Expected a JSON object." }];
|
|
274
302
|
}
|
|
275
303
|
if (artifactName === "findings.json") {
|
|
276
|
-
return Array.isArray(parsed.findings)
|
|
304
|
+
return Array.isArray(parsed.findings)
|
|
305
|
+
? []
|
|
306
|
+
: [{ path: "findings", message: "Expected findings to be an array." }];
|
|
277
307
|
}
|
|
278
308
|
const verdict = parsed;
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
309
|
+
const errors = [];
|
|
310
|
+
if (typeof verdict.overall_verdict !== "string") {
|
|
311
|
+
errors.push({ path: "overall_verdict", message: "Expected overall_verdict to be a string." });
|
|
312
|
+
}
|
|
313
|
+
if (!Array.isArray(verdict.confirmed_findings)) {
|
|
314
|
+
errors.push({ path: "confirmed_findings", message: "Expected confirmed_findings to be an array." });
|
|
315
|
+
}
|
|
316
|
+
if (!Array.isArray(verdict.contested_findings)) {
|
|
317
|
+
errors.push({ path: "contested_findings", message: "Expected contested_findings to be an array." });
|
|
318
|
+
}
|
|
319
|
+
if (!Array.isArray(verdict.rejected_findings)) {
|
|
320
|
+
errors.push({ path: "rejected_findings", message: "Expected rejected_findings to be an array." });
|
|
321
|
+
}
|
|
322
|
+
return errors;
|
|
283
323
|
}
|
|
284
|
-
function writeStageStatus(statusPath, stage,
|
|
324
|
+
function writeStageStatus(statusPath, stage, attempt, evaluation, attempts) {
|
|
285
325
|
writeFileSync(statusPath, `${JSON.stringify({
|
|
286
326
|
stage: stage.name,
|
|
287
|
-
|
|
327
|
+
command_id: stage.execution.commandId,
|
|
328
|
+
command: stage.execution.command,
|
|
288
329
|
started_at: attempt.startedAt,
|
|
289
330
|
finished_at: attempt.finishedAt,
|
|
290
331
|
exit_code: attempt.exitCode,
|
|
291
|
-
require_sentinel: requireSentinel,
|
|
292
|
-
done_file_present: evaluation.artifactPresence["done.json"] ?? false,
|
|
293
332
|
success: evaluation.success,
|
|
294
333
|
timed_out: attempt.timedOut,
|
|
295
334
|
attempts,
|
|
296
|
-
|
|
297
|
-
stdout_log: attempt.stdoutPath,
|
|
335
|
+
stream_log: attempt.streamPath,
|
|
298
336
|
stderr_log: attempt.stderrPath,
|
|
337
|
+
last_activity_at: attempt.lastActivityAt,
|
|
338
|
+
last_event_type: attempt.lastEventType,
|
|
339
|
+
stream_event_count: attempt.streamEventCount,
|
|
340
|
+
stream_parse_errors: attempt.streamParseErrors,
|
|
341
|
+
artifact_presence: evaluation.artifactPresence,
|
|
342
|
+
missing_artifacts: evaluation.missingArtifacts,
|
|
343
|
+
validation_errors: evaluation.validationErrors,
|
|
344
|
+
...(attempt.warnings.length > 0 ? { warnings: attempt.warnings } : {}),
|
|
299
345
|
}, null, 2)}\n`);
|
|
300
346
|
}
|
|
301
|
-
async function runStage(stage, workdir,
|
|
302
|
-
if (!stage.command) {
|
|
303
|
-
return null;
|
|
304
|
-
}
|
|
347
|
+
async function runStage(stage, workdir, timeoutMs, maxRetries, commandEnv) {
|
|
305
348
|
const statusPath = resolve(stage.stageDir, "status.json");
|
|
306
|
-
const command = stage.command;
|
|
307
349
|
let attempts = 0;
|
|
308
350
|
let lastAttempt = null;
|
|
309
351
|
let lastEvaluation = null;
|
|
@@ -314,10 +356,10 @@ async function runStage(stage, workdir, requireSentinel, timeoutMs, maxRetries,
|
|
|
314
356
|
await new Promise((resolveDelay) => setTimeout(resolveDelay, delayMs));
|
|
315
357
|
}
|
|
316
358
|
attempts = attemptIndex + 1;
|
|
317
|
-
cleanupStageFiles(stage
|
|
318
|
-
lastAttempt = await runStageOnce(stage
|
|
319
|
-
lastEvaluation = evaluateStageArtifacts(stage, lastAttempt
|
|
320
|
-
writeStageStatus(statusPath, stage,
|
|
359
|
+
cleanupStageFiles(stage);
|
|
360
|
+
lastAttempt = await runStageOnce(stage, workdir, timeoutMs, commandEnv);
|
|
361
|
+
lastEvaluation = evaluateStageArtifacts(stage, lastAttempt);
|
|
362
|
+
writeStageStatus(statusPath, stage, lastAttempt, lastEvaluation, attempts);
|
|
321
363
|
if (lastEvaluation.success || lastAttempt.timedOut) {
|
|
322
364
|
break;
|
|
323
365
|
}
|
|
@@ -334,80 +376,89 @@ async function runStage(stage, workdir, requireSentinel, timeoutMs, maxRetries,
|
|
|
334
376
|
failure_reason: lastEvaluation.failureReason,
|
|
335
377
|
missing_artifacts: lastEvaluation.missingArtifacts,
|
|
336
378
|
artifact_presence: lastEvaluation.artifactPresence,
|
|
379
|
+
validation_errors: lastEvaluation.validationErrors,
|
|
337
380
|
};
|
|
338
381
|
}
|
|
339
382
|
export function parseCliOptions(args) {
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
383
|
+
let values;
|
|
384
|
+
const getString = (value) => typeof value === "string" ? value : undefined;
|
|
385
|
+
const getBoolean = (value) => value === true;
|
|
386
|
+
try {
|
|
387
|
+
({ values } = parseArgs({
|
|
388
|
+
args,
|
|
389
|
+
allowPositionals: false,
|
|
390
|
+
options: {
|
|
391
|
+
target: { type: "string" },
|
|
392
|
+
"run-dir": { type: "string" },
|
|
393
|
+
"review-profile": { type: "string" },
|
|
394
|
+
"judge-profile": { type: "string" },
|
|
395
|
+
"claude-prompt-template": { type: "string" },
|
|
396
|
+
"codex-prompt-template": { type: "string" },
|
|
397
|
+
"judge-prompt-template": { type: "string" },
|
|
398
|
+
"skill-paths": { type: "string" },
|
|
399
|
+
"no-claude": { type: "boolean" },
|
|
400
|
+
"no-codex": { type: "boolean" },
|
|
401
|
+
"skip-judge": { type: "boolean" },
|
|
402
|
+
"skip-html": { type: "boolean" },
|
|
403
|
+
"open-html": { type: "boolean" },
|
|
404
|
+
timeout: { type: "string" },
|
|
405
|
+
retries: { type: "string" },
|
|
406
|
+
help: { type: "boolean", short: "h" },
|
|
407
|
+
},
|
|
408
|
+
}));
|
|
409
|
+
}
|
|
410
|
+
catch (error) {
|
|
411
|
+
console.error(error instanceof Error ? error.message : String(error));
|
|
412
|
+
process.exitCode = 1;
|
|
413
|
+
return null;
|
|
414
|
+
}
|
|
366
415
|
if (values.help) {
|
|
367
416
|
printHelp();
|
|
368
417
|
return null;
|
|
369
418
|
}
|
|
370
|
-
|
|
419
|
+
const target = getString(values.target);
|
|
420
|
+
if (!target) {
|
|
371
421
|
console.error("Error: --target is required.");
|
|
372
422
|
printHelp();
|
|
373
423
|
process.exitCode = 1;
|
|
374
424
|
return null;
|
|
375
425
|
}
|
|
376
|
-
const
|
|
426
|
+
const timeoutValue = getString(values.timeout);
|
|
427
|
+
const timeoutMs = timeoutValue ? parseInt(timeoutValue, 10) : DEFAULT_TIMEOUT_MS;
|
|
377
428
|
if (Number.isNaN(timeoutMs) || timeoutMs <= 0) {
|
|
378
|
-
console.error(`Invalid --timeout: "${
|
|
429
|
+
console.error(`Invalid --timeout: "${timeoutValue}". Must be a positive integer (ms).`);
|
|
379
430
|
process.exitCode = 1;
|
|
380
431
|
return null;
|
|
381
432
|
}
|
|
382
|
-
const
|
|
433
|
+
const retriesValue = getString(values.retries);
|
|
434
|
+
const maxRetries = retriesValue ? parseInt(retriesValue, 10) : DEFAULT_MAX_RETRIES;
|
|
383
435
|
if (Number.isNaN(maxRetries) || maxRetries < 0) {
|
|
384
|
-
console.error(`Invalid --retries: "${
|
|
436
|
+
console.error(`Invalid --retries: "${retriesValue}". Must be a non-negative integer.`);
|
|
385
437
|
process.exitCode = 1;
|
|
386
438
|
return null;
|
|
387
439
|
}
|
|
388
|
-
const noClaude = values["no-claude"]
|
|
389
|
-
const noCodex = values["no-codex"]
|
|
440
|
+
const noClaude = getBoolean(values["no-claude"]);
|
|
441
|
+
const noCodex = getBoolean(values["no-codex"]);
|
|
390
442
|
if (noClaude && noCodex) {
|
|
391
443
|
console.error("Cannot use both --no-claude and --no-codex. At least one model reviewer is required.");
|
|
392
444
|
process.exitCode = 1;
|
|
393
445
|
return null;
|
|
394
446
|
}
|
|
447
|
+
const skillPathsValue = getString(values["skill-paths"]);
|
|
395
448
|
return {
|
|
396
|
-
target
|
|
397
|
-
runDir: values["run-dir"],
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
openHtml: values["open-html"] ?? false,
|
|
410
|
-
skillPaths: values["skill-paths"] ? normalizeSkillPaths(values["skill-paths"]) : undefined,
|
|
449
|
+
target,
|
|
450
|
+
runDir: getString(values["run-dir"]),
|
|
451
|
+
enableClaude: !noClaude,
|
|
452
|
+
enableCodex: !noCodex,
|
|
453
|
+
reviewProfileId: getString(values["review-profile"]) ?? "default",
|
|
454
|
+
judgeProfileId: getString(values["judge-profile"]) ?? "default",
|
|
455
|
+
claudePromptTemplate: getString(values["claude-prompt-template"]),
|
|
456
|
+
codexPromptTemplate: getString(values["codex-prompt-template"]),
|
|
457
|
+
judgePromptTemplate: getString(values["judge-prompt-template"]),
|
|
458
|
+
skipJudge: getBoolean(values["skip-judge"]),
|
|
459
|
+
skipHtml: getBoolean(values["skip-html"]),
|
|
460
|
+
openHtml: getBoolean(values["open-html"]),
|
|
461
|
+
skillPaths: skillPathsValue ? normalizeSkillPaths(skillPathsValue) : undefined,
|
|
411
462
|
timeoutMs,
|
|
412
463
|
maxRetries,
|
|
413
464
|
};
|
|
@@ -421,14 +472,14 @@ function resolvePromptSelections(packageDir, options, judgeEnabled) {
|
|
|
421
472
|
: null,
|
|
422
473
|
};
|
|
423
474
|
}
|
|
424
|
-
function createStageDefinitions(paths, promptSelections,
|
|
425
|
-
const reviewerStages = [
|
|
426
|
-
|
|
475
|
+
function createStageDefinitions(paths, promptSelections, enabledStages, reviewTarget, reviewSchemaPath, judgeSchemaPath, skillReferences) {
|
|
476
|
+
const reviewerStages = [];
|
|
477
|
+
if (enabledStages.claude) {
|
|
478
|
+
reviewerStages.push({
|
|
427
479
|
name: "claude",
|
|
428
|
-
displayName: "Claude",
|
|
429
|
-
command: commands.claude,
|
|
430
480
|
stageDir: paths.claudeDir,
|
|
431
481
|
promptOutputName: "claude-review-export.md",
|
|
482
|
+
execution: createStageExecution("claude", paths.claudeDir, "claude-review-export.md"),
|
|
432
483
|
promptTemplatePath: promptSelections.claude.templatePath,
|
|
433
484
|
promptTemplateSource: promptSelections.claude.source,
|
|
434
485
|
requiredArtifacts: ["report.md", "findings.json"],
|
|
@@ -443,13 +494,14 @@ function createStageDefinitions(paths, promptSelections, commands, reviewTarget,
|
|
|
443
494
|
REVIEWER_NAME_LOWER: "claude",
|
|
444
495
|
SKILL_REFERENCES: skillReferences,
|
|
445
496
|
},
|
|
446
|
-
}
|
|
447
|
-
|
|
497
|
+
});
|
|
498
|
+
}
|
|
499
|
+
if (enabledStages.codex) {
|
|
500
|
+
reviewerStages.push({
|
|
448
501
|
name: "codex",
|
|
449
|
-
displayName: "Codex",
|
|
450
|
-
command: commands.codex,
|
|
451
502
|
stageDir: paths.codexDir,
|
|
452
503
|
promptOutputName: "codex-review-export.md",
|
|
504
|
+
execution: createStageExecution("codex", paths.codexDir, "codex-review-export.md"),
|
|
453
505
|
promptTemplatePath: promptSelections.codex.templatePath,
|
|
454
506
|
promptTemplateSource: promptSelections.codex.source,
|
|
455
507
|
requiredArtifacts: ["report.md", "findings.json"],
|
|
@@ -464,19 +516,18 @@ function createStageDefinitions(paths, promptSelections, commands, reviewTarget,
|
|
|
464
516
|
REVIEWER_NAME_LOWER: "codex",
|
|
465
517
|
SKILL_REFERENCES: skillReferences,
|
|
466
518
|
},
|
|
467
|
-
}
|
|
468
|
-
|
|
469
|
-
if (!promptSelections.judge) {
|
|
519
|
+
});
|
|
520
|
+
}
|
|
521
|
+
if (!enabledStages.judge || !promptSelections.judge) {
|
|
470
522
|
return reviewerStages;
|
|
471
523
|
}
|
|
472
524
|
return [
|
|
473
525
|
...reviewerStages,
|
|
474
526
|
{
|
|
475
527
|
name: "judge",
|
|
476
|
-
displayName: "Judge",
|
|
477
|
-
command: commands.judge,
|
|
478
528
|
stageDir: paths.judgeDir,
|
|
479
529
|
promptOutputName: "judge.md",
|
|
530
|
+
execution: createStageExecution("judge", paths.judgeDir, "judge.md"),
|
|
480
531
|
promptTemplatePath: promptSelections.judge.templatePath,
|
|
481
532
|
promptTemplateSource: promptSelections.judge.source,
|
|
482
533
|
requiredArtifacts: ["summary.md", "verdict.json"],
|
|
@@ -491,8 +542,24 @@ function createStageDefinitions(paths, promptSelections, commands, reviewTarget,
|
|
|
491
542
|
},
|
|
492
543
|
];
|
|
493
544
|
}
|
|
545
|
+
function serializeExecutionMetadata(stage) {
|
|
546
|
+
if (!stage) {
|
|
547
|
+
return null;
|
|
548
|
+
}
|
|
549
|
+
return {
|
|
550
|
+
stage_dir: stage.stageDir,
|
|
551
|
+
prompt_output_name: stage.promptOutputName,
|
|
552
|
+
command_id: stage.execution.commandId,
|
|
553
|
+
command: stage.execution.command,
|
|
554
|
+
artifacts: {
|
|
555
|
+
stream_log: stage.execution.artifacts.streamLog,
|
|
556
|
+
stderr_log: stage.execution.artifacts.stderrLog,
|
|
557
|
+
},
|
|
558
|
+
};
|
|
559
|
+
}
|
|
494
560
|
function writeRunMetadata(preparedRun) {
|
|
495
|
-
const { options, cwd, packageDir, reviewTarget, runId, judgeEnabled, paths, promptSelections, } = preparedRun;
|
|
561
|
+
const { options, cwd, packageDir, reviewTarget, runId, judgeEnabled, paths, promptSelections, stageDefinitions, } = preparedRun;
|
|
562
|
+
const stageIndex = new Map(stageDefinitions.map((stage) => [stage.name, stage]));
|
|
496
563
|
writeFileSync(resolve(paths.runDir, "run.json"), `${JSON.stringify({
|
|
497
564
|
run_id: runId,
|
|
498
565
|
review_target: reviewTarget,
|
|
@@ -517,10 +584,11 @@ function writeRunMetadata(preparedRun) {
|
|
|
517
584
|
source: promptSelections.judge?.source ?? null,
|
|
518
585
|
},
|
|
519
586
|
},
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
587
|
+
judge_enabled: judgeEnabled,
|
|
588
|
+
stage_executions: {
|
|
589
|
+
claude: serializeExecutionMetadata(stageIndex.get("claude")),
|
|
590
|
+
codex: serializeExecutionMetadata(stageIndex.get("codex")),
|
|
591
|
+
judge: serializeExecutionMetadata(stageIndex.get("judge")),
|
|
524
592
|
},
|
|
525
593
|
}, null, 2)}\n`);
|
|
526
594
|
}
|
|
@@ -531,7 +599,7 @@ function prepareRun(options) {
|
|
|
531
599
|
const reviewTarget = normalizeReviewTarget(options.target);
|
|
532
600
|
const runId = createRunId();
|
|
533
601
|
const paths = buildReviewPaths(cwd, runId, options.runDir);
|
|
534
|
-
const judgeEnabled = !options.skipJudge
|
|
602
|
+
const judgeEnabled = !options.skipJudge;
|
|
535
603
|
let promptSelections;
|
|
536
604
|
try {
|
|
537
605
|
promptSelections = resolvePromptSelections(packageDir, options, judgeEnabled);
|
|
@@ -557,33 +625,37 @@ function prepareRun(options) {
|
|
|
557
625
|
REVIEW_SCHEMA: reviewSchemaPath,
|
|
558
626
|
JUDGE_SCHEMA: judgeSchemaPath,
|
|
559
627
|
};
|
|
560
|
-
const rawCommands = {
|
|
561
|
-
claude: options.claudeCommand,
|
|
562
|
-
codex: options.codexCommand,
|
|
563
|
-
judge: judgeEnabled ? options.judgeCommand : undefined,
|
|
564
|
-
};
|
|
565
628
|
const skillReferences = buildSkillReferencesSection(options.skillPaths ?? []);
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
runId,
|
|
576
|
-
judgeEnabled,
|
|
577
|
-
requireSentinel: !options.allowMissingSentinel,
|
|
578
|
-
paths,
|
|
579
|
-
promptSelections,
|
|
580
|
-
commandEnv,
|
|
581
|
-
stageDefinitions,
|
|
582
|
-
});
|
|
583
|
-
for (const command of Object.values(rawCommands)) {
|
|
584
|
-
if (command) {
|
|
585
|
-
assertBinaryExists(command, cwd);
|
|
629
|
+
let stageDefinitions;
|
|
630
|
+
try {
|
|
631
|
+
stageDefinitions = createStageDefinitions(paths, promptSelections, {
|
|
632
|
+
claude: options.enableClaude,
|
|
633
|
+
codex: options.enableCodex,
|
|
634
|
+
judge: judgeEnabled,
|
|
635
|
+
}, reviewTarget, reviewSchemaPath, judgeSchemaPath, skillReferences);
|
|
636
|
+
for (const stage of stageDefinitions) {
|
|
637
|
+
renderTemplate(stage.promptTemplatePath, stage.stageVars, resolve(stage.stageDir, stage.promptOutputName));
|
|
586
638
|
}
|
|
639
|
+
writeRunMetadata({
|
|
640
|
+
options,
|
|
641
|
+
cwd,
|
|
642
|
+
packageDir,
|
|
643
|
+
reviewTarget,
|
|
644
|
+
runId,
|
|
645
|
+
judgeEnabled,
|
|
646
|
+
paths,
|
|
647
|
+
promptSelections,
|
|
648
|
+
commandEnv,
|
|
649
|
+
stageDefinitions,
|
|
650
|
+
});
|
|
651
|
+
for (const stage of stageDefinitions) {
|
|
652
|
+
assertBinaryExists(stage.execution.command, cwd);
|
|
653
|
+
}
|
|
654
|
+
}
|
|
655
|
+
catch (error) {
|
|
656
|
+
console.error(error instanceof Error ? error.message : String(error));
|
|
657
|
+
process.exitCode = 1;
|
|
658
|
+
return null;
|
|
587
659
|
}
|
|
588
660
|
return {
|
|
589
661
|
options,
|
|
@@ -592,7 +664,6 @@ function prepareRun(options) {
|
|
|
592
664
|
reviewTarget,
|
|
593
665
|
runId,
|
|
594
666
|
judgeEnabled,
|
|
595
|
-
requireSentinel: !options.allowMissingSentinel,
|
|
596
667
|
paths,
|
|
597
668
|
promptSelections,
|
|
598
669
|
commandEnv,
|
|
@@ -601,9 +672,8 @@ function prepareRun(options) {
|
|
|
601
672
|
}
|
|
602
673
|
async function runReviewerStages(preparedRun) {
|
|
603
674
|
const reviewerStages = preparedRun.stageDefinitions.filter((s) => s.name !== "judge");
|
|
604
|
-
const results = await Promise.all(reviewerStages.map((stage) => runStage(stage, preparedRun.cwd, preparedRun.
|
|
605
|
-
|
|
606
|
-
const reviewerResults = results.filter((result) => result !== null);
|
|
675
|
+
const results = await Promise.all(reviewerStages.map((stage) => runStage(stage, preparedRun.cwd, preparedRun.options.timeoutMs, preparedRun.options.maxRetries, preparedRun.commandEnv)));
|
|
676
|
+
const reviewerResults = results;
|
|
607
677
|
const successfulReviewerResults = reviewerResults.filter((result) => result.success);
|
|
608
678
|
return {
|
|
609
679
|
reviewerResults,
|
|
@@ -628,7 +698,7 @@ async function runJudgeStage(preparedRun, reviewerExecution) {
|
|
|
628
698
|
if (!judgeStage) {
|
|
629
699
|
return null;
|
|
630
700
|
}
|
|
631
|
-
return runStage(judgeStage, preparedRun.cwd, preparedRun.
|
|
701
|
+
return runStage(judgeStage, preparedRun.cwd, preparedRun.options.timeoutMs, preparedRun.options.maxRetries, preparedRun.commandEnv);
|
|
632
702
|
}
|
|
633
703
|
function finalizeRun(preparedRun, reviewerExecution, judgeResult) {
|
|
634
704
|
if (judgeResult?.success === true) {
|
|
@@ -678,10 +748,6 @@ options:
|
|
|
678
748
|
--claude-prompt-template <path> Override Claude reviewer prompt template
|
|
679
749
|
--codex-prompt-template <path> Override Codex reviewer prompt template
|
|
680
750
|
--judge-prompt-template <path> Override judge prompt template
|
|
681
|
-
--claude-command <command> Shell command to launch Claude reviewer
|
|
682
|
-
--codex-command <command> Shell command to launch Codex reviewer
|
|
683
|
-
--judge-command <command> Shell command to launch the judge stage
|
|
684
|
-
--allow-missing-sentinel Treat exit code 0 as success without done.json
|
|
685
751
|
--skip-judge Skip the judge stage
|
|
686
752
|
--skip-html Skip HTML rendering
|
|
687
753
|
--open-html Open index.html after rendering (macOS)
|
|
@@ -70,6 +70,12 @@ function stageStatusRow(name, status) {
|
|
|
70
70
|
if (Array.isArray(status.missing_artifacts) && status.missing_artifacts.length > 0) {
|
|
71
71
|
details.push(`${status.missing_artifacts.length} missing artifact(s)`);
|
|
72
72
|
}
|
|
73
|
+
if (typeof status.stream_parse_errors === "number" && status.stream_parse_errors > 0) {
|
|
74
|
+
details.push(`${status.stream_parse_errors} stream parse error(s)`);
|
|
75
|
+
}
|
|
76
|
+
if (Array.isArray(status.warnings) && status.warnings.length > 0) {
|
|
77
|
+
details.push(`${status.warnings.length} warning(s)`);
|
|
78
|
+
}
|
|
73
79
|
}
|
|
74
80
|
const detailSpan = details.length > 0
|
|
75
81
|
? `<span class="status-details">${htmlEscape(details.join(" · "))}</span>`
|
|
@@ -106,6 +112,15 @@ function buildDiagnostics(runDir, statuses) {
|
|
|
106
112
|
}
|
|
107
113
|
parts.push("</ul>");
|
|
108
114
|
}
|
|
115
|
+
if (Array.isArray(status.warnings) && status.warnings.length > 0) {
|
|
116
|
+
parts.push("<p><strong>Warnings:</strong></p><ul>");
|
|
117
|
+
for (const warning of status.warnings) {
|
|
118
|
+
if (typeof warning !== "string")
|
|
119
|
+
continue;
|
|
120
|
+
parts.push(`<li>${htmlEscape(warning)}</li>`);
|
|
121
|
+
}
|
|
122
|
+
parts.push("</ul>");
|
|
123
|
+
}
|
|
109
124
|
if (Array.isArray(status.missing_artifacts) && status.missing_artifacts.length > 0) {
|
|
110
125
|
const missingArtifacts = status.missing_artifacts;
|
|
111
126
|
parts.push("<p><strong>Missing artifacts:</strong></p><ul>");
|
|
@@ -114,18 +129,31 @@ function buildDiagnostics(runDir, statuses) {
|
|
|
114
129
|
}
|
|
115
130
|
parts.push("</ul>");
|
|
116
131
|
}
|
|
132
|
+
const lastActivityAt = typeof status.last_activity_at === "string" ? status.last_activity_at : "";
|
|
133
|
+
const lastEventType = typeof status.last_event_type === "string" ? status.last_event_type : "";
|
|
134
|
+
const streamEventCount = typeof status.stream_event_count === "number" ? status.stream_event_count : null;
|
|
135
|
+
const streamParseErrors = typeof status.stream_parse_errors === "number" ? status.stream_parse_errors : null;
|
|
136
|
+
const executionSummary = [
|
|
137
|
+
streamEventCount !== null ? `stream events: ${streamEventCount}` : "",
|
|
138
|
+
streamParseErrors !== null ? `stream parse errors: ${streamParseErrors}` : "",
|
|
139
|
+
lastActivityAt ? `last activity: ${lastActivityAt}` : "",
|
|
140
|
+
lastEventType ? `last event: ${lastEventType}` : "",
|
|
141
|
+
].filter(Boolean);
|
|
142
|
+
if (executionSummary.length > 0) {
|
|
143
|
+
parts.push(`<p><strong>Execution:</strong> ${htmlEscape(executionSummary.join(" · "))}</p>`);
|
|
144
|
+
}
|
|
117
145
|
const stageDir = resolve(runDir, stage);
|
|
118
146
|
const excerpt = stderrExcerpt(stageDir);
|
|
119
147
|
if (excerpt) {
|
|
120
148
|
parts.push("<p><strong>stderr (last 20 lines):</strong></p>");
|
|
121
149
|
parts.push(`<pre class="stderr-excerpt">${htmlEscape(excerpt)}</pre>`);
|
|
122
150
|
}
|
|
123
|
-
const
|
|
151
|
+
const streamLog = typeof status.stream_log === "string" ? status.stream_log : "";
|
|
124
152
|
const stderrLog = typeof status.stderr_log === "string" ? status.stderr_log : "";
|
|
125
|
-
if (
|
|
153
|
+
if (streamLog || stderrLog) {
|
|
126
154
|
parts.push('<div class="log-paths">');
|
|
127
|
-
if (
|
|
128
|
-
parts.push(`<code>${htmlEscape(
|
|
155
|
+
if (streamLog)
|
|
156
|
+
parts.push(`<code>${htmlEscape(streamLog)}</code>`);
|
|
129
157
|
if (stderrLog)
|
|
130
158
|
parts.push(`<code>${htmlEscape(stderrLog)}</code>`);
|
|
131
159
|
parts.push("</div>");
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import { resolve } from "node:path";
|
|
2
|
+
function shellQuote(value) {
|
|
3
|
+
return `'${value.replaceAll("'", "'\\''")}'`;
|
|
4
|
+
}
|
|
5
|
+
function buildCodexCommand(promptPath) {
|
|
6
|
+
return `codex exec --json --dangerously-bypass-approvals-and-sandbox "$(cat ${shellQuote(promptPath)})"`;
|
|
7
|
+
}
|
|
8
|
+
function createClaudeExecution(stageDir, promptOutputName) {
|
|
9
|
+
const promptPath = resolve(stageDir, promptOutputName);
|
|
10
|
+
const streamLog = resolve(stageDir, "stream.jsonl");
|
|
11
|
+
const stderrLog = resolve(stageDir, "stderr.log");
|
|
12
|
+
return {
|
|
13
|
+
commandId: "claude-review",
|
|
14
|
+
command: `claude --dangerously-skip-permissions --verbose --output-format stream-json --include-partial-messages -p "$(cat ${shellQuote(promptPath)})"`,
|
|
15
|
+
artifacts: {
|
|
16
|
+
streamLog,
|
|
17
|
+
stderrLog,
|
|
18
|
+
},
|
|
19
|
+
};
|
|
20
|
+
}
|
|
21
|
+
function createCodexExecution(stageDir, promptOutputName, commandId) {
|
|
22
|
+
const promptPath = resolve(stageDir, promptOutputName);
|
|
23
|
+
return {
|
|
24
|
+
commandId,
|
|
25
|
+
command: buildCodexCommand(promptPath),
|
|
26
|
+
artifacts: {
|
|
27
|
+
streamLog: resolve(stageDir, "stream.jsonl"),
|
|
28
|
+
stderrLog: resolve(stageDir, "stderr.log"),
|
|
29
|
+
},
|
|
30
|
+
};
|
|
31
|
+
}
|
|
32
|
+
export function createStageExecution(stageName, stageDir, promptOutputName) {
|
|
33
|
+
if (stageName === "claude") {
|
|
34
|
+
return createClaudeExecution(stageDir, promptOutputName);
|
|
35
|
+
}
|
|
36
|
+
if (stageName === "judge") {
|
|
37
|
+
return createCodexExecution(stageDir, promptOutputName, "codex-judge");
|
|
38
|
+
}
|
|
39
|
+
return createCodexExecution(stageDir, promptOutputName, "codex-review");
|
|
40
|
+
}
|
package/package.json
CHANGED
|
@@ -1,10 +1,8 @@
|
|
|
1
1
|
# CLI Integration
|
|
2
2
|
|
|
3
|
-
The orchestrator
|
|
3
|
+
The orchestrator uses canonical built-in stage commands and execution metadata. Callers can choose which stages run and which prompt templates render, but command choice and sentinel enforcement are no longer user-overridable API surfaces.
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
Available environment variables in commands:
|
|
5
|
+
Available environment variables in built-in commands:
|
|
8
6
|
|
|
9
7
|
- `$CWD`
|
|
10
8
|
- `$SKILL_DIR`
|
|
@@ -24,20 +22,18 @@ The orchestrator renders these prompt files before launching any stage:
|
|
|
24
22
|
|
|
25
23
|
When invoking from the project being reviewed, run `npx @crown-dev-studios/review-council` so `process.cwd()` stays anchored to the project and output lands in `docs/reviews/`.
|
|
26
24
|
|
|
27
|
-
##
|
|
25
|
+
## Built-In Commands
|
|
28
26
|
|
|
29
|
-
The orchestrator provides
|
|
27
|
+
The orchestrator provides canonical stage commands for Claude, Codex, and the judge. No command flags are needed for the common case:
|
|
30
28
|
|
|
31
29
|
```bash
|
|
32
30
|
npx @crown-dev-studios/review-council --target "staged changes" --open-html
|
|
33
31
|
```
|
|
34
32
|
|
|
35
33
|
Built-in defaults:
|
|
36
|
-
- **Claude:** `claude --dangerously-skip-permissions -p "$(cat $CLAUDE_DIR/claude-review-export.md)"`
|
|
37
|
-
- **Codex:** `codex exec --
|
|
38
|
-
- **Judge:** `codex exec --
|
|
39
|
-
|
|
40
|
-
Use `--claude-command`, `--codex-command`, or `--judge-command` to override any default.
|
|
34
|
+
- **Claude:** `claude --dangerously-skip-permissions --verbose --output-format stream-json --include-partial-messages -p "$(cat "$CLAUDE_DIR/claude-review-export.md")"`
|
|
35
|
+
- **Codex:** `codex exec --json --dangerously-bypass-approvals-and-sandbox "$(cat $CODEX_DIR/codex-review-export.md)"`
|
|
36
|
+
- **Judge:** `codex exec --json --dangerously-bypass-approvals-and-sandbox "$(cat $JUDGE_DIR/judge.md)"`
|
|
41
37
|
|
|
42
38
|
Use `--no-claude` or `--no-codex` to skip a model reviewer entirely.
|
|
43
39
|
|
|
@@ -91,17 +87,19 @@ Timed-out stages are not retried.
|
|
|
91
87
|
|
|
92
88
|
### Retries
|
|
93
89
|
|
|
94
|
-
`--retries <n>` (default: 2) retries a stage up to N times on non-zero exit. Delay between retries uses exponential backoff: `2000 * 2^(attempt-1)` ms (2s, 4s, 8s...). The final `status.json` records
|
|
90
|
+
`--retries <n>` (default: 2) retries a stage up to N times on non-zero exit. Delay between retries uses exponential backoff: `2000 * 2^(attempt-1)` ms (2s, 4s, 8s...). The final `status.json` records the final `attempts` count.
|
|
95
91
|
|
|
96
92
|
Retries are skipped for timeouts (not transient).
|
|
97
93
|
|
|
98
|
-
###
|
|
94
|
+
### JSONL Streams
|
|
99
95
|
|
|
100
|
-
|
|
96
|
+
All built-in stages emit JSONL events on stdout:
|
|
101
97
|
|
|
102
|
-
|
|
98
|
+
- Claude via `--output-format stream-json`
|
|
99
|
+
- Codex reviewer via `codex exec --json`
|
|
100
|
+
- Codex judge via `codex exec --json`
|
|
103
101
|
|
|
104
|
-
|
|
102
|
+
The orchestrator records that stdout directly to `stream.jsonl` for each stage and derives `last_activity_at`, `last_event_type`, `stream_event_count`, and `stream_parse_errors` from that one stream.
|
|
105
103
|
|
|
106
104
|
### Partial Judge Execution
|
|
107
105
|
|
|
@@ -116,5 +114,4 @@ The orchestrator waits for:
|
|
|
116
114
|
- judge exit code `0`
|
|
117
115
|
- judge `done.json`
|
|
118
116
|
|
|
119
|
-
If a process exits `0` but omits `done.json`, the stage is treated as incomplete.
|
|
120
|
-
|
|
117
|
+
If a process exits `0` but omits `done.json`, the stage is treated as incomplete. There is no sentinel bypass mode.
|
|
@@ -13,21 +13,21 @@ docs/reviews/<run-id>/
|
|
|
13
13
|
findings.json
|
|
14
14
|
done.json
|
|
15
15
|
status.json
|
|
16
|
-
|
|
16
|
+
stream.jsonl
|
|
17
17
|
stderr.log
|
|
18
18
|
codex/
|
|
19
19
|
report.md
|
|
20
20
|
findings.json
|
|
21
21
|
done.json
|
|
22
22
|
status.json
|
|
23
|
-
|
|
23
|
+
stream.jsonl
|
|
24
24
|
stderr.log
|
|
25
25
|
judge/
|
|
26
26
|
summary.md
|
|
27
27
|
verdict.json
|
|
28
28
|
done.json
|
|
29
29
|
status.json
|
|
30
|
-
|
|
30
|
+
stream.jsonl
|
|
31
31
|
stderr.log
|
|
32
32
|
```
|
|
33
33
|
|
|
@@ -40,6 +40,7 @@ Each model reviewer (Claude, Codex) writes:
|
|
|
40
40
|
- `report.md`: human-readable review
|
|
41
41
|
- `findings.json`: structured findings matching `schemas/review-findings.schema.json`
|
|
42
42
|
- `done.json`: sentinel file confirming the agent finished writing artifacts
|
|
43
|
+
- `stream.jsonl`: raw JSONL stdout event stream for the stage
|
|
43
44
|
|
|
44
45
|
`done.json` shape:
|
|
45
46
|
|
|
@@ -69,18 +70,27 @@ The orchestrator writes `status.json` per stage with these fields:
|
|
|
69
70
|
```json
|
|
70
71
|
{
|
|
71
72
|
"stage": "claude",
|
|
72
|
-
"
|
|
73
|
+
"command_id": "claude-review",
|
|
74
|
+
"command": "claude --dangerously-skip-permissions --verbose --output-format stream-json --include-partial-messages -p ...",
|
|
73
75
|
"started_at": "2026-03-07T18:25:00Z",
|
|
74
76
|
"finished_at": "2026-03-07T18:30:00Z",
|
|
75
77
|
"exit_code": 0,
|
|
76
|
-
"require_sentinel": true,
|
|
77
|
-
"done_file_present": true,
|
|
78
78
|
"success": true,
|
|
79
79
|
"timed_out": false,
|
|
80
80
|
"attempts": 1,
|
|
81
|
-
"
|
|
82
|
-
"
|
|
83
|
-
"
|
|
81
|
+
"stream_log": "/path/to/stream.jsonl",
|
|
82
|
+
"stderr_log": "/path/to/stderr.log",
|
|
83
|
+
"last_activity_at": "2026-03-07T18:29:59Z",
|
|
84
|
+
"last_event_type": "stop",
|
|
85
|
+
"stream_event_count": 42,
|
|
86
|
+
"stream_parse_errors": 0,
|
|
87
|
+
"artifact_presence": {
|
|
88
|
+
"report.md": true,
|
|
89
|
+
"findings.json": true,
|
|
90
|
+
"done.json": true
|
|
91
|
+
},
|
|
92
|
+
"missing_artifacts": [],
|
|
93
|
+
"validation_errors": []
|
|
84
94
|
}
|
|
85
95
|
```
|
|
86
96
|
|
|
@@ -102,8 +112,45 @@ Key fields:
|
|
|
102
112
|
| `exit_code` | number | Process exit code. `124` on timeout. |
|
|
103
113
|
| `timed_out` | boolean | Whether the stage was killed due to timeout. |
|
|
104
114
|
| `attempts` | number | Total attempts (1 = no retries). |
|
|
105
|
-
| `
|
|
106
|
-
| `
|
|
115
|
+
| `stream_log` | string | JSONL stdout event stream for the stage. |
|
|
116
|
+
| `last_activity_at` | string? | Last observed stream activity timestamp. |
|
|
117
|
+
| `last_event_type` | string? | Last observed stream event type. |
|
|
118
|
+
| `stream_event_count` | number | Parsed stdout event count for the attempt. |
|
|
119
|
+
| `stream_parse_errors` | number | Number of stdout lines that failed JSON parsing. |
|
|
120
|
+
| `missing_artifacts` | array | Required artifacts absent for the final attempt. |
|
|
121
|
+
| `validation_errors` | array | Lightweight output validation errors for malformed structured artifacts. |
|
|
122
|
+
| `warnings` | array? | Non-authoritative observability warnings, such as stream parse issues. |
|
|
123
|
+
|
|
124
|
+
## Run Metadata
|
|
125
|
+
|
|
126
|
+
`run.json` records the rendered prompt sources plus canonical execution metadata for each executable stage:
|
|
127
|
+
|
|
128
|
+
```json
|
|
129
|
+
{
|
|
130
|
+
"run_id": "20260330-12345678",
|
|
131
|
+
"review_target": "staged changes",
|
|
132
|
+
"stage_executions": {
|
|
133
|
+
"claude": {
|
|
134
|
+
"command_id": "claude-review",
|
|
135
|
+
"artifacts": {
|
|
136
|
+
"stream_log": "/path/to/stream.jsonl"
|
|
137
|
+
}
|
|
138
|
+
},
|
|
139
|
+
"codex": {
|
|
140
|
+
"command_id": "codex-review",
|
|
141
|
+
"artifacts": {
|
|
142
|
+
"stream_log": "/path/to/stream.jsonl"
|
|
143
|
+
}
|
|
144
|
+
},
|
|
145
|
+
"judge": {
|
|
146
|
+
"command_id": "codex-judge",
|
|
147
|
+
"artifacts": {
|
|
148
|
+
"stream_log": "/path/to/stream.jsonl"
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
```
|
|
107
154
|
|
|
108
155
|
## Bundle Output
|
|
109
156
|
|
|
@@ -1,50 +0,0 @@
|
|
|
1
|
-
import { createInterface } from "node:readline";
|
|
2
|
-
const queue = [];
|
|
3
|
-
let processing = false;
|
|
4
|
-
let readlineInterface = null;
|
|
5
|
-
function getReadline() {
|
|
6
|
-
if (!readlineInterface) {
|
|
7
|
-
readlineInterface = createInterface({ input: process.stdin, output: process.stdout });
|
|
8
|
-
}
|
|
9
|
-
return readlineInterface;
|
|
10
|
-
}
|
|
11
|
-
function processNext() {
|
|
12
|
-
if (queue.length === 0) {
|
|
13
|
-
processing = false;
|
|
14
|
-
return;
|
|
15
|
-
}
|
|
16
|
-
processing = true;
|
|
17
|
-
const request = queue.shift();
|
|
18
|
-
if (!request) {
|
|
19
|
-
processing = false;
|
|
20
|
-
return;
|
|
21
|
-
}
|
|
22
|
-
const reader = getReadline();
|
|
23
|
-
process.stderr.write(`\n[${request.stage}] needs your input:\n${request.prompt}\n`);
|
|
24
|
-
reader.question("", (answer) => {
|
|
25
|
-
try {
|
|
26
|
-
request.stdinPipe.write(`${answer}\n`);
|
|
27
|
-
}
|
|
28
|
-
catch {
|
|
29
|
-
// The child process may exit before the response is written.
|
|
30
|
-
}
|
|
31
|
-
request.resolve();
|
|
32
|
-
processNext();
|
|
33
|
-
});
|
|
34
|
-
}
|
|
35
|
-
export function enqueue(request) {
|
|
36
|
-
queue.push(request);
|
|
37
|
-
if (!processing) {
|
|
38
|
-
processNext();
|
|
39
|
-
}
|
|
40
|
-
}
|
|
41
|
-
export function close() {
|
|
42
|
-
if (readlineInterface) {
|
|
43
|
-
readlineInterface.close();
|
|
44
|
-
readlineInterface = null;
|
|
45
|
-
}
|
|
46
|
-
processing = false;
|
|
47
|
-
for (const request of queue.splice(0)) {
|
|
48
|
-
request.resolve();
|
|
49
|
-
}
|
|
50
|
-
}
|