@nimiplatform/nimi-coding 0.2.1 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,647 @@
1
+ import { spawn } from "node:child_process";
2
+ import { mkdir, readFile, writeFile } from "node:fs/promises";
3
+ import path from "node:path";
4
+
5
+ import {
6
+ appendRunEvent,
7
+ artifactPath,
8
+ artifactRef,
9
+ chunkRef,
10
+ ensureIsoTimestamp,
11
+ inputError,
12
+ loadChunk,
13
+ loadPlan,
14
+ packetRef,
15
+ resolveInsideProject,
16
+ safeSweepId,
17
+ withAuditSweepMutationLock,
18
+ writeYamlRef,
19
+ } from "./common.mjs";
20
+ import { buildAuditorPacket, reviewAuditSweepChunk, updatePlanChunk } from "./chunks.mjs";
21
+ import { extractCodexAuditorEvidenceFile, P0P1_RULE_CHECK_IDS } from "./codex-auditor-evidence.mjs";
22
+ import { ingestAuditSweepChunk } from "./ingest.mjs";
23
+ import { budgetBlockForChunk } from "./risk-budget.mjs";
24
+ import { validateAuditSweepArtifacts } from "./validators.mjs";
25
+
26
+ const CLAUDE_AUDITOR_DEFAULT = "claude_semantic_auditor";
27
+ const DEFAULT_CLAUDE_TIMEOUT_MS = 10 * 60 * 1000;
28
+ const CLAUDE_TIMEOUT_KILL_GRACE_MS = 3000;
29
+ const CLAUDE_RAW_SUFFIX = ".claude-raw.json";
30
+ const CLAUDE_EVIDENCE_SUFFIX = ".claude-evidence.json";
31
+ const CLAUDE_READONLY_ALLOWED_TOOLS = ["Read", "Grep", "Glob"];
32
+
33
+ function claudeOutputRef(sweepId, chunkId, suffix) {
34
+ return artifactRef("evidence_refs", sweepId, "claude-output", `${chunkId}${suffix}`);
35
+ }
36
+
37
+ function claudeRunToken(timestamp) {
38
+ return timestamp.replace(/[^0-9A-Za-z]+/g, "-").replace(/^-+|-+$/g, "");
39
+ }
40
+
41
+ function projectRefForPath(projectRoot, absolutePath) {
42
+ return path.relative(projectRoot, absolutePath).replace(/\\/g, "/");
43
+ }
44
+
45
+ function claudePrompt({ packet, auditorPacketRef, rawRef, sessionRef }) {
46
+ return [
47
+ "OUTPUT FORMAT (HARD REQUIREMENT, READ FIRST):",
48
+ "Your reply MUST be exactly one JSON object. The first character of your reply MUST be `{` and the last character MUST be `}`. No prose, no apology, no markdown fences, no \"Audit complete\" summary, no commentary. Even when no findings are emitted, you MUST still emit the full JSON object (with findings: [] and the required negative_reasoning fields). A reply that is not a single JSON object will be rejected and the chunk will be marked failed.",
49
+ "",
50
+ "You are the Claude semantic auditor for a nimicoding sweep audit chunk.",
51
+ "Run in read-only, audit-only mode. Do not edit files. Do not implement product fixes.",
52
+ `Read the auditor packet from ${auditorPacketRef} and inspect the chunk authority refs and implementation evidence semantically.`,
53
+ "Do not rely on this prompt as the chunk inventory; the packet file is the source for files, authority_refs, selected_implementation_refs, audit_depth, retrieval_prepass, and the raw semantic output contract.",
54
+ "Scripts may not generate findings or no-findings; your conclusions must come from your own inspection.",
55
+ "The packet is compact: evidence_inventory/selected_implementation_refs is the manager-selected implementation slice, not the full manager-owned inventory.",
56
+ "Do not ask for, reconstruct, or echo the omitted full evidence_inventory. audit-claude will mechanically fill coverage.files, coverage.authority_refs, and full coverage.evidence_files from manager-owned chunk state.",
57
+ "You only author semantic audit content: authority_outcomes reasoning/status, inspected_implementation_refs, P0/P1 rule checks, p0p1_negative_reasoning when applicable, and findings.",
58
+ "For each authority outcome, set authority_ref to the packet authority_ref and put inspected implementation refs in inspected_implementation_refs or implementation_evidence_refs.",
59
+ "Every implementation ref you cite must be an exact file ref from packet.selected_implementation_refs / packet.evidence_inventory.",
60
+ "Never put AGENTS.md, README.md, spec files, authority refs, methodology docs, or governance docs in inspected_implementation_refs, implementation_evidence_refs, coverage.p0p1_evidence_refs, findings[].implementation_refs, or coverage.p0p1_rule_checks[].implementation_refs; even if packet.selected_implementation_refs includes them, treat them as context only.",
61
+ "If only context/governance/authority documents are available after that exclusion, use status=\"not_applicable\" for P0/P1 rule checks and explain the lack of implementation surface in negative_reasoning.",
62
+ "If a governance or authority document influenced reasoning, mention it only in negative_reasoning/description text, not in any implementation_refs array.",
63
+ "Use packet.audit_depth to size your inspection: deep means inspect the selected slice thoroughly, normal means focused semantic inspection, shallow means audit generated/table/index invariants from the selected slice without expanding the omitted inventory.",
64
+ "Return exactly one JSON object and nothing else. Do not wrap it in markdown.",
65
+ "The JSON object must have exactly these top-level fields: chunk_id, auditor, coverage, findings.",
66
+ `Set auditor.id to ${JSON.stringify(packet.auditor)}.`,
67
+ `Set auditor.mode to "claude_semantic_audit".`,
68
+ `Set auditor.methodology_ref to "package://@nimiplatform/nimi-coding/methodology/audit-sweep-p0p1-recall.yaml".`,
69
+ "Put P0/P1 rule checks only at coverage.p0p1_rule_checks.",
70
+ `Set auditor.provenance.kind to "semantic_audit".`,
71
+ `Set auditor.provenance.packet_ref to ${JSON.stringify(packetRef(packet.sweep_id, packet.chunk_id))}.`,
72
+ `Set auditor.provenance.session_ref to ${JSON.stringify(sessionRef)}.`,
73
+ `Set auditor.provenance.transcript_ref to ${JSON.stringify(rawRef)}.`,
74
+ "coverage.authority_outcomes must contain one outcome per authority_ref.",
75
+ `coverage.p0p1_rule_checks must contain exactly these ids and no aliases: ${P0P1_RULE_CHECK_IDS.join(", ")}.`,
76
+ "Each coverage.authority_outcomes[] object must include negative_reasoning when no critical/high finding is emitted for the chunk.",
77
+ "Each coverage.p0p1_rule_checks[] object must include id, status, implementation_refs, and negative_reasoning.",
78
+ "Use status=\"checked\" when implementation evidence was inspected; checked rules must cite at least one in-scope implementation ref.",
79
+ "Use status=\"not_applicable\" only when the rule truly has no implementation surface, and explain that in negative_reasoning.",
80
+ "When the packet evidence_inventory is empty and no critical/high finding is emitted, include coverage.p0p1_implementation_not_applicable_reason with the chunk-specific reason implementation refs are not applicable.",
81
+ "When findings is an empty array, you MUST include coverage.p0p1_negative_reasoning (string) explaining why no critical/high finding was emitted across all priority defect classes. Omitting this field will reject the audit.",
82
+ "Output MUST be exactly one JSON object. Do not prepend prose. Do not wrap in ```json fences. Do not append commentary. The first character MUST be `{` and the last character MUST be `}`.",
83
+ "Do not use priority defect class aliases such as authority_boundary_bypass, security_or_permission_bypass, destructive_action_without_gate, package_boundary_violation, or unadmitted_truth_or_evidence_source as rule check ids.",
84
+ "Do not emit coverage.files, coverage.authority_refs, or coverage.evidence_files; those fields are manager-owned and will be populated from the packet.",
85
+ "Do not emit authority_outcomes[].evidence_refs; it is manager-owned and will be built from authority_ref plus inspected implementation refs.",
86
+ "Every finding must include severity, category, impact, title, description, and location.file. Set severity to critical or high for P0/P1 findings. Set finding.category to one of the exact P0/P1 rule ids when the finding maps to a P0/P1 rule; do not use rule_id as the primary finding category field.",
87
+ "Set findings[].location.file to an exact packet.selected_implementation_refs file for implementation findings. For authority-only findings with no implementation surface, set findings[].location.file to the in-scope authority_ref that contains the defect.",
88
+ "authority_outcomes[].status is an audit-process enum only: audited, blocked, or not_applicable.",
89
+ "Use status=audited when the authority/evidence was inspected, even if you discovered violations.",
90
+ "When an authority outcome uses status=blocked or status=not_applicable, include reason with the chunk-specific blocker or not-applicable explanation.",
91
+ "Do not use compliance verdicts such as violated, pass, fail, compliant, or non_compliant in authority_outcomes[].status; put violations in findings.",
92
+ "For no-finding chunks, include chunk-specific inspected implementation refs, P0/P1 rule checks, and negative reasoning.",
93
+ ].join("\n");
94
+ }
95
+
96
+ function stripCodeFence(text) {
97
+ const trimmed = text.trim();
98
+ if (!trimmed.startsWith("```")) {
99
+ return text;
100
+ }
101
+ const fenceEnd = trimmed.indexOf("\n");
102
+ if (fenceEnd < 0) {
103
+ return text;
104
+ }
105
+ const inside = trimmed.slice(fenceEnd + 1);
106
+ const closing = inside.lastIndexOf("```");
107
+ if (closing < 0) {
108
+ return inside;
109
+ }
110
+ return inside.slice(0, closing);
111
+ }
112
+
113
+ function extractFirstJsonObject(rawText) {
114
+ const candidate = stripCodeFence(rawText);
115
+ const start = candidate.indexOf("{");
116
+ if (start < 0) {
117
+ return null;
118
+ }
119
+ let depth = 0;
120
+ let inString = false;
121
+ let escaped = false;
122
+ for (let index = start; index < candidate.length; index += 1) {
123
+ const char = candidate[index];
124
+ if (inString) {
125
+ if (escaped) {
126
+ escaped = false;
127
+ } else if (char === "\\") {
128
+ escaped = true;
129
+ } else if (char === "\"") {
130
+ inString = false;
131
+ }
132
+ continue;
133
+ }
134
+ if (char === "\"") {
135
+ inString = true;
136
+ continue;
137
+ }
138
+ if (char === "{") {
139
+ depth += 1;
140
+ } else if (char === "}") {
141
+ depth -= 1;
142
+ if (depth === 0) {
143
+ return candidate.slice(start, index + 1);
144
+ }
145
+ }
146
+ }
147
+ return null;
148
+ }
149
+
150
+ function normalizeClaudeRawOutput(stdout) {
151
+ const trimmed = (stdout ?? "").trim();
152
+ if (!trimmed) {
153
+ return trimmed;
154
+ }
155
+ try {
156
+ const parsed = JSON.parse(trimmed);
157
+ if (parsed?.type === "result" && parsed?.structured_output && typeof parsed.structured_output === "object") {
158
+ return `${JSON.stringify(parsed.structured_output, null, 2)}\n`;
159
+ }
160
+ if (parsed?.type === "result" && typeof parsed.result === "string" && parsed.result.trim()) {
161
+ return normalizeClaudeRawOutput(parsed.result);
162
+ }
163
+ return trimmed;
164
+ } catch {
165
+ // Fall through to extraction below.
166
+ }
167
+ const extracted = extractFirstJsonObject(trimmed);
168
+ if (extracted) {
169
+ try {
170
+ JSON.parse(extracted);
171
+ return extracted;
172
+ } catch {
173
+ return extracted;
174
+ }
175
+ }
176
+ return trimmed;
177
+ }
178
+
179
+ function terminateProcess(child, signal) {
180
+ try {
181
+ if (process.platform !== "win32" && child.pid) {
182
+ process.kill(-child.pid, signal);
183
+ return;
184
+ }
185
+ } catch {
186
+ // Fall through to direct child termination.
187
+ }
188
+ try {
189
+ child.kill(signal);
190
+ } catch {
191
+ // Process may already have exited.
192
+ }
193
+ }
194
+
195
+ const CLAUDE_AUDIT_OUTPUT_SCHEMA = JSON.stringify({
196
+ type: "object",
197
+ properties: {
198
+ chunk_id: { type: "string" },
199
+ auditor: { type: "object" },
200
+ coverage: { type: "object" },
201
+ findings: { type: "array" },
202
+ },
203
+ required: ["chunk_id", "auditor", "coverage", "findings"],
204
+ additionalProperties: false,
205
+ });
206
+
207
+ function runClaudeExec({ projectRoot, claudeBin, rawOutputPath, prompt, timeoutMs }) {
208
+ return new Promise((resolve) => {
209
+ const boundedTimeoutMs = Number.isInteger(timeoutMs) && timeoutMs > 0 ? timeoutMs : DEFAULT_CLAUDE_TIMEOUT_MS;
210
+ let timedOut = false;
211
+ let settled = false;
212
+ let killTimer = null;
213
+ const child = spawn(claudeBin, [
214
+ "-p",
215
+ "--output-format", "json",
216
+ "--permission-mode", "bypassPermissions",
217
+ "--allowedTools", CLAUDE_READONLY_ALLOWED_TOOLS.join(","),
218
+ "--disallowedTools", "Bash,Edit,Write,NotebookEdit",
219
+ "--no-session-persistence",
220
+ "--add-dir", projectRoot,
221
+ "--json-schema", CLAUDE_AUDIT_OUTPUT_SCHEMA,
222
+ ], {
223
+ cwd: projectRoot,
224
+ stdio: ["pipe", "pipe", "pipe"],
225
+ detached: process.platform !== "win32",
226
+ });
227
+
228
+ const timeoutTimer = setTimeout(() => {
229
+ timedOut = true;
230
+ terminateProcess(child, "SIGTERM");
231
+ killTimer = setTimeout(() => terminateProcess(child, "SIGKILL"), CLAUDE_TIMEOUT_KILL_GRACE_MS);
232
+ }, boundedTimeoutMs);
233
+
234
+ let stdout = "";
235
+ let stderr = "";
236
+ child.stdout.on("data", (chunk) => {
237
+ stdout += chunk.toString();
238
+ });
239
+ child.stderr.on("data", (chunk) => {
240
+ stderr += chunk.toString();
241
+ });
242
+ child.on("error", async (error) => {
243
+ if (settled) {
244
+ return;
245
+ }
246
+ settled = true;
247
+ clearTimeout(timeoutTimer);
248
+ if (killTimer) {
249
+ clearTimeout(killTimer);
250
+ }
251
+ resolve({ ok: false, exitCode: 1, timedOut, timeoutMs: boundedTimeoutMs, stdout, stderr: `${stderr}${error.message}` });
252
+ });
253
+ child.on("close", async (exitCode, signal) => {
254
+ if (settled) {
255
+ return;
256
+ }
257
+ settled = true;
258
+ clearTimeout(timeoutTimer);
259
+ if (killTimer) {
260
+ clearTimeout(killTimer);
261
+ }
262
+ try {
263
+ await writeFile(rawOutputPath, normalizeClaudeRawOutput(stdout));
264
+ } catch {
265
+ // best effort; downstream extraction will report missing file.
266
+ }
267
+ resolve({ ok: exitCode === 0 && !timedOut, exitCode, signal, timedOut, timeoutMs: boundedTimeoutMs, stdout, stderr });
268
+ });
269
+ child.stdin.end(prompt);
270
+ });
271
+ }
272
+
273
+ async function prepareClaudeAuditPacket(projectRoot, options) {
274
+ return withAuditSweepMutationLock(projectRoot, options.sweepId, "chunk claude audit prepare", async () => {
275
+ const planResult = await loadPlan(projectRoot, options.sweepId);
276
+ if (!planResult.ok) {
277
+ return inputError(planResult.error);
278
+ }
279
+ const chunkResult = await loadChunk(projectRoot, options.sweepId, options.chunkId);
280
+ if (!chunkResult.ok) {
281
+ return inputError(chunkResult.error);
282
+ }
283
+ if (chunkResult.chunk.state === "skipped") {
284
+ return inputError("nimicoding sweep audit refused: skipped chunks cannot be audited through Claude.\n");
285
+ }
286
+ const budgetBlock = budgetBlockForChunk(planResult.plan, chunkResult.chunk);
287
+ if (budgetBlock && chunkResult.chunk.state !== "frozen") {
288
+ return inputError(`nimicoding sweep audit refused: ${budgetBlock}; build or admit remediation bundles before continuing discovery.\n`);
289
+ }
290
+
291
+ const dispatch = {
292
+ auditor: options.auditor ?? CLAUDE_AUDITOR_DEFAULT,
293
+ criteria: chunkResult.chunk.criteria,
294
+ files: chunkResult.chunk.files,
295
+ authority_refs: chunkResult.chunk.authority_refs ?? chunkResult.chunk.files,
296
+ host_authority_projection_refs: chunkResult.chunk.host_authority_projection_refs ?? [],
297
+ evidence_roots: chunkResult.chunk.evidence_roots ?? [],
298
+ admitted_evidence_roots: chunkResult.chunk.admitted_evidence_roots ?? [],
299
+ evidence_inventory: chunkResult.chunk.evidence_inventory ?? [],
300
+ evidence_inventory_status: chunkResult.chunk.evidence_inventory_status ?? null,
301
+ evidence_inventory_empty_reason: chunkResult.chunk.evidence_inventory_empty_reason ?? null,
302
+ execution_owner: "nimicoding_claude_auditor_path",
303
+ };
304
+ const packet = buildAuditorPacket(options.sweepId, chunkResult.chunk, dispatch.auditor, options.dispatchedAt, planResult.plan, { projectRoot });
305
+ packet.execution_owner = "nimicoding_claude_auditor_path";
306
+ packet.raw_output_contract = {
307
+ raw_output_is_transcript_ref: true,
308
+ raw_output_must_be_exact_json: true,
309
+ schema_drift_rejected_fail_closed: true,
310
+ scripts_may_only_extract_schema_conformant_evidence: true,
311
+ };
312
+
313
+ const auditorPacketRef = packetRef(options.sweepId, options.chunkId);
314
+ const updatedChunk = {
315
+ ...chunkResult.chunk,
316
+ state: "dispatched",
317
+ lifecycle: {
318
+ ...chunkResult.chunk.lifecycle,
319
+ dispatched_at: options.dispatchedAt,
320
+ ingested_at: null,
321
+ reviewed_at: null,
322
+ frozen_at: null,
323
+ failed_at: null,
324
+ skipped_at: null,
325
+ },
326
+ dispatch,
327
+ evidence_ref: null,
328
+ finding_count: 0,
329
+ audit_validity: null,
330
+ review: null,
331
+ failure: null,
332
+ updated_at: options.dispatchedAt,
333
+ };
334
+
335
+ await writeYamlRef(projectRoot, auditorPacketRef, packet);
336
+ await writeYamlRef(projectRoot, chunkResult.chunkRef, updatedChunk);
337
+ await writeYamlRef(projectRoot, planResult.planRef, {
338
+ ...updatePlanChunk(planResult.plan, options.chunkId, {
339
+ state: "dispatched",
340
+ evidence_ref: null,
341
+ finding_count: 0,
342
+ audit_validity: null,
343
+ failure: null,
344
+ }),
345
+ updated_at: options.dispatchedAt,
346
+ });
347
+ const runLedgerRef = await appendRunEvent(projectRoot, options.sweepId, {
348
+ event_type: "chunk_claude_audit_prepared",
349
+ chunk_id: options.chunkId,
350
+ chunk_ref: chunkRef(options.sweepId, options.chunkId),
351
+ packet_ref: auditorPacketRef,
352
+ auditor: dispatch.auditor,
353
+ });
354
+ return {
355
+ ok: true,
356
+ chunk: updatedChunk,
357
+ packet,
358
+ packetRef: auditorPacketRef,
359
+ chunkRef: chunkResult.chunkRef,
360
+ runLedgerRef,
361
+ };
362
+ });
363
+ }
364
+
365
+ async function markClaudeAuditFailed(projectRoot, options) {
366
+ return withAuditSweepMutationLock(projectRoot, options.sweepId, "chunk claude audit fail", async () => {
367
+ const planResult = await loadPlan(projectRoot, options.sweepId);
368
+ if (!planResult.ok) {
369
+ return inputError(planResult.error);
370
+ }
371
+ const chunkResult = await loadChunk(projectRoot, options.sweepId, options.chunkId);
372
+ if (!chunkResult.ok) {
373
+ return inputError(chunkResult.error);
374
+ }
375
+ const failure = {
376
+ reason: options.reason,
377
+ failed_at: options.failedAt,
378
+ packet_ref: options.packetRef,
379
+ transcript_ref: options.transcriptRef,
380
+ phase: options.phase,
381
+ };
382
+ const updatedChunk = {
383
+ ...chunkResult.chunk,
384
+ state: "failed",
385
+ lifecycle: {
386
+ ...chunkResult.chunk.lifecycle,
387
+ failed_at: options.failedAt,
388
+ skipped_at: null,
389
+ },
390
+ failure,
391
+ updated_at: options.failedAt,
392
+ };
393
+ await writeYamlRef(projectRoot, chunkResult.chunkRef, updatedChunk);
394
+ await writeYamlRef(projectRoot, planResult.planRef, {
395
+ ...updatePlanChunk(planResult.plan, options.chunkId, {
396
+ state: "failed",
397
+ failure,
398
+ }),
399
+ updated_at: options.failedAt,
400
+ });
401
+ const runLedgerRef = await appendRunEvent(projectRoot, options.sweepId, {
402
+ event_type: "chunk_failed",
403
+ chunk_id: options.chunkId,
404
+ chunk_ref: chunkResult.chunkRef,
405
+ packet_ref: options.packetRef,
406
+ transcript_ref: options.transcriptRef,
407
+ summary: options.reason,
408
+ phase: options.phase,
409
+ });
410
+ return {
411
+ ok: true,
412
+ state: "failed",
413
+ chunkRef: chunkResult.chunkRef,
414
+ runLedgerRef,
415
+ };
416
+ });
417
+ }
418
+
419
+ export async function runClaudeAuditSweepChunk(projectRoot, options) {
420
+ const sweepId = safeSweepId(options.sweepId);
421
+ if (!sweepId || typeof options.chunkId !== "string") {
422
+ return inputError("nimicoding sweep audit refused: --sweep-id and --chunk-id are required.\n");
423
+ }
424
+ const dispatchedAtError = ensureIsoTimestamp(options.dispatchedAt, "--dispatched-at");
425
+ if (dispatchedAtError) {
426
+ return dispatchedAtError;
427
+ }
428
+ const verifiedAtError = ensureIsoTimestamp(options.verifiedAt, "--verified-at");
429
+ if (verifiedAtError) {
430
+ return verifiedAtError;
431
+ }
432
+ const reviewedAtError = ensureIsoTimestamp(options.reviewedAt, "--reviewed-at");
433
+ if (reviewedAtError) {
434
+ return reviewedAtError;
435
+ }
436
+
437
+ const prepare = await prepareClaudeAuditPacket(projectRoot, {
438
+ ...options,
439
+ sweepId,
440
+ });
441
+ if (!prepare.ok) {
442
+ return prepare;
443
+ }
444
+
445
+ const outputSuffix = `.${claudeRunToken(options.dispatchedAt)}`;
446
+ let rawRef = claudeOutputRef(sweepId, options.chunkId, `${outputSuffix}${CLAUDE_RAW_SUFFIX}`);
447
+ const evidenceCandidateRef = claudeOutputRef(sweepId, options.chunkId, `${outputSuffix}${CLAUDE_EVIDENCE_SUFFIX}`);
448
+ let rawOutputPath = artifactPath(projectRoot, rawRef);
449
+ let sessionRef = `claude-exec:${sweepId}:${options.chunkId}:${options.dispatchedAt}`;
450
+ if (options.fromRawOutput) {
451
+ const replaySource = resolveInsideProject(projectRoot, options.fromRawOutput, "--from-raw-output");
452
+ if (!replaySource.ok) {
453
+ await markClaudeAuditFailed(projectRoot, {
454
+ sweepId,
455
+ chunkId: options.chunkId,
456
+ failedAt: options.verifiedAt,
457
+ packetRef: prepare.packetRef,
458
+ transcriptRef: rawRef,
459
+ phase: "raw_output_replay",
460
+ reason: replaySource.error.trim(),
461
+ });
462
+ return inputError(replaySource.error);
463
+ }
464
+ try {
465
+ const replayText = await readFile(replaySource.absolutePath, "utf8");
466
+ await mkdir(path.dirname(rawOutputPath), { recursive: true });
467
+ await writeFile(rawOutputPath, normalizeClaudeRawOutput(replayText));
468
+ sessionRef = `claude-replay:${sweepId}:${options.chunkId}:${options.dispatchedAt}:${projectRefForPath(projectRoot, replaySource.absolutePath)}`;
469
+ } catch (error) {
470
+ const reason = `Claude replay raw output could not be read or normalized: ${error.message}`;
471
+ await markClaudeAuditFailed(projectRoot, {
472
+ sweepId,
473
+ chunkId: options.chunkId,
474
+ failedAt: options.verifiedAt,
475
+ packetRef: prepare.packetRef,
476
+ transcriptRef: rawRef,
477
+ phase: "raw_output_replay",
478
+ reason,
479
+ });
480
+ return inputError(`nimicoding sweep audit refused: ${reason}\n`);
481
+ }
482
+ } else {
483
+ await mkdir(path.dirname(rawOutputPath), { recursive: true });
484
+ const runResult = await runClaudeExec({
485
+ projectRoot,
486
+ claudeBin: options.claudeBin ?? "claude",
487
+ rawOutputPath,
488
+ prompt: claudePrompt({
489
+ packet: prepare.packet,
490
+ auditorPacketRef: prepare.packetRef,
491
+ rawRef,
492
+ sessionRef,
493
+ }),
494
+ timeoutMs: options.timeoutMs,
495
+ });
496
+ if (!runResult.ok) {
497
+ const failureReason = runResult.timedOut
498
+ ? `Claude auditor execution timed out after ${runResult.timeoutMs}ms.`
499
+ : `Claude auditor execution failed with exit code ${runResult.exitCode ?? "unknown"}.`;
500
+ await markClaudeAuditFailed(projectRoot, {
501
+ sweepId,
502
+ chunkId: options.chunkId,
503
+ failedAt: options.verifiedAt,
504
+ packetRef: prepare.packetRef,
505
+ transcriptRef: rawRef,
506
+ phase: "claude_execution",
507
+ reason: failureReason,
508
+ });
509
+ await appendRunEvent(projectRoot, sweepId, {
510
+ event_type: "chunk_claude_audit_failed",
511
+ chunk_id: options.chunkId,
512
+ chunk_ref: prepare.chunkRef,
513
+ packet_ref: prepare.packetRef,
514
+ transcript_ref: rawRef,
515
+ exit_code: runResult.exitCode,
516
+ timed_out: runResult.timedOut,
517
+ timeout_ms: runResult.timeoutMs,
518
+ stderr_tail: runResult.stderr.slice(-2000),
519
+ });
520
+ return inputError(`nimicoding sweep audit refused: ${failureReason}\n`);
521
+ }
522
+ }
523
+
524
+ const extracted = await extractCodexAuditorEvidenceFile(projectRoot, {
525
+ rawOutputPath,
526
+ evidenceRef: evidenceCandidateRef,
527
+ chunk: prepare.chunk,
528
+ packetRef: prepare.packetRef,
529
+ sessionRef,
530
+ transcriptRef: rawRef,
531
+ auditorId: options.auditor ?? CLAUDE_AUDITOR_DEFAULT,
532
+ auditorMode: "claude_semantic_audit",
533
+ });
534
+ if (!extracted.ok) {
535
+ await markClaudeAuditFailed(projectRoot, {
536
+ sweepId,
537
+ chunkId: options.chunkId,
538
+ failedAt: options.verifiedAt,
539
+ packetRef: prepare.packetRef,
540
+ transcriptRef: rawRef,
541
+ phase: "auditor_output_validation",
542
+ reason: `Claude auditor output rejected: ${extracted.error}.`,
543
+ });
544
+ await appendRunEvent(projectRoot, sweepId, {
545
+ event_type: "chunk_claude_auditor_output_rejected",
546
+ chunk_id: options.chunkId,
547
+ chunk_ref: prepare.chunkRef,
548
+ packet_ref: prepare.packetRef,
549
+ transcript_ref: rawRef,
550
+ reason: extracted.error,
551
+ });
552
+ return inputError(`nimicoding sweep audit refused: Claude auditor output rejected for ${options.chunkId}: ${extracted.error}.\n`);
553
+ }
554
+
555
+ await appendRunEvent(projectRoot, sweepId, {
556
+ event_type: "chunk_claude_auditor_output_accepted",
557
+ chunk_id: options.chunkId,
558
+ chunk_ref: prepare.chunkRef,
559
+ packet_ref: prepare.packetRef,
560
+ transcript_ref: rawRef,
561
+ evidence_candidate_ref: evidenceCandidateRef,
562
+ audit_validity: extracted.auditValidity,
563
+ });
564
+
565
+ const ingest = await ingestAuditSweepChunk(projectRoot, {
566
+ sweepId,
567
+ chunkId: options.chunkId,
568
+ fromPath: evidenceCandidateRef,
569
+ verifiedAt: options.verifiedAt,
570
+ });
571
+ if (!ingest.ok) {
572
+ await markClaudeAuditFailed(projectRoot, {
573
+ sweepId,
574
+ chunkId: options.chunkId,
575
+ failedAt: options.verifiedAt,
576
+ packetRef: prepare.packetRef,
577
+ transcriptRef: rawRef,
578
+ phase: "chunk_ingest",
579
+ reason: `Claude auditor evidence ingest rejected: ${ingest.error ?? "unknown ingest failure"}.`,
580
+ });
581
+ return inputError(`nimicoding sweep audit refused: Claude auditor evidence ingest rejected for ${options.chunkId}: ${ingest.error ?? "unknown ingest failure"}.\n`);
582
+ }
583
+
584
+ const review = await reviewAuditSweepChunk(projectRoot, {
585
+ sweepId,
586
+ chunkId: options.chunkId,
587
+ verdict: "pass",
588
+ reviewedAt: options.reviewedAt,
589
+ reviewer: options.reviewer ?? "nimicoding_claude_auditor_path",
590
+ summary: options.summary ?? `Claude semantic audit accepted from ${rawRef}.`,
591
+ });
592
+ if (!review.ok) {
593
+ await markClaudeAuditFailed(projectRoot, {
594
+ sweepId,
595
+ chunkId: options.chunkId,
596
+ failedAt: options.reviewedAt,
597
+ packetRef: prepare.packetRef,
598
+ transcriptRef: rawRef,
599
+ phase: "chunk_review",
600
+ reason: `Claude auditor evidence review rejected: ${review.error ?? "unknown review failure"}.`,
601
+ });
602
+ return inputError(`nimicoding sweep audit refused: Claude auditor evidence review rejected for ${options.chunkId}: ${review.error ?? "unknown review failure"}.\n`);
603
+ }
604
+
605
+ const validation = await validateAuditSweepArtifacts(projectRoot, {
606
+ sweepId,
607
+ scope: "chunks",
608
+ });
609
+ const chunkScopedFailures = (validation.checks ?? []).filter((entry) => {
610
+ if (entry.ok) {
611
+ return false;
612
+ }
613
+ const id = entry.id ?? "";
614
+ return id.includes(options.chunkId);
615
+ });
616
+ if (chunkScopedFailures.length > 0) {
617
+ const failureSummary = chunkScopedFailures.map((entry) => `${entry.id}: ${entry.reason}`).join("; ");
618
+ await markClaudeAuditFailed(projectRoot, {
619
+ sweepId,
620
+ chunkId: options.chunkId,
621
+ failedAt: options.reviewedAt,
622
+ packetRef: prepare.packetRef,
623
+ transcriptRef: rawRef,
624
+ phase: "post_chunk_validation",
625
+ reason: `Post-Claude chunk validation failed: ${failureSummary}`,
626
+ });
627
+ return inputError(`nimicoding sweep audit refused: post-Claude chunk validation failed for ${options.chunkId}: ${failureSummary}.\n`);
628
+ }
629
+
630
+ return {
631
+ ok: true,
632
+ exitCode: 0,
633
+ sweepId,
634
+ chunkId: options.chunkId,
635
+ state: "frozen",
636
+ packetRef: prepare.packetRef,
637
+ transcriptRef: rawRef,
638
+ extractedEvidenceRef: evidenceCandidateRef,
639
+ evidenceRef: ingest.evidenceRef,
640
+ findingsRef: ingest.findingsRef,
641
+ findingCount: ingest.findingCount,
642
+ addedCount: ingest.addedCount,
643
+ duplicateCount: ingest.duplicateCount,
644
+ reviewRef: review.runLedgerRef,
645
+ validationScope: "chunks",
646
+ };
647
+ }