auditor-lambda 0.3.3 → 0.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/README.md +6 -1
  2. package/audit-code-wrapper-lib.mjs +87 -7
  3. package/dist/cli.js +517 -91
  4. package/dist/extractors/graph.d.ts +5 -1
  5. package/dist/extractors/graph.js +223 -3
  6. package/dist/extractors/pathPatterns.d.ts +3 -2
  7. package/dist/extractors/pathPatterns.js +97 -24
  8. package/dist/io/artifacts.d.ts +5 -0
  9. package/dist/io/artifacts.js +2 -0
  10. package/dist/orchestrator/advance.js +1 -1
  11. package/dist/orchestrator/dependencyMap.js +18 -0
  12. package/dist/orchestrator/fileAnchors.d.ts +32 -0
  13. package/dist/orchestrator/fileAnchors.js +217 -0
  14. package/dist/orchestrator/internalExecutors.d.ts +1 -1
  15. package/dist/orchestrator/internalExecutors.js +120 -33
  16. package/dist/orchestrator/reviewPackets.d.ts +14 -0
  17. package/dist/orchestrator/reviewPackets.js +310 -0
  18. package/dist/orchestrator/selectiveDeepening.d.ts +14 -0
  19. package/dist/orchestrator/selectiveDeepening.js +392 -0
  20. package/dist/orchestrator/state.js +6 -1
  21. package/dist/orchestrator/taskBuilder.d.ts +16 -0
  22. package/dist/orchestrator/taskBuilder.js +68 -11
  23. package/dist/prompts/renderWorkerPrompt.js +2 -1
  24. package/dist/providers/claudeCodeProvider.js +3 -1
  25. package/dist/providers/index.js +2 -1
  26. package/dist/supervisor/operatorHandoff.js +22 -11
  27. package/dist/types/graph.d.ts +1 -0
  28. package/dist/types/reviewPlanning.d.ts +41 -0
  29. package/dist/types/reviewPlanning.js +1 -0
  30. package/dist/types/sessionConfig.d.ts +1 -0
  31. package/dist/validation/artifacts.js +13 -0
  32. package/dist/validation/auditResults.js +50 -2
  33. package/dist/validation/sessionConfig.js +5 -0
  34. package/docs/agent-integrations.md +4 -1
  35. package/docs/bootstrap-install.md +3 -0
  36. package/docs/contract.md +3 -0
  37. package/docs/dispatch-implementation-plan.md +220 -489
  38. package/docs/next-steps.md +13 -8
  39. package/docs/product-direction.md +5 -3
  40. package/docs/run-flow.md +25 -30
  41. package/docs/session-config.md +15 -4
  42. package/docs/supervisor.md +5 -3
  43. package/docs/workflow-refactor-brief.md +114 -176
  44. package/package.json +1 -1
  45. package/schemas/finding.schema.json +1 -15
  46. package/schemas/graph_bundle.schema.json +16 -0
  47. package/skills/audit-code/audit-code.prompt.md +11 -6
package/dist/cli.js CHANGED
@@ -1,6 +1,8 @@
1
1
  import { access, mkdir, readFile, readdir, rename, writeFile } from "node:fs/promises";
2
2
  import { createReadStream } from "node:fs";
3
- import { basename, dirname, join, resolve } from "node:path";
3
+ import { Buffer } from "node:buffer";
4
+ import { createHash } from "node:crypto";
5
+ import { basename, dirname, isAbsolute, join, relative, resolve } from "node:path";
4
6
  import { fileURLToPath } from "node:url";
5
7
  import { buildRepoManifest } from "./extractors/fileInventory.js";
6
8
  import { buildFileDisposition } from "./extractors/disposition.js";
@@ -11,7 +13,7 @@ import { buildFlowCoverage } from "./orchestrator/flowCoverage.js";
11
13
  import { buildRuntimeValidationTasks, } from "./orchestrator/runtimeValidation.js";
12
14
  import { initializeCoverageFromPlan } from "./orchestrator/planning.js";
13
15
  import { loadArtifactBundle, writeCoreArtifacts, promoteFinalAuditReport, } from "./io/artifacts.js";
14
- import { readJsonFile, writeJsonFile } from "./io/json.js";
16
+ import { isFileMissingError, readJsonFile, writeJsonFile } from "./io/json.js";
15
17
  import { validateArtifactBundle } from "./validation/artifacts.js";
16
18
  import { validateAuditResults, formatAuditResultIssues, } from "./validation/auditResults.js";
17
19
  import { prefixValidationIssues } from "./validation/basic.js";
@@ -26,16 +28,19 @@ import { buildAuditCodeHandoff, writeAuditCodeHandoffArtifacts, } from "./superv
26
28
  import { getSessionConfigPath, loadSessionConfig, readSessionConfigFile, } from "./supervisor/sessionConfig.js";
27
29
  import { clearDispatchFiles, buildRunId, ensureSupervisorDirs, getRunPaths, writeDispatchBatchFiles, writeWorkerTaskFiles, } from "./io/runArtifacts.js";
28
30
  import { renderWorkerPrompt } from "./prompts/renderWorkerPrompt.js";
31
+ import { buildReviewPackets, orderTasksForPacketReview, } from "./orchestrator/reviewPackets.js";
32
+ import { buildFileAnchorSummary, } from "./orchestrator/fileAnchors.js";
29
33
  import { LOCAL_SUBPROCESS_PROVIDER_NAME } from "./providers/constants.js";
30
34
  import { runAuditCodeMcpServer } from "./mcp/server.js";
31
35
  const packageRoot = resolve(dirname(fileURLToPath(import.meta.url)), "..");
32
36
  const ADVANCE_AUDIT_CONTRACT_VERSION = "audit-code/v1alpha1";
33
37
  const WORKER_RESULT_CONTRACT_VERSION = "audit-code-worker-result/v1alpha1";
38
+ const LARGE_FILE_PACKET_TARGET_LINES = 2500;
34
39
  const DIRECT_CLI_DEFAULTS = {
35
40
  rootDir: ".",
36
41
  artifactsDir: ".artifacts",
37
42
  maxRuns: 1000,
38
- agentBatchSize: 1,
43
+ agentBatchSize: 6,
39
44
  parallelWorkers: 1,
40
45
  timeoutMs: 30 * 60 * 1000, // 30 minutes
41
46
  uiMode: "headless",
@@ -64,6 +69,45 @@ function getFlag(argv, name, fallback) {
64
69
  function hasFlag(argv, name) {
65
70
  return argv.includes(name);
66
71
  }
72
+ function toBase64Url(value) {
73
+ return Buffer.from(value, "utf8").toString("base64url");
74
+ }
75
+ function fromBase64Url(value) {
76
+ return Buffer.from(value, "base64url").toString("utf8");
77
+ }
78
+ function digestId(value) {
79
+ return createHash("sha256").update(value).digest("hex").slice(0, 12);
80
+ }
81
+ function safeArtifactStem(value) {
82
+ const sanitized = value
83
+ .replace(/[^a-zA-Z0-9_-]+/g, "_")
84
+ .replace(/^_+|_+$/g, "")
85
+ .slice(0, 80);
86
+ return sanitized.length > 0 ? sanitized : "artifact";
87
+ }
88
+ function artifactNameForId(value, extension) {
89
+ return `${safeArtifactStem(value)}_${digestId(value)}.${extension}`;
90
+ }
91
+ function taskResultPath(taskResultsDir, taskId) {
92
+ return join(taskResultsDir, artifactNameForId(taskId, "json"));
93
+ }
94
+ function packetPromptPath(taskResultsDir, packetId) {
95
+ return join(taskResultsDir, artifactNameForId(packetId, "prompt.md"));
96
+ }
97
+ async function readStdinText() {
98
+ if (process.stdin.isTTY) {
99
+ return "";
100
+ }
101
+ return await new Promise((resolveInput, reject) => {
102
+ let input = "";
103
+ process.stdin.setEncoding("utf8");
104
+ process.stdin.on("data", (chunk) => {
105
+ input += chunk;
106
+ });
107
+ process.stdin.on("end", () => resolveInput(input));
108
+ process.stdin.on("error", reject);
109
+ });
110
+ }
67
111
  function resolveFlagPath(argv, name, fallback) {
68
112
  return resolve(getFlag(argv, name, fallback));
69
113
  }
@@ -287,7 +331,12 @@ async function detectProjectRoot(root) {
287
331
  }
288
332
  function buildPendingAuditTasks(bundle) {
289
333
  const completedTaskIds = new Set((bundle.audit_results ?? []).map((result) => result.task_id));
290
- return (bundle.audit_tasks ?? []).filter((task) => task.status !== "complete" && !completedTaskIds.has(task.task_id));
334
+ const pendingTasks = (bundle.audit_tasks ?? []).filter((task) => task.status !== "complete" && !completedTaskIds.has(task.task_id));
335
+ const lineIndex = Object.fromEntries(pendingTasks.flatMap((task) => Object.entries(task.file_line_counts ?? {})));
336
+ return orderTasksForPacketReview(pendingTasks, {
337
+ graphBundle: bundle.graph_bundle,
338
+ lineIndex,
339
+ });
291
340
  }
292
341
  async function addFileLineCountHints(root, tasks) {
293
342
  const lineIndex = await buildLineIndexForPaths(root, tasks.flatMap((task) => task.file_paths));
@@ -1398,6 +1447,60 @@ async function cmdWorkerRun(argv) {
1398
1447
  process.exitCode = 1;
1399
1448
  }
1400
1449
  }
1450
+ const DISPATCH_RESULT_MAP_FILENAME = "dispatch-result-map.json";
1451
+ function dispatchResultMapPath(runDir) {
1452
+ return join(runDir, DISPATCH_RESULT_MAP_FILENAME);
1453
+ }
1454
+ function resolveRunScopedArg(argv, rawFlag, b64Flag) {
1455
+ const raw = getFlag(argv, rawFlag);
1456
+ const encoded = getFlag(argv, b64Flag);
1457
+ return raw ?? (encoded ? fromBase64Url(encoded) : undefined);
1458
+ }
1459
+ async function loadDispatchResultMap(runDir) {
1460
+ try {
1461
+ return await readJsonFile(dispatchResultMapPath(runDir));
1462
+ }
1463
+ catch (error) {
1464
+ if (!isFileMissingError(error)) {
1465
+ throw error;
1466
+ }
1467
+ return null;
1468
+ }
1469
+ }
1470
+ function entriesByTaskId(entries) {
1471
+ return new Map(entries.map((entry) => [entry.task_id, entry]));
1472
+ }
1473
+ function isIsolatedLargeFilePacket(packet) {
1474
+ return (packet.file_paths.length === 1 &&
1475
+ packet.total_lines > LARGE_FILE_PACKET_TARGET_LINES);
1476
+ }
1477
+ function withinRoot(root, path) {
1478
+ const rootPath = resolve(root);
1479
+ const absolutePath = resolve(rootPath, path);
1480
+ const relativePath = relative(rootPath, absolutePath);
1481
+ if (relativePath.startsWith("..") || isAbsolute(relativePath)) {
1482
+ throw new Error(`Path '${path}' escapes repository root '${rootPath}'.`);
1483
+ }
1484
+ return absolutePath;
1485
+ }
1486
+ function renderAnchorPreview(summary, anchorPath) {
1487
+ const preview = summary.anchors.slice(0, 24).map((anchor) => {
1488
+ const location = anchor.line ? `${summary.path}:${anchor.line}` : summary.path;
1489
+ const detail = anchor.detail ? ` - ${anchor.detail}` : "";
1490
+ return `- ${location} [${anchor.kind}] ${anchor.name}${detail}`;
1491
+ });
1492
+ return [
1493
+ "## Large File Review Mode",
1494
+ "This packet is intentionally isolated because it covers one large file.",
1495
+ "Use targeted reads/searches within this file, guided by the mechanical anchors.",
1496
+ "Do not read unrelated files unless a finding cannot be evidenced without a direct boundary check.",
1497
+ `Anchor file: ${anchorPath}`,
1498
+ `Anchor counts: symbols=${summary.counts.symbols}, routes=${summary.counts.routes}, keywords=${summary.counts.keywords}, graph_edges=${summary.counts.graph_edges}, analyzer_signals=${summary.counts.analyzer_signals}, omitted=${summary.omitted_anchor_count}`,
1499
+ "Anchor preview:",
1500
+ ...(preview.length > 0 ? preview : ["- no anchors extracted beyond file boundaries"]),
1501
+ "",
1502
+ ];
1503
+ }
1401
1504
  async function cmdPrepareDispatch(argv) {
1402
1505
  const runId = getFlag(argv, "--run-id");
1403
1506
  if (!runId)
@@ -1407,95 +1510,337 @@ async function cmdPrepareDispatch(argv) {
1407
1510
  const tasksPath = join(runDir, "pending-audit-tasks.json");
1408
1511
  const taskResultsDir = join(runDir, "task-results");
1409
1512
  const dispatchPlanPath = join(runDir, "dispatch-plan.json");
1513
+ const explicitRoot = getFlag(argv, "--root") ? getRootDir(argv) : undefined;
1514
+ let reviewRoot = explicitRoot;
1515
+ try {
1516
+ const workerTask = await readJsonFile(join(runDir, "task.json"));
1517
+ reviewRoot ??= workerTask.repo_root;
1518
+ }
1519
+ catch (error) {
1520
+ if (!isFileMissingError(error)) {
1521
+ throw error;
1522
+ }
1523
+ }
1410
1524
  const tasks = await readJsonFile(tasksPath);
1525
+ const bundle = await loadArtifactBundle(artifactsDir);
1411
1526
  const lensDefsPath = join(packageRoot, "dispatch", "lens-definitions.json");
1412
1527
  const lensDefs = await readJsonFile(lensDefsPath);
1413
1528
  await mkdir(taskResultsDir, { recursive: true });
1529
+ const lineIndex = Object.fromEntries(tasks.flatMap((task) => Object.entries(task.file_line_counts ?? {})));
1530
+ const orderedTasks = orderTasksForPacketReview(tasks, {
1531
+ graphBundle: bundle.graph_bundle,
1532
+ lineIndex,
1533
+ });
1534
+ const packets = buildReviewPackets(orderedTasks, {
1535
+ graphBundle: bundle.graph_bundle,
1536
+ lineIndex,
1537
+ });
1538
+ const tasksById = new Map(orderedTasks.map((task) => [task.task_id, task]));
1539
+ const resultPathByTaskId = new Map(orderedTasks.map((task) => [
1540
+ task.task_id,
1541
+ taskResultPath(taskResultsDir, task.task_id),
1542
+ ]));
1543
+ const resultPathSet = new Set(resultPathByTaskId.values());
1544
+ if (resultPathSet.size !== resultPathByTaskId.size) {
1545
+ throw new Error("prepare-dispatch generated duplicate result paths; task ids must be uniquely addressable.");
1546
+ }
1414
1547
  const plan = [];
1415
- let largestTask = null;
1548
+ const resultMapEntries = [];
1549
+ let largestPacketId = null;
1416
1550
  let largestLines = 0;
1417
- for (const task of tasks) {
1418
- const sanitized = task.task_id.replace(/[^a-zA-Z0-9_-]/g, "_");
1419
- const outputPath = join(taskResultsDir, `${sanitized}.json`);
1420
- const promptPath = join(taskResultsDir, `${sanitized}.prompt.md`);
1421
- const lensDef = lensDefs[task.lens];
1422
- if (!lensDef) {
1423
- process.stderr.write(`Warning: no lens definition for '${task.lens}' (task ${task.task_id})\n`);
1551
+ let largestEstimatedTokens = 0;
1552
+ const warnings = [];
1553
+ for (const packet of packets) {
1554
+ const promptPath = packetPromptPath(taskResultsDir, packet.packet_id);
1555
+ const packetTasks = packet.task_ids
1556
+ .map((taskId) => tasksById.get(taskId))
1557
+ .filter((task) => task !== undefined);
1558
+ if (packet.total_lines > largestLines) {
1559
+ largestLines = packet.total_lines;
1560
+ largestEstimatedTokens = packet.estimated_tokens;
1561
+ largestPacketId = packet.packet_id;
1424
1562
  }
1425
- const totalLines = Object.values(task.file_line_counts ?? {}).reduce((a, b) => a + b, 0);
1426
- if (totalLines > largestLines) {
1427
- largestLines = totalLines;
1428
- largestTask = task.task_id;
1563
+ const largeFileMode = isIsolatedLargeFilePacket(packet);
1564
+ if (packet.total_lines > LARGE_FILE_PACKET_TARGET_LINES && !largeFileMode) {
1565
+ warnings.push({
1566
+ code: "large_packet",
1567
+ message: `large packet ${packet.packet_id} (~${packet.total_lines} lines) may hit quota limits`,
1568
+ });
1429
1569
  }
1430
- if (totalLines > 1500) {
1431
- process.stderr.write(`Warning: large task ${task.task_id} (~${totalLines} lines) may hit quota limits\n`);
1570
+ for (const task of packetTasks) {
1571
+ if (!lensDefs[task.lens]) {
1572
+ warnings.push({
1573
+ code: "missing_lens_definition",
1574
+ message: `no lens definition for '${task.lens}' (task ${task.task_id})`,
1575
+ });
1576
+ }
1432
1577
  }
1433
- const fileList = task.file_paths.map(p => {
1434
- const lines = task.file_line_counts?.[p] ?? 0;
1435
- return `- ${p} (${lines} lines)`;
1578
+ const fileList = packet.file_paths.map((path) => {
1579
+ const lines = packet.file_line_counts[path] ?? 0;
1580
+ return `- ${path} (${lines} lines)`;
1436
1581
  }).join("\n");
1582
+ let anchorPath = null;
1583
+ let anchorSummary = null;
1584
+ if (largeFileMode) {
1585
+ const filePath = packet.file_paths[0];
1586
+ if (!reviewRoot) {
1587
+ warnings.push({
1588
+ code: "large_file_anchor_unavailable",
1589
+ message: `large single-file packet ${packet.packet_id} has no repo root available for anchor extraction`,
1590
+ });
1591
+ }
1592
+ else {
1593
+ try {
1594
+ const totalLines = packet.file_line_counts[filePath] ?? packet.total_lines;
1595
+ const content = await readFile(withinRoot(reviewRoot, filePath), "utf8");
1596
+ anchorSummary = buildFileAnchorSummary({
1597
+ path: filePath,
1598
+ content,
1599
+ totalLines,
1600
+ graphBundle: bundle.graph_bundle,
1601
+ externalAnalyzerResults: bundle.external_analyzer_results,
1602
+ });
1603
+ anchorPath = join(taskResultsDir, artifactNameForId(packet.packet_id, "anchors.json"));
1604
+ await writeJsonFile(anchorPath, anchorSummary);
1605
+ }
1606
+ catch (error) {
1607
+ warnings.push({
1608
+ code: "large_file_anchor_failed",
1609
+ message: `large single-file packet ${packet.packet_id} could not be anchored mechanically: ` +
1610
+ (error instanceof Error ? error.message : String(error)),
1611
+ });
1612
+ }
1613
+ }
1614
+ }
1615
+ const largeFileSection = anchorSummary && anchorPath
1616
+ ? renderAnchorPreview(anchorSummary, anchorPath)
1617
+ : largeFileMode
1618
+ ? [
1619
+ "## Large File Review Mode",
1620
+ "This packet is intentionally isolated because it covers one large file.",
1621
+ "Use targeted reads/searches within this file only.",
1622
+ "No mechanical anchor file was available, so rely on targeted symbol and keyword searches before reading broad ranges.",
1623
+ "",
1624
+ ]
1625
+ : [];
1626
+ const taskSections = packetTasks.flatMap((task) => {
1627
+ const lensDef = lensDefs[task.lens];
1628
+ return [
1629
+ `### ${task.task_id}`,
1630
+ `unit_id: ${task.unit_id}`,
1631
+ `pass_id: ${task.pass_id}`,
1632
+ `lens: ${task.lens}`,
1633
+ `rationale: ${task.rationale}`,
1634
+ "",
1635
+ `Lens guidance: ${lensDef?.description ?? task.lens}`,
1636
+ `Do NOT report: ${lensDef?.do_not_report ?? "N/A"}`,
1637
+ "",
1638
+ ];
1639
+ });
1640
+ const submitCommand = `"${process.execPath}" "${join(packageRoot, "audit-code.mjs")}" submit-packet ` +
1641
+ `--run-id-b64 ${toBase64Url(runId)} ` +
1642
+ `--packet-id-b64 ${toBase64Url(packet.packet_id)} ` +
1643
+ `--artifacts-dir-b64 ${toBase64Url(artifactsDir)}`;
1644
+ for (const task of packetTasks) {
1645
+ resultMapEntries.push({
1646
+ packet_id: packet.packet_id,
1647
+ task_id: task.task_id,
1648
+ result_path: resultPathByTaskId.get(task.task_id),
1649
+ });
1650
+ }
1437
1651
  const prompt = [
1438
- "You are a code auditor. Review the files below under the specified lens.",
1652
+ "You are a code auditor. Review this packet once, then submit exactly one result per listed task.",
1439
1653
  "",
1440
- "## Task",
1441
- `task_id: ${task.task_id}`,
1442
- `unit_id: ${task.unit_id}`,
1443
- `pass_id: ${task.pass_id}`,
1444
- `lens: ${task.lens}`,
1654
+ "## Packet",
1655
+ `packet_id: ${packet.packet_id}`,
1656
+ `task_count: ${packet.task_ids.length}`,
1657
+ `lenses: ${packet.lenses.join(", ")}`,
1658
+ `estimated_tokens: ${packet.estimated_tokens}`,
1445
1659
  "",
1446
1660
  "## Files to read",
1447
- "Use your Read tool. Paths are repo-relative from the current working directory.",
1661
+ largeFileMode
1662
+ ? "Use targeted Read/Grep calls. Paths are repo-relative from the current working directory."
1663
+ : "Use your Read tool. Paths are repo-relative from the current working directory.",
1448
1664
  fileList,
1449
1665
  "",
1450
- `## Lens: ${task.lens}`,
1451
- lensDef?.description ?? task.lens,
1452
- "",
1453
- `Do NOT report: ${lensDef?.do_not_report ?? "N/A"}`,
1454
- "",
1666
+ ...largeFileSection,
1667
+ "## Tasks",
1668
+ ...taskSections,
1455
1669
  "## Output",
1456
- `Write a single JSON object to: ${outputPath}`,
1457
- "Write only this assigned task's AuditResult object. Do not edit source files,",
1670
+ "Do not write files directly. Do not use a Write tool, create temp files, edit source files,",
1458
1671
  "remediate findings, create extra task results, or run unrelated audits.",
1672
+ "Produce one JSON array containing exactly one AuditResult object for each listed task.",
1459
1673
  "",
1460
- "Required fields:",
1461
- " task_id copy from task metadata above",
1462
- " unit_id copy from task metadata above",
1463
- " pass_id copy from task metadata above",
1464
- " lens copy from task metadata above",
1465
- " file_coverage [{path, total_lines}] one entry per file; use the line counts listed above",
1466
- " findings [] or array of finding objects (see below)",
1674
+ "Required AuditResult fields:",
1675
+ " task_id copy from the task metadata",
1676
+ " unit_id copy from the task metadata",
1677
+ " pass_id copy from the task metadata",
1678
+ " lens copy from the task metadata",
1679
+ " file_coverage [{path, total_lines}] - one entry per assigned file; use the line counts listed above",
1680
+ " findings [] or array of finding objects",
1467
1681
  "",
1468
1682
  "Each finding object:",
1469
1683
  " id unique ID, e.g. \"COR-001\"",
1470
1684
  " title short title",
1471
- " category correctness|architecture|maintainability|security|reliability|performance|data_integrity|tests|operability|config_deployment",
1685
+ " category specific finding category, such as missing-validation or command-execution",
1472
1686
  " severity critical|high|medium|low|info",
1473
1687
  " confidence high|medium|low",
1474
- ` lens "${task.lens}" — must match task lens exactly`,
1475
- " summary 12 sentence description",
1476
- " affected_files [{path, line_start?, line_end?, symbol?}] objects, not strings; min 1 entry",
1477
- " evidence [\"path/to/file.ts:42 description of what you see there\"] min 1 entry",
1688
+ " lens must match the task lens exactly",
1689
+ " summary 1-2 sentence description",
1690
+ " affected_files [{path, line_start?, line_end?, symbol?}] - objects, not strings; min 1 entry",
1691
+ " evidence [\"path/to/file.ts:42 - description of what you see there\"] - min 1 entry",
1478
1692
  "",
1479
1693
  "Constraints:",
1480
- "1. line_end must not exceed the file's actual line count (use counts listed above)",
1481
- "2. affected_files entries are OBJECTS with a \"path\" key NOT plain strings",
1482
- "3. Only reference files from the list above",
1483
- "4. findings: [] is correct when you find nothing genuine",
1694
+ "1. line_end must not exceed the file's actual line count.",
1695
+ "2. affected_files entries are objects with a path key, not plain strings.",
1696
+ "3. Only reference files from the packet unless a finding genuinely crosses a boundary.",
1697
+ "4. findings: [] is correct when you find nothing genuine.",
1698
+ "",
1699
+ "## Submit",
1700
+ "Pipe the JSON array on stdin to this command:",
1701
+ ` ${submitCommand}`,
1484
1702
  "",
1485
- "## Validate",
1486
- "After writing your result, run:",
1487
- ` "${process.execPath}" "${join(packageRoot, "audit-code.mjs")}" validate-result --run-id ${runId} --task-id ${task.task_id} --artifacts-dir "${artifactsDir}"`,
1703
+ "The command validates and writes the packet-owned result files. Exit 0 means accepted.",
1704
+ "Non-zero: read the errors, fix the JSON, and run the same submit command again. Retry up to 3 times.",
1488
1705
  "",
1489
- "Exit 0 means valid. Non-zero: read the errors, fix your JSON, rewrite the file, run again. Retry up to 3 times.",
1706
+ "## Final response",
1707
+ `After the submit command succeeds, reply exactly: valid: ${packet.packet_id}, findings=<total finding count>`,
1490
1708
  ].join("\n");
1491
1709
  await writeFile(promptPath, prompt, "utf8");
1492
- const description = `Audit ${task.unit_id} (${task.file_paths.length} file(s), ~${totalLines} lines) — ${task.lens} lens`;
1493
- plan.push({ task_id: task.task_id, description, output_path: outputPath, prompt_path: promptPath });
1710
+ plan.push({
1711
+ packet_id: packet.packet_id,
1712
+ description: `Audit ${packet.file_paths.length} file(s), ${packet.task_ids.length} task(s), ${packet.lenses.length} lens(es) (~${packet.total_lines} lines)` +
1713
+ (largeFileMode ? " [isolated large-file mode]" : ""),
1714
+ prompt_path: promptPath,
1715
+ });
1494
1716
  }
1495
1717
  await writeJsonFile(dispatchPlanPath, plan);
1496
- console.log(`Wrote dispatch-plan.json — ${plan.length} tasks ready for dispatch`);
1497
- if (largestTask)
1498
- console.log(`Largest task: ${largestTask} (~${largestLines} lines)`);
1718
+ await writeJsonFile(dispatchResultMapPath(runDir), {
1719
+ contract_version: "audit-code-dispatch-results/v1alpha1",
1720
+ run_id: runId,
1721
+ entries: resultMapEntries,
1722
+ });
1723
+ const warningsPath = warnings.length > 0
1724
+ ? join(runDir, "dispatch-warnings.json")
1725
+ : null;
1726
+ if (warningsPath) {
1727
+ await writeJsonFile(warningsPath, warnings);
1728
+ }
1729
+ console.log(JSON.stringify({
1730
+ run_id: runId,
1731
+ dispatch_plan_path: dispatchPlanPath,
1732
+ packet_count: plan.length,
1733
+ task_count: orderedTasks.length,
1734
+ largest_packet: largestPacketId
1735
+ ? {
1736
+ packet_id: largestPacketId,
1737
+ total_lines: largestLines,
1738
+ estimated_tokens: largestEstimatedTokens,
1739
+ }
1740
+ : null,
1741
+ warning_count: warnings.length,
1742
+ dispatch_warnings_path: warningsPath,
1743
+ }, null, 2));
1744
+ }
1745
+ async function cmdSubmitPacket(argv) {
1746
+ const runId = resolveRunScopedArg(argv, "--run-id", "--run-id-b64");
1747
+ const packetId = resolveRunScopedArg(argv, "--packet-id", "--packet-id-b64");
1748
+ const artifactsDirB64 = getFlag(argv, "--artifacts-dir-b64");
1749
+ const artifactsDir = artifactsDirB64
1750
+ ? resolve(fromBase64Url(artifactsDirB64))
1751
+ : getArtifactsDir(argv);
1752
+ if (!runId || !packetId) {
1753
+ throw new Error("submit-packet requires --run-id and --packet-id (or --run-id-b64/--packet-id-b64)");
1754
+ }
1755
+ const runDir = join(artifactsDir, "runs", runId);
1756
+ const tasksPath = join(runDir, "pending-audit-tasks.json");
1757
+ const resultMap = await loadDispatchResultMap(runDir);
1758
+ if (!resultMap) {
1759
+ throw new Error(`No ${DISPATCH_RESULT_MAP_FILENAME} found for run ${runId}; run prepare-dispatch first.`);
1760
+ }
1761
+ const packetEntries = resultMap.entries.filter((entry) => entry.packet_id === packetId);
1762
+ if (packetEntries.length === 0) {
1763
+ throw new Error(`Unknown packet_id '${packetId}' for run ${runId}.`);
1764
+ }
1765
+ if (entriesByTaskId(packetEntries).size !== packetEntries.length) {
1766
+ throw new Error(`Dispatch result map has duplicate task entries for packet '${packetId}'.`);
1767
+ }
1768
+ const allTasks = await readJsonFile(tasksPath);
1769
+ const taskById = new Map(allTasks.map((task) => [task.task_id, task]));
1770
+ const packetTasks = packetEntries.map((entry) => taskById.get(entry.task_id));
1771
+ const missingTask = packetEntries.find((entry, index) => !packetTasks[index]);
1772
+ if (missingTask) {
1773
+ throw new Error(`Dispatch result map references unknown task '${missingTask.task_id}'.`);
1774
+ }
1775
+ const tasks = packetTasks;
1776
+ const expectedTaskIds = new Set(tasks.map((task) => task.task_id));
1777
+ const lineIndex = Object.fromEntries(tasks.flatMap((task) => Object.entries(task.file_line_counts ?? {})));
1778
+ const encodedResults = getFlag(argv, "--results-b64");
1779
+ const raw = encodedResults ? fromBase64Url(encodedResults) : await readStdinText();
1780
+ if (raw.trim().length === 0) {
1781
+ throw new Error("submit-packet requires an AuditResult[] JSON payload on stdin or --results-b64.");
1782
+ }
1783
+ let payload;
1784
+ try {
1785
+ payload = JSON.parse(raw);
1786
+ }
1787
+ catch (error) {
1788
+ throw new Error(`Invalid submit-packet JSON: ${error instanceof Error ? error.message : String(error)}`);
1789
+ }
1790
+ const resultErrors = [];
1791
+ const issues = validateAuditResults(payload, tasks, { lineIndex });
1792
+ const validationErrors = issues.filter((issue) => issue.severity === "error");
1793
+ const validationWarnings = issues.filter((issue) => issue.severity === "warning");
1794
+ if (validationWarnings.length > 0) {
1795
+ process.stderr.write(`audit-results validation: ${validationWarnings.length} warning(s):\n` +
1796
+ formatAuditResultIssues(validationWarnings) +
1797
+ "\n");
1798
+ }
1799
+ if (validationErrors.length > 0) {
1800
+ resultErrors.push(formatAuditResultIssues(validationErrors));
1801
+ }
1802
+ if (Array.isArray(payload)) {
1803
+ const seen = new Set();
1804
+ for (const [index, result] of payload.entries()) {
1805
+ if (!result || typeof result !== "object" || Array.isArray(result)) {
1806
+ continue;
1807
+ }
1808
+ const taskId = result.task_id;
1809
+ if (typeof taskId !== "string" || taskId.trim().length === 0) {
1810
+ continue;
1811
+ }
1812
+ if (seen.has(taskId)) {
1813
+ resultErrors.push(`Duplicate audit result for assigned task '${taskId}'.`);
1814
+ }
1815
+ seen.add(taskId);
1816
+ if (!expectedTaskIds.has(taskId)) {
1817
+ resultErrors.push(`Result at index ${index} uses task_id '${taskId}', which is not assigned to packet '${packetId}'.`);
1818
+ }
1819
+ }
1820
+ for (const task of tasks) {
1821
+ if (!seen.has(task.task_id)) {
1822
+ resultErrors.push(`Missing audit result for assigned task '${task.task_id}'.`);
1823
+ }
1824
+ }
1825
+ }
1826
+ if (resultErrors.length > 0) {
1827
+ throw new Error(`submit-packet rejected ${packetId}:\n${resultErrors.join("\n")}`);
1828
+ }
1829
+ const entryByTaskId = entriesByTaskId(packetEntries);
1830
+ for (const result of payload) {
1831
+ const entry = entryByTaskId.get(result.task_id);
1832
+ if (!entry) {
1833
+ throw new Error(`Internal error: no result path for accepted task '${result.task_id}'.`);
1834
+ }
1835
+ await writeJsonFile(entry.result_path, result);
1836
+ }
1837
+ const findingCount = payload.reduce((sum, result) => sum + result.findings.length, 0);
1838
+ console.log(JSON.stringify({
1839
+ run_id: runId,
1840
+ packet_id: packetId,
1841
+ accepted_count: payload.length,
1842
+ finding_count: findingCount,
1843
+ }, null, 2));
1499
1844
  }
1500
1845
  async function cmdMergeAndIngest(argv) {
1501
1846
  const runId = getFlag(argv, "--run-id");
@@ -1507,12 +1852,21 @@ async function cmdMergeAndIngest(argv) {
1507
1852
  const auditResultsPath = join(runDir, "audit-results.json");
1508
1853
  const taskPath = join(runDir, "task.json");
1509
1854
  const tasksPath = join(runDir, "pending-audit-tasks.json");
1855
+ const workerTask = await readJsonFile(taskPath);
1856
+ const resultMap = await loadDispatchResultMap(runDir);
1857
+ if (!resultMap) {
1858
+ throw new Error(`No ${DISPATCH_RESULT_MAP_FILENAME} found for run ${runId}; run prepare-dispatch first.`);
1859
+ }
1510
1860
  let allTasks = [];
1511
1861
  try {
1512
1862
  allTasks = await readJsonFile(tasksPath);
1513
1863
  }
1514
1864
  catch { /* may not exist */ }
1515
- const taskMap = new Map(allTasks.map(t => [t.task_id, t]));
1865
+ const entryByTaskId = entriesByTaskId(resultMap.entries);
1866
+ if (entryByTaskId.size !== resultMap.entries.length) {
1867
+ throw new Error(`Dispatch result map for run ${runId} contains duplicate task entries.`);
1868
+ }
1869
+ const expectedPaths = new Set(resultMap.entries.map((entry) => resolve(entry.result_path)));
1516
1870
  let files;
1517
1871
  try {
1518
1872
  files = (await readdir(taskResultsDir)).filter(f => f.endsWith(".json")).sort();
@@ -1524,36 +1878,66 @@ async function cmdMergeAndIngest(argv) {
1524
1878
  const failing = [];
1525
1879
  const seenTaskIds = new Set();
1526
1880
  for (const filename of files) {
1527
- const filePath = join(taskResultsDir, filename);
1881
+ const filePath = resolve(join(taskResultsDir, filename));
1882
+ if (!expectedPaths.has(filePath)) {
1883
+ failing.push({
1884
+ task_id: filename,
1885
+ errors: ["Unexpected task result file; only backend-assigned result paths may be ingested."],
1886
+ });
1887
+ }
1888
+ }
1889
+ for (const task of allTasks) {
1890
+ const entry = entryByTaskId.get(task.task_id);
1891
+ if (!entry) {
1892
+ failing.push({
1893
+ task_id: task.task_id,
1894
+ errors: ["Missing dispatch result-map entry for assigned task."],
1895
+ });
1896
+ continue;
1897
+ }
1898
+ const filePath = entry.result_path;
1528
1899
  let obj;
1529
1900
  try {
1530
1901
  obj = JSON.parse(await readFile(filePath, "utf8"));
1531
1902
  }
1532
1903
  catch (e) {
1533
- failing.push({ task_id: filename, errors: [`Invalid JSON: ${e.message}`] });
1904
+ if (isFileMissingError(e)) {
1905
+ failing.push({
1906
+ task_id: task.task_id,
1907
+ errors: ["Missing audit result for assigned task."],
1908
+ });
1909
+ }
1910
+ else {
1911
+ failing.push({ task_id: task.task_id, errors: [`Invalid JSON: ${e.message}`] });
1912
+ }
1534
1913
  continue;
1535
1914
  }
1536
- const taskId = typeof obj.task_id === "string"
1537
- ? String(obj.task_id) : undefined;
1915
+ const record = obj && typeof obj === "object" && !Array.isArray(obj)
1916
+ ? obj
1917
+ : undefined;
1918
+ const taskId = typeof record?.task_id === "string"
1919
+ ? String(record.task_id) : undefined;
1920
+ const resultErrors = [];
1538
1921
  if (taskId) {
1539
- seenTaskIds.add(taskId);
1922
+ if (seenTaskIds.has(taskId)) {
1923
+ resultErrors.push(`Duplicate audit result for assigned task '${taskId}'.`);
1924
+ }
1925
+ else {
1926
+ seenTaskIds.add(taskId);
1927
+ }
1928
+ if (taskId !== task.task_id) {
1929
+ resultErrors.push(`Result file is assigned to '${task.task_id}' but contains task_id '${taskId}'.`);
1930
+ }
1540
1931
  }
1541
- const matchingTask = taskId ? taskMap.get(taskId) : undefined;
1542
- const issues = validateAuditResults([obj], matchingTask ? [matchingTask] : [], { lineIndex: matchingTask?.file_line_counts ?? {} });
1543
- const errors = issues.filter(i => i.severity === "error");
1544
- if (errors.length === 0) {
1932
+ const issues = validateAuditResults([obj], [task], { lineIndex: task.file_line_counts ?? {} });
1933
+ resultErrors.push(...issues
1934
+ .filter(i => i.severity === "error")
1935
+ .map(i => i.message));
1936
+ if (resultErrors.length === 0) {
1545
1937
  passing.push(obj);
1546
1938
  }
1547
1939
  else {
1548
- failing.push({ task_id: taskId ?? filename, errors: errors.map(i => i.message) });
1549
- }
1550
- }
1551
- for (const task of allTasks) {
1552
- if (!seenTaskIds.has(task.task_id)) {
1553
- failing.push({
1554
- task_id: task.task_id,
1555
- errors: ["Missing audit result for assigned task."],
1556
- });
1940
+ failing.push({ task_id: taskId ?? task.task_id, errors: resultErrors });
1557
1941
  }
1558
1942
  }
1559
1943
  await writeJsonFile(auditResultsPath, passing);
@@ -1562,19 +1946,58 @@ async function cmdMergeAndIngest(argv) {
1562
1946
  await writeJsonFile(failedTasksPath, failing);
1563
1947
  throw new Error(`${failing.length} assigned task result(s) were missing or invalid; blocked before ingestion. See ${failedTasksPath}`);
1564
1948
  }
1565
- process.stderr.write(`✓ ${passing.length}/${files.length} results merged → ${auditResultsPath}\n`);
1566
- // Ingest: run worker-run logic against the merged results file
1567
- await cmdWorkerRun([argv[0], argv[1], "worker-run", "--task", taskPath, "--artifacts-dir", artifactsDir]);
1949
+ const findingCount = passing.reduce((sum, result) => sum + result.findings.length, 0);
1950
+ const result = await runAuditStep({
1951
+ root: workerTask.repo_root,
1952
+ artifactsDir,
1953
+ preferredExecutor: "result_ingestion_executor",
1954
+ auditResultsPath,
1955
+ });
1956
+ const workerResult = buildWorkerResult({
1957
+ runId,
1958
+ obligationId: workerTask.obligation_id,
1959
+ status: result.progress_made ? "completed" : "no_progress",
1960
+ progressMade: result.progress_made,
1961
+ selectedExecutor: result.selected_executor,
1962
+ artifactsWritten: result.artifacts_written,
1963
+ summary: result.progress_summary,
1964
+ nextLikelyStep: result.next_likely_step,
1965
+ errors: [],
1966
+ });
1967
+ await writeJsonFile(workerTask.result_path, workerResult);
1968
+ console.log(JSON.stringify({
1969
+ run_id: runId,
1970
+ status: workerResult.status,
1971
+ accepted_count: passing.length,
1972
+ rejected_count: 0,
1973
+ finding_count: findingCount,
1974
+ audit_results_path: auditResultsPath,
1975
+ selected_executor: workerResult.selected_executor,
1976
+ progress_made: workerResult.progress_made,
1977
+ progress_summary: workerResult.summary,
1978
+ next_likely_step: workerResult.next_likely_step,
1979
+ }, null, 2));
1568
1980
  }
1569
1981
  async function cmdValidateResult(argv) {
1570
- const runId = getFlag(argv, "--run-id");
1571
- const taskId = getFlag(argv, "--task-id");
1572
- if (!runId || !taskId)
1573
- throw new Error("validate-result requires --run-id and --task-id");
1574
- const artifactsDir = getArtifactsDir(argv);
1575
- const sanitized = taskId.replace(/[^a-zA-Z0-9_-]/g, "_");
1576
- const resultPath = join(artifactsDir, "runs", runId, "task-results", `${sanitized}.json`);
1577
- const tasksPath = join(artifactsDir, "runs", runId, "pending-audit-tasks.json");
1982
+ const rawRunId = getFlag(argv, "--run-id");
1983
+ const runIdB64 = getFlag(argv, "--run-id-b64");
1984
+ const rawTaskId = getFlag(argv, "--task-id");
1985
+ const artifactsDirB64 = getFlag(argv, "--artifacts-dir-b64");
1986
+ const runId = rawRunId ?? (runIdB64 ? fromBase64Url(runIdB64) : undefined);
1987
+ const taskIdB64 = getFlag(argv, "--task-id-b64");
1988
+ const taskId = rawTaskId ?? (taskIdB64 ? fromBase64Url(taskIdB64) : undefined);
1989
+ const artifactsDir = artifactsDirB64
1990
+ ? resolve(fromBase64Url(artifactsDirB64))
1991
+ : getArtifactsDir(argv);
1992
+ if (!runId || !taskId) {
1993
+ throw new Error("validate-result requires --run-id and --task-id (or --run-id-b64/--task-id-b64)");
1994
+ }
1995
+ const runDir = join(artifactsDir, "runs", runId);
1996
+ const taskResultsDir = join(runDir, "task-results");
1997
+ const resultMap = await loadDispatchResultMap(runDir);
1998
+ const resultPath = resultMap?.entries.find((entry) => entry.task_id === taskId)?.result_path ??
1999
+ taskResultPath(taskResultsDir, taskId);
2000
+ const tasksPath = join(runDir, "pending-audit-tasks.json");
1578
2001
  let raw;
1579
2002
  try {
1580
2003
  raw = await readFile(resultPath, "utf8");
@@ -1866,12 +2289,15 @@ async function main(argv) {
1866
2289
  case "merge-and-ingest":
1867
2290
  await cmdMergeAndIngest(argv);
1868
2291
  return;
2292
+ case "submit-packet":
2293
+ await cmdSubmitPacket(argv);
2294
+ return;
1869
2295
  case "validate-result":
1870
2296
  await cmdValidateResult(argv);
1871
2297
  return;
1872
2298
  default:
1873
2299
  console.error(`Unknown command: ${command}`);
1874
- console.error("Available commands: sample-run, advance-audit, run-to-completion, worker-run, import-external-analyzer, intake, plan, ingest-results, explain-task, update-runtime-validation, validate, validate-results, requeue, synthesize, mcp, prepare-dispatch, merge-and-ingest, validate-result");
2300
+ console.error("Available commands: sample-run, advance-audit, run-to-completion, worker-run, import-external-analyzer, intake, plan, ingest-results, explain-task, update-runtime-validation, validate, validate-results, requeue, synthesize, mcp, prepare-dispatch, merge-and-ingest, submit-packet, validate-result");
1875
2301
  process.exitCode = 1;
1876
2302
  }
1877
2303
  }