auditor-lambda 0.3.1 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,40 +1,34 @@
1
- import { dirname, resolve, join } from "node:path";
2
- import { fileURLToPath } from "node:url";
1
+ import { resolve, join } from "node:path";
3
2
  import { readFileSync, writeFileSync, readdirSync, existsSync } from "node:fs";
4
3
  import { validateResult } from "./validate.mjs";
5
4
 
6
- const __filename = fileURLToPath(import.meta.url);
7
- const __dirname = dirname(__filename);
8
-
9
- // Parse --run-id
10
5
  const runIdIdx = process.argv.indexOf("--run-id");
11
6
  if (runIdIdx === -1 || !process.argv[runIdIdx + 1]) {
12
7
  console.error("Usage: node dispatch/merge-results.mjs --run-id <run_id> [--artifacts-dir <dir>]");
13
8
  process.exit(1);
14
9
  }
15
- const run_id = process.argv[runIdIdx + 1];
10
+ const runId = process.argv[runIdIdx + 1];
16
11
 
17
- // Parse --artifacts-dir (default: CWD/.audit-artifacts)
18
12
  const artifactsDirIdx = process.argv.indexOf("--artifacts-dir");
19
13
  const artifactsDir = artifactsDirIdx !== -1 && process.argv[artifactsDirIdx + 1]
20
14
  ? resolve(process.argv[artifactsDirIdx + 1])
21
15
  : join(process.cwd(), ".audit-artifacts");
22
16
 
23
- const taskResultsDir = join(artifactsDir, "runs", run_id, "task-results");
24
- const auditResultsPath = join(artifactsDir, "runs", run_id, "audit-results.json");
25
- const failedTasksPath = join(artifactsDir, "runs", run_id, "failed-tasks.json");
26
- const tasksPath = join(artifactsDir, "runs", run_id, "pending-audit-tasks.json");
17
+ const taskResultsDir = join(artifactsDir, "runs", runId, "task-results");
18
+ const auditResultsPath = join(artifactsDir, "runs", runId, "audit-results.json");
19
+ const failedTasksPath = join(artifactsDir, "runs", runId, "failed-tasks.json");
20
+ const tasksPath = join(artifactsDir, "runs", runId, "pending-audit-tasks.json");
27
21
 
28
- // Build fileLineCounts map
29
- const lineCounts = {};
22
+ // Build task map for validation context
23
+ const taskMap = {};
30
24
  if (existsSync(tasksPath)) {
31
25
  try {
32
26
  const tasks = JSON.parse(readFileSync(tasksPath, "utf8"));
33
27
  for (const task of tasks) {
34
- lineCounts[task.task_id] = task.file_line_counts;
28
+ taskMap[task.task_id] = task;
35
29
  }
36
30
  } catch {
37
- // proceed with empty map
31
+ // proceed without task context
38
32
  }
39
33
  }
40
34
 
@@ -59,8 +53,8 @@ for (const filename of files) {
59
53
  }
60
54
 
61
55
  const taskId = resultObj?.task_id;
62
- const fileLineCounts = (taskId && lineCounts[taskId]) ? lineCounts[taskId] : {};
63
- const { valid, errors } = validateResult(resultObj, fileLineCounts);
56
+ const task = (taskId && taskMap[taskId]) ? taskMap[taskId] : null;
57
+ const { valid, errors } = validateResult(resultObj, task);
64
58
 
65
59
  if (valid) {
66
60
  passing.push(resultObj);
@@ -1,38 +1,25 @@
1
- import { dirname, resolve, join } from "node:path";
2
- import { fileURLToPath } from "node:url";
1
+ import { resolve, join } from "node:path";
3
2
  import { readFileSync, existsSync } from "node:fs";
4
3
  import { validateResult } from "./validate.mjs";
5
4
 
6
- const __filename = fileURLToPath(import.meta.url);
7
- const __dirname = dirname(__filename);
8
-
9
- // Support both named flags and legacy positional args:
10
- // Named: --run-id <id> --task-id <id> [--artifacts-dir <dir>]
11
- // Positional (legacy): <run_id> <task_id>
12
- const runIdFlagIdx = process.argv.indexOf("--run-id");
13
- const taskIdFlagIdx = process.argv.indexOf("--task-id");
5
+ const runIdIdx = process.argv.indexOf("--run-id");
6
+ const taskIdIdx = process.argv.indexOf("--task-id");
14
7
  const artifactsDirIdx = process.argv.indexOf("--artifacts-dir");
15
8
 
16
- const run_id = runIdFlagIdx !== -1
17
- ? process.argv[runIdFlagIdx + 1]
18
- : process.argv[2];
19
-
20
- const task_id = taskIdFlagIdx !== -1
21
- ? process.argv[taskIdFlagIdx + 1]
22
- : process.argv[3];
9
+ const runId = runIdIdx !== -1 ? process.argv[runIdIdx + 1] : undefined;
10
+ const taskId = taskIdIdx !== -1 ? process.argv[taskIdIdx + 1] : undefined;
23
11
 
24
- if (!run_id || !task_id) {
12
+ if (!runId || !taskId) {
25
13
  console.error("Usage: node dispatch/validate-result.mjs --run-id <run_id> --task-id <task_id> [--artifacts-dir <dir>]");
26
14
  process.exit(1);
27
15
  }
28
16
 
29
- // Artifacts dir: explicit flag > CWD default
30
17
  const artifactsDir = artifactsDirIdx !== -1 && process.argv[artifactsDirIdx + 1]
31
18
  ? resolve(process.argv[artifactsDirIdx + 1])
32
19
  : join(process.cwd(), ".audit-artifacts");
33
20
 
34
- const sanitized = task_id.replace(/[^a-zA-Z0-9_-]/g, "_");
35
- const resultPath = join(artifactsDir, "runs", run_id, "task-results", sanitized + ".json");
21
+ const sanitized = taskId.replace(/[^a-zA-Z0-9_-]/g, "_");
22
+ const resultPath = join(artifactsDir, "runs", runId, "task-results", sanitized + ".json");
36
23
 
37
24
  if (!existsSync(resultPath)) {
38
25
  console.error(`File not found: ${resultPath}`);
@@ -47,25 +34,24 @@ try {
47
34
  process.exit(1);
48
35
  }
49
36
 
50
- const tasksPath = join(artifactsDir, "runs", run_id, "pending-audit-tasks.json");
51
- let fileLineCounts = {};
37
+ const tasksPath = join(artifactsDir, "runs", runId, "pending-audit-tasks.json");
38
+ let task = null;
52
39
  if (existsSync(tasksPath)) {
53
40
  try {
54
41
  const tasks = JSON.parse(readFileSync(tasksPath, "utf8"));
55
- const task = tasks.find(t => t.task_id === task_id);
56
- fileLineCounts = task?.file_line_counts ?? {};
42
+ task = tasks.find(t => t.task_id === taskId) ?? null;
57
43
  } catch {
58
- // use empty
44
+ // proceed without task context
59
45
  }
60
46
  }
61
47
 
62
- const { valid, errors } = validateResult(resultObj, fileLineCounts);
48
+ const { valid, errors } = validateResult(resultObj, task);
63
49
 
64
50
  if (valid) {
65
- console.log("✓ valid:", task_id);
51
+ console.log("✓ valid:", taskId);
66
52
  process.exit(0);
67
53
  } else {
68
- console.error("✗ invalid:", task_id);
54
+ console.error("✗ invalid:", taskId);
69
55
  console.error(JSON.stringify(errors, null, 2));
70
56
  process.exit(1);
71
57
  }
@@ -1,88 +1,16 @@
1
- import { dirname, resolve, join } from "node:path";
2
- import { fileURLToPath } from "node:url";
3
- import { readFileSync } from "node:fs";
4
- import Ajv2020 from "ajv/dist/2020.js";
5
-
6
- const __filename = fileURLToPath(import.meta.url);
7
- const __dirname = dirname(__filename);
8
- const PROJECT_ROOT = resolve(__dirname, "..");
9
- const SCHEMAS_DIR = join(PROJECT_ROOT, "schemas");
10
-
11
- function loadSchema(name) {
12
- return JSON.parse(readFileSync(join(SCHEMAS_DIR, name), "utf8"));
13
- }
14
-
15
- let _ajv = null;
16
- let _validateFn = null;
17
-
18
- function getValidator() {
19
- if (_validateFn) return _validateFn;
20
- _ajv = new Ajv2020({ strict: false, allErrors: true });
21
- _ajv.addSchema(loadSchema("finding.schema.json"));
22
- _validateFn = _ajv.compile(loadSchema("audit_result.schema.json"));
23
- return _validateFn;
24
- }
25
-
26
- function formatAjvError(e) {
27
- const path = e.instancePath || "(root)";
28
- return `${path}: ${e.message}${e.params ? " (" + JSON.stringify(e.params) + ")" : ""}`;
29
- }
1
+ import { validateAuditResults } from "../dist/validation/auditResults.js";
30
2
 
31
3
  /**
32
4
  * @param {object} resultObj — parsed JSON from a task-results file
33
- * @param {Record<string, number>} fileLineCounts — from the task's file_line_counts
5
+ * @param {object|null} task the matching AuditTask from pending-audit-tasks.json, or null
34
6
  * @returns {{ valid: boolean, errors: string[] }}
35
7
  */
36
- export function validateResult(resultObj, fileLineCounts) {
37
- const validate = getValidator();
38
- const schemaValid = validate(resultObj);
39
-
40
- if (!schemaValid) {
41
- return { valid: false, errors: validate.errors.map(formatAjvError) };
42
- }
43
-
44
- const errors = [];
45
-
46
- // Line range constraint
47
- for (const finding of resultObj.findings) {
48
- for (const entry of finding.affected_files) {
49
- if (entry.line_end !== undefined) {
50
- const coverage = resultObj.file_coverage.find(fc => fc.path === entry.path);
51
- if (!coverage) {
52
- errors.push(`affected_files path '${entry.path}' not in file_coverage`);
53
- } else if (entry.line_end > coverage.total_lines) {
54
- errors.push(
55
- `finding '${finding.id}': line_end ${entry.line_end} exceeds total_lines ${coverage.total_lines} for ${entry.path}`
56
- );
57
- }
58
- }
59
- }
60
- }
61
-
62
- // Lens consistency
63
- for (const finding of resultObj.findings) {
64
- if (finding.lens !== resultObj.lens) {
65
- errors.push(
66
- `finding '${finding.id}': lens '${finding.lens}' does not match task lens '${resultObj.lens}'`
67
- );
68
- }
69
- }
70
-
71
- // affected_files paths in scope
72
- const allowedPaths = new Set(resultObj.file_coverage.map(fc => fc.path));
73
- for (const finding of resultObj.findings) {
74
- for (const entry of finding.affected_files) {
75
- if (!allowedPaths.has(entry.path)) {
76
- errors.push(
77
- `finding '${finding.id}': affected path '${entry.path}' not in task file_coverage`
78
- );
79
- }
80
- }
81
- }
82
-
83
- if (errors.length > 0) {
84
- return { valid: false, errors };
85
- }
86
-
87
- return { valid: true, errors: [] };
8
+ export function validateResult(resultObj, task) {
9
+ const tasks = task ? [task] : [];
10
+ const lineIndex = task?.file_line_counts ?? {};
11
+ const issues = validateAuditResults([resultObj], tasks, { lineIndex });
12
+ const errors = issues
13
+ .filter(i => i.severity === "error")
14
+ .map(i => `${i.path}: ${i.message}`);
15
+ return { valid: errors.length === 0, errors };
88
16
  }
package/dist/cli.js CHANGED
@@ -1312,6 +1312,9 @@ async function cmdRunToCompletion(argv) {
1312
1312
  next_likely_step: state.status === "complete" ? null : decision.selected_obligation,
1313
1313
  providerName: provider.name,
1314
1314
  });
1315
+ if (state.status === "complete") {
1316
+ await promoteFinalAuditReport({ artifactsDir, repoRoot: root });
1317
+ }
1315
1318
  }
1316
1319
  async function cmdWorkerRun(argv) {
1317
1320
  const taskPath = getFlag(argv, "--task");
@@ -1451,6 +1454,8 @@ async function cmdPrepareDispatch(argv) {
1451
1454
  "",
1452
1455
  "## Output",
1453
1456
  `Write a single JSON object to: ${outputPath}`,
1457
+ "Write only this assigned task's AuditResult object. Do not edit source files,",
1458
+ "remediate findings, create extra task results, or run unrelated audits.",
1454
1459
  "",
1455
1460
  "Required fields:",
1456
1461
  " task_id copy from task metadata above",
@@ -1501,18 +1506,23 @@ async function cmdMergeAndIngest(argv) {
1501
1506
  const taskResultsDir = join(runDir, "task-results");
1502
1507
  const auditResultsPath = join(runDir, "audit-results.json");
1503
1508
  const taskPath = join(runDir, "task.json");
1504
- // Merge: collect all per-task result files (skip .prompt.md files)
1509
+ const tasksPath = join(runDir, "pending-audit-tasks.json");
1510
+ let allTasks = [];
1511
+ try {
1512
+ allTasks = await readJsonFile(tasksPath);
1513
+ }
1514
+ catch { /* may not exist */ }
1515
+ const taskMap = new Map(allTasks.map(t => [t.task_id, t]));
1505
1516
  let files;
1506
1517
  try {
1507
- files = (await readdir(taskResultsDir))
1508
- .filter(f => f.endsWith(".json"))
1509
- .sort();
1518
+ files = (await readdir(taskResultsDir)).filter(f => f.endsWith(".json")).sort();
1510
1519
  }
1511
1520
  catch {
1512
1521
  files = [];
1513
1522
  }
1514
1523
  const passing = [];
1515
1524
  const failing = [];
1525
+ const seenTaskIds = new Set();
1516
1526
  for (const filename of files) {
1517
1527
  const filePath = join(taskResultsDir, filename);
1518
1528
  let obj;
@@ -1523,30 +1533,39 @@ async function cmdMergeAndIngest(argv) {
1523
1533
  failing.push({ task_id: filename, errors: [`Invalid JSON: ${e.message}`] });
1524
1534
  continue;
1525
1535
  }
1526
- const r = obj;
1527
- const missing = ["task_id", "unit_id", "pass_id", "lens", "file_coverage", "findings"].filter(f => !(f in r));
1528
- if (missing.length > 0) {
1529
- failing.push({ task_id: String(r.task_id ?? filename), errors: [`Missing required fields: ${missing.join(", ")}`] });
1536
+ const taskId = typeof obj.task_id === "string"
1537
+ ? String(obj.task_id) : undefined;
1538
+ if (taskId) {
1539
+ seenTaskIds.add(taskId);
1530
1540
  }
1531
- else {
1541
+ const matchingTask = taskId ? taskMap.get(taskId) : undefined;
1542
+ const issues = validateAuditResults([obj], matchingTask ? [matchingTask] : [], { lineIndex: matchingTask?.file_line_counts ?? {} });
1543
+ const errors = issues.filter(i => i.severity === "error");
1544
+ if (errors.length === 0) {
1532
1545
  passing.push(obj);
1533
1546
  }
1547
+ else {
1548
+ failing.push({ task_id: taskId ?? filename, errors: errors.map(i => i.message) });
1549
+ }
1550
+ }
1551
+ for (const task of allTasks) {
1552
+ if (!seenTaskIds.has(task.task_id)) {
1553
+ failing.push({
1554
+ task_id: task.task_id,
1555
+ errors: ["Missing audit result for assigned task."],
1556
+ });
1557
+ }
1534
1558
  }
1535
1559
  await writeJsonFile(auditResultsPath, passing);
1536
1560
  if (failing.length > 0) {
1537
- await writeJsonFile(join(runDir, "failed-tasks.json"), failing);
1538
- process.stderr.write(`${failing.length} task(s) excluded — see ${join(runDir, "failed-tasks.json")}\n`);
1561
+ const failedTasksPath = join(runDir, "failed-tasks.json");
1562
+ await writeJsonFile(failedTasksPath, failing);
1563
+ throw new Error(`${failing.length} assigned task result(s) were missing or invalid; blocked before ingestion. See ${failedTasksPath}`);
1539
1564
  }
1540
1565
  process.stderr.write(`✓ ${passing.length}/${files.length} results merged → ${auditResultsPath}\n`);
1541
1566
  // Ingest: run worker-run logic against the merged results file
1542
1567
  await cmdWorkerRun([argv[0], argv[1], "worker-run", "--task", taskPath, "--artifacts-dir", artifactsDir]);
1543
1568
  }
1544
- const VALID_LENSES_SET = new Set([
1545
- "correctness", "architecture", "maintainability", "security", "reliability",
1546
- "performance", "data_integrity", "tests", "operability", "config_deployment",
1547
- ]);
1548
- const VALID_SEVERITIES_SET = new Set(["critical", "high", "medium", "low", "info"]);
1549
- const VALID_CONFIDENCES_SET = new Set(["high", "medium", "low"]);
1550
1569
  async function cmdValidateResult(argv) {
1551
1570
  const runId = getFlag(argv, "--run-id");
1552
1571
  const taskId = getFlag(argv, "--task-id");
@@ -1574,102 +1593,22 @@ async function cmdValidateResult(argv) {
1574
1593
  process.exitCode = 1;
1575
1594
  return;
1576
1595
  }
1577
- const errors = [];
1578
- // Required top-level fields
1579
- for (const field of ["task_id", "unit_id", "pass_id", "lens", "file_coverage", "findings"]) {
1580
- if (!(field in obj))
1581
- errors.push(`Missing required field: ${field}`);
1582
- }
1583
- if (errors.length > 0) {
1584
- console.error(`✗ invalid: ${taskId}`);
1585
- for (const e of errors)
1586
- console.error(` ${e}`);
1587
- process.exitCode = 1;
1588
- return;
1589
- }
1590
- // Lens
1591
- if (typeof obj.lens !== "string" || !VALID_LENSES_SET.has(obj.lens)) {
1592
- errors.push(`lens must be one of: ${[...VALID_LENSES_SET].join("|")}`);
1593
- }
1594
- // file_coverage
1595
- if (!Array.isArray(obj.file_coverage) || obj.file_coverage.length === 0) {
1596
- errors.push("file_coverage must be a non-empty array");
1597
- }
1598
- else {
1599
- for (const fc of obj.file_coverage) {
1600
- const entry = fc;
1601
- if (typeof entry.path !== "string")
1602
- errors.push(`file_coverage entry missing string 'path'`);
1603
- if (typeof entry.total_lines !== "number")
1604
- errors.push(`file_coverage entry missing numeric 'total_lines'`);
1605
- }
1606
- }
1607
- // findings
1608
- if (!Array.isArray(obj.findings)) {
1609
- errors.push("findings must be an array");
1610
- }
1611
- else {
1612
- for (const f of obj.findings) {
1613
- const finding = f;
1614
- for (const field of ["id", "title", "category", "severity", "confidence", "lens", "summary"]) {
1615
- if (typeof finding[field] !== "string")
1616
- errors.push(`finding missing string '${field}'`);
1617
- }
1618
- if (typeof finding.severity === "string" && !VALID_SEVERITIES_SET.has(finding.severity)) {
1619
- errors.push(`finding '${finding.id}': invalid severity '${finding.severity}'`);
1620
- }
1621
- if (typeof finding.confidence === "string" && !VALID_CONFIDENCES_SET.has(finding.confidence)) {
1622
- errors.push(`finding '${finding.id}': invalid confidence '${finding.confidence}'`);
1623
- }
1624
- if (!Array.isArray(finding.affected_files) || finding.affected_files.length === 0) {
1625
- errors.push(`finding '${finding.id}': affected_files must be a non-empty array`);
1626
- }
1627
- else {
1628
- for (const af of finding.affected_files) {
1629
- if (typeof af.path !== "string") {
1630
- errors.push(`finding '${finding.id}': affected_files entries must be objects with a 'path' key`);
1631
- }
1632
- }
1633
- }
1634
- if (!Array.isArray(finding.evidence) || finding.evidence.length === 0) {
1635
- errors.push(`finding '${finding.id}': evidence must be a non-empty array`);
1636
- }
1637
- if (typeof finding.lens === "string" && finding.lens !== obj.lens) {
1638
- errors.push(`finding '${finding.id}': lens '${finding.lens}' does not match task lens '${obj.lens}'`);
1639
- }
1640
- }
1641
- }
1642
- // Line range bounds (load from pending-audit-tasks.json if available)
1643
- let fileLineCounts = {};
1596
+ let allTasks = [];
1644
1597
  try {
1645
- const tasks = await readJsonFile(tasksPath);
1646
- const task = tasks.find(t => t.task_id === taskId);
1647
- fileLineCounts = task?.file_line_counts ?? {};
1648
- }
1649
- catch { /* ignore */ }
1650
- if (Array.isArray(obj.file_coverage) && Array.isArray(obj.findings)) {
1651
- const coverageMap = new Map(obj.file_coverage.map(fc => [fc.path, fc.total_lines]));
1652
- const allowedPaths = new Set(coverageMap.keys());
1653
- for (const f of obj.findings) {
1654
- for (const af of (f.affected_files ?? [])) {
1655
- const p = af.path;
1656
- if (!allowedPaths.has(p))
1657
- errors.push(`finding '${f.id}': path '${p}' not in file_coverage`);
1658
- if (typeof af.line_end === "number") {
1659
- const max = coverageMap.get(p) ?? fileLineCounts[p] ?? Infinity;
1660
- if (af.line_end > max)
1661
- errors.push(`finding '${f.id}': line_end ${af.line_end} exceeds total_lines ${max} for ${p}`);
1662
- }
1663
- }
1664
- }
1598
+ allTasks = await readJsonFile(tasksPath);
1665
1599
  }
1600
+ catch { /* may not exist */ }
1601
+ const matchingTasks = allTasks.filter(t => t.task_id === taskId);
1602
+ const lineIndex = matchingTasks[0]?.file_line_counts ?? {};
1603
+ const issues = validateAuditResults([obj], matchingTasks, { lineIndex });
1604
+ const errors = issues.filter(i => i.severity === "error");
1666
1605
  if (errors.length === 0) {
1667
1606
  console.log(`✓ valid: ${taskId}`);
1668
1607
  }
1669
1608
  else {
1670
1609
  console.error(`✗ invalid: ${taskId}`);
1671
1610
  for (const e of errors)
1672
- console.error(` ${e}`);
1611
+ console.error(` ${e.path}: ${e.message}`);
1673
1612
  process.exitCode = 1;
1674
1613
  }
1675
1614
  }
package/dist/io/json.js CHANGED
@@ -29,7 +29,7 @@ export async function readJsonFile(path) {
29
29
  if (isFileMissingError(error)) {
30
30
  throw error;
31
31
  }
32
- throw new Error(`Failed to read ${path}: ${errorMessage(error)}`);
32
+ throw ioError("read", path, error);
33
33
  }
34
34
  try {
35
35
  return JSON.parse(content);
@@ -82,7 +82,7 @@ export async function readNdjsonFile(path) {
82
82
  if (error instanceof Error && error.message.includes(path)) {
83
83
  throw error;
84
84
  }
85
- throw new Error(`Failed to read ${path}: ${errorMessage(error)}`);
85
+ throw ioError("read", path, error);
86
86
  }
87
87
  }
88
88
  export async function readOptionalJsonFile(path) {
@@ -128,7 +128,7 @@ export async function readOptionalTextFile(path) {
128
128
  if (isFileMissingError(error)) {
129
129
  return undefined;
130
130
  }
131
- throw new Error(`Failed to read ${path}: ${errorMessage(error)}`);
131
+ throw ioError("read", path, error);
132
132
  }
133
133
  }
134
134
  export async function writeTextFile(path, value) {
@@ -5,11 +5,13 @@ import { writeJsonFile } from "./json.js";
5
5
  const moduleDir = dirname(fileURLToPath(import.meta.url));
6
6
  const packageRoot = resolve(moduleDir, "..", "..");
7
7
  const auditResultSchemaPath = join(packageRoot, "schemas", "audit_result.schema.json");
8
+ const auditResultsSchemaPath = join(packageRoot, "schemas", "audit_results.schema.json");
8
9
  const findingSchemaPath = join(packageRoot, "schemas", "finding.schema.json");
9
10
  const CURRENT_TASK_FILENAME = "current-task.json";
10
11
  const CURRENT_PROMPT_FILENAME = "current-prompt.md";
11
12
  const CURRENT_TASKS_FILENAME = "current-tasks.json";
12
13
  const CURRENT_SCHEMA_FILENAME = "audit-result.schema.json";
14
+ const CURRENT_RESULTS_SCHEMA_FILENAME = "audit-results.schema.json";
13
15
  const CURRENT_FINDING_SCHEMA_FILENAME = "finding.schema.json";
14
16
  function pad(value, size = 2) {
15
17
  return String(value).padStart(size, "0");
@@ -58,6 +60,7 @@ export async function ensureSupervisorDirs(artifactsDir) {
58
60
  async function writeDispatchSchemaFiles(artifactsDir) {
59
61
  const dispatchDir = join(artifactsDir, "dispatch");
60
62
  await writeFile(join(dispatchDir, CURRENT_SCHEMA_FILENAME), await readFile(auditResultSchemaPath, "utf8"), "utf8");
63
+ await writeFile(join(dispatchDir, CURRENT_RESULTS_SCHEMA_FILENAME), await readFile(auditResultsSchemaPath, "utf8"), "utf8");
61
64
  await writeFile(join(dispatchDir, CURRENT_FINDING_SCHEMA_FILENAME), await readFile(findingSchemaPath, "utf8"), "utf8");
62
65
  }
63
66
  export async function writeWorkerTaskFiles(task, prompt, paths, artifactsDir, currentTasks, options = {}) {
@@ -119,6 +122,7 @@ export async function clearDispatchFiles(artifactsDir) {
119
122
  CURRENT_PROMPT_FILENAME,
120
123
  CURRENT_TASKS_FILENAME,
121
124
  CURRENT_SCHEMA_FILENAME,
125
+ CURRENT_RESULTS_SCHEMA_FILENAME,
122
126
  CURRENT_FINDING_SCHEMA_FILENAME,
123
127
  ];
124
128
  for (const name of targets) {
@@ -14,6 +14,7 @@ const packageRoot = resolve(moduleDir, "..", "..");
14
14
  const wrapperPath = join(packageRoot, "audit-code.mjs");
15
15
  const packageJsonPath = join(packageRoot, "package.json");
16
16
  const PROTOCOL_VERSION = "2025-06-18";
17
+ const MAX_CONTENT_LENGTH_BYTES = 10 * 1024 * 1024;
17
18
  function getFlag(argv, name) {
18
19
  const index = argv.indexOf(name);
19
20
  if (index < 0)
@@ -68,6 +69,22 @@ function failure(id, code, message, data) {
68
69
  },
69
70
  };
70
71
  }
72
+ function parseContentLength(headerBlock) {
73
+ const headers = headerBlock.split("\r\n");
74
+ const contentLengthHeader = headers.find((header) => header.toLowerCase().startsWith("content-length:"));
75
+ if (!contentLengthHeader) {
76
+ throw new Error("missing Content-Length");
77
+ }
78
+ const rawValue = contentLengthHeader.split(":")[1]?.trim();
79
+ const contentLength = Number(rawValue);
80
+ if (rawValue?.length === 0 ||
81
+ !Number.isInteger(contentLength) ||
82
+ contentLength < 0 ||
83
+ contentLength > MAX_CONTENT_LENGTH_BYTES) {
84
+ throw new Error("bad Content-Length");
85
+ }
86
+ return contentLength;
87
+ }
71
88
  async function runWrapperCommand(args, options) {
72
89
  return await new Promise((resolvePromise, rejectPromise) => {
73
90
  const child = spawn(process.execPath, [
@@ -419,21 +436,17 @@ export async function runAuditCodeMcpServer(argv) {
419
436
  if (separator < 0) {
420
437
  return;
421
438
  }
422
- const headerBlock = buffer.slice(0, separator).toString("utf8");
423
- const headers = headerBlock.split("\r\n");
424
- const contentLengthHeader = headers.find((header) => header.toLowerCase().startsWith("content-length:"));
425
- if (!contentLengthHeader) {
426
- buffer = Buffer.alloc(0);
427
- writeMessage(failure(null, -32700, "Invalid MCP framing: missing Content-Length."));
428
- return;
439
+ let contentLength;
440
+ try {
441
+ const headerBlock = buffer.slice(0, separator).toString("utf8");
442
+ contentLength = parseContentLength(headerBlock);
429
443
  }
430
- const contentLength = Number(contentLengthHeader.split(":")[1]?.trim());
431
- const frameLength = separator + 4 + contentLength;
432
- if (!Number.isFinite(contentLength) || contentLength < 0) {
444
+ catch (error) {
433
445
  buffer = Buffer.alloc(0);
434
- writeMessage(failure(null, -32700, "Invalid MCP framing: bad Content-Length."));
446
+ writeMessage(failure(null, -32700, `Invalid MCP framing: ${error instanceof Error ? error.message : String(error)}.`));
435
447
  return;
436
448
  }
449
+ const frameLength = separator + 4 + contentLength;
437
450
  if (buffer.length < frameLength) {
438
451
  return;
439
452
  }
@@ -10,9 +10,60 @@ const DEFAULT_LENS_ORDER = [
10
10
  "operability",
11
11
  "config_deployment",
12
12
  ];
13
+ const VALID_LENSES = new Set(DEFAULT_LENS_ORDER);
14
+ function isRecord(value) {
15
+ return value !== null && typeof value === "object";
16
+ }
17
+ function isLens(value) {
18
+ return typeof value === "string" && VALID_LENSES.has(value);
19
+ }
20
+ function assertStringArray(value, label) {
21
+ if (!Array.isArray(value) || value.some((item) => typeof item !== "string")) {
22
+ throw new TypeError(`${label} must be an array of strings.`);
23
+ }
24
+ }
25
+ function assertLensArray(value, label) {
26
+ if (!Array.isArray(value) || value.some((item) => !isLens(item))) {
27
+ throw new TypeError(`${label} must be an array of supported lenses.`);
28
+ }
29
+ }
30
+ function assertUnitManifest(value) {
31
+ if (!isRecord(value) || !Array.isArray(value.units)) {
32
+ throw new TypeError("buildAuditTasks requires unitManifest.units to be an array.");
33
+ }
34
+ value.units.forEach((unit, index) => {
35
+ const label = `unitManifest.units[${index}]`;
36
+ if (!isRecord(unit)) {
37
+ throw new TypeError(`${label} must be an object.`);
38
+ }
39
+ if (typeof unit.unit_id !== "string" || unit.unit_id.length === 0) {
40
+ throw new TypeError(`${label}.unit_id must be a non-empty string.`);
41
+ }
42
+ if (typeof unit.name !== "string" || unit.name.length === 0) {
43
+ throw new TypeError(`${label}.name must be a non-empty string.`);
44
+ }
45
+ assertStringArray(unit.files, `${label}.files`);
46
+ assertLensArray(unit.required_lenses, `${label}.required_lenses`);
47
+ });
48
+ }
49
+ function normalizedOptions(options) {
50
+ if (!isRecord(options)) {
51
+ throw new TypeError("buildAuditTasks options must be an object.");
52
+ }
53
+ if (options.pass_prefix !== undefined && typeof options.pass_prefix !== "string") {
54
+ throw new TypeError("buildAuditTasks options.pass_prefix must be a string.");
55
+ }
56
+ if (options.limit_lenses !== undefined) {
57
+ assertLensArray(options.limit_lenses, "buildAuditTasks options.limit_lenses");
58
+ }
59
+ return {
60
+ passPrefix: options.pass_prefix ?? "pass",
61
+ allowed: new Set(options.limit_lenses ?? DEFAULT_LENS_ORDER),
62
+ };
63
+ }
13
64
  export function buildAuditTasks(unitManifest, options = {}) {
14
- const allowed = new Set(options.limit_lenses ?? DEFAULT_LENS_ORDER);
15
- const passPrefix = options.pass_prefix ?? "pass";
65
+ assertUnitManifest(unitManifest);
66
+ const { allowed, passPrefix } = normalizedOptions(options);
16
67
  const tasks = [];
17
68
  for (const unit of unitManifest.units) {
18
69
  for (const lens of unit.required_lenses) {
@@ -7,11 +7,17 @@ export function renderWorkerPrompt(task) {
7
7
  if (task.preferred_executor === "agent" && task.audit_results_path) {
8
8
  const tasksPath = task.pending_audit_tasks_path ??
9
9
  `${task.artifacts_dir}/audit_tasks.json`;
10
+ const resultsSchemaPath = `${task.artifacts_dir}/dispatch/audit-results.schema.json`;
11
+ const singleResultSchemaPath = `${task.artifacts_dir}/dispatch/audit-result.schema.json`;
10
12
  const lines = [
11
13
  `Audit run: ${task.run_id}`,
12
14
  `Read: ${tasksPath}`,
13
- "For each task: read all file_paths in full, review under the specified lens,",
14
- "and emit one AuditResult with:",
15
+ `Array schema: ${resultsSchemaPath}`,
16
+ `Single-result schema: ${singleResultSchemaPath}`,
17
+ "Scope: review only the tasks listed in the Read file. Do not add tasks,",
18
+ "edit source files, remediate findings, run unrelated audits, or write result_path.",
19
+ "For each listed task: read all file_paths in full, review under the specified lens,",
20
+ "and emit exactly one AuditResult object with:",
15
21
  " task_id, unit_id, pass_id, lens (copy from task),",
16
22
  " file_coverage: [{path, total_lines}] — use file_line_counts[path] from the task for each file,",
17
23
  " findings: [] or array of finding objects.",
@@ -19,13 +25,13 @@ export function renderWorkerPrompt(task) {
19
25
  " affected_files [{path, line_start, line_end, symbol}] (objects, not strings; min 1 entry),",
20
26
  " evidence [strings] (min 1 entry).",
21
27
  "Constraint: line_end must not exceed total_lines for that file.",
22
- `Write all results as a JSON array to: ${task.audit_results_path}`,
28
+ `Write only the JSON array of AuditResult objects to: ${task.audit_results_path}`,
23
29
  ];
24
30
  if (usesDeferredWorkerCommand(task)) {
25
31
  lines.push("Deferred mode: write results, do not execute worker_command.");
26
32
  }
27
33
  else {
28
- lines.push("Then execute worker_command from task.json exactly.", `Command: ${commandArgv}`);
34
+ lines.push("After writing audit_results_path, execute worker_command from task.json exactly.", "The worker command ingests audit_results_path and writes result_path.", `Command: ${commandArgv}`);
29
35
  }
30
36
  return lines.join("\n");
31
37
  }
@@ -21,7 +21,7 @@ function commandExists(command) {
21
21
  return result.status === 0;
22
22
  }
23
23
  export function resolveFreshSessionProviderName(name, sessionConfig = {}, options = {}) {
24
- const requestedProvider = name ?? sessionConfig.provider ?? "auto";
24
+ const requestedProvider = name ?? sessionConfig.provider ?? "local-subprocess";
25
25
  if (requestedProvider !== "auto") {
26
26
  return requestedProvider;
27
27
  }
@@ -4,7 +4,7 @@ import { formatValidationIssues, } from "../validation/basic.js";
4
4
  import { validateSessionConfig } from "../validation/sessionConfig.js";
5
5
  import { writeJsonFile } from "../io/json.js";
6
6
  const SESSION_CONFIG_FILENAME = "session-config.json";
7
- const DEFAULT_SESSION_CONFIG = { provider: "auto" };
7
+ const DEFAULT_SESSION_CONFIG = { provider: "local-subprocess" };
8
8
  export function getSessionConfigPath(artifactsDir) {
9
9
  return join(artifactsDir, SESSION_CONFIG_FILENAME);
10
10
  }
@@ -156,7 +156,7 @@ export function validateConfiguredProviderEnvironment(sessionConfig, options = {
156
156
  const issues = [];
157
157
  const lookupCommand = options.commandExists ?? commandExists;
158
158
  const lookupPath = options.pathExists ?? configuredPathExists;
159
- const provider = sessionConfig.provider ?? "auto";
159
+ const provider = sessionConfig.provider ?? "local-subprocess";
160
160
  if (provider === "claude-code") {
161
161
  const command = sessionConfig.claude_code?.command ?? "claude";
162
162
  if (isBareExecutableName(command) && !lookupCommand(command)) {
@@ -120,13 +120,13 @@ Use the backend wrapper only when you intentionally need the repo-local fallback
120
120
 
121
121
  ## What the wrapper actually does
122
122
 
123
- `audit-code` is the stable backend entrypoint.
123
+ `audit-code` is the stable backend entrypoint behind the slash command.
124
124
 
125
125
  It:
126
126
 
127
127
  - defaults artifacts to `<repo-root>/.audit-artifacts`
128
128
  - persists audit continuity there
129
- - calls `run-to-completion` by default
129
+ - calls `run-to-completion` by default for deterministic work
130
130
  - creates fresh worker runs behind the scenes
131
131
  - returns a stable top-level JSON contract with `contract_version: "audit-code/v1alpha1"`
132
132
 
@@ -145,13 +145,15 @@ Inspect the returned JSON and continue invoking the same entrypoint until either
145
145
  Terminal interpretation:
146
146
 
147
147
  - `audit_state.status === "complete"` means the audit finished end to end.
148
- - `audit_state.status === "blocked"` means the wrapper exhausted automatic work and the remaining review still needs imported results or a provider-capable continuation path.
148
+ - `audit_state.status === "blocked"` means the wrapper exhausted deterministic
149
+ work and exposed scoped semantic-review task artifacts for the slash-command
150
+ orchestrator.
149
151
 
150
152
  Current implementation note:
151
153
 
152
154
  - the backend fallback still supports explicit provider bridges such as `claude-code`, `opencode`, `subprocess-template`, and `vscode-task`
153
155
  - those bridges are compatibility modes, not the intended default review owner
154
- - the intended long-term workflow is documented in [docs/workflow-refactor-brief.md](/C:/Code/auditor-lambda/docs/workflow-refactor-brief.md)
156
+ - the intended workflow is documented in [docs/workflow-refactor-brief.md](/C:/Code/auditor-lambda/docs/workflow-refactor-brief.md)
155
157
 
156
158
  When additional evidence exists, pass it into the same wrapper:
157
159
 
@@ -172,9 +174,15 @@ Use it when the current host cannot keep review inside the active conversation,
172
174
 
173
175
  Use when you want the supervisor to stay entirely local.
174
176
 
175
- This requires no external agent CLI. Deterministic executors run in-process during normal wrapper runs, and the supervisor only stops once the remaining work is genuinely semantic review.
177
+ This requires no external agent CLI. Deterministic executors run in-process
178
+ during normal wrapper runs, and the supervisor only stops once the remaining
179
+ work is genuinely semantic review.
176
180
 
177
- When that review boundary is reached, `local-subprocess` stops in a terminal blocked handoff instead of pretending more automatic progress is available. Use `--results <file>` for a single batch or `--batch-results <dir>` when the active conversation agent reviewed multiple task batches before ingestion.
181
+ When that review boundary is reached, `local-subprocess` stops in a terminal
182
+ blocked handoff instead of pretending more automatic progress is available.
183
+ The slash-command orchestrator should dispatch subagents from the handoff when
184
+ available; otherwise it should review exactly one task, write results, run the
185
+ provided worker command, and stop.
178
186
 
179
187
  This is the safest default backend when the repository is already available locally.
180
188
 
@@ -255,7 +263,8 @@ Highest-value follow-through:
255
263
 
256
264
  The product direction remains skill-first:
257
265
 
258
- - in conversation, use the active conversation model by default
266
+ - in conversation, keep orchestration in the active model and delegate semantic
267
+ review to bounded subagents when the host supports them
259
268
  - for backend CLI delegation, let the chosen provider own its own model-selection behavior unless explicitly configured otherwise
260
269
 
261
270
  ## Practical recommendation
@@ -265,7 +274,7 @@ For a polished operator experience today:
265
274
  1. treat `/audit-code` as the canonical user-facing contract
266
275
  2. use `audit-code install` first, and fall back to `audit-code prompt-path` only for hosts that still require manual prompt import
267
276
  3. use `audit-code` as the repo-local backend fallback
268
- 4. prefer `local-subprocess` unless you want interactive review to continue automatically through agent tasks
277
+ 4. prefer `local-subprocess` unless you explicitly want a backend provider bridge
269
278
  5. use `subprocess-template` only when integrating a non-native editor or launcher surface
270
279
 
271
280
  If you intentionally want the backend fallback to bridge semantic review into another process, re-run with an explicit `--provider` flag after configuring the matching section in `.audit-artifacts/session-config.json`.
@@ -88,7 +88,8 @@ How many provider-assisted review batches to launch in parallel when the selecte
88
88
 
89
89
  This setting only applies to explicit backend bridge launches.
90
90
 
91
- It should not be treated as the source of truth for in-conversation subagent parallelism, which belongs to the active conversation agent or host runtime.
91
+ It should not be treated as the source of truth for slash-command subagent
92
+ parallelism, which belongs to the host runtime.
92
93
 
93
94
  ## Auto provider mode
94
95
 
@@ -112,7 +113,10 @@ This keeps the current default stable while still allowing best-effort cross-edi
112
113
 
113
114
  No extra config is required.
114
115
 
115
- This mode covers deterministic audit execution locally and stops in a terminal blocked state once the remaining work requires imported audit results or an interactive provider.
116
+ This mode covers deterministic audit execution locally and stops in a terminal
117
+ blocked state once the remaining work requires semantic review. The
118
+ slash-command orchestrator should then dispatch bounded subagents when the host
119
+ supports them, or complete one assigned task and stop when it does not.
116
120
 
117
121
  When the active conversation agent reviews multiple task batches before ingestion, prefer `audit-code --batch-results <dir>` over ad hoc artifact edits.
118
122
 
@@ -6,8 +6,13 @@ The primary product contract is `/audit-code` in conversation.
6
6
 
7
7
  Everything here is fallback and implementation detail guidance for the repo-local backend surface.
8
8
 
9
- In the intended workflow, the active conversation agent owns semantic review.
10
- Provider adapters are compatibility bridges for backend fallback mode, not the default review owner.
9
+ In the intended workflow, the conversation agent owns orchestration and
10
+ ingestion control, but bounded subagents own semantic review whenever the host
11
+ can dispatch them. If subagents are unavailable, the conversation agent reviews
12
+ one assigned task and stops so `/audit-code` can be rerun from fresh context.
13
+
14
+ Provider adapters are compatibility bridges for backend fallback mode, not the
15
+ default review owner.
11
16
 
12
17
  ## Repo-local backend surface
13
18
 
@@ -59,7 +64,8 @@ audit-code --provider vscode-task
59
64
  ```
60
65
 
61
66
  Those `--provider` invocations are the explicit bridge handoff point.
62
- Without an explicit `--provider` flag, the backend keeps semantic review with the active conversation agent and uses `local-subprocess` only for deterministic worker steps.
67
+ Without an explicit `--provider` flag, the backend stops at the semantic-review
68
+ boundary and exposes scoped task artifacts for the slash-command orchestrator.
63
69
 
64
70
  ## Auto resolution rule
65
71
 
@@ -87,4 +93,6 @@ See:
87
93
  ## Note
88
94
 
89
95
  Provider adapters are backend integrations, not the primary product concept.
90
- Session config defines bridge settings, but the explicit `--provider` flag is what opts the backend into owning semantic-review dispatch.
96
+ Session config defines bridge settings. An explicit `provider: "auto"` or
97
+ `--provider` bridge selection is what opts the backend into provider-mediated
98
+ review dispatch.
@@ -16,13 +16,16 @@ That is not the intended workflow.
16
16
 
17
17
  The intended `/audit-code` workflow is:
18
18
 
19
- 1. The active conversation agent owns semantic review work.
20
- 2. Deterministic planning computes which files need which lenses.
21
- 3. Pending review is partitioned into non-overlapping review blocks, preferably grouped by lens.
22
- 4. One dispatched review task should correspond to one review block.
23
- 5. `agent_task_batch_size` should stay `1` by default.
24
- 6. If the active conversation agent can delegate to subagents in parallel, that fan-out belongs to the host agent runtime, not to the backend session config.
25
- 7. Backend provider adapters are fallback compatibility bridges only. They should not be the default review owner.
19
+ 1. The active conversation agent owns orchestration and ingestion control.
20
+ 2. Bounded subagents own semantic review work whenever the host supports them.
21
+ 3. If subagents are unavailable, the conversation agent completes one assigned
22
+ review task and stops so `/audit-code` can be rerun from fresh context.
23
+ 4. Deterministic planning computes which files need which lenses.
24
+ 5. Pending review is partitioned into non-overlapping review blocks, preferably grouped by lens.
25
+ 6. One dispatched review task should correspond to one review block.
26
+ 7. `agent_task_batch_size` should stay `1` by default.
27
+ 8. Subagent fan-out belongs to the host agent runtime, not to the backend session config.
28
+ 9. Backend provider adapters are fallback compatibility bridges only. They should not be the default review owner.
26
29
 
27
30
  ## Current implementation drift
28
31
 
@@ -97,15 +100,19 @@ That fan-out is consistent with the current unit-first planner, not with the int
97
100
 
98
101
  The next implementation pass should do the following.
99
102
 
100
- ### A. Make the active conversation agent the semantic-review owner
103
+ ### A. Make the slash-command orchestrator the review dispatcher
101
104
 
102
- The `agent` executor should represent review work owned by the current conversation or host agent session.
105
+ The `agent` executor should represent review work owned by the current
106
+ conversation or host agent session, with semantic review delegated to bounded
107
+ subagents whenever possible.
103
108
 
104
109
  Target behavior:
105
110
 
106
111
  - normal `/audit-code` usage does not require `provider: "claude-code"` or `provider: "opencode"`
107
112
  - session-config should not be the normal way to choose a second LLM for review
108
113
  - backend provider bridges remain available only for explicit fallback workflows
114
+ - when subagents are unavailable, one invocation performs at most one semantic
115
+ review task before stopping
109
116
 
110
117
  ### B. Plan review work at the file/lens level
111
118
 
@@ -150,6 +157,8 @@ Relevant files:
150
157
  The refactor should be treated as done only when all of the following are true.
151
158
 
152
159
  - Starting `/audit-code` in a conversation does not rely on an external `claude-code` or `opencode` subprocess to own semantic review.
160
+ - The slash-command orchestrator dispatches bounded subagents when available and
161
+ falls back to one semantic review task per invocation otherwise.
153
162
  - The backend fallback still supports deterministic stages and explicit compatibility bridges.
154
163
  - The default dispatch granularity for semantic review remains one review block per task.
155
164
  - Pending review tasks are planned as lens-aware, non-overlapping file blocks.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "auditor-lambda",
3
- "version": "0.3.1",
3
+ "version": "0.3.3",
4
4
  "private": false,
5
5
  "description": "Portable hybrid code-auditing framework for arbitrary repositories.",
6
6
  "type": "module",
@@ -38,7 +38,7 @@
38
38
  "start": "node dist/index.js",
39
39
  "audit-code": "node audit-code.mjs",
40
40
  "sample-run": "node dist/index.js sample-run",
41
- "dispatch:prepare": "node dispatch/prepare-dispatch.mjs",
41
+ "dispatch:prepare": "node audit-code.mjs prepare-dispatch",
42
42
  "dispatch:merge": "node dispatch/merge-results.mjs",
43
43
  "dispatch:validate": "node dispatch/validate-result.mjs"
44
44
  },
@@ -0,0 +1,10 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "audit_results.schema.json",
4
+ "title": "Audit Results",
5
+ "type": "array",
6
+ "minItems": 1,
7
+ "items": {
8
+ "$ref": "audit_result.schema.json"
9
+ }
10
+ }
@@ -17,8 +17,16 @@ Normal usage should:
17
17
  - avoid manual paths, provider flags, and model-selection arguments
18
18
  - advance the audit automatically until it completes or no further automatic progress is possible
19
19
 
20
- Semantic review should stay with the active conversation agent by default.
21
- If the host can delegate to subagents, that fan-out belongs to the host agent runtime rather than to repo-local backend provider settings.
20
+ Semantic review should be delegated to bounded subagents whenever the host can
21
+ dispatch them. The conversation orchestrator owns dispatch and ingestion control;
22
+ it should not perform broad review itself when subagents are available.
23
+
24
+ If the host cannot delegate to subagents, the conversation orchestrator may
25
+ complete exactly one assigned review task, ingest it through the provided backend
26
+ command, then stop so the user can rerun `/audit-code` from fresh context.
27
+
28
+ Subagent fan-out belongs to the host agent runtime rather than to repo-local
29
+ backend provider settings.
22
30
 
23
31
  Bounded steps are a backend implementation detail, not the intended user experience.
24
32
 
@@ -66,7 +74,8 @@ audit-code --single-step
66
74
  For repo-local backend usage:
67
75
 
68
76
  - omitted provider remains `local-subprocess`
69
- - `local-subprocess` should stop cleanly once only manual or provider-assisted review remains
77
+ - `local-subprocess` should stop cleanly once semantic review is needed and
78
+ expose scoped task artifacts for the slash-command orchestrator
70
79
  - `provider: "auto"` is the explicit opt-in best-effort routing mode
71
80
  - explicit provider names remain available when an operator wants a specific backend
72
81
 
@@ -1,22 +1,37 @@
1
1
  ---
2
- description: Autonomous local loop code auditing steps the audit-code orchestrator and dispatches parallel subagents until the audit completes
2
+ description: Autonomous local loop code auditing - advances deterministic audit state, delegates bounded review tasks, and ingests validated results
3
3
  argument-hint: [target-dir]
4
- allowed-tools: [Read, Write, Edit, Bash, Glob, Grep, Agent]
4
+ allowed-tools: [Read, Write, Bash, Glob, Grep, Agent]
5
5
  ---
6
6
 
7
7
  # `/audit-code` Execution Directive
8
8
 
9
- **SYSTEM DIRECTIVE:** You are the autonomous audit orchestrator. Your job is to advance the state machine, dispatch parallel subagents for code review work, and loop until the audit is complete. Do not ask the user for confirmation between steps.
10
-
11
- ---
12
-
13
- ## The Loop
14
-
15
- Repeat Steps 1–5 until the audit status is `"complete"`.
16
-
17
- ---
18
-
19
- ### Step 1 Advance the State Machine
9
+ You are the audit-code orchestrator for this conversation. The user-facing
10
+ surface is only `/audit-code`; do not ask the user to choose backend commands,
11
+ providers, models, paths, or batching strategy during normal operation.
12
+
13
+ Your job is to advance the deterministic state machine, delegate bounded
14
+ semantic review when the host supports subagents, and let the backend validate
15
+ and ingest results mechanically.
16
+
17
+ ## Core Guardrails
18
+
19
+ - Do not edit source files during semantic review. The deterministic
20
+ `auto_fixes_applied` executor may run formatter/remediation commands before
21
+ review; that is part of the backend workflow.
22
+ - Do not manually merge audit results, manually update coverage, or manually
23
+ edit audit state.
24
+ - Do not read result schemas or completed result payloads into context unless
25
+ a backend command fails and the error explicitly requires diagnosis.
26
+ - Do not inspect individual subagent result files after dispatch. Validation
27
+ and ingestion are backend responsibilities.
28
+ - Prefer subagent dispatch for semantic review whenever the host exposes an
29
+ Agent/subagent tool.
30
+ - If the host cannot dispatch subagents, complete exactly one assigned review
31
+ task, run the provided ingestion command, then stop. The user can run
32
+ `/audit-code` again to continue from fresh context.
33
+
34
+ ## Step 1 - Advance Deterministic State
20
35
 
21
36
  Run:
22
37
 
@@ -24,82 +39,97 @@ Run:
24
39
  audit-code
25
40
  ```
26
41
 
27
- _(Inside the `auditor-lambda` repo itself, use `node audit-code.mjs` instead.)_
42
+ Inside the `auditor-lambda` repository itself, use:
28
43
 
29
- Parse the JSON output. Check `audit_state.status`:
44
+ ```bash
45
+ node audit-code.mjs
46
+ ```
30
47
 
31
- | Status | Action |
32
- |--------|--------|
33
- | `"complete"` | Go to **Step 6** |
34
- | `"active"` | Deterministic progress was made — loop immediately back to Step 1 |
35
- | `"blocked"` | LLM work needed — continue to Step 2 |
48
+ Parse only the command JSON envelope needed for routing:
36
49
 
37
- ---
50
+ - `audit_state.status`
51
+ - `handoff.active_review_run.run_id`
52
+ - `handoff.artifacts_dir`
53
+ - `handoff.active_review_run.task_path`
54
+ - `handoff.active_review_run.prompt_path`
55
+ - `handoff.active_review_run.pending_audit_tasks_path`
56
+ - `handoff.active_review_run.audit_results_path`
57
+ - `handoff.active_review_run.worker_command`
38
58
 
39
- ### Step 2 Extract the Task IDs
59
+ If status is `"active"`, deterministic progress was made. Run Step 1 again.
40
60
 
41
- Parse these fields directly from the Step 1 JSON output:
42
- - `run_id` — from `handoff.active_review_run.run_id`
43
- - `artifacts_dir` — from `handoff.artifacts_dir`
61
+ If status is `"complete"`, skip to Step 5.
44
62
 
45
- _(If `audit_state.blockers` contains a message that requires operator input rather than code review, stop and report the blocker verbatim to the user.)_
63
+ If status is `"blocked"` and the blocker is not semantic review, report the
64
+ blocker verbatim and stop.
46
65
 
47
- ---
66
+ If status is `"blocked"` for semantic review, continue to Step 2.
48
67
 
49
- ### Step 3 Prepare the Dispatch Plan
68
+ ## Step 2 - Dispatch Review Work
50
69
 
51
- Run:
70
+ When the host supports subagents, prepare a dispatch plan:
52
71
 
53
72
  ```bash
54
73
  audit-code prepare-dispatch --run-id <run_id> --artifacts-dir <artifacts_dir>
55
74
  ```
56
75
 
57
- Read `<artifacts_dir>/runs/<run_id>/dispatch-plan.json`. It is a JSON array where each entry has:
58
- - `task_id` — task identifier
59
- - `description` — short label for the Agent call
60
- - `output_path` — where the subagent writes its result
61
- - `prompt_path` — path to the complete subagent instructions file
76
+ Read only `<artifacts_dir>/runs/<run_id>/dispatch-plan.json`.
62
77
 
63
- ---
78
+ In a single message, launch one Agent/subagent call per dispatch-plan entry:
64
79
 
65
- ### Step 4 — Dispatch All Subagents in Parallel
66
-
67
- **In a single message**, fire one `Agent` call per entry in `dispatch-plan.json`:
68
-
69
- ```
80
+ ```text
70
81
  Agent({ description: entry.description, prompt: "Read and follow the audit instructions in: " + entry.prompt_path })
71
82
  ```
72
83
 
73
- All calls must be sent simultaneously — never await one before firing the next. This is the critical performance constraint. Wait for all to complete before proceeding.
74
-
75
- Each subagent reads its instruction file, reviews the assigned code, writes a validated JSON result to `output_path`, and self-validates. You do not need to inspect individual subagent output.
76
-
77
- ---
84
+ All subagent calls should be launched together. Wait for them to finish.
78
85
 
79
- ### Step 5 Merge and Ingest
86
+ Subagents own bounded semantic review. They must read only their prompt and
87
+ assigned files, write exactly the requested audit result JSON to `output_path`,
88
+ run the validation command in their prompt, retry up to 3 times if validation
89
+ fails, and stop. They must not edit source files, remediate findings, create
90
+ extra task results, run unrelated audits, or write the worker `result.json`
91
+ control envelope.
80
92
 
81
- Run:
93
+ Then run:
82
94
 
83
95
  ```bash
84
96
  audit-code merge-and-ingest --run-id <run_id> --artifacts-dir <artifacts_dir>
85
97
  ```
86
98
 
87
- Loop back to **Step 1**.
99
+ If `merge-and-ingest` exits non-zero, stop immediately and report the exact
100
+ error. Do not improvise manual merging or state edits.
88
101
 
89
- ---
102
+ Loop back to Step 1.
90
103
 
91
- ### Step 6 Present Results
104
+ ## Step 3 - Single-Task Fallback
92
105
 
93
- When `audit_state.status` is `"complete"`, stop the loop. Do **not** run the orchestrator again.
106
+ Use this path only when the host cannot dispatch subagents.
94
107
 
95
- Read `audit-report.md` and present the completed audit to the user. Lead with the work blocks — they are the primary remediation handoff.
108
+ Read the current review prompt named by `handoff.active_review_run.prompt_path`
109
+ or `.audit-artifacts/dispatch/current-prompt.md`, plus the matching task file
110
+ needed to find `audit_results_path` and `worker_command`.
96
111
 
97
- ---
112
+ Complete exactly one assigned review task. If a batch file lists multiple tasks,
113
+ choose the first pending task only. Read only that task's assigned files. Write
114
+ one valid `AuditResult` object, wrapped in a JSON array, to `audit_results_path`.
115
+
116
+ Run the exact `worker_command` from the task file. Then stop and summarize that
117
+ one bounded step. Do not loop into another semantic review task in the same
118
+ conversation turn.
119
+
120
+ ## Step 4 - Backend Failure Handling
121
+
122
+ If `prepare-dispatch`, `merge-and-ingest`, or `worker_command` fails:
98
123
 
99
- ## Edge Cases
124
+ - stop immediately
125
+ - report the exact command and error output
126
+ - do not manually create prompts, split tasks, merge results, edit state, or
127
+ remediate application code
100
128
 
101
- **Large task warnings:** `prepare-dispatch` warns about tasks exceeding ~1500 lines. If a subagent hits a quota limit and fails to produce output, `merge-and-ingest` excludes it silently — those tasks remain pending and are picked up in the next loop iteration. No manual intervention needed.
129
+ Invalid or missing subagent output is a blocker. It should not be silently
130
+ merged or treated as automatic progress.
102
131
 
103
- **Failed validation:** Subagents self-validate and retry up to 3 times before finishing. `merge-and-ingest` excludes any results that still lack required fields and writes `failed-tasks.json`. Those tasks are requeued automatically in the next cycle.
132
+ ## Step 5 - Present Results
104
133
 
105
- **Command failures:** If `prepare-dispatch` or `merge-and-ingest` exits non-zero, **STOP immediately** and report the exact error output to the user. Do NOT improvise manual dispatch, manually split tasks, manually create directories, manually construct prompts, or manually merge results. These scripts are the canonical mechanism — operating without them produces incorrect output. Fix the underlying issue and re-run the failed command.
134
+ When `audit_state.status` is `"complete"`, do not run the orchestrator again.
135
+ Read `audit-report.md` and present the completed audit with work blocks first.
@@ -1,136 +0,0 @@
1
- import { dirname, resolve, join } from "node:path";
2
- import { fileURLToPath } from "node:url";
3
- import { readFileSync, writeFileSync, mkdirSync } from "node:fs";
4
-
5
- const __filename = fileURLToPath(import.meta.url);
6
- const __dirname = dirname(__filename);
7
- const PACKAGE_ROOT = resolve(__dirname, "..");
8
-
9
- // Parse --run-id
10
- const runIdIdx = process.argv.indexOf("--run-id");
11
- if (runIdIdx === -1 || !process.argv[runIdIdx + 1]) {
12
- console.error("Usage: node dispatch/prepare-dispatch.mjs --run-id <run_id> [--artifacts-dir <dir>]");
13
- process.exit(1);
14
- }
15
- const run_id = process.argv[runIdIdx + 1];
16
-
17
- // Parse --artifacts-dir (default: CWD/.audit-artifacts)
18
- const artifactsDirIdx = process.argv.indexOf("--artifacts-dir");
19
- const artifactsDir = artifactsDirIdx !== -1 && process.argv[artifactsDirIdx + 1]
20
- ? resolve(process.argv[artifactsDirIdx + 1])
21
- : join(process.cwd(), ".audit-artifacts");
22
-
23
- const runDir = join(artifactsDir, "runs", run_id);
24
- const tasksPath = join(runDir, "pending-audit-tasks.json");
25
- const taskResultsDir = join(runDir, "task-results");
26
- const dispatchPlanPath = join(runDir, "dispatch-plan.json");
27
-
28
- let tasks;
29
- try {
30
- tasks = JSON.parse(readFileSync(tasksPath, "utf8"));
31
- } catch (e) {
32
- console.error(`Cannot read ${tasksPath}: ${e.message}`);
33
- process.exit(1);
34
- }
35
-
36
- const lensDefinitions = JSON.parse(
37
- readFileSync(join(__dirname, "lens-definitions.json"), "utf8")
38
- );
39
-
40
- mkdirSync(taskResultsDir, { recursive: true });
41
-
42
- function buildPrompt(task, lensDef, outputPath, runId, artifactsDir) {
43
- const fileList = task.file_paths.map(p => {
44
- const lines = task.file_line_counts?.[p] ?? 0;
45
- return `- ${p} (${lines} lines)`;
46
- }).join("\n");
47
-
48
- return `You are a code auditor. Review the files below under the specified lens.
49
-
50
- ## Task
51
- task_id: ${task.task_id}
52
- unit_id: ${task.unit_id}
53
- pass_id: ${task.pass_id}
54
- lens: ${task.lens}
55
-
56
- ## Files to read
57
- Use your Read tool. Paths are repo-relative from the current working directory.
58
- ${fileList}
59
-
60
- ## Lens: ${task.lens}
61
- ${lensDef?.description ?? task.lens}
62
-
63
- Do NOT report: ${lensDef?.do_not_report ?? "N/A"}
64
-
65
- ## Output
66
- Write a single JSON object to: ${outputPath}
67
-
68
- Required fields:
69
- task_id copy from task metadata above
70
- unit_id copy from task metadata above
71
- pass_id copy from task metadata above
72
- lens copy from task metadata above
73
- file_coverage [{path, total_lines}] — one entry per file; use the line counts listed above
74
- findings [] or array of finding objects (see below)
75
-
76
- Each finding object (omit optional fields if not applicable):
77
- id unique ID, e.g. "SEC-001"
78
- title short title
79
- category correctness|architecture|maintainability|security|reliability|performance|data_integrity|tests|operability|config_deployment
80
- severity critical|high|medium|low|info
81
- confidence high|medium|low
82
- lens "${task.lens}" — must match task lens exactly
83
- summary 1–2 sentence description
84
- affected_files [{path, line_start?, line_end?, symbol?}] — objects, not strings; min 1 entry
85
- evidence ["path/to/file.ts:42 — description of what you see there"] — min 1 entry
86
-
87
- Constraints:
88
- 1. line_end must not exceed the file's actual line count (use the counts listed above)
89
- 2. affected_files entries are OBJECTS with a "path" key — NOT plain strings
90
- 3. Only reference files from the list above
91
- 4. findings: [] is correct when you find nothing genuine — do not invent findings
92
-
93
- ## Validate
94
- After writing your result, run:
95
- audit-code validate-result --run-id ${runId} --task-id ${task.task_id} --artifacts-dir ${artifactsDir}
96
-
97
- Exit 0 means valid. Non-zero: read the errors, fix your JSON, rewrite the file, run again. Retry up to 3 times.`;
98
- }
99
-
100
- const plan = [];
101
- let largestTask = null;
102
- let largestLines = 0;
103
-
104
- for (const task of tasks) {
105
- const sanitizedId = task.task_id.replace(/[^a-zA-Z0-9_-]/g, "_");
106
- const outputPath = join(taskResultsDir, sanitizedId + ".json");
107
- const promptPath = join(taskResultsDir, sanitizedId + ".prompt.md");
108
- const lensDef = lensDefinitions[task.lens];
109
-
110
- if (!lensDef) {
111
- process.stderr.write(`Warning: no lens definition for '${task.lens}' (task ${task.task_id})\n`);
112
- }
113
-
114
- const totalFileLines = Object.values(task.file_line_counts ?? {}).reduce((a, b) => a + b, 0);
115
-
116
- if (totalFileLines > largestLines) {
117
- largestLines = totalFileLines;
118
- largestTask = task.task_id;
119
- }
120
- if (totalFileLines > 1500) {
121
- process.stderr.write(`Warning: large task ${task.task_id} (~${totalFileLines} lines) may hit quota limits\n`);
122
- }
123
-
124
- const prompt = buildPrompt(task, lensDef, outputPath, run_id, artifactsDir);
125
- writeFileSync(promptPath, prompt, "utf8");
126
-
127
- const description = `Audit ${task.unit_id} (${task.file_paths.length} file(s), ~${totalFileLines} lines) — ${task.lens} lens`;
128
- plan.push({ task_id: task.task_id, description, output_path: outputPath, prompt_path: promptPath });
129
- }
130
-
131
- writeFileSync(dispatchPlanPath, JSON.stringify(plan, null, 2));
132
-
133
- console.log(`Wrote dispatch-plan.json — ${plan.length} tasks ready for dispatch`);
134
- if (largestTask) {
135
- console.log(`Largest task: ${largestTask} (~${largestLines} lines)`);
136
- }