auditor-lambda 0.2.15 → 0.2.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dispatch/lens-definitions.json +38 -0
- package/dispatch/merge-results.mjs +84 -0
- package/dispatch/prepare-dispatch.mjs +155 -0
- package/dispatch/validate-result.mjs +67 -0
- package/dispatch/validate.mjs +88 -0
- package/docs/dispatch-implementation-plan.md +553 -0
- package/package.json +7 -2
- package/scripts/postinstall.mjs +14 -6
- package/skills/audit-code/audit-code.prompt.md +80 -36
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
{
|
|
2
|
+
"correctness": {
|
|
3
|
+
"description": "Logic errors, incorrect algorithm implementations, off-by-one bugs, type mismatches, wrong return values, incorrect state transitions, missing null/undefined guards, misuse of APIs. Focus on code that does the wrong thing.",
|
|
4
|
+
"do_not_report": "Style issues, naming problems, missing tests, or findings that belong to other lenses."
|
|
5
|
+
},
|
|
6
|
+
"maintainability": {
|
|
7
|
+
"description": "Code that is hard to change safely: excessive function length, deep nesting, tight coupling between unrelated modules, poor naming, magic constants, duplicated logic, inconsistent abstractions, unclear public APIs.",
|
|
8
|
+
"do_not_report": "Correctness bugs, test gaps, or operational concerns."
|
|
9
|
+
},
|
|
10
|
+
"tests": {
|
|
11
|
+
"description": "Test coverage gaps for important paths, tests that assert incorrect behavior (pinning bugs as expected), fragile or non-deterministic tests, missing negative/edge-case tests, tests that silently pass on stale builds (e.g. importing compiled dist/ rather than source).",
|
|
12
|
+
"do_not_report": "Source code bugs — report only issues with the tests themselves."
|
|
13
|
+
},
|
|
14
|
+
"security": {
|
|
15
|
+
"description": "Injection vulnerabilities (SQL, shell, path traversal), authentication/authorization flaws, secret exposure, insecure deserialization, privilege escalation, unsafe use of eval or child processes with user input.",
|
|
16
|
+
"do_not_report": "Performance or correctness issues that are not security-relevant."
|
|
17
|
+
},
|
|
18
|
+
"reliability": {
|
|
19
|
+
"description": "Failure modes without recovery, missing timeouts, unhandled promise rejections, race conditions, resource leaks (file handles, sockets, timers), incorrect retry logic, cascading failure risks.",
|
|
20
|
+
"do_not_report": "Correctness bugs that do not affect reliability under failure conditions."
|
|
21
|
+
},
|
|
22
|
+
"performance": {
|
|
23
|
+
"description": "Algorithmic inefficiencies (O(n²) where O(n) is possible), unnecessary re-computation, missing caching, synchronous blocking in hot paths, excessive memory allocation.",
|
|
24
|
+
"do_not_report": "Correctness bugs unrelated to performance."
|
|
25
|
+
},
|
|
26
|
+
"data_integrity": {
|
|
27
|
+
"description": "Missing input validation at trust boundaries, schema violations, inconsistent field naming across related schemas, data loss scenarios, missing required fields, enum values that are present in some schemas but not others.",
|
|
28
|
+
"do_not_report": "UI or presentation issues; operational or deployment concerns."
|
|
29
|
+
},
|
|
30
|
+
"operability": {
|
|
31
|
+
"description": "Missing or low-quality log output, error messages that don't help operators diagnose problems, missing progress indicators for long operations, no elapsed-time reporting, lack of dry-run or preview modes for destructive operations.",
|
|
32
|
+
"do_not_report": "Correctness bugs or deployment configuration."
|
|
33
|
+
},
|
|
34
|
+
"config_deployment": {
|
|
35
|
+
"description": "CI/CD pipeline correctness (wrong triggers, missing branch filters, floating version pins), deployment safety (no gate before publish, missing rollback), insecure secret handling in configs, mutable action tags that should be pinned to commit SHAs.",
|
|
36
|
+
"do_not_report": "Runtime code issues; findings that belong to other lenses."
|
|
37
|
+
}
|
|
38
|
+
}
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
import { dirname, resolve, join } from "node:path";
|
|
2
|
+
import { fileURLToPath } from "node:url";
|
|
3
|
+
import { readFileSync, writeFileSync, readdirSync, existsSync } from "node:fs";
|
|
4
|
+
import { validateResult } from "./validate.mjs";
|
|
5
|
+
|
|
6
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
7
|
+
const __dirname = dirname(__filename);
|
|
8
|
+
const PROJECT_ROOT = resolve(__dirname, "..");
|
|
9
|
+
|
|
10
|
+
// Parse --run-id
|
|
11
|
+
const runIdIdx = process.argv.indexOf("--run-id");
|
|
12
|
+
if (runIdIdx === -1 || !process.argv[runIdIdx + 1]) {
|
|
13
|
+
console.error("Usage: node dispatch/merge-results.mjs --run-id <run_id>");
|
|
14
|
+
process.exit(1);
|
|
15
|
+
}
|
|
16
|
+
const run_id = process.argv[runIdIdx + 1];
|
|
17
|
+
|
|
18
|
+
const artifactsDir = join(PROJECT_ROOT, ".audit-artifacts");
|
|
19
|
+
const taskResultsDir = join(artifactsDir, "runs", run_id, "task-results");
|
|
20
|
+
const auditResultsPath = join(artifactsDir, "runs", run_id, "audit-results.json");
|
|
21
|
+
const failedTasksPath = join(artifactsDir, "runs", run_id, "failed-tasks.json");
|
|
22
|
+
const tasksPath = join(artifactsDir, "runs", run_id, "pending-audit-tasks.json");
|
|
23
|
+
|
|
24
|
+
// Build fileLineCounts map
|
|
25
|
+
const lineCounts = {};
|
|
26
|
+
if (existsSync(tasksPath)) {
|
|
27
|
+
try {
|
|
28
|
+
const tasks = JSON.parse(readFileSync(tasksPath, "utf8"));
|
|
29
|
+
for (const task of tasks) {
|
|
30
|
+
lineCounts[task.task_id] = task.file_line_counts;
|
|
31
|
+
}
|
|
32
|
+
} catch {
|
|
33
|
+
// proceed with empty map
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
if (!existsSync(taskResultsDir)) {
|
|
38
|
+
console.error(`task-results directory not found: ${taskResultsDir}`);
|
|
39
|
+
process.exit(1);
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
const files = readdirSync(taskResultsDir).filter(f => f.endsWith(".json"));
|
|
43
|
+
|
|
44
|
+
const passing = [];
|
|
45
|
+
const failing = [];
|
|
46
|
+
|
|
47
|
+
for (const filename of files) {
|
|
48
|
+
const filePath = join(taskResultsDir, filename);
|
|
49
|
+
let resultObj;
|
|
50
|
+
try {
|
|
51
|
+
resultObj = JSON.parse(readFileSync(filePath, "utf8"));
|
|
52
|
+
} catch (e) {
|
|
53
|
+
failing.push({ task_id: filename, errors: [`Invalid JSON: ${e.message}`] });
|
|
54
|
+
continue;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
const taskId = resultObj?.task_id;
|
|
58
|
+
const fileLineCounts = (taskId && lineCounts[taskId]) ? lineCounts[taskId] : {};
|
|
59
|
+
const { valid, errors } = validateResult(resultObj, fileLineCounts);
|
|
60
|
+
|
|
61
|
+
if (valid) {
|
|
62
|
+
passing.push(resultObj);
|
|
63
|
+
} else {
|
|
64
|
+
failing.push({ task_id: taskId ?? filename, errors });
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
writeFileSync(auditResultsPath, JSON.stringify(passing, null, 2));
|
|
69
|
+
|
|
70
|
+
if (failing.length > 0) {
|
|
71
|
+
writeFileSync(failedTasksPath, JSON.stringify(failing, null, 2));
|
|
72
|
+
console.warn(`${failing.length} task(s) failed validation and were excluded:`);
|
|
73
|
+
for (const f of failing) {
|
|
74
|
+
console.warn(` ✗ ${f.task_id}: ${f.errors[0]}`);
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
const total = files.length;
|
|
79
|
+
console.log(`✓ ${passing.length}/${total} tasks valid → ${auditResultsPath}`);
|
|
80
|
+
if (failing.length > 0) {
|
|
81
|
+
console.log(" Re-run those tasks in the next cycle.");
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
process.exit(0);
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
import { dirname, resolve, join } from "node:path";
|
|
2
|
+
import { fileURLToPath } from "node:url";
|
|
3
|
+
import { readFileSync, writeFileSync, mkdirSync, existsSync } from "node:fs";
|
|
4
|
+
|
|
5
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
6
|
+
const __dirname = dirname(__filename);
|
|
7
|
+
const PROJECT_ROOT = resolve(__dirname, "..");
|
|
8
|
+
|
|
9
|
+
// Parse --run-id
|
|
10
|
+
const runIdIdx = process.argv.indexOf("--run-id");
|
|
11
|
+
if (runIdIdx === -1 || !process.argv[runIdIdx + 1]) {
|
|
12
|
+
console.error("Usage: node dispatch/prepare-dispatch.mjs --run-id <run_id>");
|
|
13
|
+
process.exit(1);
|
|
14
|
+
}
|
|
15
|
+
const run_id = process.argv[runIdIdx + 1];
|
|
16
|
+
|
|
17
|
+
const artifactsDir = join(PROJECT_ROOT, ".audit-artifacts");
|
|
18
|
+
const runDir = join(artifactsDir, "runs", run_id);
|
|
19
|
+
const tasksPath = join(runDir, "pending-audit-tasks.json");
|
|
20
|
+
const dispatchPlanPath = join(runDir, "dispatch-plan.json");
|
|
21
|
+
|
|
22
|
+
if (!existsSync(tasksPath)) {
|
|
23
|
+
console.error(`File not found: ${tasksPath}`);
|
|
24
|
+
process.exit(1);
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
const tasks = JSON.parse(readFileSync(tasksPath, "utf8"));
|
|
28
|
+
const lensDefinitions = JSON.parse(
|
|
29
|
+
readFileSync(join(__dirname, "lens-definitions.json"), "utf8")
|
|
30
|
+
);
|
|
31
|
+
const auditResultSchema = JSON.parse(
|
|
32
|
+
readFileSync(join(PROJECT_ROOT, "schemas", "audit_result.schema.json"), "utf8")
|
|
33
|
+
);
|
|
34
|
+
const findingSchema = JSON.parse(
|
|
35
|
+
readFileSync(join(PROJECT_ROOT, "schemas", "finding.schema.json"), "utf8")
|
|
36
|
+
);
|
|
37
|
+
|
|
38
|
+
function buildPrompt(task, lensDef, auditResultSchema, findingSchema, outputPath, runId, artifactsDir) {
|
|
39
|
+
const fallback = {
|
|
40
|
+
task_id: task.task_id,
|
|
41
|
+
unit_id: task.unit_id,
|
|
42
|
+
pass_id: task.pass_id,
|
|
43
|
+
lens: task.lens,
|
|
44
|
+
file_coverage: task.file_paths.map(p => ({ path: p, total_lines: task.file_line_counts[p] })),
|
|
45
|
+
findings: [],
|
|
46
|
+
notes: ["Validation failed after 3 attempts — empty result written as fallback."]
|
|
47
|
+
};
|
|
48
|
+
|
|
49
|
+
return `You are a code auditor. Perform a bounded audit of the files listed below under the specified lens.
|
|
50
|
+
|
|
51
|
+
## Task metadata
|
|
52
|
+
${JSON.stringify(task, null, 2)}
|
|
53
|
+
|
|
54
|
+
## Files to read
|
|
55
|
+
Read each path in task.file_paths using your Read tool. The repo root is the current working directory — paths are repo-relative (e.g. "src/foo.ts").
|
|
56
|
+
|
|
57
|
+
file_line_counts gives the expected total line count for each file. Use those exact values for file_coverage[].total_lines in your result.
|
|
58
|
+
|
|
59
|
+
## Lens: ${task.lens}
|
|
60
|
+
${lensDef.description}
|
|
61
|
+
|
|
62
|
+
Do NOT report: ${lensDef.do_not_report}
|
|
63
|
+
|
|
64
|
+
## Output format
|
|
65
|
+
Write your result as a single JSON **object** (not an array) to this exact path:
|
|
66
|
+
${outputPath}
|
|
67
|
+
|
|
68
|
+
The result must conform to the following schema:
|
|
69
|
+
|
|
70
|
+
### audit_result.schema.json
|
|
71
|
+
${JSON.stringify(auditResultSchema, null, 2)}
|
|
72
|
+
|
|
73
|
+
### finding.schema.json
|
|
74
|
+
${JSON.stringify(findingSchema, null, 2)}
|
|
75
|
+
|
|
76
|
+
## Hard constraints (violations will fail validation)
|
|
77
|
+
1. NEVER set line_end higher than the file's actual line count.
|
|
78
|
+
Use file_line_counts as your reference. If in doubt, leave line_end omitted.
|
|
79
|
+
2. Every finding MUST have ALL required fields:
|
|
80
|
+
id, title, category, severity, confidence, lens, summary, affected_files, evidence
|
|
81
|
+
3. lens on every finding must be exactly "${task.lens}"
|
|
82
|
+
4. No fields outside the schema. Forbidden: "recommendation", "tags", "description" (use "summary").
|
|
83
|
+
5. evidence[] must contain at least one specific file:line reference.
|
|
84
|
+
Format: "path/to/file.ts:42 - brief description of what you see there"
|
|
85
|
+
6. affected_files[] entries are OBJECTS with a "path" key — NOT plain strings.
|
|
86
|
+
Example: {"path": "src/foo.ts", "line_start": 10, "line_end": 20, "symbol": "myFunc"}
|
|
87
|
+
7. Only reference file paths that appear in this task's file_paths.
|
|
88
|
+
8. findings: [] is correct when you genuinely find nothing. Do not invent findings.
|
|
89
|
+
|
|
90
|
+
## Validation step (required)
|
|
91
|
+
After writing your result, run:
|
|
92
|
+
node dispatch/validate-result.mjs ${runId} ${task.task_id}
|
|
93
|
+
|
|
94
|
+
- If it exits 0: you are done. Stop.
|
|
95
|
+
- If it exits non-zero: read the error output, fix the JSON, rewrite the file, run again.
|
|
96
|
+
- Repeat up to 3 times.
|
|
97
|
+
|
|
98
|
+
If you cannot produce a valid result after 3 attempts, write this fallback (substituting real values):
|
|
99
|
+
${JSON.stringify(fallback, null, 2)}
|
|
100
|
+
|
|
101
|
+
Then validate the fallback passes before finishing.`;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
mkdirSync(join(runDir, "task-results"), { recursive: true });
|
|
105
|
+
|
|
106
|
+
const plan = [];
|
|
107
|
+
let largestTask = null;
|
|
108
|
+
let largestLines = 0;
|
|
109
|
+
|
|
110
|
+
for (const task of tasks) {
|
|
111
|
+
const sanitizedId = task.task_id.replace(/[^a-zA-Z0-9_-]/g, "_");
|
|
112
|
+
const outputPath = join(runDir, "task-results", sanitizedId + ".json");
|
|
113
|
+
const lensDef = lensDefinitions[task.lens];
|
|
114
|
+
|
|
115
|
+
if (!lensDef) {
|
|
116
|
+
console.warn(`Warning: no lens definition for '${task.lens}' (task ${task.task_id})`);
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
const totalFileLines = Object.values(task.file_line_counts).reduce((a, b) => a + b, 0);
|
|
120
|
+
const description = `Audit ${task.unit_id} (${task.file_paths.length} file(s), ~${totalFileLines} lines) — ${task.lens} lens`;
|
|
121
|
+
const prompt = buildPrompt(
|
|
122
|
+
task,
|
|
123
|
+
lensDef ?? { description: task.lens, do_not_report: "N/A" },
|
|
124
|
+
auditResultSchema,
|
|
125
|
+
findingSchema,
|
|
126
|
+
outputPath,
|
|
127
|
+
run_id,
|
|
128
|
+
artifactsDir
|
|
129
|
+
);
|
|
130
|
+
|
|
131
|
+
if (totalFileLines > largestLines) {
|
|
132
|
+
largestLines = totalFileLines;
|
|
133
|
+
largestTask = task.task_id;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
if (totalFileLines > 1500) {
|
|
137
|
+
console.warn(`Warning: large task ${task.task_id} (~${totalFileLines} lines) may hit quota limits`);
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
plan.push({ task_id: task.task_id, description, output_path: outputPath, prompt });
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
writeFileSync(dispatchPlanPath, JSON.stringify(plan, null, 2));
|
|
144
|
+
|
|
145
|
+
console.log(`Wrote dispatch-plan.json — ${plan.length} tasks ready for dispatch`);
|
|
146
|
+
if (largestTask) {
|
|
147
|
+
console.log(`Largest task: ${largestTask} (~${largestLines} lines)`);
|
|
148
|
+
}
|
|
149
|
+
console.log("");
|
|
150
|
+
console.log("--- ORCHESTRATOR INSTRUCTIONS ---");
|
|
151
|
+
console.log("Read dispatch-plan.json. For each entry, fire one Agent call with:");
|
|
152
|
+
console.log(" description: <entry.description>");
|
|
153
|
+
console.log(" prompt: <entry.prompt>");
|
|
154
|
+
console.log(`Fire all ${plan.length} calls in a single message for parallel execution.`);
|
|
155
|
+
console.log(`When all complete, run: node dispatch/merge-results.mjs --run-id ${run_id}`);
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
import { dirname, resolve, join } from "node:path";
|
|
2
|
+
import { fileURLToPath } from "node:url";
|
|
3
|
+
import { readFileSync, existsSync } from "node:fs";
|
|
4
|
+
import { validateResult } from "./validate.mjs";
|
|
5
|
+
|
|
6
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
7
|
+
const __dirname = dirname(__filename);
|
|
8
|
+
const PROJECT_ROOT = resolve(__dirname, "..");
|
|
9
|
+
|
|
10
|
+
const run_id = process.argv[2];
|
|
11
|
+
const task_id = process.argv[3];
|
|
12
|
+
|
|
13
|
+
if (!run_id || !task_id) {
|
|
14
|
+
console.error("Usage: node dispatch/validate-result.mjs <run_id> <task_id>");
|
|
15
|
+
process.exit(1);
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
// Locate artifacts_dir
|
|
19
|
+
let artifactsDir = join(PROJECT_ROOT, ".audit-artifacts");
|
|
20
|
+
const sessionConfigPath = join(artifactsDir, "session-config.json");
|
|
21
|
+
if (existsSync(sessionConfigPath)) {
|
|
22
|
+
try {
|
|
23
|
+
const cfg = JSON.parse(readFileSync(sessionConfigPath, "utf8"));
|
|
24
|
+
if (cfg.artifacts_dir) artifactsDir = cfg.artifacts_dir;
|
|
25
|
+
} catch {
|
|
26
|
+
// use default
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
const sanitized = task_id.replace(/[^a-zA-Z0-9_-]/g, "_");
|
|
31
|
+
const resultPath = join(artifactsDir, "runs", run_id, "task-results", sanitized + ".json");
|
|
32
|
+
|
|
33
|
+
if (!existsSync(resultPath)) {
|
|
34
|
+
console.error(`File not found: ${resultPath}`);
|
|
35
|
+
process.exit(1);
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
let resultObj;
|
|
39
|
+
try {
|
|
40
|
+
resultObj = JSON.parse(readFileSync(resultPath, "utf8"));
|
|
41
|
+
} catch (e) {
|
|
42
|
+
console.error(`Invalid JSON in ${resultPath}: ${e.message}`);
|
|
43
|
+
process.exit(1);
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
const tasksPath = join(artifactsDir, "runs", run_id, "pending-audit-tasks.json");
|
|
47
|
+
let fileLineCounts = {};
|
|
48
|
+
if (existsSync(tasksPath)) {
|
|
49
|
+
try {
|
|
50
|
+
const tasks = JSON.parse(readFileSync(tasksPath, "utf8"));
|
|
51
|
+
const task = tasks.find(t => t.task_id === task_id);
|
|
52
|
+
fileLineCounts = task?.file_line_counts ?? {};
|
|
53
|
+
} catch {
|
|
54
|
+
// use empty
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
const { valid, errors } = validateResult(resultObj, fileLineCounts);
|
|
59
|
+
|
|
60
|
+
if (valid) {
|
|
61
|
+
console.log("✓ valid:", task_id);
|
|
62
|
+
process.exit(0);
|
|
63
|
+
} else {
|
|
64
|
+
console.error("✗ invalid:", task_id);
|
|
65
|
+
console.error(JSON.stringify(errors, null, 2));
|
|
66
|
+
process.exit(1);
|
|
67
|
+
}
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
import { dirname, resolve, join } from "node:path";
|
|
2
|
+
import { fileURLToPath } from "node:url";
|
|
3
|
+
import { readFileSync } from "node:fs";
|
|
4
|
+
import Ajv2020 from "ajv/dist/2020.js";
|
|
5
|
+
|
|
6
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
7
|
+
const __dirname = dirname(__filename);
|
|
8
|
+
const PROJECT_ROOT = resolve(__dirname, "..");
|
|
9
|
+
const SCHEMAS_DIR = join(PROJECT_ROOT, "schemas");
|
|
10
|
+
|
|
11
|
+
function loadSchema(name) {
|
|
12
|
+
return JSON.parse(readFileSync(join(SCHEMAS_DIR, name), "utf8"));
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
let _ajv = null;
|
|
16
|
+
let _validateFn = null;
|
|
17
|
+
|
|
18
|
+
function getValidator() {
|
|
19
|
+
if (_validateFn) return _validateFn;
|
|
20
|
+
_ajv = new Ajv2020({ strict: false, allErrors: true });
|
|
21
|
+
_ajv.addSchema(loadSchema("finding.schema.json"));
|
|
22
|
+
_validateFn = _ajv.compile(loadSchema("audit_result.schema.json"));
|
|
23
|
+
return _validateFn;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
function formatAjvError(e) {
|
|
27
|
+
const path = e.instancePath || "(root)";
|
|
28
|
+
return `${path}: ${e.message}${e.params ? " (" + JSON.stringify(e.params) + ")" : ""}`;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* @param {object} resultObj — parsed JSON from a task-results file
|
|
33
|
+
* @param {Record<string, number>} fileLineCounts — from the task's file_line_counts
|
|
34
|
+
* @returns {{ valid: boolean, errors: string[] }}
|
|
35
|
+
*/
|
|
36
|
+
export function validateResult(resultObj, fileLineCounts) {
|
|
37
|
+
const validate = getValidator();
|
|
38
|
+
const schemaValid = validate(resultObj);
|
|
39
|
+
|
|
40
|
+
if (!schemaValid) {
|
|
41
|
+
return { valid: false, errors: validate.errors.map(formatAjvError) };
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
const errors = [];
|
|
45
|
+
|
|
46
|
+
// Line range constraint
|
|
47
|
+
for (const finding of resultObj.findings) {
|
|
48
|
+
for (const entry of finding.affected_files) {
|
|
49
|
+
if (entry.line_end !== undefined) {
|
|
50
|
+
const coverage = resultObj.file_coverage.find(fc => fc.path === entry.path);
|
|
51
|
+
if (!coverage) {
|
|
52
|
+
errors.push(`affected_files path '${entry.path}' not in file_coverage`);
|
|
53
|
+
} else if (entry.line_end > coverage.total_lines) {
|
|
54
|
+
errors.push(
|
|
55
|
+
`finding '${finding.id}': line_end ${entry.line_end} exceeds total_lines ${coverage.total_lines} for ${entry.path}`
|
|
56
|
+
);
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
// Lens consistency
|
|
63
|
+
for (const finding of resultObj.findings) {
|
|
64
|
+
if (finding.lens !== resultObj.lens) {
|
|
65
|
+
errors.push(
|
|
66
|
+
`finding '${finding.id}': lens '${finding.lens}' does not match task lens '${resultObj.lens}'`
|
|
67
|
+
);
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// affected_files paths in scope
|
|
72
|
+
const allowedPaths = new Set(resultObj.file_coverage.map(fc => fc.path));
|
|
73
|
+
for (const finding of resultObj.findings) {
|
|
74
|
+
for (const entry of finding.affected_files) {
|
|
75
|
+
if (!allowedPaths.has(entry.path)) {
|
|
76
|
+
errors.push(
|
|
77
|
+
`finding '${finding.id}': affected path '${entry.path}' not in task file_coverage`
|
|
78
|
+
);
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
if (errors.length > 0) {
|
|
84
|
+
return { valid: false, errors };
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
return { valid: true, errors: [] };
|
|
88
|
+
}
|
|
@@ -0,0 +1,553 @@
|
|
|
1
|
+
# Dispatch Automation Implementation Plan
|
|
2
|
+
|
|
3
|
+
## Background
|
|
4
|
+
|
|
5
|
+
The current audit-code workflow requires the LLM orchestrator to manually assemble
|
|
6
|
+
subagent prompts, handle schema normalization, and merge results — costing hundreds of
|
|
7
|
+
tokens per task and producing frequent schema violations. This plan replaces that with a
|
|
8
|
+
deterministic scripted dispatch layer so the orchestrator's only job is to fire Agent
|
|
9
|
+
tool calls with pre-built prompts, then run a merge script.
|
|
10
|
+
|
|
11
|
+
**Environment constraint:** Claude Desktop with no separate Anthropic API key. Subagent
|
|
12
|
+
dispatch must go through the `Agent` tool in the conversation runtime — no direct SDK
|
|
13
|
+
calls. All other steps must be zero-token scripts.
|
|
14
|
+
|
|
15
|
+
---
|
|
16
|
+
|
|
17
|
+
## Target workflow (per audit cycle)
|
|
18
|
+
|
|
19
|
+
```
|
|
20
|
+
1. node dist/index.js audit-code
|
|
21
|
+
→ emits run_id + pending-audit-tasks.json
|
|
22
|
+
|
|
23
|
+
2. node dispatch/prepare-dispatch.mjs --run-id <run_id>
|
|
24
|
+
→ reads tasks + schemas → writes dispatch-plan.json
|
|
25
|
+
(deterministic, 0 LLM tokens)
|
|
26
|
+
|
|
27
|
+
3. [Orchestrator reads dispatch-plan.json — small JSON array]
|
|
28
|
+
[Orchestrator fires N Agent calls in ONE message, verbatim prompts from plan]
|
|
29
|
+
|
|
30
|
+
Each subagent (×N, parallel):
|
|
31
|
+
- reads source files with Read tool
|
|
32
|
+
- performs lens audit
|
|
33
|
+
- writes result to task-results/<sanitized_task_id>.json using Write tool
|
|
34
|
+
- runs: node dispatch/validate-result.mjs <run_id> <task_id>
|
|
35
|
+
- if non-zero: fixes errors, rewrites, re-validates (max 3 attempts)
|
|
36
|
+
- if still failing after 3: writes empty-but-valid fallback result
|
|
37
|
+
|
|
38
|
+
4. node dispatch/merge-results.mjs --run-id <run_id>
|
|
39
|
+
→ validates all task-results/*.json
|
|
40
|
+
→ writes audit-results.json (passing results only)
|
|
41
|
+
→ writes failed-tasks.json (task_ids that failed validation)
|
|
42
|
+
(deterministic, 0 LLM tokens)
|
|
43
|
+
|
|
44
|
+
5. node dist/index.js worker-run --run-id <run_id>
|
|
45
|
+
→ ingests audit-results.json → coverage matrix → marks tasks complete
|
|
46
|
+
(deterministic, 0 LLM tokens)
|
|
47
|
+
|
|
48
|
+
6. Repeat from step 1 until no pending tasks remain.
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
Orchestrator token cost per cycle: **~50 tokens × N tasks** (read dispatch-plan + invoke Agent calls). Independent of source file sizes.
|
|
52
|
+
|
|
53
|
+
---
|
|
54
|
+
|
|
55
|
+
## Files to create
|
|
56
|
+
|
|
57
|
+
```
|
|
58
|
+
dispatch/
|
|
59
|
+
lens-definitions.json — lens descriptions embedded in every subagent prompt
|
|
60
|
+
validate.mjs — shared validation logic (imported by other scripts)
|
|
61
|
+
validate-result.mjs — CLI: validate one task-results file
|
|
62
|
+
prepare-dispatch.mjs — reads pending tasks → writes dispatch-plan.json
|
|
63
|
+
merge-results.mjs — merges validated task results → audit-results.json
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
## Files to modify
|
|
67
|
+
|
|
68
|
+
```
|
|
69
|
+
package.json — add ajv devDependency; add dispatch:* npm scripts
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
> **Do NOT add `dispatch/` to the `files` array in package.json.** These scripts are
|
|
73
|
+
> local dev tooling and must not be published to npm.
|
|
74
|
+
|
|
75
|
+
---
|
|
76
|
+
|
|
77
|
+
## Step 1 — Add `ajv` dependency
|
|
78
|
+
|
|
79
|
+
In `package.json`, add to `devDependencies`:
|
|
80
|
+
|
|
81
|
+
```json
|
|
82
|
+
"ajv": "^8.17.1"
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
Then run `npm install`.
|
|
86
|
+
|
|
87
|
+
AJV v8 is required for JSON Schema draft 2020-12 support (which the existing schemas use).
|
|
88
|
+
No other new dependencies are needed.
|
|
89
|
+
|
|
90
|
+
Also add npm scripts (optional convenience aliases):
|
|
91
|
+
|
|
92
|
+
```json
|
|
93
|
+
"dispatch:prepare": "node dispatch/prepare-dispatch.mjs",
|
|
94
|
+
"dispatch:merge": "node dispatch/merge-results.mjs",
|
|
95
|
+
"dispatch:validate": "node dispatch/validate-result.mjs"
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
---
|
|
99
|
+
|
|
100
|
+
## Step 2 — Create `dispatch/lens-definitions.json`
|
|
101
|
+
|
|
102
|
+
This file is embedded verbatim in every subagent prompt. It must be accurate enough that
|
|
103
|
+
a subagent can scope its review correctly without reading any other files.
|
|
104
|
+
|
|
105
|
+
```json
|
|
106
|
+
{
|
|
107
|
+
"correctness": {
|
|
108
|
+
"description": "Logic errors, incorrect algorithm implementations, off-by-one bugs, type mismatches, wrong return values, incorrect state transitions, missing null/undefined guards, misuse of APIs. Focus on code that does the wrong thing.",
|
|
109
|
+
"do_not_report": "Style issues, naming problems, missing tests, or findings that belong to other lenses."
|
|
110
|
+
},
|
|
111
|
+
"maintainability": {
|
|
112
|
+
"description": "Code that is hard to change safely: excessive function length, deep nesting, tight coupling between unrelated modules, poor naming, magic constants, duplicated logic, inconsistent abstractions, unclear public APIs.",
|
|
113
|
+
"do_not_report": "Correctness bugs, test gaps, or operational concerns."
|
|
114
|
+
},
|
|
115
|
+
"tests": {
|
|
116
|
+
"description": "Test coverage gaps for important paths, tests that assert incorrect behavior (pinning bugs as expected), fragile or non-deterministic tests, missing negative/edge-case tests, tests that silently pass on stale builds (e.g. importing compiled dist/ rather than source).",
|
|
117
|
+
"do_not_report": "Source code bugs — report only issues with the tests themselves."
|
|
118
|
+
},
|
|
119
|
+
"security": {
|
|
120
|
+
"description": "Injection vulnerabilities (SQL, shell, path traversal), authentication/authorization flaws, secret exposure, insecure deserialization, privilege escalation, unsafe use of eval or child processes with user input.",
|
|
121
|
+
"do_not_report": "Performance or correctness issues that are not security-relevant."
|
|
122
|
+
},
|
|
123
|
+
"reliability": {
|
|
124
|
+
"description": "Failure modes without recovery, missing timeouts, unhandled promise rejections, race conditions, resource leaks (file handles, sockets, timers), incorrect retry logic, cascading failure risks.",
|
|
125
|
+
"do_not_report": "Correctness bugs that do not affect reliability under failure conditions."
|
|
126
|
+
},
|
|
127
|
+
"performance": {
|
|
128
|
+
"description": "Algorithmic inefficiencies (O(n²) where O(n) is possible), unnecessary re-computation, missing caching, synchronous blocking in hot paths, excessive memory allocation.",
|
|
129
|
+
"do_not_report": "Correctness bugs unrelated to performance."
|
|
130
|
+
},
|
|
131
|
+
"data_integrity": {
|
|
132
|
+
"description": "Missing input validation at trust boundaries, schema violations, inconsistent field naming across related schemas, data loss scenarios, missing required fields, enum values that are present in some schemas but not others.",
|
|
133
|
+
"do_not_report": "UI or presentation issues; operational or deployment concerns."
|
|
134
|
+
},
|
|
135
|
+
"operability": {
|
|
136
|
+
"description": "Missing or low-quality log output, error messages that don't help operators diagnose problems, missing progress indicators for long operations, no elapsed-time reporting, lack of dry-run or preview modes for destructive operations.",
|
|
137
|
+
"do_not_report": "Correctness bugs or deployment configuration."
|
|
138
|
+
},
|
|
139
|
+
"config_deployment": {
|
|
140
|
+
"description": "CI/CD pipeline correctness (wrong triggers, missing branch filters, floating version pins), deployment safety (no gate before publish, missing rollback), insecure secret handling in configs, mutable action tags that should be pinned to commit SHAs.",
|
|
141
|
+
"do_not_report": "Runtime code issues; findings that belong to other lenses."
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
---
|
|
147
|
+
|
|
148
|
+
## Step 3 — Create `dispatch/validate.mjs`
|
|
149
|
+
|
|
150
|
+
Shared validation module. Exports a single function `validateResult(resultObj, fileLineCounts)`.
|
|
151
|
+
|
|
152
|
+
### Interface
|
|
153
|
+
|
|
154
|
+
```js
|
|
155
|
+
/**
|
|
156
|
+
* @param {object} resultObj — parsed JSON from a task-results file
|
|
157
|
+
* @param {Record<string, number>} fileLineCounts — from the task's file_line_counts
|
|
158
|
+
* @returns {{ valid: boolean, errors: string[] }}
|
|
159
|
+
*/
|
|
160
|
+
export function validateResult(resultObj, fileLineCounts) { ... }
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
### Logic
|
|
164
|
+
|
|
165
|
+
```
|
|
166
|
+
1. AJV validate resultObj against schemas/audit_result.schema.json
|
|
167
|
+
- Load finding.schema.json first (addSchema) so $ref resolves
|
|
168
|
+
- Use Ajv({ strict: false }) to avoid complaints about unknown keywords like $schema
|
|
169
|
+
- On failure: return { valid: false, errors: ajv.errors.map(e => formatAjvError(e)) }
|
|
170
|
+
|
|
171
|
+
2. Extra check — line range constraint:
|
|
172
|
+
For each finding in resultObj.findings:
|
|
173
|
+
For each entry in finding.affected_files:
|
|
174
|
+
if entry.line_end is defined:
|
|
175
|
+
look up total_lines from resultObj.file_coverage where path === entry.path
|
|
176
|
+
if total_lines is undefined: push error "affected_files path '${entry.path}' not in file_coverage"
|
|
177
|
+
else if entry.line_end > total_lines: push error
|
|
178
|
+
"finding '${finding.id}': line_end ${entry.line_end} exceeds total_lines ${total_lines} for ${entry.path}"
|
|
179
|
+
|
|
180
|
+
3. Extra check — lens consistency:
|
|
181
|
+
For each finding in resultObj.findings:
|
|
182
|
+
if finding.lens !== resultObj.lens:
|
|
183
|
+
push error "finding '${finding.id}': lens '${finding.lens}' does not match task lens '${resultObj.lens}'"
|
|
184
|
+
|
|
185
|
+
4. Extra check — affected_files paths in scope:
|
|
186
|
+
Collect allowed paths from resultObj.file_coverage[].path
|
|
187
|
+
For each finding's affected_files entry:
|
|
188
|
+
if entry.path not in allowed paths:
|
|
189
|
+
push error "finding '${finding.id}': affected path '${entry.path}' not in task file_coverage"
|
|
190
|
+
|
|
191
|
+
5. If any extra-check errors: return { valid: false, errors }
|
|
192
|
+
|
|
193
|
+
6. Return { valid: true, errors: [] }
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
### Schema loading
|
|
197
|
+
|
|
198
|
+
Schemas are resolved relative to the project root. Use this logic to find the project root:
|
|
199
|
+
|
|
200
|
+
```js
|
|
201
|
+
// dispatch/validate.mjs
|
|
202
|
+
import { createRequire } from "node:module";
|
|
203
|
+
import { dirname, resolve, join } from "node:path";
|
|
204
|
+
import { fileURLToPath } from "node:url";
|
|
205
|
+
import { readFileSync } from "node:fs";
|
|
206
|
+
import Ajv from "ajv";
|
|
207
|
+
|
|
208
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
209
|
+
const __dirname = dirname(__filename);
|
|
210
|
+
// dispatch/ is one level below project root
|
|
211
|
+
const PROJECT_ROOT = resolve(__dirname, "..");
|
|
212
|
+
const SCHEMAS_DIR = join(PROJECT_ROOT, "schemas");
|
|
213
|
+
|
|
214
|
+
function loadSchema(name) {
|
|
215
|
+
return JSON.parse(readFileSync(join(SCHEMAS_DIR, name), "utf8"));
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
let _ajv = null;
|
|
219
|
+
function getAjv() {
|
|
220
|
+
if (_ajv) return _ajv;
|
|
221
|
+
_ajv = new Ajv({ strict: false, allErrors: true });
|
|
222
|
+
_ajv.addSchema(loadSchema("finding.schema.json"));
|
|
223
|
+
return _ajv;
|
|
224
|
+
}
|
|
225
|
+
```
|
|
226
|
+
|
|
227
|
+
---
|
|
228
|
+
|
|
229
|
+
## Step 4 — Create `dispatch/validate-result.mjs`
|
|
230
|
+
|
|
231
|
+
CLI wrapper for use by subagents after writing their result file.
|
|
232
|
+
|
|
233
|
+
### Usage
|
|
234
|
+
|
|
235
|
+
```
|
|
236
|
+
node dispatch/validate-result.mjs <run_id> <task_id>
|
|
237
|
+
```
|
|
238
|
+
|
|
239
|
+
- `run_id`: e.g. `20260424T152454170Z_audit_tasks_completed_001`
|
|
240
|
+
- `task_id`: e.g. `src-adapters:correctness` (unsanitized — the script sanitizes internally)
|
|
241
|
+
|
|
242
|
+
### Logic
|
|
243
|
+
|
|
244
|
+
```
|
|
245
|
+
1. Parse argv: run_id = process.argv[2], task_id = process.argv[3]
|
|
246
|
+
If either missing: print usage and exit 1
|
|
247
|
+
|
|
248
|
+
2. Locate artifacts_dir:
|
|
249
|
+
Read .audit-artifacts/session-config.json to find artifacts_dir.
|
|
250
|
+
If not present, default: join(PROJECT_ROOT, ".audit-artifacts")
|
|
251
|
+
|
|
252
|
+
3. Derive file path:
|
|
253
|
+
sanitized = task_id.replace(/[^a-zA-Z0-9_-]/g, "_")
|
|
254
|
+
resultPath = join(artifactsDir, "runs", run_id, "task-results", sanitized + ".json")
|
|
255
|
+
|
|
256
|
+
4. Read and parse resultPath. If file not found or invalid JSON:
|
|
257
|
+
print error, exit 1
|
|
258
|
+
|
|
259
|
+
5. Load the task from pending-audit-tasks.json to get file_line_counts:
|
|
260
|
+
tasksPath = join(artifactsDir, "runs", run_id, "pending-audit-tasks.json")
|
|
261
|
+
tasks = JSON.parse(readFileSync(tasksPath))
|
|
262
|
+
task = tasks.find(t => t.task_id === task_id)
|
|
263
|
+
fileLineCounts = task?.file_line_counts ?? {}
|
|
264
|
+
|
|
265
|
+
6. Call validateResult(resultObj, fileLineCounts) from validate.mjs
|
|
266
|
+
|
|
267
|
+
7. If valid: console.log("✓ valid:", task_id); exit 0
|
|
268
|
+
If invalid:
|
|
269
|
+
console.error("✗ invalid:", task_id);
|
|
270
|
+
console.error(JSON.stringify(errors, null, 2));
|
|
271
|
+
exit 1
|
|
272
|
+
```
|
|
273
|
+
|
|
274
|
+
---
|
|
275
|
+
|
|
276
|
+
## Step 5 — Create `dispatch/prepare-dispatch.mjs`
|
|
277
|
+
|
|
278
|
+
Core script. Reads pending tasks and produces a ready-to-use dispatch plan.
|
|
279
|
+
|
|
280
|
+
### Usage
|
|
281
|
+
|
|
282
|
+
```
|
|
283
|
+
node dispatch/prepare-dispatch.mjs --run-id <run_id>
|
|
284
|
+
```
|
|
285
|
+
|
|
286
|
+
### Logic
|
|
287
|
+
|
|
288
|
+
```
|
|
289
|
+
1. Parse --run-id <run_id> from argv. Error if missing.
|
|
290
|
+
|
|
291
|
+
2. Resolve paths:
|
|
292
|
+
artifactsDir = join(PROJECT_ROOT, ".audit-artifacts")
|
|
293
|
+
runDir = join(artifactsDir, "runs", run_id)
|
|
294
|
+
tasksPath = join(runDir, "pending-audit-tasks.json")
|
|
295
|
+
dispatchPlanPath = join(runDir, "dispatch-plan.json")
|
|
296
|
+
|
|
297
|
+
3. Read pending-audit-tasks.json — array of AuditTask objects.
|
|
298
|
+
If file not found: error and exit 1.
|
|
299
|
+
|
|
300
|
+
4. Load shared content (read once, reuse for all tasks):
|
|
301
|
+
lensDefinitions = read dispatch/lens-definitions.json
|
|
302
|
+
auditResultSchema = read schemas/audit_result.schema.json
|
|
303
|
+
findingSchema = read schemas/finding.schema.json
|
|
304
|
+
|
|
305
|
+
5. For each task in tasks:
|
|
306
|
+
a. sanitizedId = task.task_id.replace(/[^a-zA-Z0-9_-]/g, "_")
|
|
307
|
+
b. outputPath = join(runDir, "task-results", sanitizedId + ".json")
|
|
308
|
+
c. lensDef = lensDefinitions[task.lens]
|
|
309
|
+
d. totalFileLines = Object.values(task.file_line_counts).reduce((a, b) => a + b, 0)
|
|
310
|
+
e. description = `Audit ${task.unit_id} (${task.file_paths.length} file(s), ~${totalFileLines} lines) — ${task.lens} lens`
|
|
311
|
+
f. prompt = buildPrompt(task, lensDef, auditResultSchema, findingSchema, outputPath, run_id, artifactsDir)
|
|
312
|
+
g. Append { task_id, description, output_path: outputPath, prompt } to plan array
|
|
313
|
+
|
|
314
|
+
6. Ensure task-results/ directory exists:
|
|
315
|
+
mkdirSync(join(runDir, "task-results"), { recursive: true })
|
|
316
|
+
|
|
317
|
+
7. Write plan array to dispatchPlanPath as formatted JSON.
|
|
318
|
+
|
|
319
|
+
8. Print: "Wrote dispatch-plan.json — N tasks ready for dispatch"
|
|
320
|
+
Print: "Largest task: <task_id> (~N lines)"
|
|
321
|
+
Print: ""
|
|
322
|
+
Print: "--- ORCHESTRATOR INSTRUCTIONS ---"
|
|
323
|
+
Print: "Read dispatch-plan.json. For each entry, fire one Agent call with:"
|
|
324
|
+
Print: " description: <entry.description>"
|
|
325
|
+
Print: " prompt: <entry.prompt>"
|
|
326
|
+
Print: "Fire all N calls in a single message for parallel execution."
|
|
327
|
+
Print: "When all complete, run: node dispatch/merge-results.mjs --run-id <run_id>"
|
|
328
|
+
```
|
|
329
|
+
|
|
330
|
+
### `buildPrompt(task, lensDef, auditResultSchema, findingSchema, outputPath, runId, artifactsDir)`
|
|
331
|
+
|
|
332
|
+
Returns a string. Template (use template literals):
|
|
333
|
+
|
|
334
|
+
```
|
|
335
|
+
You are a code auditor. Perform a bounded audit of the files listed below under the specified lens.
|
|
336
|
+
|
|
337
|
+
## Task metadata
|
|
338
|
+
${JSON.stringify(task, null, 2)}
|
|
339
|
+
|
|
340
|
+
## Files to read
|
|
341
|
+
Read each path in task.file_paths using your Read tool. The repo root is the current working directory — paths are repo-relative (e.g. "src/foo.ts").
|
|
342
|
+
|
|
343
|
+
file_line_counts gives the expected total line count for each file. Use those exact values for file_coverage[].total_lines in your result.
|
|
344
|
+
|
|
345
|
+
## Lens: ${task.lens}
|
|
346
|
+
${lensDef.description}
|
|
347
|
+
|
|
348
|
+
Do NOT report: ${lensDef.do_not_report}
|
|
349
|
+
|
|
350
|
+
## Output format
|
|
351
|
+
Write your result as a single JSON **object** (not an array) to this exact path:
|
|
352
|
+
${outputPath}
|
|
353
|
+
|
|
354
|
+
The result must conform to the following schema:
|
|
355
|
+
|
|
356
|
+
### audit_result.schema.json
|
|
357
|
+
${JSON.stringify(auditResultSchema, null, 2)}
|
|
358
|
+
|
|
359
|
+
### finding.schema.json
|
|
360
|
+
${JSON.stringify(findingSchema, null, 2)}
|
|
361
|
+
|
|
362
|
+
## Hard constraints (violations will fail validation)
|
|
363
|
+
1. NEVER set line_end higher than the file's actual line count.
|
|
364
|
+
Use file_line_counts as your reference. If in doubt, leave line_end omitted.
|
|
365
|
+
2. Every finding MUST have ALL required fields:
|
|
366
|
+
id, title, category, severity, confidence, lens, summary, affected_files, evidence
|
|
367
|
+
3. lens on every finding must be exactly "${task.lens}"
|
|
368
|
+
4. No fields outside the schema. Forbidden: "recommendation", "tags", "description" (use "summary").
|
|
369
|
+
5. evidence[] must contain at least one specific file:line reference.
|
|
370
|
+
Format: "path/to/file.ts:42 - brief description of what you see there"
|
|
371
|
+
6. affected_files[] entries are OBJECTS with a "path" key — NOT plain strings.
|
|
372
|
+
Example: {"path": "src/foo.ts", "line_start": 10, "line_end": 20, "symbol": "myFunc"}
|
|
373
|
+
7. Only reference file paths that appear in this task's file_paths.
|
|
374
|
+
8. findings: [] is correct when you genuinely find nothing. Do not invent findings.
|
|
375
|
+
|
|
376
|
+
## Validation step (required)
|
|
377
|
+
After writing your result, run:
|
|
378
|
+
node dispatch/validate-result.mjs ${runId} ${task.task_id}
|
|
379
|
+
|
|
380
|
+
- If it exits 0: you are done. Stop.
|
|
381
|
+
- If it exits non-zero: read the error output, fix the JSON, rewrite the file, run again.
|
|
382
|
+
- Repeat up to 3 times.
|
|
383
|
+
|
|
384
|
+
If you cannot produce a valid result after 3 attempts, write this fallback (substituting real values):
|
|
385
|
+
${JSON.stringify({
|
|
386
|
+
task_id: task.task_id,
|
|
387
|
+
unit_id: task.unit_id,
|
|
388
|
+
pass_id: task.pass_id,
|
|
389
|
+
lens: task.lens,
|
|
390
|
+
file_coverage: task.file_paths.map(p => ({ path: p, total_lines: task.file_line_counts[p] })),
|
|
391
|
+
findings: [],
|
|
392
|
+
notes: ["Validation failed after 3 attempts — empty result written as fallback."]
|
|
393
|
+
}, null, 2)}
|
|
394
|
+
|
|
395
|
+
Then validate the fallback passes before finishing.
|
|
396
|
+
```
|
|
397
|
+
|
|
398
|
+
Note: the fallback JSON in the prompt is pre-computed in `buildPrompt` using the task
|
|
399
|
+
data, not generated by the subagent.
|
|
400
|
+
|
|
401
|
+
---
|
|
402
|
+
|
|
403
|
+
## Step 6 — Create `dispatch/merge-results.mjs`
|
|
404
|
+
|
|
405
|
+
### Usage
|
|
406
|
+
|
|
407
|
+
```
|
|
408
|
+
node dispatch/merge-results.mjs --run-id <run_id>
|
|
409
|
+
```
|
|
410
|
+
|
|
411
|
+
### Logic
|
|
412
|
+
|
|
413
|
+
```
|
|
414
|
+
1. Parse --run-id <run_id> from argv.
|
|
415
|
+
|
|
416
|
+
2. Resolve paths:
|
|
417
|
+
taskResultsDir = join(artifactsDir, "runs", run_id, "task-results")
|
|
418
|
+
auditResultsPath = join(artifactsDir, "runs", run_id, "audit-results.json")
|
|
419
|
+
failedTasksPath = join(artifactsDir, "runs", run_id, "failed-tasks.json")
|
|
420
|
+
tasksPath = join(artifactsDir, "runs", run_id, "pending-audit-tasks.json")
|
|
421
|
+
|
|
422
|
+
3. Read pending-audit-tasks.json to build fileLineCounts map:
|
|
423
|
+
lineCounts = {}
|
|
424
|
+
for each task: lineCounts[task.task_id] = task.file_line_counts
|
|
425
|
+
|
|
426
|
+
4. Read all *.json files from task-results/:
|
|
427
|
+
files = readdirSync(taskResultsDir).filter(f => f.endsWith(".json"))
|
|
428
|
+
|
|
429
|
+
5. For each file:
|
|
430
|
+
a. Parse JSON
|
|
431
|
+
b. Call validateResult(resultObj, lineCounts[resultObj.task_id] ?? {})
|
|
432
|
+
c. If valid: push to passing[]
|
|
433
|
+
d. If invalid: push { task_id: resultObj?.task_id ?? filename, errors } to failing[]
|
|
434
|
+
|
|
435
|
+
6. Write passing array to audit-results.json (as AuditResult[] — array of passing objects)
|
|
436
|
+
|
|
437
|
+
7. If failing.length > 0:
|
|
438
|
+
Write failing array to failed-tasks.json
|
|
439
|
+
Print warning: "${failing.length} task(s) failed validation and were excluded:"
|
|
440
|
+
For each: print " ✗ ${f.task_id}: ${f.errors[0]}" (first error only for brevity)
|
|
441
|
+
|
|
442
|
+
8. Print: "✓ ${passing.length}/${total} tasks valid → ${auditResultsPath}"
|
|
443
|
+
If failing.length > 0: print " Re-run those tasks in the next cycle."
|
|
444
|
+
|
|
445
|
+
9. Exit 0 regardless (partial ingestion is safe — failed tasks remain pending for requeue).
|
|
446
|
+
```
|
|
447
|
+
|
|
448
|
+
---
|
|
449
|
+
|
|
450
|
+
## Step 7 — Update `session-config.json` (optional but recommended)
|
|
451
|
+
|
|
452
|
+
Add `dispatch_provider` field to `.audit-artifacts/session-config.json`:
|
|
453
|
+
|
|
454
|
+
```json
|
|
455
|
+
{
|
|
456
|
+
"provider": "local-subprocess",
|
|
457
|
+
"dispatch_provider": "claude-desktop",
|
|
458
|
+
"agent_task_batch_size": 10
|
|
459
|
+
}
|
|
460
|
+
```
|
|
461
|
+
|
|
462
|
+
This is metadata only for now — no code reads `dispatch_provider` yet. It documents intent and provides the hook for future multi-provider support.
|
|
463
|
+
|
|
464
|
+
---
|
|
465
|
+
|
|
466
|
+
## Testing procedure
|
|
467
|
+
|
|
468
|
+
### Unit test: `validate-result.mjs`
|
|
469
|
+
|
|
470
|
+
1. Write a minimal valid result to a temp file:
|
|
471
|
+
```json
|
|
472
|
+
{
|
|
473
|
+
"task_id": "test:correctness",
|
|
474
|
+
"unit_id": "test",
|
|
475
|
+
"pass_id": "pass:correctness",
|
|
476
|
+
"lens": "correctness",
|
|
477
|
+
"file_coverage": [{"path": "src/foo.ts", "total_lines": 100}],
|
|
478
|
+
"findings": []
|
|
479
|
+
}
|
|
480
|
+
```
|
|
481
|
+
2. Run: `node dispatch/validate-result.mjs <some_run_id> test:correctness` — expect exit 0
|
|
482
|
+
3. Mutate the file: remove `lens` field — expect exit 1 with error mentioning `lens`
|
|
483
|
+
4. Mutate: add `line_end: 200` on an affected_file with total_lines 100 — expect exit 1
|
|
484
|
+
|
|
485
|
+
### Integration test: `prepare-dispatch.mjs`
|
|
486
|
+
|
|
487
|
+
1. Run against the current pending tasks:
|
|
488
|
+
```
|
|
489
|
+
node dispatch/prepare-dispatch.mjs --run-id 20260424T152454170Z_audit_tasks_completed_001
|
|
490
|
+
```
|
|
491
|
+
2. Inspect `dispatch-plan.json`: each entry should have `task_id`, `description`, `output_path`, `prompt`
|
|
492
|
+
3. Verify `prompt` contains the task JSON, lens definition, both schemas, and the output path
|
|
493
|
+
|
|
494
|
+
### Integration test: `merge-results.mjs`
|
|
495
|
+
|
|
496
|
+
1. Write 2 valid and 1 invalid result to `task-results/`
|
|
497
|
+
2. Run: `node dispatch/merge-results.mjs --run-id <id>`
|
|
498
|
+
3. Verify `audit-results.json` contains exactly the 2 valid results
|
|
499
|
+
4. Verify `failed-tasks.json` contains the 1 invalid task
|
|
500
|
+
5. Verify exit code is 0
|
|
501
|
+
|
|
502
|
+
---
|
|
503
|
+
|
|
504
|
+
## Orchestrator usage reference
|
|
505
|
+
|
|
506
|
+
When `prepare-dispatch.mjs` finishes, it prints the instructions inline. For reference:
|
|
507
|
+
|
|
508
|
+
```
|
|
509
|
+
1. Run: node dispatch/prepare-dispatch.mjs --run-id <run_id>
|
|
510
|
+
2. Read: .audit-artifacts/runs/<run_id>/dispatch-plan.json
|
|
511
|
+
3. In ONE message, fire one Agent call per entry:
|
|
512
|
+
Agent({ description: entry.description, prompt: entry.prompt })
|
|
513
|
+
Fire all calls simultaneously — they run in parallel.
|
|
514
|
+
4. Wait for all subagents to complete.
|
|
515
|
+
5. Run: node dispatch/merge-results.mjs --run-id <run_id>
|
|
516
|
+
6. Run: node dist/index.js worker-run --run-id <run_id>
|
|
517
|
+
7. Run: node dist/index.js audit-code (to get next batch)
|
|
518
|
+
8. Repeat.
|
|
519
|
+
```
|
|
520
|
+
|
|
521
|
+
**Important:** The orchestrator should NOT read the pending-audit-tasks.json, NOT read
|
|
522
|
+
any source files, NOT compose any prompts. Everything is pre-built. Just read
|
|
523
|
+
`dispatch-plan.json` and fire the calls verbatim.
|
|
524
|
+
|
|
525
|
+
---
|
|
526
|
+
|
|
527
|
+
## Notes and caveats
|
|
528
|
+
|
|
529
|
+
### Large files (2000+ lines)
|
|
530
|
+
|
|
531
|
+
Tasks with very large files (e.g. `audit-code-wrapper-lib.mjs` at 2215 lines) will still
|
|
532
|
+
hit quota limits for subagents. The `prepare-dispatch.mjs` script should print a warning
|
|
533
|
+
for tasks exceeding a threshold (e.g. 1500 total lines). These tasks may need to be split
|
|
534
|
+
at the task-builder level — that is a separate concern and not addressed here.
|
|
535
|
+
|
|
536
|
+
### `audit_results_path` vs per-task files
|
|
537
|
+
|
|
538
|
+
The existing `renderWorkerPrompt.ts` tells subagents to write to a shared
|
|
539
|
+
`audit-results.json`. The new `prepare-dispatch.mjs`-generated prompts tell subagents to
|
|
540
|
+
write to per-task `task-results/<task_id>.json` files. These are two separate dispatch
|
|
541
|
+
paths — the old path (via `renderWorkerPrompt`) is still used for non-`claude-desktop`
|
|
542
|
+
providers and is not modified by this plan.
|
|
543
|
+
|
|
544
|
+
### Future: provider abstraction
|
|
545
|
+
|
|
546
|
+
`prepare-dispatch.mjs` output (`dispatch-plan.json`) is provider-agnostic. A future
|
|
547
|
+
`anthropic-direct` provider could read the same `dispatch-plan.json` and call
|
|
548
|
+
`messages.create()` for each entry via SDK, with no changes to `prepare-dispatch.mjs`.
|
|
549
|
+
|
|
550
|
+
### ajv and published package
|
|
551
|
+
|
|
552
|
+
`ajv` is added as a devDependency. The `dispatch/` scripts are NOT in the `files` array
|
|
553
|
+
and are not published. End users of the npm package are unaffected.
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "auditor-lambda",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.17",
|
|
4
4
|
"private": false,
|
|
5
5
|
"description": "Portable hybrid code-auditing framework for arbitrary repositories.",
|
|
6
6
|
"type": "module",
|
|
@@ -11,6 +11,7 @@
|
|
|
11
11
|
"dist/**",
|
|
12
12
|
"audit-code.mjs",
|
|
13
13
|
"audit-code-wrapper-lib.mjs",
|
|
14
|
+
"dispatch/**",
|
|
14
15
|
"schemas/**",
|
|
15
16
|
"skills/audit-code/**",
|
|
16
17
|
"scripts/postinstall.mjs",
|
|
@@ -36,7 +37,10 @@
|
|
|
36
37
|
"prepublishOnly": "npm run verify:release",
|
|
37
38
|
"start": "node dist/index.js",
|
|
38
39
|
"audit-code": "node audit-code.mjs",
|
|
39
|
-
"sample-run": "node dist/index.js sample-run"
|
|
40
|
+
"sample-run": "node dist/index.js sample-run",
|
|
41
|
+
"dispatch:prepare": "node dispatch/prepare-dispatch.mjs",
|
|
42
|
+
"dispatch:merge": "node dispatch/merge-results.mjs",
|
|
43
|
+
"dispatch:validate": "node dispatch/validate-result.mjs"
|
|
40
44
|
},
|
|
41
45
|
"engines": {
|
|
42
46
|
"node": ">=20"
|
|
@@ -62,6 +66,7 @@
|
|
|
62
66
|
],
|
|
63
67
|
"devDependencies": {
|
|
64
68
|
"@types/node": "^24.3.0",
|
|
69
|
+
"ajv": "^8.17.1",
|
|
65
70
|
"typescript": "^5.9.2"
|
|
66
71
|
}
|
|
67
72
|
}
|
package/scripts/postinstall.mjs
CHANGED
|
@@ -1,19 +1,27 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
import { homedir } from 'os';
|
|
3
3
|
import { join, dirname } from 'path';
|
|
4
|
-
import { mkdirSync,
|
|
4
|
+
import { mkdirSync, existsSync, readFileSync, writeFileSync } from 'fs';
|
|
5
5
|
import { fileURLToPath } from 'url';
|
|
6
6
|
|
|
7
|
-
const pkgRoot = dirname(fileURLToPath(
|
|
8
|
-
const sourceFile = join(pkgRoot, '
|
|
7
|
+
const pkgRoot = dirname(dirname(fileURLToPath(import.meta.url)));
|
|
8
|
+
const sourceFile = join(pkgRoot, 'skills', 'audit-code', 'audit-code.prompt.md');
|
|
9
9
|
const destDir = join(homedir(), '.claude', 'commands');
|
|
10
10
|
const destFile = join(destDir, 'audit-code.md');
|
|
11
11
|
|
|
12
|
+
if (!existsSync(sourceFile)) {
|
|
13
|
+
console.warn(`audit-code: skill source not found at ${sourceFile} — skipping Claude command install`);
|
|
14
|
+
process.exit(0);
|
|
15
|
+
}
|
|
16
|
+
|
|
12
17
|
try {
|
|
13
18
|
mkdirSync(destDir, { recursive: true });
|
|
14
|
-
|
|
15
|
-
|
|
19
|
+
const action = existsSync(destFile) ? 'updated' : 'installed';
|
|
20
|
+
// Read then write to avoid Windows file-lock issues with copyFileSync
|
|
21
|
+
writeFileSync(destFile, readFileSync(sourceFile));
|
|
22
|
+
console.log(`audit-code: ${action} /audit-code Claude command → ${destFile}`);
|
|
16
23
|
} catch (err) {
|
|
17
|
-
// Non-fatal — CLI still works, just no slash command autocomplete
|
|
18
24
|
console.warn(`audit-code: could not install Claude command (${err.message})`);
|
|
25
|
+
console.warn(` To install manually, run:`);
|
|
26
|
+
console.warn(` cp "${sourceFile}" "${destFile}"`);
|
|
19
27
|
}
|
|
@@ -1,67 +1,111 @@
|
|
|
1
1
|
---
|
|
2
|
-
description: Autonomous local loop code auditing — steps the audit-code orchestrator and
|
|
2
|
+
description: Autonomous local loop code auditing — steps the audit-code orchestrator and dispatches parallel subagents until the audit completes
|
|
3
3
|
argument-hint: [target-dir]
|
|
4
|
-
allowed-tools: [Read, Write, Edit, Bash, Glob, Grep]
|
|
4
|
+
allowed-tools: [Read, Write, Edit, Bash, Glob, Grep, Agent]
|
|
5
5
|
---
|
|
6
6
|
|
|
7
7
|
# `/audit-code` Execution Directive
|
|
8
8
|
|
|
9
|
-
**SYSTEM DIRECTIVE:**
|
|
9
|
+
**SYSTEM DIRECTIVE:** You are the autonomous audit orchestrator. Your job is to advance the state machine, dispatch parallel subagents for code review work, and loop until the audit is complete. Do not ask the user for confirmation between steps.
|
|
10
10
|
|
|
11
|
-
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
## The Loop
|
|
14
|
+
|
|
15
|
+
Repeat Steps 1–5 until the audit status is `"complete"`.
|
|
16
|
+
|
|
17
|
+
---
|
|
18
|
+
|
|
19
|
+
### Step 1 — Advance the State Machine
|
|
20
|
+
|
|
21
|
+
Run:
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
node audit-code.mjs
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
_(Outside the `auditor-lambda` repo itself, use `audit-code` or `npx audit-code` instead.)_
|
|
28
|
+
|
|
29
|
+
Parse the JSON output. Check `audit_state.status`:
|
|
30
|
+
|
|
31
|
+
| Status | Action |
|
|
32
|
+
|--------|--------|
|
|
33
|
+
| `"complete"` | Go to **Step 6** |
|
|
34
|
+
| `"active"` | Deterministic progress was made — loop immediately back to Step 1 |
|
|
35
|
+
| `"blocked"` | LLM work needed — continue to Step 2 |
|
|
36
|
+
|
|
37
|
+
---
|
|
38
|
+
|
|
39
|
+
### Step 2 — Read the Task
|
|
12
40
|
|
|
13
|
-
|
|
41
|
+
Read `.audit-artifacts/dispatch/current-task.json`.
|
|
14
42
|
|
|
15
|
-
|
|
43
|
+
Note these fields:
|
|
44
|
+
- `run_id` — identifies this batch of audit work
|
|
45
|
+
- `artifacts_dir` — base artifacts directory
|
|
46
|
+
- `pending_audit_tasks_path` — path to the pending task list
|
|
47
|
+
- `worker_command` — JSON array; run this after the audit work is complete
|
|
48
|
+
|
|
49
|
+
---
|
|
50
|
+
|
|
51
|
+
### Step 3 — Prepare the Dispatch Plan
|
|
52
|
+
|
|
53
|
+
Run:
|
|
16
54
|
|
|
17
55
|
```bash
|
|
18
|
-
|
|
56
|
+
node dispatch/prepare-dispatch.mjs --run-id <run_id>
|
|
19
57
|
```
|
|
20
58
|
|
|
21
|
-
|
|
59
|
+
This reads every pending audit task, pre-computes a complete subagent prompt for each, and writes `dispatch-plan.json` to the same directory as `pending_audit_tasks_path`. It prints the task count and warns about any tasks exceeding 1500 lines.
|
|
60
|
+
|
|
61
|
+
Read `dispatch-plan.json`. It is a JSON array where each entry has:
|
|
62
|
+
- `task_id` — task identifier
|
|
63
|
+
- `description` — short label for the Agent call
|
|
64
|
+
- `output_path` — where the subagent writes its result
|
|
65
|
+
- `prompt` — the complete, ready-to-use subagent prompt (do not modify it)
|
|
66
|
+
|
|
67
|
+
---
|
|
68
|
+
|
|
69
|
+
### Step 4 — Dispatch All Subagents in Parallel
|
|
22
70
|
|
|
23
|
-
|
|
71
|
+
**In a single message**, fire one `Agent` call per entry in `dispatch-plan.json`:
|
|
24
72
|
|
|
25
|
-
|
|
26
|
-
|
|
73
|
+
```
|
|
74
|
+
Agent({ description: entry.description, prompt: entry.prompt })
|
|
75
|
+
```
|
|
27
76
|
|
|
28
|
-
|
|
77
|
+
All calls must be sent simultaneously — never await one before firing the next. This is the critical performance constraint. Wait for all to complete before proceeding.
|
|
29
78
|
|
|
30
|
-
-
|
|
31
|
-
|
|
32
|
-
|
|
79
|
+
Each subagent reads its assigned files, writes a validated JSON result to `output_path`, and self-validates via `node dispatch/validate-result.mjs`. You do not need to check individual subagent output.
|
|
80
|
+
|
|
81
|
+
---
|
|
33
82
|
|
|
34
|
-
|
|
83
|
+
### Step 5 — Merge and Ingest
|
|
35
84
|
|
|
36
|
-
|
|
37
|
-
2. Use your file-reading tool to examine the specific source code files mentioned.
|
|
38
|
-
3. Critically analyze the codebase. Use your deepest reasoning capabilities (e.g., chain of thought) to discover defects, logic errors, or systemic architectural issues requested in the prompt.
|
|
85
|
+
Run in sequence:
|
|
39
86
|
|
|
40
|
-
|
|
87
|
+
```bash
|
|
88
|
+
node dispatch/merge-results.mjs --run-id <run_id>
|
|
89
|
+
```
|
|
41
90
|
|
|
42
|
-
|
|
43
|
-
Do not use `echo` or generic terminal shell strings for large JSON structures to avoid breaking JSON escaping.
|
|
44
|
-
Instead, use your raw **File Edit Tool** to reliably save your results to the exact `audit_results_path` named in `.audit-artifacts/dispatch/current-task.json`.
|
|
45
|
-
If `current-tasks.json` exists, emit one `AuditResult` per assigned task in that batch.
|
|
91
|
+
Then execute the `worker_command` from `current-task.json`. It is a JSON array — join the elements into a shell command and run it.
|
|
46
92
|
|
|
47
|
-
|
|
93
|
+
Loop back to **Step 1**.
|
|
48
94
|
|
|
49
|
-
|
|
95
|
+
---
|
|
50
96
|
|
|
51
|
-
|
|
97
|
+
### Step 6 — Present Results
|
|
52
98
|
|
|
53
|
-
|
|
99
|
+
When `audit_state.status` is `"complete"`, stop the loop. Do **not** run the orchestrator again.
|
|
54
100
|
|
|
55
|
-
|
|
101
|
+
Read `audit-report.md` and present the completed audit to the user. Lead with the work blocks — they are the primary remediation handoff. Wait for the user to ask you to begin resolving one or more work blocks.
|
|
56
102
|
|
|
57
|
-
|
|
103
|
+
---
|
|
58
104
|
|
|
59
|
-
|
|
60
|
-
Instead, read the final deterministic report at:
|
|
105
|
+
## Edge Cases
|
|
61
106
|
|
|
62
|
-
- `
|
|
107
|
+
**Non-agent blocker:** If `audit_state.blockers` contains a message that requires operator input (not code review), stop and report the blocker verbatim to the user.
|
|
63
108
|
|
|
64
|
-
|
|
65
|
-
they are the primary remediation handoff units.
|
|
109
|
+
**Large task warnings:** `prepare-dispatch.mjs` warns about tasks exceeding ~1500 lines. If a subagent hits a quota limit and fails to produce output, `merge-results.mjs` excludes it silently — those tasks remain pending and are picked up in the next loop iteration. No manual intervention needed.
|
|
66
110
|
|
|
67
|
-
|
|
111
|
+
**Failed validation:** Subagents self-validate and retry up to 3 times before writing a fallback empty result. `merge-results.mjs` writes `failed-tasks.json` listing any tasks that still failed. Those tasks are requeued automatically in the next cycle.
|