auditor-lambda 0.9.2 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -9
- package/audit-code-wrapper-lib.mjs +19 -920
- package/dist/cli/auditStep.d.ts +1 -33
- package/dist/cli/dispatch.d.ts +47 -0
- package/dist/cli/dispatch.js +116 -1
- package/dist/cli/mergeAndIngestCommand.js +10 -0
- package/dist/cli/nextStepCommand.js +3 -1
- package/dist/cli/prompts.d.ts +2 -0
- package/dist/cli/prompts.js +9 -0
- package/dist/cli/semanticReviewStep.js +12 -1
- package/dist/cli/steps.d.ts +15 -0
- package/dist/cli.js +1 -8
- package/dist/io/artifacts.d.ts +9 -1
- package/dist/io/artifacts.js +7 -0
- package/dist/io/runArtifacts.d.ts +14 -0
- package/dist/io/runArtifacts.js +23 -0
- package/dist/orchestrator/designReviewPrompt.d.ts +4 -1
- package/dist/orchestrator/designReviewPrompt.js +43 -2
- package/dist/orchestrator/executorResult.d.ts +25 -0
- package/dist/orchestrator/intakeExecutors.d.ts +19 -1
- package/dist/orchestrator/intakeExecutors.js +89 -3
- package/dist/orchestrator/nextStep.d.ts +1 -0
- package/dist/orchestrator/nextStep.js +1 -1
- package/dist/orchestrator/state.js +8 -1
- package/dist/providers/constants.d.ts +1 -1
- package/dist/providers/constants.js +1 -1
- package/dist/reporting/synthesis.d.ts +8 -0
- package/dist/reporting/synthesis.js +16 -1
- package/dist/supervisor/operatorHandoff.js +2 -0
- package/dist/types/auditScope.d.ts +16 -2
- package/dist/validation/sessionConfig.js +35 -0
- package/docs/contracts.md +0 -16
- package/docs/operator-guide.md +6 -8
- package/package.json +1 -1
- package/schemas/audit_findings.schema.json +1 -0
- package/scripts/postinstall.mjs +0 -174
- package/skills/audit-code/SKILL.md +17 -1
- package/skills/audit-code/audit-code.prompt.md +25 -0
- package/dist/mcp/server.d.ts +0 -72
- package/dist/mcp/server.js +0 -765
package/dist/cli/auditStep.d.ts
CHANGED
|
@@ -1,7 +1,4 @@
|
|
|
1
|
-
import type { AuditResult } from "../types.js";
|
|
2
1
|
import type { AnalyzerSetting } from "@audit-tools/shared";
|
|
3
|
-
import type { RuntimeValidationReport } from "../types/runtimeValidation.js";
|
|
4
|
-
import type { ExternalAnalyzerResults } from "../types/externalAnalyzer.js";
|
|
5
2
|
export declare function runAuditStep(options: {
|
|
6
3
|
root: string;
|
|
7
4
|
artifactsDir: string;
|
|
@@ -23,36 +20,7 @@ export declare function ingestBatchAuditResults(options: {
|
|
|
23
20
|
batchDir: string;
|
|
24
21
|
}): Promise<{
|
|
25
22
|
batchFiles: string[];
|
|
26
|
-
bundle:
|
|
27
|
-
repo_manifest: import("../types.js").RepoManifest;
|
|
28
|
-
file_disposition: import("@audit-tools/shared").FileDisposition;
|
|
29
|
-
auto_fixes_applied: unknown;
|
|
30
|
-
unit_manifest: import("../types.js").UnitManifest;
|
|
31
|
-
graph_bundle: import("@audit-tools/shared").GraphBundle;
|
|
32
|
-
surface_manifest: import("@audit-tools/shared").SurfaceManifest;
|
|
33
|
-
critical_flows: import("@audit-tools/shared").CriticalFlowManifest;
|
|
34
|
-
flow_coverage: import("../types/flowCoverage.js").FlowCoverageManifest;
|
|
35
|
-
risk_register: import("@audit-tools/shared").RiskRegister;
|
|
36
|
-
design_assessment: import("../types/designAssessment.js").DesignAssessment;
|
|
37
|
-
analyzer_capability: import("../types/analyzerCapability.js").AnalyzerCapabilityRecord;
|
|
38
|
-
scope: import("../types/auditScope.js").AuditScopeManifest;
|
|
39
|
-
coverage_matrix: import("../types.js").CoverageMatrix;
|
|
40
|
-
runtime_validation_tasks: import("../types/runtimeValidation.js").RuntimeValidationTaskManifest;
|
|
41
|
-
runtime_validation_report: RuntimeValidationReport;
|
|
42
|
-
external_analyzer_results: ExternalAnalyzerResults;
|
|
43
|
-
syntax_resolution_status: unknown;
|
|
44
|
-
audit_results: AuditResult[];
|
|
45
|
-
audit_tasks: import("../types.js").AuditTask[];
|
|
46
|
-
audit_plan_metrics: import("../types/reviewPlanning.js").AuditPlanMetrics;
|
|
47
|
-
review_packets: import("../types/reviewPlanning.js").ReviewPacket[];
|
|
48
|
-
requeue_tasks: import("../types.js").AuditTask[];
|
|
49
|
-
audit_report: string;
|
|
50
|
-
audit_findings: import("@audit-tools/shared").AuditFindingsReport;
|
|
51
|
-
synthesis_narrative: import("../types/synthesisNarrative.js").SynthesisNarrativeRecord;
|
|
52
|
-
audit_state: import("../types/auditState.js").AuditState;
|
|
53
|
-
artifact_metadata: import("../types/artifactMetadata.js").ArtifactMetadataManifest;
|
|
54
|
-
tooling_manifest: import("../types/toolingManifest.js").ToolingManifest;
|
|
55
|
-
}>;
|
|
23
|
+
bundle: import("../io/artifacts.js").ArtifactBundle;
|
|
56
24
|
audit_state: import("../types/auditState.js").AuditState;
|
|
57
25
|
selected_obligation: string | null;
|
|
58
26
|
selected_executor: string;
|
package/dist/cli/dispatch.d.ts
CHANGED
|
@@ -20,9 +20,21 @@ export declare const ACTIVE_DISPATCH_FILENAME = "active-dispatch.json";
|
|
|
20
20
|
export interface ActiveDispatchState {
|
|
21
21
|
run_id: string;
|
|
22
22
|
created_at: string;
|
|
23
|
+
/** Emitted packets only (after canary/budget filtering). */
|
|
23
24
|
packet_count: number;
|
|
25
|
+
/** Tasks remaining this round (not-yet-done), not just emitted-packet tasks. */
|
|
24
26
|
task_count: number;
|
|
25
27
|
status: "active" | "merged";
|
|
28
|
+
/** "canary" on first contact when only the top packet was emitted; "fan_out" otherwise. */
|
|
29
|
+
phase: "canary" | "fan_out";
|
|
30
|
+
/** packet_id of the emitted canary packet when phase==="canary", else null. */
|
|
31
|
+
canary_packet_id: string | null;
|
|
32
|
+
/** Total packets that would have been emitted before a budget cap (present only when capped). */
|
|
33
|
+
budget_packet_count?: number;
|
|
34
|
+
/** packet_ids NOT emitted due to the budget cap. */
|
|
35
|
+
deferred_packet_ids?: string[];
|
|
36
|
+
/** task_ids NOT emitted due to the budget cap. */
|
|
37
|
+
deferred_task_ids?: string[];
|
|
26
38
|
}
|
|
27
39
|
export interface DispatchResultMapEntry {
|
|
28
40
|
packet_id: string;
|
|
@@ -34,6 +46,25 @@ export interface DispatchResultMap {
|
|
|
34
46
|
run_id: string;
|
|
35
47
|
entries: DispatchResultMapEntry[];
|
|
36
48
|
}
|
|
49
|
+
export declare const DEFAULT_DISPATCH_CONFIRM_THRESHOLD = 10;
|
|
50
|
+
export interface DispatchFanout {
|
|
51
|
+
agent_count: number;
|
|
52
|
+
wave_count: number;
|
|
53
|
+
confirmation_recommended: boolean;
|
|
54
|
+
dispatch_summary: string;
|
|
55
|
+
}
|
|
56
|
+
/**
|
|
57
|
+
* FINDING-012: pure-arithmetic fan-out summary the loader can gate on. Given the
|
|
58
|
+
* number of agents (packets emitted this round, after canary/budget filtering)
|
|
59
|
+
* and the resolved wave size, derive the wave count, a human-readable summary,
|
|
60
|
+
* and whether the agent count exceeds the confirmation threshold (default 10).
|
|
61
|
+
* No LLM call, no side effects, no prompting.
|
|
62
|
+
*/
|
|
63
|
+
export declare function computeDispatchFanout(params: {
|
|
64
|
+
agentCount: number;
|
|
65
|
+
waveSize: number;
|
|
66
|
+
confirmThreshold?: number;
|
|
67
|
+
}): DispatchFanout;
|
|
37
68
|
export interface PrepareDispatchResult {
|
|
38
69
|
run_id: string;
|
|
39
70
|
dispatch_plan_path: string;
|
|
@@ -43,6 +74,22 @@ export interface PrepareDispatchResult {
|
|
|
43
74
|
skipped_task_count: number;
|
|
44
75
|
/** Subagent parallelism resolved for this dispatch run. */
|
|
45
76
|
wave_size: number;
|
|
77
|
+
/** "canary" on first contact when only the top packet was emitted; "fan_out" otherwise. */
|
|
78
|
+
phase: "canary" | "fan_out";
|
|
79
|
+
/** packet_id of the emitted canary packet when phase==="canary", else null. */
|
|
80
|
+
canary_packet_id: string | null;
|
|
81
|
+
/** Total agents that will be launched this run (packet_count after canary/budget). */
|
|
82
|
+
agent_count: number;
|
|
83
|
+
/** ceil(agent_count / max(1, wave_size)). */
|
|
84
|
+
wave_count: number;
|
|
85
|
+
/** True when agent_count exceeds sessionConfig.dispatch?.confirm_threshold (default 10). */
|
|
86
|
+
confirmation_recommended: boolean;
|
|
87
|
+
/** Human-readable summary, e.g. "12 agents across 3 waves (wave_size=4)". */
|
|
88
|
+
dispatch_summary: string;
|
|
89
|
+
/** True when a max_packets budget capped the emitted packets this run. */
|
|
90
|
+
budget_capped: boolean;
|
|
91
|
+
/** Number of packets deferred (not emitted) due to the budget cap. */
|
|
92
|
+
deferred_packet_count: number;
|
|
46
93
|
largest_packet: {
|
|
47
94
|
packet_id: string;
|
|
48
95
|
total_lines: number;
|
package/dist/cli/dispatch.js
CHANGED
|
@@ -4,6 +4,7 @@ import { isAbsolute, join, relative, resolve } from "node:path";
|
|
|
4
4
|
import { isFileMissingError, readJsonFile, writeJsonFile, DEFAULT_EMPIRICAL_HALF_LIFE_HOURS, } from "@audit-tools/shared";
|
|
5
5
|
import { buildQuotaSource } from "@audit-tools/shared/quota/compositeQuotaSource";
|
|
6
6
|
import { loadArtifactBundle } from "../io/artifacts.js";
|
|
7
|
+
import { writePacketSchemaFiles } from "../io/runArtifacts.js";
|
|
7
8
|
import { orderTasksForPacketReview, buildReviewPackets, sizeIndexFromManifest, } from "../orchestrator/reviewPackets.js";
|
|
8
9
|
import { buildFileAnchorSummary } from "../orchestrator/fileAnchors.js";
|
|
9
10
|
import { resolveFreshSessionProviderName } from "../providers/index.js";
|
|
@@ -16,6 +17,29 @@ export const SMALL_MODEL_HINT_MAX_ESTIMATED_TOKENS = 3000;
|
|
|
16
17
|
export const DEEP_MODEL_HINT_MIN_ESTIMATED_TOKENS = 9000;
|
|
17
18
|
export const DISPATCH_RESULT_MAP_FILENAME = "dispatch-result-map.json";
|
|
18
19
|
export const ACTIVE_DISPATCH_FILENAME = "active-dispatch.json";
|
|
20
|
+
export const DEFAULT_DISPATCH_CONFIRM_THRESHOLD = 10;
|
|
21
|
+
/**
|
|
22
|
+
* FINDING-012: pure-arithmetic fan-out summary the loader can gate on. Given the
|
|
23
|
+
* number of agents (packets emitted this round, after canary/budget filtering)
|
|
24
|
+
* and the resolved wave size, derive the wave count, a human-readable summary,
|
|
25
|
+
* and whether the agent count exceeds the confirmation threshold (default 10).
|
|
26
|
+
* No LLM call, no side effects, no prompting.
|
|
27
|
+
*/
|
|
28
|
+
export function computeDispatchFanout(params) {
|
|
29
|
+
const agentCount = params.agentCount;
|
|
30
|
+
const waveSize = params.waveSize;
|
|
31
|
+
const waveCount = Math.ceil(agentCount / Math.max(1, waveSize));
|
|
32
|
+
const confirmThreshold = params.confirmThreshold ?? DEFAULT_DISPATCH_CONFIRM_THRESHOLD;
|
|
33
|
+
const confirmationRecommended = agentCount > confirmThreshold;
|
|
34
|
+
const dispatchSummary = `${agentCount} agent${agentCount !== 1 ? "s" : ""} across ` +
|
|
35
|
+
`${waveCount} wave${waveCount !== 1 ? "s" : ""} (wave_size=${waveSize})`;
|
|
36
|
+
return {
|
|
37
|
+
agent_count: agentCount,
|
|
38
|
+
wave_count: waveCount,
|
|
39
|
+
confirmation_recommended: confirmationRecommended,
|
|
40
|
+
dispatch_summary: dispatchSummary,
|
|
41
|
+
};
|
|
42
|
+
}
|
|
19
43
|
export function dispatchResultMapPath(runDir) {
|
|
20
44
|
return join(runDir, DISPATCH_RESULT_MAP_FILENAME);
|
|
21
45
|
}
|
|
@@ -200,6 +224,19 @@ export async function prepareDispatchArtifacts(params) {
|
|
|
200
224
|
const lensDefsPath = join(params.packageRoot, "dispatch", "lens-definitions.json");
|
|
201
225
|
const lensDefs = await readJsonFile(lensDefsPath);
|
|
202
226
|
await mkdir(taskResultsDir, { recursive: true });
|
|
227
|
+
// FINDING-009: make the AuditResult JSON-Schema (and the two sibling schemas
|
|
228
|
+
// it $refs) reachable from this run's task-results directory so packet workers
|
|
229
|
+
// can optionally self-validate before calling submit-packet.
|
|
230
|
+
await writePacketSchemaFiles(taskResultsDir, params.packageRoot);
|
|
231
|
+
// FINDING-011: read the prior dispatch state (if any) so a fan-out round can
|
|
232
|
+
// detect a preceding canary that never produced an accepted result.
|
|
233
|
+
let priorActiveDispatch = null;
|
|
234
|
+
try {
|
|
235
|
+
priorActiveDispatch = await readJsonFile(join(artifactsDir, ACTIVE_DISPATCH_FILENAME));
|
|
236
|
+
}
|
|
237
|
+
catch {
|
|
238
|
+
/* none yet */
|
|
239
|
+
}
|
|
203
240
|
const priorResultTaskIds = new Set();
|
|
204
241
|
for (const task of tasks) {
|
|
205
242
|
if (existsSync(taskResultPath(taskResultsDir, task.task_id))) {
|
|
@@ -230,6 +267,34 @@ export async function prepareDispatchArtifacts(params) {
|
|
|
230
267
|
if (resultPathSet.size !== resultPathByTaskId.size) {
|
|
231
268
|
throw new Error("prepare-dispatch generated duplicate result paths; task ids must be uniquely addressable.");
|
|
232
269
|
}
|
|
270
|
+
// Packets come back priority-ordered (high -> medium -> low), so packets[0] is
|
|
271
|
+
// the top-priority packet. Filtering composes in a fixed order: canary first
|
|
272
|
+
// (emit only the top packet on first contact), then the budget cap (top-K).
|
|
273
|
+
//
|
|
274
|
+
// FINDING-011: single-worker canary. On first contact with a multi-packet run,
|
|
275
|
+
// dispatch only the top packet; the held-back packets' tasks keep no result
|
|
276
|
+
// file, so they re-enter `dispatchTasks` on the next call (fan-out).
|
|
277
|
+
const firstContact = priorResultTaskIds.size === 0;
|
|
278
|
+
const canaryEnabled = sessionConfig.dispatch?.canary !== false; // default on
|
|
279
|
+
const doCanary = firstContact && canaryEnabled && packets.length > 1;
|
|
280
|
+
const canaryPacketId = doCanary ? packets[0].packet_id : null;
|
|
281
|
+
const phase = doCanary ? "canary" : "fan_out";
|
|
282
|
+
const postCanaryPackets = doCanary ? packets.slice(0, 1) : packets;
|
|
283
|
+
// FINDING-013: top-K coverage budget. Cap the (already priority-ordered)
|
|
284
|
+
// packets at max_packets; the remainder are recorded as DEFERRED and excluded
|
|
285
|
+
// from the completion check so the run can finish honestly under budget.
|
|
286
|
+
// Budget defaults OFF (no cap) so default behavior is unchanged. Canary takes
|
|
287
|
+
// precedence: a canary round only emits 1 packet regardless of the budget.
|
|
288
|
+
const maxPackets = sessionConfig.dispatch?.max_packets;
|
|
289
|
+
const budgetCapped = typeof maxPackets === "number" &&
|
|
290
|
+
maxPackets >= 0 &&
|
|
291
|
+
maxPackets < postCanaryPackets.length;
|
|
292
|
+
const emitPackets = budgetCapped
|
|
293
|
+
? postCanaryPackets.slice(0, maxPackets)
|
|
294
|
+
: postCanaryPackets;
|
|
295
|
+
const deferredPackets = budgetCapped
|
|
296
|
+
? postCanaryPackets.slice(maxPackets)
|
|
297
|
+
: [];
|
|
233
298
|
const plan = [];
|
|
234
299
|
const resultMapEntries = [];
|
|
235
300
|
for (const task of tasks) {
|
|
@@ -245,7 +310,7 @@ export async function prepareDispatchArtifacts(params) {
|
|
|
245
310
|
let largestLines = 0;
|
|
246
311
|
let largestEstimatedTokens = 0;
|
|
247
312
|
const warnings = [];
|
|
248
|
-
for (const packet of
|
|
313
|
+
for (const packet of emitPackets) {
|
|
249
314
|
const promptPath = packetPromptPath(taskResultsDir, packet.packet_id);
|
|
250
315
|
const packetTasks = packet.task_ids
|
|
251
316
|
.map((taskId) => tasksById.get(taskId))
|
|
@@ -396,6 +461,11 @@ export async function prepareDispatchArtifacts(params) {
|
|
|
396
461
|
"way to record results, and it writes them inside the artifacts directory for you.",
|
|
397
462
|
"Produce one JSON array containing exactly one AuditResult object for each listed task.",
|
|
398
463
|
"",
|
|
464
|
+
"Schema file (resolve relative to this prompt's directory): audit_result.schema.json",
|
|
465
|
+
" $refs resolved from the same directory: finding.schema.json, audit_task.schema.json",
|
|
466
|
+
"You MAY validate your JSON array against the schema before calling submit-packet. This is optional;",
|
|
467
|
+
" the submit command performs the authoritative validation and will report any errors.",
|
|
468
|
+
"",
|
|
399
469
|
"Required AuditResult fields:",
|
|
400
470
|
" task_id copy from the task metadata",
|
|
401
471
|
" unit_id copy from the task metadata",
|
|
@@ -524,20 +594,57 @@ export async function prepareDispatchArtifacts(params) {
|
|
|
524
594
|
}
|
|
525
595
|
}
|
|
526
596
|
}
|
|
597
|
+
// FINDING-011: when advancing past a canary, warn if it never produced an
|
|
598
|
+
// accepted result. submit-packet writes the per-task result file ONLY after
|
|
599
|
+
// validation passes, so presence of that file == ACCEPTED. We map the recorded
|
|
600
|
+
// canary packet_id back to its task ids via the result map and check whether
|
|
601
|
+
// those tasks now have accepted results (i.e. landed in priorResultTaskIds).
|
|
602
|
+
if (!doCanary && priorActiveDispatch?.phase === "canary" && priorActiveDispatch.canary_packet_id) {
|
|
603
|
+
const canaryAccepted = priorActiveDispatch.run_id === runId
|
|
604
|
+
? (await loadDispatchResultMap(runDir))?.entries
|
|
605
|
+
.filter((entry) => entry.packet_id === priorActiveDispatch.canary_packet_id)
|
|
606
|
+
.every((entry) => priorResultTaskIds.has(entry.task_id)) ?? false
|
|
607
|
+
: false;
|
|
608
|
+
if (!canaryAccepted) {
|
|
609
|
+
warnings.push({
|
|
610
|
+
code: "canary_not_accepted",
|
|
611
|
+
message: `Canary packet ${priorActiveDispatch.canary_packet_id} did not produce an accepted result before fan-out; remaining packets are being dispatched anyway.`,
|
|
612
|
+
});
|
|
613
|
+
}
|
|
614
|
+
}
|
|
527
615
|
const warningsPath = warnings.length > 0
|
|
528
616
|
? join(runDir, "dispatch-warnings.json")
|
|
529
617
|
: null;
|
|
530
618
|
if (warningsPath) {
|
|
531
619
|
await writeJsonFile(warningsPath, warnings);
|
|
532
620
|
}
|
|
621
|
+
// FINDING-013: record deferred packets/tasks so the completion obligation can
|
|
622
|
+
// exclude them under a budget cap (present only when actually capped).
|
|
623
|
+
const deferredPacketIds = deferredPackets.map((packet) => packet.packet_id);
|
|
624
|
+
const deferredTaskIds = deferredPackets.flatMap((packet) => packet.task_ids);
|
|
533
625
|
const activeDispatch = {
|
|
534
626
|
run_id: runId,
|
|
535
627
|
created_at: new Date().toISOString(),
|
|
536
628
|
packet_count: plan.length,
|
|
537
629
|
task_count: orderedTasks.length,
|
|
538
630
|
status: "active",
|
|
631
|
+
phase,
|
|
632
|
+
canary_packet_id: canaryPacketId,
|
|
633
|
+
...(budgetCapped
|
|
634
|
+
? {
|
|
635
|
+
budget_packet_count: postCanaryPackets.length,
|
|
636
|
+
deferred_packet_ids: deferredPacketIds,
|
|
637
|
+
deferred_task_ids: deferredTaskIds,
|
|
638
|
+
}
|
|
639
|
+
: {}),
|
|
539
640
|
};
|
|
540
641
|
await writeJsonFile(join(artifactsDir, ACTIVE_DISPATCH_FILENAME), activeDispatch);
|
|
642
|
+
// FINDING-012: pure-arithmetic fan-out summary the loader can gate on.
|
|
643
|
+
const fanout = computeDispatchFanout({
|
|
644
|
+
agentCount: plan.length,
|
|
645
|
+
waveSize: waveSchedule.wave_size,
|
|
646
|
+
confirmThreshold: sessionConfig.dispatch?.confirm_threshold,
|
|
647
|
+
});
|
|
541
648
|
return {
|
|
542
649
|
run_id: runId,
|
|
543
650
|
dispatch_plan_path: dispatchPlanPath,
|
|
@@ -546,6 +653,14 @@ export async function prepareDispatchArtifacts(params) {
|
|
|
546
653
|
task_count: orderedTasks.length,
|
|
547
654
|
skipped_task_count: priorResultTaskIds.size,
|
|
548
655
|
wave_size: waveSchedule.wave_size,
|
|
656
|
+
phase,
|
|
657
|
+
canary_packet_id: canaryPacketId,
|
|
658
|
+
agent_count: fanout.agent_count,
|
|
659
|
+
wave_count: fanout.wave_count,
|
|
660
|
+
confirmation_recommended: fanout.confirmation_recommended,
|
|
661
|
+
dispatch_summary: fanout.dispatch_summary,
|
|
662
|
+
budget_capped: budgetCapped,
|
|
663
|
+
deferred_packet_count: deferredPackets.length,
|
|
549
664
|
largest_packet: largestPacketId
|
|
550
665
|
? {
|
|
551
666
|
packet_id: largestPacketId,
|
|
@@ -7,6 +7,11 @@ import { DISPATCH_RESULT_MAP_FILENAME, ACTIVE_DISPATCH_FILENAME, loadDispatchRes
|
|
|
7
7
|
import { addFileLineCountHints } from "./lineIndex.js";
|
|
8
8
|
import { isCanonicalResultFilename, getArtifactsDir, getFlag } from "./args.js";
|
|
9
9
|
import { buildWorkerResult } from "./workerResult.js";
|
|
10
|
+
import { PACKET_SCHEMA_FILENAMES } from "../io/runArtifacts.js";
|
|
11
|
+
// Schema pointer files prepare-dispatch copies into task-results/ for optional
|
|
12
|
+
// worker self-validation. They are expected, not stray — skip them when
|
|
13
|
+
// scanning for spurious files.
|
|
14
|
+
const PACKET_SCHEMA_FILENAME_SET = new Set(PACKET_SCHEMA_FILENAMES);
|
|
10
15
|
export async function cmdMergeAndIngest(argv) {
|
|
11
16
|
const runId = getFlag(argv, "--run-id");
|
|
12
17
|
if (!runId)
|
|
@@ -64,6 +69,11 @@ export async function cmdMergeAndIngest(argv) {
|
|
|
64
69
|
const spuriousFiles = [];
|
|
65
70
|
const fallbackByTaskId = new Map();
|
|
66
71
|
for (const filename of files) {
|
|
72
|
+
// Schema pointer files (audit_result/finding/audit_task .schema.json) are
|
|
73
|
+
// copied into task-results/ by prepare-dispatch for optional worker
|
|
74
|
+
// self-validation; they are expected, not stray.
|
|
75
|
+
if (PACKET_SCHEMA_FILENAME_SET.has(filename))
|
|
76
|
+
continue;
|
|
67
77
|
const filePath = resolve(join(taskResultsDir, filename));
|
|
68
78
|
if (expectedPaths.has(filePath))
|
|
69
79
|
continue;
|
|
@@ -455,7 +455,9 @@ export async function cmdNextStep(argv) {
|
|
|
455
455
|
const designReviewResultsPath = join(artifactsDir, "incoming", "design-review-findings.json");
|
|
456
456
|
await mkdir(join(artifactsDir, "incoming"), { recursive: true });
|
|
457
457
|
const continueCommand = nextStepCommand(root, artifactsDir);
|
|
458
|
-
const prompt = renderDesignReviewPrompt(result.bundle
|
|
458
|
+
const prompt = renderDesignReviewPrompt(result.bundle, {
|
|
459
|
+
max_units: sessionConfig.design_review?.max_units,
|
|
460
|
+
});
|
|
459
461
|
const fullPrompt = [
|
|
460
462
|
prompt,
|
|
461
463
|
"## Results path",
|
package/dist/cli/prompts.d.ts
CHANGED
|
@@ -10,6 +10,8 @@ export declare function renderDispatchReviewPrompt(params: {
|
|
|
10
10
|
dispatchQuotaPath: string | null;
|
|
11
11
|
hostCanRestrictSubagentTools: boolean;
|
|
12
12
|
hostCanSelectSubagentModel: boolean;
|
|
13
|
+
phase?: "canary" | "fan_out";
|
|
14
|
+
canaryPacketId?: string | null;
|
|
13
15
|
}): string;
|
|
14
16
|
export declare function renderSingleTaskFallbackStepPrompt(params: {
|
|
15
17
|
singleTaskPromptPath: string;
|
package/dist/cli/prompts.js
CHANGED
|
@@ -78,10 +78,19 @@ export function renderDispatchReviewPrompt(params) {
|
|
|
78
78
|
"",
|
|
79
79
|
"Launch one subagent for each entry in the plan.",
|
|
80
80
|
];
|
|
81
|
+
const canaryLines = params.phase === "canary"
|
|
82
|
+
? [
|
|
83
|
+
"",
|
|
84
|
+
"This is a CANARY round: the plan contains only the single top-priority packet. " +
|
|
85
|
+
"Dispatch it, run merge-and-ingest, then run next-step — the remaining packets fan out " +
|
|
86
|
+
"on the following step once this packet's result is accepted.",
|
|
87
|
+
]
|
|
88
|
+
: [];
|
|
81
89
|
return [
|
|
82
90
|
"# audit-code dispatch review",
|
|
83
91
|
"",
|
|
84
92
|
...dispatchDataLines,
|
|
93
|
+
...canaryLines,
|
|
85
94
|
"",
|
|
86
95
|
"Pass each `entry.prompt_path` literally to its subagent; do not load packet prompt files into this orchestrator context.",
|
|
87
96
|
"",
|
|
@@ -64,7 +64,10 @@ export async function renderSemanticReviewStep(params) {
|
|
|
64
64
|
allowedCommands: [mergeCommand, continueCommand],
|
|
65
65
|
allowedMcpTools: ["auditor_merge_and_ingest", "auditor_continue_audit"],
|
|
66
66
|
progress: {
|
|
67
|
-
summary:
|
|
67
|
+
summary: (dispatch.phase === "canary"
|
|
68
|
+
? `Canary: dispatching only the top-priority packet (${dispatch.canary_packet_id}) before fan-out. `
|
|
69
|
+
: "") +
|
|
70
|
+
`Dispatching ${dispatch.packet_count} review packet(s) covering ` +
|
|
68
71
|
`${dispatch.task_count} task(s) in waves of ${dispatch.wave_size}` +
|
|
69
72
|
(dispatch.skipped_task_count > 0
|
|
70
73
|
? `; ${dispatch.skipped_task_count} task(s) already completed.`
|
|
@@ -73,6 +76,12 @@ export async function renderSemanticReviewStep(params) {
|
|
|
73
76
|
pending_tasks: dispatch.task_count,
|
|
74
77
|
completed_tasks: dispatch.skipped_task_count,
|
|
75
78
|
wave_size: dispatch.wave_size,
|
|
79
|
+
phase: dispatch.phase,
|
|
80
|
+
canary_packet_id: dispatch.canary_packet_id,
|
|
81
|
+
agent_count: dispatch.agent_count,
|
|
82
|
+
wave_count: dispatch.wave_count,
|
|
83
|
+
confirmation_recommended: dispatch.confirmation_recommended,
|
|
84
|
+
dispatch_summary: dispatch.dispatch_summary,
|
|
76
85
|
},
|
|
77
86
|
stopCondition: "Dispatch every packet, run merge-and-ingest once, then run next-step.",
|
|
78
87
|
repoRoot: root,
|
|
@@ -91,6 +100,8 @@ export async function renderSemanticReviewStep(params) {
|
|
|
91
100
|
dispatchQuotaPath: dispatch.dispatch_quota_path,
|
|
92
101
|
hostCanRestrictSubagentTools: params.hostCanRestrictSubagentTools,
|
|
93
102
|
hostCanSelectSubagentModel: params.hostCanSelectSubagentModel,
|
|
103
|
+
phase: dispatch.phase,
|
|
104
|
+
canaryPacketId: dispatch.canary_packet_id,
|
|
94
105
|
}),
|
|
95
106
|
access: {
|
|
96
107
|
read_paths: [
|
package/dist/cli/steps.d.ts
CHANGED
|
@@ -17,6 +17,21 @@ export interface StepProgress {
|
|
|
17
17
|
completed_tasks?: number;
|
|
18
18
|
/** Subagent parallelism resolved for this dispatch run. */
|
|
19
19
|
wave_size?: number;
|
|
20
|
+
/** "canary" when only the top packet was emitted this round; "fan_out" otherwise. */
|
|
21
|
+
phase?: "canary" | "fan_out";
|
|
22
|
+
/** packet_id of the emitted canary packet when `phase === "canary"`. */
|
|
23
|
+
canary_packet_id?: string | null;
|
|
24
|
+
/** Total agents (packets) that will be launched this run. */
|
|
25
|
+
agent_count?: number;
|
|
26
|
+
/** Number of dispatch waves for this run (`ceil(agent_count / wave_size)`). */
|
|
27
|
+
wave_count?: number;
|
|
28
|
+
/**
|
|
29
|
+
* True when `agent_count` exceeds the configured confirm threshold and the
|
|
30
|
+
* loader should pause for user confirmation before fan-out (FINDING-012).
|
|
31
|
+
*/
|
|
32
|
+
confirmation_recommended?: boolean;
|
|
33
|
+
/** Human-readable fan-out summary, e.g. "12 agents across 3 waves (wave_size=4)". */
|
|
34
|
+
dispatch_summary?: string;
|
|
20
35
|
}
|
|
21
36
|
export interface StepArtifact {
|
|
22
37
|
contract_version: typeof STEP_CONTRACT_VERSION;
|
package/dist/cli.js
CHANGED
|
@@ -21,7 +21,6 @@ import { deriveAuditState } from "./orchestrator/state.js";
|
|
|
21
21
|
import { createFreshSessionProvider, resolveFreshSessionProviderName, } from "./providers/index.js";
|
|
22
22
|
import { getSessionConfigPath, loadSessionConfig, readSessionConfigFile, } from "./supervisor/sessionConfig.js";
|
|
23
23
|
import { clearDispatchFiles, ensureSupervisorDirs, } from "./io/runArtifacts.js";
|
|
24
|
-
import { runAuditCodeMcpServer } from "./mcp/server.js";
|
|
25
24
|
import { scheduleWave, buildProviderModelKey, readQuotaState, resolveLimits, resolveHostActiveSubagentLimit, computeMaxSafeConcurrency, getQuotaStatePath, lookupDiscoveredLimits, setQuotaStateDir, } from "./quota/index.js";
|
|
26
25
|
import { DIRECT_CLI_DEFAULTS, getFlag, hasFlag, fromBase64Url, taskResultPath, getArtifactsDir, getRootDir, warnIfNotGitRepo, getBatchResultsDir, getMaxRuns, getAgentBatchSize, getParallelWorkers, getTimeoutMs, getExplicitProvider, getHostModel, getHostMaxActiveSubagents, resolveRunProviderName, chunkArray, getUiMode, looksLikeCliFlag, countLines, } from "./cli/args.js";
|
|
27
26
|
import { ACTIVE_DISPATCH_FILENAME, loadDispatchResultMap, prepareDispatchArtifacts, } from "./cli/dispatch.js";
|
|
@@ -539,9 +538,6 @@ async function cmdCleanup(argv) {
|
|
|
539
538
|
dry_run: dryRun,
|
|
540
539
|
}, null, 2));
|
|
541
540
|
}
|
|
542
|
-
async function cmdMcp(argv) {
|
|
543
|
-
await runAuditCodeMcpServer(argv.slice(3));
|
|
544
|
-
}
|
|
545
541
|
async function cmdQuota(argv) {
|
|
546
542
|
const artifactsDir = getArtifactsDir(argv);
|
|
547
543
|
const sessionConfig = await loadSessionConfig(artifactsDir).catch(() => ({}));
|
|
@@ -707,9 +703,6 @@ async function main(argv) {
|
|
|
707
703
|
case "cleanup":
|
|
708
704
|
await cmdCleanup(argv);
|
|
709
705
|
return;
|
|
710
|
-
case "mcp":
|
|
711
|
-
await cmdMcp(argv);
|
|
712
|
-
return;
|
|
713
706
|
case "prepare-dispatch":
|
|
714
707
|
await cmdPrepareDispatch(argv);
|
|
715
708
|
return;
|
|
@@ -733,7 +726,7 @@ async function main(argv) {
|
|
|
733
726
|
return;
|
|
734
727
|
default:
|
|
735
728
|
console.error(`Unknown command: ${command}`);
|
|
736
|
-
console.error("Available commands: sample-run, advance-audit, next-step, run-to-completion, worker-run, import-external-analyzer, intake, plan, ingest-results, explain-task, update-runtime-validation, validate, validate-results, requeue, synthesize, cleanup,
|
|
729
|
+
console.error("Available commands: sample-run, advance-audit, next-step, run-to-completion, worker-run, import-external-analyzer, intake, plan, ingest-results, explain-task, update-runtime-validation, validate, validate-results, requeue, synthesize, cleanup, prepare-dispatch, merge-and-ingest, submit-packet, validate-result, quota, status, dispatch-status");
|
|
737
730
|
process.exitCode = 1;
|
|
738
731
|
}
|
|
739
732
|
}
|
package/dist/io/artifacts.d.ts
CHANGED
|
@@ -12,6 +12,7 @@ import type { DesignAssessment } from "../types/designAssessment.js";
|
|
|
12
12
|
import type { AnalyzerCapabilityRecord } from "../types/analyzerCapability.js";
|
|
13
13
|
import type { AuditScopeManifest } from "../types/auditScope.js";
|
|
14
14
|
import type { ToolingManifest } from "../types/toolingManifest.js";
|
|
15
|
+
import type { ActiveDispatchState } from "../cli/dispatch.js";
|
|
15
16
|
type ArtifactPayloadMap = {
|
|
16
17
|
repo_manifest: RepoManifest;
|
|
17
18
|
file_disposition: FileDisposition;
|
|
@@ -45,8 +46,15 @@ type ArtifactPayloadMap = {
|
|
|
45
46
|
/**
|
|
46
47
|
* Audit artifacts accumulate phase-by-phase as the orchestrator advances.
|
|
47
48
|
* Missing keys mean the corresponding artifact has not been produced yet.
|
|
49
|
+
*
|
|
50
|
+
* `active_dispatch` is loaded specially (like `tooling_manifest`): it lives at
|
|
51
|
+
* the artifacts root rather than as a standard pruned artifact, and carries the
|
|
52
|
+
* in-flight dispatch phase plus any budget-deferred task ids the completion
|
|
53
|
+
* obligation must exclude.
|
|
48
54
|
*/
|
|
49
|
-
export type ArtifactBundle = Partial<ArtifactPayloadMap
|
|
55
|
+
export type ArtifactBundle = Partial<ArtifactPayloadMap> & {
|
|
56
|
+
active_dispatch?: ActiveDispatchState;
|
|
57
|
+
};
|
|
50
58
|
export type ArtifactBundleKey = keyof ArtifactPayloadMap;
|
|
51
59
|
type ArtifactPhase = "intake" | "analysis" | "execution" | "reporting" | "supervisor";
|
|
52
60
|
interface ArtifactDefinition<K extends ArtifactBundleKey = ArtifactBundleKey> {
|
package/dist/io/artifacts.js
CHANGED
|
@@ -77,6 +77,13 @@ export async function loadArtifactBundle(root) {
|
|
|
77
77
|
}
|
|
78
78
|
}
|
|
79
79
|
bundle.tooling_manifest = await buildToolingManifest();
|
|
80
|
+
// active-dispatch.json is written by prepare-dispatch at the artifacts root
|
|
81
|
+
// (not a standard ARTIFACT_DEFINITIONS entry). Load it so the completion
|
|
82
|
+
// obligation can exclude budget-deferred tasks. Absent on a fresh run.
|
|
83
|
+
const activeDispatch = await readOptionalJsonFile(join(root, "active-dispatch.json"));
|
|
84
|
+
if (activeDispatch !== undefined) {
|
|
85
|
+
bundle.active_dispatch = activeDispatch;
|
|
86
|
+
}
|
|
80
87
|
return bundle;
|
|
81
88
|
}
|
|
82
89
|
export async function writeCoreArtifacts(root, bundle, options = {}) {
|
|
@@ -2,6 +2,20 @@ import type { AuditTask } from "../types.js";
|
|
|
2
2
|
import type { WorkerTask } from "../types/workerSession.js";
|
|
3
3
|
import type { RunPaths, DispatchBatchRun } from "./runArtifactTypes.js";
|
|
4
4
|
export type { RunPaths, DispatchBatchRun } from "./runArtifactTypes.js";
|
|
5
|
+
/**
|
|
6
|
+
* Schema files copied into a dispatch run's `task-results/` directory so packet
|
|
7
|
+
* workers can optionally self-validate before submit. `audit_result.schema.json`
|
|
8
|
+
* `$ref`s the other two by relative filename, so all three must sit side-by-side
|
|
9
|
+
* for a validator to resolve them. Exported so merge-and-ingest can recognize
|
|
10
|
+
* them as legitimate (not stray) files in `task-results/`.
|
|
11
|
+
*/
|
|
12
|
+
export declare const PACKET_SCHEMA_FILENAMES: readonly ["audit_result.schema.json", "finding.schema.json", "audit_task.schema.json"];
|
|
13
|
+
/**
|
|
14
|
+
* Copy {@link PACKET_SCHEMA_FILENAMES} into `targetDir` under their canonical
|
|
15
|
+
* filenames, making the AuditResult schema reachable from a dispatch run's
|
|
16
|
+
* `task-results/` directory.
|
|
17
|
+
*/
|
|
18
|
+
export declare function writePacketSchemaFiles(targetDir: string, pkgRoot: string): Promise<void>;
|
|
5
19
|
export declare function buildRunId(obligationId: string | null, index: number, now?: Date): string;
|
|
6
20
|
export declare function getRunPaths(artifactsDir: string, runId: string): RunPaths;
|
|
7
21
|
export declare function ensureSupervisorDirs(artifactsDir: string): Promise<void>;
|
package/dist/io/runArtifacts.js
CHANGED
|
@@ -7,6 +7,29 @@ const packageRoot = resolve(moduleDir, "..", "..");
|
|
|
7
7
|
const auditResultSchemaPath = join(packageRoot, "schemas", "audit_result.schema.json");
|
|
8
8
|
const auditResultsSchemaPath = join(packageRoot, "schemas", "audit_results.schema.json");
|
|
9
9
|
const findingSchemaPath = join(packageRoot, "schemas", "finding.schema.json");
|
|
10
|
+
/**
|
|
11
|
+
* Schema files copied into a dispatch run's `task-results/` directory so packet
|
|
12
|
+
* workers can optionally self-validate before submit. `audit_result.schema.json`
|
|
13
|
+
* `$ref`s the other two by relative filename, so all three must sit side-by-side
|
|
14
|
+
* for a validator to resolve them. Exported so merge-and-ingest can recognize
|
|
15
|
+
* them as legitimate (not stray) files in `task-results/`.
|
|
16
|
+
*/
|
|
17
|
+
export const PACKET_SCHEMA_FILENAMES = [
|
|
18
|
+
"audit_result.schema.json",
|
|
19
|
+
"finding.schema.json",
|
|
20
|
+
"audit_task.schema.json",
|
|
21
|
+
];
|
|
22
|
+
/**
|
|
23
|
+
* Copy {@link PACKET_SCHEMA_FILENAMES} into `targetDir` under their canonical
|
|
24
|
+
* filenames, making the AuditResult schema reachable from a dispatch run's
|
|
25
|
+
* `task-results/` directory.
|
|
26
|
+
*/
|
|
27
|
+
export async function writePacketSchemaFiles(targetDir, pkgRoot) {
|
|
28
|
+
await mkdir(targetDir, { recursive: true });
|
|
29
|
+
for (const name of PACKET_SCHEMA_FILENAMES) {
|
|
30
|
+
await writeFile(join(targetDir, name), await readFile(join(pkgRoot, "schemas", name), "utf8"), "utf8");
|
|
31
|
+
}
|
|
32
|
+
}
|
|
10
33
|
const CURRENT_TASK_FILENAME = "current-task.json";
|
|
11
34
|
const CURRENT_PROMPT_FILENAME = "current-prompt.md";
|
|
12
35
|
const CURRENT_TASKS_FILENAME = "current-tasks.json";
|
|
@@ -1,2 +1,5 @@
|
|
|
1
1
|
import type { ArtifactBundle } from "../io/artifacts.js";
|
|
2
|
-
export
|
|
2
|
+
export interface DesignReviewOptions {
|
|
3
|
+
max_units?: number;
|
|
4
|
+
}
|
|
5
|
+
export declare function renderDesignReviewPrompt(bundle: ArtifactBundle, options?: DesignReviewOptions): string;
|
|
@@ -45,6 +45,39 @@ function summarizeRisk(bundle) {
|
|
|
45
45
|
...lines,
|
|
46
46
|
].join("\n");
|
|
47
47
|
}
|
|
48
|
+
function buildPrioritizedReadingList(bundle, maxUnits) {
|
|
49
|
+
const items = bundle.risk_register?.items ?? [];
|
|
50
|
+
const units = bundle.unit_manifest?.units ?? [];
|
|
51
|
+
if (items.length === 0 && units.length === 0) {
|
|
52
|
+
return "No risk or unit data available; read the repository root files to orient yourself.";
|
|
53
|
+
}
|
|
54
|
+
// Build a map from unit_id → file list for fast lookup
|
|
55
|
+
const unitFiles = new Map();
|
|
56
|
+
for (const unit of units) {
|
|
57
|
+
unitFiles.set(unit.unit_id, unit.files);
|
|
58
|
+
}
|
|
59
|
+
// Sort risk items by score descending, then take the top-N
|
|
60
|
+
const sorted = [...items].sort((a, b) => b.risk_score - a.risk_score);
|
|
61
|
+
const top = sorted.slice(0, maxUnits);
|
|
62
|
+
if (top.length === 0) {
|
|
63
|
+
// Fall back to listing all units if no risk data
|
|
64
|
+
const allUnits = units.slice(0, maxUnits);
|
|
65
|
+
const lines = allUnits.map((u) => `- **${u.unit_id}** — ${u.files.join(", ")}`);
|
|
66
|
+
return [
|
|
67
|
+
`Top ${allUnits.length} unit(s) (no risk scores available):`,
|
|
68
|
+
...lines,
|
|
69
|
+
].join("\n");
|
|
70
|
+
}
|
|
71
|
+
const lines = top.map((item) => {
|
|
72
|
+
const files = unitFiles.get(item.unit_id);
|
|
73
|
+
const fileList = files && files.length > 0 ? files.join(", ") : "(files unknown)";
|
|
74
|
+
return `- **${item.unit_id}** (risk score: ${item.risk_score}) — ${fileList}`;
|
|
75
|
+
});
|
|
76
|
+
return [
|
|
77
|
+
`Top ${top.length} highest-risk unit(s) by risk score (out of ${items.length} total):`,
|
|
78
|
+
...lines,
|
|
79
|
+
].join("\n");
|
|
80
|
+
}
|
|
48
81
|
function summarizeSurfaces(bundle) {
|
|
49
82
|
const surfaces = bundle.surface_manifest?.surfaces ?? [];
|
|
50
83
|
if (surfaces.length === 0)
|
|
@@ -76,8 +109,12 @@ function formatDeterministicFindings(findings) {
|
|
|
76
109
|
...lines,
|
|
77
110
|
].join("\n");
|
|
78
111
|
}
|
|
79
|
-
export function renderDesignReviewPrompt(bundle) {
|
|
112
|
+
export function renderDesignReviewPrompt(bundle, options = {}) {
|
|
80
113
|
const deterministicFindings = bundle.design_assessment?.findings ?? [];
|
|
114
|
+
const unitCount = bundle.unit_manifest?.units.length ?? 0;
|
|
115
|
+
const defaultMaxUnits = Math.max(5, Math.min(20, Math.ceil(unitCount / 5)));
|
|
116
|
+
const maxUnits = options.max_units ?? defaultMaxUnits;
|
|
117
|
+
const prioritizedReadingList = buildPrioritizedReadingList(bundle, maxUnits);
|
|
81
118
|
return [
|
|
82
119
|
"# Project design review",
|
|
83
120
|
"",
|
|
@@ -117,7 +154,11 @@ export function renderDesignReviewPrompt(bundle) {
|
|
|
117
154
|
"",
|
|
118
155
|
"## What to assess",
|
|
119
156
|
"",
|
|
120
|
-
|
|
157
|
+
`Focus on the ${maxUnits} highest-risk units listed below; you need not read the entire repository, though you may follow any thread that demands more context. Produce findings about:`,
|
|
158
|
+
"",
|
|
159
|
+
"### Prioritised reading list",
|
|
160
|
+
"",
|
|
161
|
+
prioritizedReadingList,
|
|
121
162
|
"",
|
|
122
163
|
"- **Tool and library opportunities**: third-party tools, libraries, or frameworks that would improve the project. Concrete suggestions with rationale, not generic advice.",
|
|
123
164
|
"- **Architecture pattern improvements**: structural changes that would improve extensibility, testability, or maintainability. Consider whether the current abstractions match the problem domain.",
|