auditor-lambda 0.9.1 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -9
- package/audit-code-wrapper-lib.mjs +19 -915
- package/dispatch/merge-results.mjs +1 -1
- package/dist/cli/auditStep.d.ts +1 -33
- package/dist/cli/dispatch.d.ts +47 -0
- package/dist/cli/dispatch.js +116 -1
- package/dist/cli/mergeAndIngestCommand.js +55 -8
- package/dist/cli/nextStepCommand.js +43 -15
- package/dist/cli/prompts.d.ts +2 -0
- package/dist/cli/prompts.js +9 -0
- package/dist/cli/reviewRun.js +1 -1
- package/dist/cli/runToCompletion.js +21 -8
- package/dist/cli/semanticReviewStep.js +12 -1
- package/dist/cli/steps.d.ts +15 -0
- package/dist/cli.js +1 -8
- package/dist/io/artifacts.d.ts +9 -1
- package/dist/io/artifacts.js +7 -0
- package/dist/io/runArtifacts.d.ts +14 -0
- package/dist/io/runArtifacts.js +23 -0
- package/dist/orchestrator/designReviewPrompt.d.ts +4 -1
- package/dist/orchestrator/designReviewPrompt.js +43 -2
- package/dist/orchestrator/executorResult.d.ts +25 -0
- package/dist/orchestrator/intakeExecutors.d.ts +19 -1
- package/dist/orchestrator/intakeExecutors.js +89 -3
- package/dist/orchestrator/nextStep.d.ts +1 -0
- package/dist/orchestrator/nextStep.js +1 -1
- package/dist/orchestrator/state.js +8 -1
- package/dist/providers/constants.d.ts +1 -1
- package/dist/providers/constants.js +1 -1
- package/dist/reporting/synthesis.d.ts +8 -0
- package/dist/reporting/synthesis.js +16 -1
- package/dist/supervisor/operatorHandoff.js +8 -1
- package/dist/types/auditScope.d.ts +16 -2
- package/dist/validation/sessionConfig.js +35 -0
- package/docs/contracts.md +0 -16
- package/docs/operator-guide.md +6 -8
- package/package.json +1 -1
- package/schemas/audit_findings.schema.json +1 -0
- package/scripts/postinstall.mjs +0 -174
- package/skills/audit-code/SKILL.md +17 -1
- package/skills/audit-code/audit-code.prompt.md +25 -0
- package/dist/mcp/server.d.ts +0 -72
- package/dist/mcp/server.js +0 -765
|
@@ -15,7 +15,7 @@ const artifactsDir = artifactsDirIdx !== -1 && process.argv[artifactsDirIdx + 1]
|
|
|
15
15
|
: join(process.cwd(), ".audit-artifacts");
|
|
16
16
|
|
|
17
17
|
const taskResultsDir = join(artifactsDir, "runs", runId, "task-results");
|
|
18
|
-
const auditResultsPath = join(artifactsDir, "runs", runId, "
|
|
18
|
+
const auditResultsPath = join(artifactsDir, "runs", runId, "run-results.json");
|
|
19
19
|
const failedTasksPath = join(artifactsDir, "runs", runId, "failed-tasks.json");
|
|
20
20
|
const tasksPath = join(artifactsDir, "runs", runId, "pending-audit-tasks.json");
|
|
21
21
|
|
package/dist/cli/auditStep.d.ts
CHANGED
|
@@ -1,7 +1,4 @@
|
|
|
1
|
-
import type { AuditResult } from "../types.js";
|
|
2
1
|
import type { AnalyzerSetting } from "@audit-tools/shared";
|
|
3
|
-
import type { RuntimeValidationReport } from "../types/runtimeValidation.js";
|
|
4
|
-
import type { ExternalAnalyzerResults } from "../types/externalAnalyzer.js";
|
|
5
2
|
export declare function runAuditStep(options: {
|
|
6
3
|
root: string;
|
|
7
4
|
artifactsDir: string;
|
|
@@ -23,36 +20,7 @@ export declare function ingestBatchAuditResults(options: {
|
|
|
23
20
|
batchDir: string;
|
|
24
21
|
}): Promise<{
|
|
25
22
|
batchFiles: string[];
|
|
26
|
-
bundle:
|
|
27
|
-
repo_manifest: import("../types.js").RepoManifest;
|
|
28
|
-
file_disposition: import("@audit-tools/shared").FileDisposition;
|
|
29
|
-
auto_fixes_applied: unknown;
|
|
30
|
-
unit_manifest: import("../types.js").UnitManifest;
|
|
31
|
-
graph_bundle: import("@audit-tools/shared").GraphBundle;
|
|
32
|
-
surface_manifest: import("@audit-tools/shared").SurfaceManifest;
|
|
33
|
-
critical_flows: import("@audit-tools/shared").CriticalFlowManifest;
|
|
34
|
-
flow_coverage: import("../types/flowCoverage.js").FlowCoverageManifest;
|
|
35
|
-
risk_register: import("@audit-tools/shared").RiskRegister;
|
|
36
|
-
design_assessment: import("../types/designAssessment.js").DesignAssessment;
|
|
37
|
-
analyzer_capability: import("../types/analyzerCapability.js").AnalyzerCapabilityRecord;
|
|
38
|
-
scope: import("../types/auditScope.js").AuditScopeManifest;
|
|
39
|
-
coverage_matrix: import("../types.js").CoverageMatrix;
|
|
40
|
-
runtime_validation_tasks: import("../types/runtimeValidation.js").RuntimeValidationTaskManifest;
|
|
41
|
-
runtime_validation_report: RuntimeValidationReport;
|
|
42
|
-
external_analyzer_results: ExternalAnalyzerResults;
|
|
43
|
-
syntax_resolution_status: unknown;
|
|
44
|
-
audit_results: AuditResult[];
|
|
45
|
-
audit_tasks: import("../types.js").AuditTask[];
|
|
46
|
-
audit_plan_metrics: import("../types/reviewPlanning.js").AuditPlanMetrics;
|
|
47
|
-
review_packets: import("../types/reviewPlanning.js").ReviewPacket[];
|
|
48
|
-
requeue_tasks: import("../types.js").AuditTask[];
|
|
49
|
-
audit_report: string;
|
|
50
|
-
audit_findings: import("@audit-tools/shared").AuditFindingsReport;
|
|
51
|
-
synthesis_narrative: import("../types/synthesisNarrative.js").SynthesisNarrativeRecord;
|
|
52
|
-
audit_state: import("../types/auditState.js").AuditState;
|
|
53
|
-
artifact_metadata: import("../types/artifactMetadata.js").ArtifactMetadataManifest;
|
|
54
|
-
tooling_manifest: import("../types/toolingManifest.js").ToolingManifest;
|
|
55
|
-
}>;
|
|
23
|
+
bundle: import("../io/artifacts.js").ArtifactBundle;
|
|
56
24
|
audit_state: import("../types/auditState.js").AuditState;
|
|
57
25
|
selected_obligation: string | null;
|
|
58
26
|
selected_executor: string;
|
package/dist/cli/dispatch.d.ts
CHANGED
|
@@ -20,9 +20,21 @@ export declare const ACTIVE_DISPATCH_FILENAME = "active-dispatch.json";
|
|
|
20
20
|
export interface ActiveDispatchState {
|
|
21
21
|
run_id: string;
|
|
22
22
|
created_at: string;
|
|
23
|
+
/** Emitted packets only (after canary/budget filtering). */
|
|
23
24
|
packet_count: number;
|
|
25
|
+
/** Tasks remaining this round (not-yet-done), not just emitted-packet tasks. */
|
|
24
26
|
task_count: number;
|
|
25
27
|
status: "active" | "merged";
|
|
28
|
+
/** "canary" on first contact when only the top packet was emitted; "fan_out" otherwise. */
|
|
29
|
+
phase: "canary" | "fan_out";
|
|
30
|
+
/** packet_id of the emitted canary packet when phase==="canary", else null. */
|
|
31
|
+
canary_packet_id: string | null;
|
|
32
|
+
/** Total packets that would have been emitted before a budget cap (present only when capped). */
|
|
33
|
+
budget_packet_count?: number;
|
|
34
|
+
/** packet_ids NOT emitted due to the budget cap. */
|
|
35
|
+
deferred_packet_ids?: string[];
|
|
36
|
+
/** task_ids NOT emitted due to the budget cap. */
|
|
37
|
+
deferred_task_ids?: string[];
|
|
26
38
|
}
|
|
27
39
|
export interface DispatchResultMapEntry {
|
|
28
40
|
packet_id: string;
|
|
@@ -34,6 +46,25 @@ export interface DispatchResultMap {
|
|
|
34
46
|
run_id: string;
|
|
35
47
|
entries: DispatchResultMapEntry[];
|
|
36
48
|
}
|
|
49
|
+
export declare const DEFAULT_DISPATCH_CONFIRM_THRESHOLD = 10;
|
|
50
|
+
export interface DispatchFanout {
|
|
51
|
+
agent_count: number;
|
|
52
|
+
wave_count: number;
|
|
53
|
+
confirmation_recommended: boolean;
|
|
54
|
+
dispatch_summary: string;
|
|
55
|
+
}
|
|
56
|
+
/**
|
|
57
|
+
* FINDING-012: pure-arithmetic fan-out summary the loader can gate on. Given the
|
|
58
|
+
* number of agents (packets emitted this round, after canary/budget filtering)
|
|
59
|
+
* and the resolved wave size, derive the wave count, a human-readable summary,
|
|
60
|
+
* and whether the agent count exceeds the confirmation threshold (default 10).
|
|
61
|
+
* No LLM call, no side effects, no prompting.
|
|
62
|
+
*/
|
|
63
|
+
export declare function computeDispatchFanout(params: {
|
|
64
|
+
agentCount: number;
|
|
65
|
+
waveSize: number;
|
|
66
|
+
confirmThreshold?: number;
|
|
67
|
+
}): DispatchFanout;
|
|
37
68
|
export interface PrepareDispatchResult {
|
|
38
69
|
run_id: string;
|
|
39
70
|
dispatch_plan_path: string;
|
|
@@ -43,6 +74,22 @@ export interface PrepareDispatchResult {
|
|
|
43
74
|
skipped_task_count: number;
|
|
44
75
|
/** Subagent parallelism resolved for this dispatch run. */
|
|
45
76
|
wave_size: number;
|
|
77
|
+
/** "canary" on first contact when only the top packet was emitted; "fan_out" otherwise. */
|
|
78
|
+
phase: "canary" | "fan_out";
|
|
79
|
+
/** packet_id of the emitted canary packet when phase==="canary", else null. */
|
|
80
|
+
canary_packet_id: string | null;
|
|
81
|
+
/** Total agents that will be launched this run (packet_count after canary/budget). */
|
|
82
|
+
agent_count: number;
|
|
83
|
+
/** ceil(agent_count / max(1, wave_size)). */
|
|
84
|
+
wave_count: number;
|
|
85
|
+
/** True when agent_count exceeds sessionConfig.dispatch?.confirm_threshold (default 10). */
|
|
86
|
+
confirmation_recommended: boolean;
|
|
87
|
+
/** Human-readable summary, e.g. "12 agents across 3 waves (wave_size=4)". */
|
|
88
|
+
dispatch_summary: string;
|
|
89
|
+
/** True when a max_packets budget capped the emitted packets this run. */
|
|
90
|
+
budget_capped: boolean;
|
|
91
|
+
/** Number of packets deferred (not emitted) due to the budget cap. */
|
|
92
|
+
deferred_packet_count: number;
|
|
46
93
|
largest_packet: {
|
|
47
94
|
packet_id: string;
|
|
48
95
|
total_lines: number;
|
package/dist/cli/dispatch.js
CHANGED
|
@@ -4,6 +4,7 @@ import { isAbsolute, join, relative, resolve } from "node:path";
|
|
|
4
4
|
import { isFileMissingError, readJsonFile, writeJsonFile, DEFAULT_EMPIRICAL_HALF_LIFE_HOURS, } from "@audit-tools/shared";
|
|
5
5
|
import { buildQuotaSource } from "@audit-tools/shared/quota/compositeQuotaSource";
|
|
6
6
|
import { loadArtifactBundle } from "../io/artifacts.js";
|
|
7
|
+
import { writePacketSchemaFiles } from "../io/runArtifacts.js";
|
|
7
8
|
import { orderTasksForPacketReview, buildReviewPackets, sizeIndexFromManifest, } from "../orchestrator/reviewPackets.js";
|
|
8
9
|
import { buildFileAnchorSummary } from "../orchestrator/fileAnchors.js";
|
|
9
10
|
import { resolveFreshSessionProviderName } from "../providers/index.js";
|
|
@@ -16,6 +17,29 @@ export const SMALL_MODEL_HINT_MAX_ESTIMATED_TOKENS = 3000;
|
|
|
16
17
|
export const DEEP_MODEL_HINT_MIN_ESTIMATED_TOKENS = 9000;
|
|
17
18
|
export const DISPATCH_RESULT_MAP_FILENAME = "dispatch-result-map.json";
|
|
18
19
|
export const ACTIVE_DISPATCH_FILENAME = "active-dispatch.json";
|
|
20
|
+
export const DEFAULT_DISPATCH_CONFIRM_THRESHOLD = 10;
|
|
21
|
+
/**
|
|
22
|
+
* FINDING-012: pure-arithmetic fan-out summary the loader can gate on. Given the
|
|
23
|
+
* number of agents (packets emitted this round, after canary/budget filtering)
|
|
24
|
+
* and the resolved wave size, derive the wave count, a human-readable summary,
|
|
25
|
+
* and whether the agent count exceeds the confirmation threshold (default 10).
|
|
26
|
+
* No LLM call, no side effects, no prompting.
|
|
27
|
+
*/
|
|
28
|
+
export function computeDispatchFanout(params) {
|
|
29
|
+
const agentCount = params.agentCount;
|
|
30
|
+
const waveSize = params.waveSize;
|
|
31
|
+
const waveCount = Math.ceil(agentCount / Math.max(1, waveSize));
|
|
32
|
+
const confirmThreshold = params.confirmThreshold ?? DEFAULT_DISPATCH_CONFIRM_THRESHOLD;
|
|
33
|
+
const confirmationRecommended = agentCount > confirmThreshold;
|
|
34
|
+
const dispatchSummary = `${agentCount} agent${agentCount !== 1 ? "s" : ""} across ` +
|
|
35
|
+
`${waveCount} wave${waveCount !== 1 ? "s" : ""} (wave_size=${waveSize})`;
|
|
36
|
+
return {
|
|
37
|
+
agent_count: agentCount,
|
|
38
|
+
wave_count: waveCount,
|
|
39
|
+
confirmation_recommended: confirmationRecommended,
|
|
40
|
+
dispatch_summary: dispatchSummary,
|
|
41
|
+
};
|
|
42
|
+
}
|
|
19
43
|
export function dispatchResultMapPath(runDir) {
|
|
20
44
|
return join(runDir, DISPATCH_RESULT_MAP_FILENAME);
|
|
21
45
|
}
|
|
@@ -200,6 +224,19 @@ export async function prepareDispatchArtifacts(params) {
|
|
|
200
224
|
const lensDefsPath = join(params.packageRoot, "dispatch", "lens-definitions.json");
|
|
201
225
|
const lensDefs = await readJsonFile(lensDefsPath);
|
|
202
226
|
await mkdir(taskResultsDir, { recursive: true });
|
|
227
|
+
// FINDING-009: make the AuditResult JSON-Schema (and the two sibling schemas
|
|
228
|
+
// it $refs) reachable from this run's task-results directory so packet workers
|
|
229
|
+
// can optionally self-validate before calling submit-packet.
|
|
230
|
+
await writePacketSchemaFiles(taskResultsDir, params.packageRoot);
|
|
231
|
+
// FINDING-011: read the prior dispatch state (if any) so a fan-out round can
|
|
232
|
+
// detect a preceding canary that never produced an accepted result.
|
|
233
|
+
let priorActiveDispatch = null;
|
|
234
|
+
try {
|
|
235
|
+
priorActiveDispatch = await readJsonFile(join(artifactsDir, ACTIVE_DISPATCH_FILENAME));
|
|
236
|
+
}
|
|
237
|
+
catch {
|
|
238
|
+
/* none yet */
|
|
239
|
+
}
|
|
203
240
|
const priorResultTaskIds = new Set();
|
|
204
241
|
for (const task of tasks) {
|
|
205
242
|
if (existsSync(taskResultPath(taskResultsDir, task.task_id))) {
|
|
@@ -230,6 +267,34 @@ export async function prepareDispatchArtifacts(params) {
|
|
|
230
267
|
if (resultPathSet.size !== resultPathByTaskId.size) {
|
|
231
268
|
throw new Error("prepare-dispatch generated duplicate result paths; task ids must be uniquely addressable.");
|
|
232
269
|
}
|
|
270
|
+
// Packets come back priority-ordered (high -> medium -> low), so packets[0] is
|
|
271
|
+
// the top-priority packet. Filtering composes in a fixed order: canary first
|
|
272
|
+
// (emit only the top packet on first contact), then the budget cap (top-K).
|
|
273
|
+
//
|
|
274
|
+
// FINDING-011: single-worker canary. On first contact with a multi-packet run,
|
|
275
|
+
// dispatch only the top packet; the held-back packets' tasks keep no result
|
|
276
|
+
// file, so they re-enter `dispatchTasks` on the next call (fan-out).
|
|
277
|
+
const firstContact = priorResultTaskIds.size === 0;
|
|
278
|
+
const canaryEnabled = sessionConfig.dispatch?.canary !== false; // default on
|
|
279
|
+
const doCanary = firstContact && canaryEnabled && packets.length > 1;
|
|
280
|
+
const canaryPacketId = doCanary ? packets[0].packet_id : null;
|
|
281
|
+
const phase = doCanary ? "canary" : "fan_out";
|
|
282
|
+
const postCanaryPackets = doCanary ? packets.slice(0, 1) : packets;
|
|
283
|
+
// FINDING-013: top-K coverage budget. Cap the (already priority-ordered)
|
|
284
|
+
// packets at max_packets; the remainder are recorded as DEFERRED and excluded
|
|
285
|
+
// from the completion check so the run can finish honestly under budget.
|
|
286
|
+
// Budget defaults OFF (no cap) so default behavior is unchanged. Canary takes
|
|
287
|
+
// precedence: a canary round only emits 1 packet regardless of the budget.
|
|
288
|
+
const maxPackets = sessionConfig.dispatch?.max_packets;
|
|
289
|
+
const budgetCapped = typeof maxPackets === "number" &&
|
|
290
|
+
maxPackets >= 0 &&
|
|
291
|
+
maxPackets < postCanaryPackets.length;
|
|
292
|
+
const emitPackets = budgetCapped
|
|
293
|
+
? postCanaryPackets.slice(0, maxPackets)
|
|
294
|
+
: postCanaryPackets;
|
|
295
|
+
const deferredPackets = budgetCapped
|
|
296
|
+
? postCanaryPackets.slice(maxPackets)
|
|
297
|
+
: [];
|
|
233
298
|
const plan = [];
|
|
234
299
|
const resultMapEntries = [];
|
|
235
300
|
for (const task of tasks) {
|
|
@@ -245,7 +310,7 @@ export async function prepareDispatchArtifacts(params) {
|
|
|
245
310
|
let largestLines = 0;
|
|
246
311
|
let largestEstimatedTokens = 0;
|
|
247
312
|
const warnings = [];
|
|
248
|
-
for (const packet of
|
|
313
|
+
for (const packet of emitPackets) {
|
|
249
314
|
const promptPath = packetPromptPath(taskResultsDir, packet.packet_id);
|
|
250
315
|
const packetTasks = packet.task_ids
|
|
251
316
|
.map((taskId) => tasksById.get(taskId))
|
|
@@ -396,6 +461,11 @@ export async function prepareDispatchArtifacts(params) {
|
|
|
396
461
|
"way to record results, and it writes them inside the artifacts directory for you.",
|
|
397
462
|
"Produce one JSON array containing exactly one AuditResult object for each listed task.",
|
|
398
463
|
"",
|
|
464
|
+
"Schema file (resolve relative to this prompt's directory): audit_result.schema.json",
|
|
465
|
+
" $refs resolved from the same directory: finding.schema.json, audit_task.schema.json",
|
|
466
|
+
"You MAY validate your JSON array against the schema before calling submit-packet. This is optional;",
|
|
467
|
+
" the submit command performs the authoritative validation and will report any errors.",
|
|
468
|
+
"",
|
|
399
469
|
"Required AuditResult fields:",
|
|
400
470
|
" task_id copy from the task metadata",
|
|
401
471
|
" unit_id copy from the task metadata",
|
|
@@ -524,20 +594,57 @@ export async function prepareDispatchArtifacts(params) {
|
|
|
524
594
|
}
|
|
525
595
|
}
|
|
526
596
|
}
|
|
597
|
+
// FINDING-011: when advancing past a canary, warn if it never produced an
|
|
598
|
+
// accepted result. submit-packet writes the per-task result file ONLY after
|
|
599
|
+
// validation passes, so presence of that file == ACCEPTED. We map the recorded
|
|
600
|
+
// canary packet_id back to its task ids via the result map and check whether
|
|
601
|
+
// those tasks now have accepted results (i.e. landed in priorResultTaskIds).
|
|
602
|
+
if (!doCanary && priorActiveDispatch?.phase === "canary" && priorActiveDispatch.canary_packet_id) {
|
|
603
|
+
const canaryAccepted = priorActiveDispatch.run_id === runId
|
|
604
|
+
? (await loadDispatchResultMap(runDir))?.entries
|
|
605
|
+
.filter((entry) => entry.packet_id === priorActiveDispatch.canary_packet_id)
|
|
606
|
+
.every((entry) => priorResultTaskIds.has(entry.task_id)) ?? false
|
|
607
|
+
: false;
|
|
608
|
+
if (!canaryAccepted) {
|
|
609
|
+
warnings.push({
|
|
610
|
+
code: "canary_not_accepted",
|
|
611
|
+
message: `Canary packet ${priorActiveDispatch.canary_packet_id} did not produce an accepted result before fan-out; remaining packets are being dispatched anyway.`,
|
|
612
|
+
});
|
|
613
|
+
}
|
|
614
|
+
}
|
|
527
615
|
const warningsPath = warnings.length > 0
|
|
528
616
|
? join(runDir, "dispatch-warnings.json")
|
|
529
617
|
: null;
|
|
530
618
|
if (warningsPath) {
|
|
531
619
|
await writeJsonFile(warningsPath, warnings);
|
|
532
620
|
}
|
|
621
|
+
// FINDING-013: record deferred packets/tasks so the completion obligation can
|
|
622
|
+
// exclude them under a budget cap (present only when actually capped).
|
|
623
|
+
const deferredPacketIds = deferredPackets.map((packet) => packet.packet_id);
|
|
624
|
+
const deferredTaskIds = deferredPackets.flatMap((packet) => packet.task_ids);
|
|
533
625
|
const activeDispatch = {
|
|
534
626
|
run_id: runId,
|
|
535
627
|
created_at: new Date().toISOString(),
|
|
536
628
|
packet_count: plan.length,
|
|
537
629
|
task_count: orderedTasks.length,
|
|
538
630
|
status: "active",
|
|
631
|
+
phase,
|
|
632
|
+
canary_packet_id: canaryPacketId,
|
|
633
|
+
...(budgetCapped
|
|
634
|
+
? {
|
|
635
|
+
budget_packet_count: postCanaryPackets.length,
|
|
636
|
+
deferred_packet_ids: deferredPacketIds,
|
|
637
|
+
deferred_task_ids: deferredTaskIds,
|
|
638
|
+
}
|
|
639
|
+
: {}),
|
|
539
640
|
};
|
|
540
641
|
await writeJsonFile(join(artifactsDir, ACTIVE_DISPATCH_FILENAME), activeDispatch);
|
|
642
|
+
// FINDING-012: pure-arithmetic fan-out summary the loader can gate on.
|
|
643
|
+
const fanout = computeDispatchFanout({
|
|
644
|
+
agentCount: plan.length,
|
|
645
|
+
waveSize: waveSchedule.wave_size,
|
|
646
|
+
confirmThreshold: sessionConfig.dispatch?.confirm_threshold,
|
|
647
|
+
});
|
|
541
648
|
return {
|
|
542
649
|
run_id: runId,
|
|
543
650
|
dispatch_plan_path: dispatchPlanPath,
|
|
@@ -546,6 +653,14 @@ export async function prepareDispatchArtifacts(params) {
|
|
|
546
653
|
task_count: orderedTasks.length,
|
|
547
654
|
skipped_task_count: priorResultTaskIds.size,
|
|
548
655
|
wave_size: waveSchedule.wave_size,
|
|
656
|
+
phase,
|
|
657
|
+
canary_packet_id: canaryPacketId,
|
|
658
|
+
agent_count: fanout.agent_count,
|
|
659
|
+
wave_count: fanout.wave_count,
|
|
660
|
+
confirmation_recommended: fanout.confirmation_recommended,
|
|
661
|
+
dispatch_summary: fanout.dispatch_summary,
|
|
662
|
+
budget_capped: budgetCapped,
|
|
663
|
+
deferred_packet_count: deferredPackets.length,
|
|
549
664
|
largest_packet: largestPacketId
|
|
550
665
|
? {
|
|
551
666
|
packet_id: largestPacketId,
|
|
@@ -7,6 +7,11 @@ import { DISPATCH_RESULT_MAP_FILENAME, ACTIVE_DISPATCH_FILENAME, loadDispatchRes
|
|
|
7
7
|
import { addFileLineCountHints } from "./lineIndex.js";
|
|
8
8
|
import { isCanonicalResultFilename, getArtifactsDir, getFlag } from "./args.js";
|
|
9
9
|
import { buildWorkerResult } from "./workerResult.js";
|
|
10
|
+
import { PACKET_SCHEMA_FILENAMES } from "../io/runArtifacts.js";
|
|
11
|
+
// Schema pointer files prepare-dispatch copies into task-results/ for optional
|
|
12
|
+
// worker self-validation. They are expected, not stray — skip them when
|
|
13
|
+
// scanning for spurious files.
|
|
14
|
+
const PACKET_SCHEMA_FILENAME_SET = new Set(PACKET_SCHEMA_FILENAMES);
|
|
10
15
|
export async function cmdMergeAndIngest(argv) {
|
|
11
16
|
const runId = getFlag(argv, "--run-id");
|
|
12
17
|
if (!runId)
|
|
@@ -14,9 +19,28 @@ export async function cmdMergeAndIngest(argv) {
|
|
|
14
19
|
const artifactsDir = getArtifactsDir(argv);
|
|
15
20
|
const runDir = join(artifactsDir, "runs", runId);
|
|
16
21
|
const taskResultsDir = join(runDir, "task-results");
|
|
17
|
-
const auditResultsPath = join(runDir, "
|
|
22
|
+
const auditResultsPath = join(runDir, "run-results.json");
|
|
18
23
|
const taskPath = join(runDir, "task.json");
|
|
19
24
|
const tasksPath = join(runDir, "pending-audit-tasks.json");
|
|
25
|
+
const mergeCompletePath = join(runDir, "merge-complete.json");
|
|
26
|
+
// Idempotency: a fully-merged run is terminal. A stray re-invocation for the
|
|
27
|
+
// same run-id (e.g. after the run already advanced to the next deepening
|
|
28
|
+
// round, which rewrites this run dir's pending-audit-tasks.json to the *next*
|
|
29
|
+
// round's tasks) must be a clean no-op — not a spurious "all results missing"
|
|
30
|
+
// hard failure that also truncates the transient results file. Replay the
|
|
31
|
+
// recorded summary and exit 0.
|
|
32
|
+
let priorSummary = null;
|
|
33
|
+
try {
|
|
34
|
+
priorSummary = await readJsonFile(mergeCompletePath);
|
|
35
|
+
}
|
|
36
|
+
catch (e) {
|
|
37
|
+
if (!isFileMissingError(e))
|
|
38
|
+
throw e;
|
|
39
|
+
}
|
|
40
|
+
if (priorSummary) {
|
|
41
|
+
console.log(JSON.stringify({ ...priorSummary, idempotent_replay: true }, null, 2));
|
|
42
|
+
return;
|
|
43
|
+
}
|
|
20
44
|
const workerTask = await readJsonFile(taskPath);
|
|
21
45
|
const resultMap = await loadDispatchResultMap(runDir);
|
|
22
46
|
if (!resultMap) {
|
|
@@ -42,9 +66,14 @@ export async function cmdMergeAndIngest(argv) {
|
|
|
42
66
|
const passing = [];
|
|
43
67
|
const failing = [];
|
|
44
68
|
const seenTaskIds = new Set();
|
|
45
|
-
|
|
69
|
+
const spuriousFiles = [];
|
|
46
70
|
const fallbackByTaskId = new Map();
|
|
47
71
|
for (const filename of files) {
|
|
72
|
+
// Schema pointer files (audit_result/finding/audit_task .schema.json) are
|
|
73
|
+
// copied into task-results/ by prepare-dispatch for optional worker
|
|
74
|
+
// self-validation; they are expected, not stray.
|
|
75
|
+
if (PACKET_SCHEMA_FILENAME_SET.has(filename))
|
|
76
|
+
continue;
|
|
48
77
|
const filePath = resolve(join(taskResultsDir, filename));
|
|
49
78
|
if (expectedPaths.has(filePath))
|
|
50
79
|
continue;
|
|
@@ -68,10 +97,16 @@ export async function cmdMergeAndIngest(argv) {
|
|
|
68
97
|
// task-results/ dir are legitimate and must not inflate the count or bury
|
|
69
98
|
// the real stray-file signal (3 -> 191 over a run before this fix).
|
|
70
99
|
if (!isCanonicalResultFilename(filename)) {
|
|
71
|
-
|
|
72
|
-
process.stderr.write(`[merge-and-ingest] Warning: unexpected file in task-results/: ${filename}\n`);
|
|
100
|
+
spuriousFiles.push(filename);
|
|
73
101
|
}
|
|
74
102
|
}
|
|
103
|
+
// Collapse stray-file warnings into a single stderr line so the real summary
|
|
104
|
+
// (emitted as the sole stdout JSON payload) is never buried under a wall of
|
|
105
|
+
// per-file warnings.
|
|
106
|
+
if (spuriousFiles.length > 0) {
|
|
107
|
+
process.stderr.write(`[merge-and-ingest] Warning: ${spuriousFiles.length} unexpected file(s) in ` +
|
|
108
|
+
`task-results/ ignored: ${spuriousFiles.join(", ")}\n`);
|
|
109
|
+
}
|
|
75
110
|
for (const task of allTasks) {
|
|
76
111
|
const entry = entryByTaskId.get(task.task_id);
|
|
77
112
|
if (!entry) {
|
|
@@ -134,14 +169,18 @@ export async function cmdMergeAndIngest(argv) {
|
|
|
134
169
|
failing.push({ task_id: taskId ?? task.task_id, errors: resultErrors });
|
|
135
170
|
}
|
|
136
171
|
}
|
|
137
|
-
await writeJsonFile(auditResultsPath, passing);
|
|
138
172
|
const failedTasksPath = join(runDir, "failed-tasks.json");
|
|
139
173
|
if (failing.length > 0) {
|
|
140
174
|
await writeJsonFile(failedTasksPath, failing);
|
|
141
175
|
}
|
|
142
176
|
if (passing.length === 0 && failing.length > 0) {
|
|
177
|
+
// Nothing merged and at least one failure: a blocked no-op. Do NOT write the
|
|
178
|
+
// transient results file here — truncating it to [] reads as catastrophic
|
|
179
|
+
// data loss on a re-run when the cumulative audit_results.jsonl store is in
|
|
180
|
+
// fact intact and the first merge had simply already succeeded.
|
|
143
181
|
throw new Error(`All ${failing.length} assigned task result(s) were missing or invalid; blocked before ingestion. See ${failedTasksPath}`);
|
|
144
182
|
}
|
|
183
|
+
await writeJsonFile(auditResultsPath, passing);
|
|
145
184
|
const findingCount = passing.reduce((sum, result) => sum + result.findings.length, 0);
|
|
146
185
|
let result = null;
|
|
147
186
|
if (passing.length > 0) {
|
|
@@ -197,12 +236,12 @@ export async function cmdMergeAndIngest(argv) {
|
|
|
197
236
|
errors: [],
|
|
198
237
|
});
|
|
199
238
|
await writeJsonFile(workerTask.result_path, workerResult);
|
|
200
|
-
|
|
239
|
+
const summaryPayload = {
|
|
201
240
|
run_id: runId,
|
|
202
241
|
status,
|
|
203
242
|
accepted_count: passing.length,
|
|
204
243
|
rejected_count: failing.length,
|
|
205
|
-
spurious_file_count:
|
|
244
|
+
spurious_file_count: spuriousFiles.length,
|
|
206
245
|
finding_count: findingCount,
|
|
207
246
|
audit_results_path: auditResultsPath,
|
|
208
247
|
...(retryDispatchPath ? { retry_dispatch_path: retryDispatchPath } : {}),
|
|
@@ -212,7 +251,15 @@ export async function cmdMergeAndIngest(argv) {
|
|
|
212
251
|
progress_summary: workerResult.summary,
|
|
213
252
|
next_likely_step: workerResult.next_likely_step,
|
|
214
253
|
} : {}),
|
|
215
|
-
}
|
|
254
|
+
};
|
|
255
|
+
// Record a completion marker for a fully-merged run so a stray re-invocation
|
|
256
|
+
// replays this summary (above) instead of re-processing — and possibly
|
|
257
|
+
// clobbering — terminal state. Only on full success: a partial merge is meant
|
|
258
|
+
// to be re-run after the failed packets are retried, so it stays replayable.
|
|
259
|
+
if (failing.length === 0) {
|
|
260
|
+
await writeJsonFile(mergeCompletePath, summaryPayload);
|
|
261
|
+
}
|
|
262
|
+
console.log(JSON.stringify(summaryPayload, null, 2));
|
|
216
263
|
if (failing.length > 0) {
|
|
217
264
|
process.exitCode = 2;
|
|
218
265
|
}
|
|
@@ -35,6 +35,42 @@ async function runDeterministicForNextStep(params) {
|
|
|
35
35
|
const FINALIZATION_CYCLE_TOLERANCE = 16;
|
|
36
36
|
const seenStateSignatures = new Set();
|
|
37
37
|
const obligationTrail = [];
|
|
38
|
+
// Build the terminal step for a deterministic loop that has stopped advancing
|
|
39
|
+
// (hit the run backstop or the finalization cycle guard). A rendered report is
|
|
40
|
+
// the deliverable: if synthesis already produced one — or the state is formally
|
|
41
|
+
// complete — present it instead of reporting the stopped loop as a bare
|
|
42
|
+
// "blocked" failure. A completed audit must never surface as blocked just
|
|
43
|
+
// because finalization kept churning (e.g. a runtime_validation <-> synthesis
|
|
44
|
+
// ping-pong, or revision churn from filesystem retries) after the report was
|
|
45
|
+
// written. With no report yet, the stop is a genuine block.
|
|
46
|
+
async function terminalStep(bundle, state, blockedReason) {
|
|
47
|
+
const reportRendered = state.status === "complete" || Boolean(bundle.audit_report);
|
|
48
|
+
await writeHandoffOnly({
|
|
49
|
+
root: params.root,
|
|
50
|
+
artifactsDir: params.artifactsDir,
|
|
51
|
+
bundle,
|
|
52
|
+
audit_state: state,
|
|
53
|
+
progress_summary: reportRendered && state.status !== "complete"
|
|
54
|
+
? `Audit report already rendered; ending run. ${blockedReason}`
|
|
55
|
+
: blockedReason,
|
|
56
|
+
providerName: LOCAL_SUBPROCESS_PROVIDER_NAME,
|
|
57
|
+
});
|
|
58
|
+
if (!reportRendered) {
|
|
59
|
+
return { kind: "blocked", state, bundle, reason: blockedReason };
|
|
60
|
+
}
|
|
61
|
+
const promoted = await promoteFinalAuditReport({
|
|
62
|
+
artifactsDir: params.artifactsDir,
|
|
63
|
+
repoRoot: params.root,
|
|
64
|
+
});
|
|
65
|
+
return {
|
|
66
|
+
kind: "complete",
|
|
67
|
+
state,
|
|
68
|
+
bundle,
|
|
69
|
+
finalReportPath: promoted.promoted
|
|
70
|
+
? join(params.root, AUDIT_REPORT_FILENAME)
|
|
71
|
+
: join(params.artifactsDir, AUDIT_REPORT_FILENAME),
|
|
72
|
+
};
|
|
73
|
+
}
|
|
38
74
|
for (let index = 0; index < params.maxRuns; index++) {
|
|
39
75
|
const bundle = await loadArtifactBundle(params.artifactsDir);
|
|
40
76
|
const decision = decideNextStep(bundle);
|
|
@@ -318,24 +354,14 @@ async function runDeterministicForNextStep(params) {
|
|
|
318
354
|
`progress; stopping. Cycling obligations: ${cycle.join(" -> ")}.`,
|
|
319
355
|
timestamp: new Date().toISOString(),
|
|
320
356
|
});
|
|
321
|
-
return
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
bundle: result.updated_bundle,
|
|
325
|
-
reason: "Finalization is not converging: deterministic executors kept revisiting " +
|
|
326
|
-
`prior artifact states (${cycle.join(" -> ")}). The report has been ` +
|
|
327
|
-
"rendered; review whether these obligations are erroneously invalidating each other.",
|
|
328
|
-
};
|
|
357
|
+
return await terminalStep(result.updated_bundle, result.audit_state, "Finalization is not converging: deterministic executors kept revisiting " +
|
|
358
|
+
`prior artifact states (${cycle.join(" -> ")}). Review whether these ` +
|
|
359
|
+
"obligations are erroneously invalidating each other.");
|
|
329
360
|
}
|
|
330
361
|
}
|
|
331
362
|
const bundle = await loadArtifactBundle(params.artifactsDir);
|
|
332
363
|
const state = deriveAuditState(bundle);
|
|
333
|
-
return {
|
|
334
|
-
kind: "blocked",
|
|
335
|
-
state,
|
|
336
|
-
bundle,
|
|
337
|
-
reason: `Reached max run limit (${params.maxRuns}) before a review, report, or blocker step was ready.`,
|
|
338
|
-
};
|
|
364
|
+
return await terminalStep(bundle, state, `Reached max run limit (${params.maxRuns}) before a review, report, or blocker step was ready.`);
|
|
339
365
|
}
|
|
340
366
|
export async function cmdNextStep(argv) {
|
|
341
367
|
const root = getRootDir(argv);
|
|
@@ -429,7 +455,9 @@ export async function cmdNextStep(argv) {
|
|
|
429
455
|
const designReviewResultsPath = join(artifactsDir, "incoming", "design-review-findings.json");
|
|
430
456
|
await mkdir(join(artifactsDir, "incoming"), { recursive: true });
|
|
431
457
|
const continueCommand = nextStepCommand(root, artifactsDir);
|
|
432
|
-
const prompt = renderDesignReviewPrompt(result.bundle
|
|
458
|
+
const prompt = renderDesignReviewPrompt(result.bundle, {
|
|
459
|
+
max_units: sessionConfig.design_review?.max_units,
|
|
460
|
+
});
|
|
433
461
|
const fullPrompt = [
|
|
434
462
|
prompt,
|
|
435
463
|
"## Results path",
|
package/dist/cli/prompts.d.ts
CHANGED
|
@@ -10,6 +10,8 @@ export declare function renderDispatchReviewPrompt(params: {
|
|
|
10
10
|
dispatchQuotaPath: string | null;
|
|
11
11
|
hostCanRestrictSubagentTools: boolean;
|
|
12
12
|
hostCanSelectSubagentModel: boolean;
|
|
13
|
+
phase?: "canary" | "fan_out";
|
|
14
|
+
canaryPacketId?: string | null;
|
|
13
15
|
}): string;
|
|
14
16
|
export declare function renderSingleTaskFallbackStepPrompt(params: {
|
|
15
17
|
singleTaskPromptPath: string;
|
package/dist/cli/prompts.js
CHANGED
|
@@ -78,10 +78,19 @@ export function renderDispatchReviewPrompt(params) {
|
|
|
78
78
|
"",
|
|
79
79
|
"Launch one subagent for each entry in the plan.",
|
|
80
80
|
];
|
|
81
|
+
const canaryLines = params.phase === "canary"
|
|
82
|
+
? [
|
|
83
|
+
"",
|
|
84
|
+
"This is a CANARY round: the plan contains only the single top-priority packet. " +
|
|
85
|
+
"Dispatch it, run merge-and-ingest, then run next-step — the remaining packets fan out " +
|
|
86
|
+
"on the following step once this packet's result is accepted.",
|
|
87
|
+
]
|
|
88
|
+
: [];
|
|
81
89
|
return [
|
|
82
90
|
"# audit-code dispatch review",
|
|
83
91
|
"",
|
|
84
92
|
...dispatchDataLines,
|
|
93
|
+
...canaryLines,
|
|
85
94
|
"",
|
|
86
95
|
"Pass each `entry.prompt_path` literally to its subagent; do not load packet prompt files into this orchestrator context.",
|
|
87
96
|
"",
|
package/dist/cli/reviewRun.js
CHANGED
|
@@ -90,7 +90,7 @@ export async function ensureSemanticReviewRun(params) {
|
|
|
90
90
|
const paths = getRunPaths(params.artifactsDir, runId);
|
|
91
91
|
const pendingTasks = await addFileLineCountHints(params.root, buildPendingAuditTasks(params.bundle));
|
|
92
92
|
const pendingTasksPath = join(paths.runDir, "pending-audit-tasks.json");
|
|
93
|
-
const auditResultsPath = join(paths.runDir, "
|
|
93
|
+
const auditResultsPath = join(paths.runDir, "run-results.json");
|
|
94
94
|
const taskReadPaths = new Set();
|
|
95
95
|
for (const pt of pendingTasks) {
|
|
96
96
|
for (const fp of pt.file_paths)
|
|
@@ -70,7 +70,7 @@ async function buildParallelWaveSlots(params) {
|
|
|
70
70
|
runCount += 1;
|
|
71
71
|
const slotRunId = buildRunId(obligationId, runCount);
|
|
72
72
|
const slotPaths = getRunPaths(artifactsDir, slotRunId);
|
|
73
|
-
const slotAuditResultsPath = join(slotPaths.runDir, "
|
|
73
|
+
const slotAuditResultsPath = join(slotPaths.runDir, "run-results.json");
|
|
74
74
|
const slotPendingTasksPath = join(slotPaths.runDir, "pending-audit-tasks.json");
|
|
75
75
|
const slotReadPaths = new Set();
|
|
76
76
|
for (const t of group) {
|
|
@@ -398,7 +398,7 @@ async function runSingleWorkerStep(params) {
|
|
|
398
398
|
? join(paths.runDir, "pending-audit-tasks.json")
|
|
399
399
|
: undefined;
|
|
400
400
|
const providerAuditResultsPath = preferredExecutor === "agent"
|
|
401
|
-
? join(paths.runDir, "
|
|
401
|
+
? join(paths.runDir, "run-results.json")
|
|
402
402
|
: auditResultsPath;
|
|
403
403
|
const providerReadPaths = new Set();
|
|
404
404
|
if (pendingAuditTasks) {
|
|
@@ -694,7 +694,7 @@ export async function cmdRunToCompletion(argv) {
|
|
|
694
694
|
const blockPaths = getRunPaths(artifactsDir, blockRunId);
|
|
695
695
|
const blockPendingTasks = await addFileLineCountHints(root, buildPendingAuditTasks(bundle));
|
|
696
696
|
const blockPendingTasksPath = join(blockPaths.runDir, "pending-audit-tasks.json");
|
|
697
|
-
const blockAuditResultsPath = join(blockPaths.runDir, "
|
|
697
|
+
const blockAuditResultsPath = join(blockPaths.runDir, "run-results.json");
|
|
698
698
|
const blockReadPaths = new Set();
|
|
699
699
|
for (const pt of blockPendingTasks) {
|
|
700
700
|
for (const fp of pt.file_paths)
|
|
@@ -1031,23 +1031,36 @@ export async function cmdRunToCompletion(argv) {
|
|
|
1031
1031
|
const bundle = await loadArtifactBundle(artifactsDir);
|
|
1032
1032
|
const decision = decideNextStep(bundle);
|
|
1033
1033
|
const state = decision.state;
|
|
1034
|
-
if
|
|
1034
|
+
// A rendered report is the deliverable: if synthesis already produced one (or
|
|
1035
|
+
// the state is formally complete), finish the run on it instead of stranding
|
|
1036
|
+
// it in the artifacts dir behind a bare "max run limit" non-completion. This
|
|
1037
|
+
// mirrors next-step's terminalStep so both loops present a completed audit the
|
|
1038
|
+
// same way, even when finalization churned (runtime_validation <-> synthesis
|
|
1039
|
+
// ping-pong, or filesystem-retry revision churn) up to the backstop. With no
|
|
1040
|
+
// report yet, the run limit is a genuine non-terminal stop.
|
|
1041
|
+
const reportRendered = state.status === "complete" || Boolean(bundle.audit_report);
|
|
1042
|
+
if (reportRendered) {
|
|
1035
1043
|
await clearDispatchFiles(artifactsDir);
|
|
1036
1044
|
}
|
|
1045
|
+
const terminalState = reportRendered && state.status !== "complete"
|
|
1046
|
+
? { ...state, status: "complete" }
|
|
1047
|
+
: state;
|
|
1037
1048
|
await emitEnvelope({
|
|
1038
1049
|
root,
|
|
1039
1050
|
artifactsDir,
|
|
1040
1051
|
bundle,
|
|
1041
|
-
audit_state:
|
|
1052
|
+
audit_state: terminalState,
|
|
1042
1053
|
selected_obligation: lastResult?.obligation_id ?? decision.selected_obligation,
|
|
1043
1054
|
selected_executor: lastResult?.selected_executor ?? decision.selected_executor,
|
|
1044
1055
|
progress_made: anyProgress,
|
|
1045
1056
|
artifacts_written: Array.from(artifactsWritten),
|
|
1046
|
-
progress_summary:
|
|
1047
|
-
|
|
1057
|
+
progress_summary: reportRendered && state.status !== "complete"
|
|
1058
|
+
? `Audit report already rendered; completing the run after reaching the max run limit (${maxRuns}) during finalization.`
|
|
1059
|
+
: `Reached max run limit (${maxRuns}) before terminal state.`,
|
|
1060
|
+
next_likely_step: reportRendered ? null : decision.selected_obligation,
|
|
1048
1061
|
providerName: provider.name,
|
|
1049
1062
|
});
|
|
1050
|
-
if (
|
|
1063
|
+
if (reportRendered) {
|
|
1051
1064
|
await promoteFinalAuditReport({ artifactsDir, repoRoot: root });
|
|
1052
1065
|
}
|
|
1053
1066
|
}
|