auditor-lambda 0.10.0 → 0.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -24,6 +24,17 @@ export declare function digestId(value: string): string;
24
24
  export declare function safeArtifactStem(value: string): string;
25
25
  export declare function artifactNameForId(value: string, extension: string): string;
26
26
  export declare function quoteCommandArg(value: string): string;
27
+ /**
28
+ * Normalize a generated command token to POSIX path separators. These command
29
+ * strings are embedded in step prompts and `allowed_commands` and run by the
30
+ * host — often through a bash-like shell, which treats `\` as an escape and
31
+ * silently corrupts Windows absolute paths (`node C:\a\b.mjs` collapses to
32
+ * `node C:ab.mjs`). Node accepts forward slashes on Windows, and `/` survives
33
+ * bash, PowerShell, and cmd alike. Only tokens that actually carry a backslash
34
+ * are touched, and no non-path argument in this CLI contains one, so this is a
35
+ * targeted normalization rather than a blanket rewrite.
36
+ */
37
+ export declare function toPosixCommandToken(value: string): string;
27
38
  export declare function renderCommand(argv: string[]): string;
28
39
  export declare function summarizeLaunchExit(result: {
29
40
  accepted?: boolean;
package/dist/cli/args.js CHANGED
@@ -80,8 +80,21 @@ export function artifactNameForId(value, extension) {
80
80
  export function quoteCommandArg(value) {
81
81
  return /[\s"]/u.test(value) ? `"${value.replace(/"/g, '\\"')}"` : value;
82
82
  }
83
+ /**
84
+ * Normalize a generated command token to POSIX path separators. These command
85
+ * strings are embedded in step prompts and `allowed_commands` and run by the
86
+ * host — often through a bash-like shell, which treats `\` as an escape and
87
+ * silently corrupts Windows absolute paths (`node C:\a\b.mjs` collapses to
88
+ * `node C:ab.mjs`). Node accepts forward slashes on Windows, and `/` survives
89
+ * bash, PowerShell, and cmd alike. Only tokens that actually carry a backslash
90
+ * are touched, and no non-path argument in this CLI contains one, so this is a
91
+ * targeted normalization rather than a blanket rewrite.
92
+ */
93
+ export function toPosixCommandToken(value) {
94
+ return value.includes("\\") ? value.replace(/\\/g, "/") : value;
95
+ }
83
96
  export function renderCommand(argv) {
84
- return argv.map((item) => quoteCommandArg(item)).join(" ");
97
+ return argv.map((item) => quoteCommandArg(toPosixCommandToken(item))).join(" ");
85
98
  }
86
99
  export function summarizeLaunchExit(result) {
87
100
  if (result.accepted !== false && !result.error) {
@@ -9,7 +9,7 @@ import { orderTasksForPacketReview, buildReviewPackets, sizeIndexFromManifest, }
9
9
  import { buildFileAnchorSummary } from "../orchestrator/fileAnchors.js";
10
10
  import { resolveFreshSessionProviderName } from "../providers/index.js";
11
11
  import { loadSessionConfig } from "../supervisor/sessionConfig.js";
12
- import { scheduleWave, buildProviderModelKey, resolveHostModel, readQuotaState, resolveHostActiveSubagentLimit, lookupDiscoveredLimits, mergeDiscoveredLimits, } from "../quota/index.js";
12
+ import { computeDispatchCapacity, buildProviderModelKey, resolveHostModel, readQuotaState, resolveHostActiveSubagentLimit, lookupDiscoveredLimits, mergeDiscoveredLimits, } from "../quota/index.js";
13
13
  import { taskResultPath, packetPromptPath, artifactNameForId, toBase64Url, fromBase64Url, getFlag, } from "./args.js";
14
14
  export const LARGE_FILE_PACKET_TARGET_LINES = 2500;
15
15
  export const SMALL_MODEL_HINT_MAX_LINES = 500;
@@ -274,7 +274,17 @@ export async function prepareDispatchArtifacts(params) {
274
274
  // FINDING-011: single-worker canary. On first contact with a multi-packet run,
275
275
  // dispatch only the top packet; the held-back packets' tasks keep no result
276
276
  // file, so they re-enter `dispatchTasks` on the next call (fan-out).
277
- const firstContact = priorResultTaskIds.size === 0;
277
+ //
278
+ // Graduation signal: the canary fires only on the FIRST dispatch of a run and
279
+ // then fans out. "First dispatch" is recorded directly by active-dispatch.json
280
+ // (written at the end of every prepareDispatch), so derive it from
281
+ // priorActiveDispatch.run_id. The previous signal — "no pending task has a
282
+ // result file" (priorResultTaskIds.size === 0) — silently broke: merge-and-ingest
283
+ // prunes accepted task_ids out of pending-audit-tasks.json, so post-canary the
284
+ // still-pending tasks have no result files, priorResultTaskIds stayed empty, and
285
+ // the canary re-fired every cycle (1 packet forever, never reaching fan-out).
286
+ const priorDispatchThisRun = priorActiveDispatch?.run_id === runId;
287
+ const firstContact = !priorDispatchThisRun;
278
288
  const canaryEnabled = sessionConfig.dispatch?.canary !== false; // default on
279
289
  const doCanary = firstContact && canaryEnabled && packets.length > 1;
280
290
  const canaryPacketId = doCanary ? packets[0].packet_id : null;
@@ -554,17 +564,27 @@ export async function prepareDispatchArtifacts(params) {
554
564
  DEFAULT_EMPIRICAL_HALF_LIFE_HOURS;
555
565
  const quotaSource = buildQuotaSource({ halfLifeHours });
556
566
  const quotaSourceSnapshot = await quotaSource.queryCurrentUsage(quotaProviderKey).catch(() => null);
557
- const waveSchedule = scheduleWave({
567
+ // Size the dispatch just-in-time against the full pending layout (one token
568
+ // estimate per emitted packet) and the host pool's current limits, rather than
569
+ // a preset wave size. `parallel_workers` is no longer the ambition — it is
570
+ // folded into hostConcurrencyLimit as a ceiling (resolveHostActiveSubagentLimit).
571
+ // Today there is a single pool (the conversation host's subagents); a
572
+ // heterogeneous provider pool slots in here without changing the call.
573
+ const hostPool = {
574
+ id: quotaProviderKey,
558
575
  providerName: quotaProviderName,
559
- sessionConfig,
560
576
  hostModel,
561
- requestedConcurrency: sessionConfig.parallel_workers ?? plan.length,
562
- estimatedSlotTokens: perPacketTokens,
563
- quotaStateEntry,
564
577
  hostConcurrencyLimit,
578
+ quotaStateEntry,
565
579
  discoveredLimits,
566
580
  quotaSourceSnapshot,
581
+ };
582
+ const dispatchCapacity = computeDispatchCapacity({
583
+ pools: [hostPool],
584
+ sessionConfig,
585
+ pendingItemTokens: perPacketTokens,
567
586
  });
587
+ const waveSchedule = dispatchCapacity.primary.schedule;
568
588
  const dispatchQuota = {
569
589
  contract_version: "audit-code-dispatch-quota/v1alpha2",
570
590
  run_id: runId,
@@ -573,9 +593,9 @@ export async function prepareDispatchArtifacts(params) {
573
593
  confidence: waveSchedule.confidence,
574
594
  source: waveSchedule.source,
575
595
  host_concurrency_limit: waveSchedule.host_concurrency_limit,
576
- wave_size: waveSchedule.wave_size,
577
- estimated_wave_tokens: waveSchedule.estimated_wave_tokens,
578
- cooldown_until: waveSchedule.cooldown_until,
596
+ wave_size: dispatchCapacity.total_slots,
597
+ estimated_wave_tokens: dispatchCapacity.estimated_wave_tokens,
598
+ cooldown_until: dispatchCapacity.cooldown_until,
579
599
  quota_source_snapshot: waveSchedule.quota_source_snapshot ?? null,
580
600
  backoff_state: null,
581
601
  };
@@ -642,7 +662,7 @@ export async function prepareDispatchArtifacts(params) {
642
662
  // FINDING-012: pure-arithmetic fan-out summary the loader can gate on.
643
663
  const fanout = computeDispatchFanout({
644
664
  agentCount: plan.length,
645
- waveSize: waveSchedule.wave_size,
665
+ waveSize: dispatchCapacity.total_slots,
646
666
  confirmThreshold: sessionConfig.dispatch?.confirm_threshold,
647
667
  });
648
668
  return {
@@ -652,7 +672,7 @@ export async function prepareDispatchArtifacts(params) {
652
672
  packet_count: plan.length,
653
673
  task_count: orderedTasks.length,
654
674
  skipped_task_count: priorResultTaskIds.size,
655
- wave_size: waveSchedule.wave_size,
675
+ wave_size: dispatchCapacity.total_slots,
656
676
  phase,
657
677
  canary_packet_id: canaryPacketId,
658
678
  agent_count: fanout.agent_count,
@@ -65,6 +65,13 @@ export async function cmdMergeAndIngest(argv) {
65
65
  }
66
66
  const passing = [];
67
67
  const failing = [];
68
+ // Pending tasks that were NOT dispatched this round (canary emitted only the
69
+ // top packet, or a budget cap deferred packets). They are not failures — they
70
+ // re-enter dispatch on the next round — so they are tracked separately and must
71
+ // never inflate rejected_count, force a non-zero exit, or gate the completion
72
+ // marker. Conflating them with failures is what produced the spurious
73
+ // "180 failed" + exit-2 on every canary merge.
74
+ const notDispatched = [];
68
75
  const seenTaskIds = new Set();
69
76
  const spuriousFiles = [];
70
77
  const fallbackByTaskId = new Map();
@@ -110,10 +117,9 @@ export async function cmdMergeAndIngest(argv) {
110
117
  for (const task of allTasks) {
111
118
  const entry = entryByTaskId.get(task.task_id);
112
119
  if (!entry) {
113
- failing.push({
114
- task_id: task.task_id,
115
- errors: ["Missing dispatch result-map entry for assigned task."],
116
- });
120
+ // No result-map entry => this pending task was not dispatched this round.
121
+ // Leave it pending for the next dispatch; it is not a failure.
122
+ notDispatched.push(task.task_id);
117
123
  continue;
118
124
  }
119
125
  const filePath = entry.result_path;
@@ -197,7 +203,11 @@ export async function cmdMergeAndIngest(argv) {
197
203
  try {
198
204
  const dispatch = await readJsonFile(activeDispatchPath);
199
205
  if (dispatch.run_id === runId) {
200
- dispatch.status = failing.length > 0 ? "active" : "merged";
206
+ // "merged" only when this round is fully drained: every dispatched task
207
+ // accepted AND nothing held back. A canary (notDispatched > 0) stays
208
+ // "active" because the fan-out round on the same run-id still has to merge.
209
+ dispatch.status =
210
+ failing.length > 0 || notDispatched.length > 0 ? "active" : "merged";
201
211
  await writeJsonFile(activeDispatchPath, dispatch);
202
212
  }
203
213
  }
@@ -221,7 +231,11 @@ export async function cmdMergeAndIngest(argv) {
221
231
  process.stderr.write(`[merge-and-ingest] ${passing.length} accepted, ${failing.length} failed. ` +
222
232
  `Retry packets: ${failedPacketIds.join(", ")}\n`);
223
233
  }
224
- const status = failing.length > 0
234
+ // "partial" whenever work remains for this run — either genuine dispatched
235
+ // failures (failing) or tasks held back this round (notDispatched). The exit
236
+ // code below distinguishes the two: only genuine failures exit non-zero, so a
237
+ // canary reports status "partial" but exits 0 (progressing, not an error).
238
+ const status = failing.length > 0 || notDispatched.length > 0
225
239
  ? "partial"
226
240
  : (result?.progress_made ? "completed" : "no_progress");
227
241
  const workerResult = buildWorkerResult({
@@ -241,6 +255,7 @@ export async function cmdMergeAndIngest(argv) {
241
255
  status,
242
256
  accepted_count: passing.length,
243
257
  rejected_count: failing.length,
258
+ not_dispatched_count: notDispatched.length,
244
259
  spurious_file_count: spuriousFiles.length,
245
260
  finding_count: findingCount,
246
261
  audit_results_path: auditResultsPath,
@@ -254,9 +269,11 @@ export async function cmdMergeAndIngest(argv) {
254
269
  };
255
270
  // Record a completion marker for a fully-merged run so a stray re-invocation
256
271
  // replays this summary (above) instead of re-processing — and possibly
257
- // clobbering — terminal state. Only on full success: a partial merge is meant
258
- // to be re-run after the failed packets are retried, so it stays replayable.
259
- if (failing.length === 0) {
272
+ // clobbering — terminal state. Only when this round is fully drained: genuine
273
+ // failures stay replayable for retry, and a canary (notDispatched > 0) must NOT
274
+ // be marked complete or the fan-out merge on the same run-id would short-circuit
275
+ // to an idempotent replay and silently drop the fan-out results.
276
+ if (failing.length === 0 && notDispatched.length === 0) {
260
277
  await writeJsonFile(mergeCompletePath, summaryPayload);
261
278
  }
262
279
  console.log(JSON.stringify(summaryPayload, null, 2));
@@ -107,6 +107,8 @@ export function renderDispatchReviewPrompt(params) {
107
107
  "",
108
108
  "**After all waves complete:**",
109
109
  "",
110
+ "Run these backend commands directly — do not pipe them through a token-compression wrapper (e.g. `opentoken wrap`). Their JSON / prompt-contract output is parsed verbatim by the backend and this host, and wrapping corrupts it.",
111
+ "",
110
112
  "Run exactly:",
111
113
  "",
112
114
  ` ${mergeCommand}`,
@@ -3,6 +3,8 @@ export { resolveLimits, lookupKnownModel, classifyProvider, readQuotaState, writ
3
3
  export type { LimitResolutionResult, ResolveLimitsOptions, ProviderType, ResolvedLimits, LimitSource, LimitConfidence, HostConcurrencyLimit, HostConcurrencyLimitSource, QuotaState, QuotaStateEntry, ConcurrencyBucket, WaveSchedule, BackoffState, ObservedWaveOutcome, RateLimitDetectionResult, SlidingWindowResult, QuotaSource, QuotaUsageSnapshot, ErrorParser, } from "@audit-tools/shared";
4
4
  export { scheduleWave, buildProviderModelKey, resolveHostModel } from "@audit-tools/shared";
5
5
  export type { ScheduleWaveOptions } from "@audit-tools/shared";
6
+ export { computeDispatchCapacity } from "@audit-tools/shared";
7
+ export type { CapacityPool, PoolDispatchAllocation, DispatchCapacity, } from "@audit-tools/shared";
6
8
  export { detectHostActiveSubagentLimit, resolveHostActiveSubagentLimit, } from "./hostLimits.js";
7
9
  export { lookupDiscoveredLimits, updateDiscoveredLimits, mergeDiscoveredLimits, readDiscoveredLimitsCache, writeDiscoveredLimitsCache, } from "./discoveredLimits.js";
8
10
  export type { DiscoveredRateLimits, DiscoveredLimitsCache, DiscoveredLimitsCacheEntry } from "./discoveredLimits.js";
@@ -4,6 +4,10 @@ export { resolveLimits, lookupKnownModel, classifyProvider, readQuotaState, writ
4
4
  // both orchestrators). Auditor passes its discovered-limits via the structural
5
5
  // DiscoveredRateLimitsInput the shared scheduler accepts.
6
6
  export { scheduleWave, buildProviderModelKey, resolveHostModel } from "@audit-tools/shared";
7
+ // Capacity model: the JIT, multi-pool-capable layer both orchestrators size
8
+ // dispatch with. Single host pool today; heterogeneous provider pools slot in
9
+ // without changing call sites.
10
+ export { computeDispatchCapacity } from "@audit-tools/shared";
7
11
  // Auditor-specific: discovered limits, header extraction
8
12
  export { detectHostActiveSubagentLimit, resolveHostActiveSubagentLimit, } from "./hostLimits.js";
9
13
  export { lookupDiscoveredLimits, updateDiscoveredLimits, mergeDiscoveredLimits, readDiscoveredLimitsCache, writeDiscoveredLimitsCache, } from "./discoveredLimits.js";
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "auditor-lambda",
3
- "version": "0.10.0",
3
+ "version": "0.10.1",
4
4
  "private": false,
5
5
  "description": "Portable hybrid code-auditing framework for arbitrary repositories.",
6
6
  "type": "module",