auditor-lambda 0.3.21 → 0.3.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2685,6 +2685,11 @@ export async function runAuditCodeWrapper({
2685
2685
  return;
2686
2686
  }
2687
2687
 
2688
+ if (argv[0] === 'quota') {
2689
+ await runDistCommand('quota', argv.slice(1), { ensureArtifactsDir: true });
2690
+ return;
2691
+ }
2692
+
2688
2693
  if (argv[0] === 'submit-packet') {
2689
2694
  await runDistCommand('submit-packet', argv.slice(1));
2690
2695
  return;
package/dist/cli.js CHANGED
@@ -32,6 +32,7 @@ import { buildReviewPackets, orderTasksForPacketReview, } from "./orchestrator/r
32
32
  import { buildFileAnchorSummary, } from "./orchestrator/fileAnchors.js";
33
33
  import { LOCAL_SUBPROCESS_PROVIDER_NAME } from "./providers/constants.js";
34
34
  import { runAuditCodeMcpServer } from "./mcp/server.js";
35
+ import { scheduleWave, buildProviderModelKey, readQuotaState, recordWaveOutcome, resolveLimits, probeProvider, computeMaxSafeConcurrency, getQuotaStatePath, } from "./quota/index.js";
35
36
  const packageRoot = resolve(dirname(fileURLToPath(import.meta.url)), "..");
36
37
  const ADVANCE_AUDIT_CONTRACT_VERSION = "audit-code/v1alpha1";
37
38
  const WORKER_RESULT_CONTRACT_VERSION = "audit-code-worker-result/v1alpha1";
@@ -178,6 +179,27 @@ function getTimeoutMs(argv, sessionConfig) {
178
179
  function getExplicitProvider(argv) {
179
180
  return getFlag(argv, "--provider");
180
181
  }
182
+ function getHostModel(argv) {
183
+ return getFlag(argv, "--host-model") ?? null;
184
+ }
185
+ function getQuotaProbeMode(argv, sessionConfig) {
186
+ const raw = getFlag(argv, "--quota-probe") ?? sessionConfig.quota?.probe ?? "auto";
187
+ if (raw === "auto" || raw === "never" || raw === "force")
188
+ return raw;
189
+ return "auto";
190
+ }
191
+ function detectRateLimitError(errorText) {
192
+ const lower = errorText.toLowerCase();
193
+ return lower.includes("429") || lower.includes("rate limit") || lower.includes("rate_limit");
194
+ }
195
+ function defaultCooldownUntil(resetAtHeader) {
196
+ if (resetAtHeader) {
197
+ const t = new Date(resetAtHeader).getTime();
198
+ if (!Number.isNaN(t))
199
+ return new Date(t).toISOString();
200
+ }
201
+ return new Date(Date.now() + 60_000).toISOString();
202
+ }
181
203
  function resolveRunProviderName(argv, sessionConfig) {
182
204
  return resolveFreshSessionProviderName(getExplicitProvider(argv), sessionConfig);
183
205
  }
@@ -528,14 +550,30 @@ function renderDispatchReviewPrompt(params) {
528
550
  const toolsLine = params.hostCanRestrictSubagentTools
529
551
  ? "Restrict review subagents to read/search plus the packet submit command named in their prompt. Do not give them source edit/write tools."
530
552
  : "Do not ask the user about per-subagent tool restrictions; this host did not report a callable restriction facility.";
553
+ const fileLines = params.dispatchQuotaPath
554
+ ? [
555
+ "Dispatch is prepared. Read both of these files:",
556
+ "",
557
+ ` Dispatch plan: ${params.dispatchPlanPath}`,
558
+ ` Dispatch quota: ${params.dispatchQuotaPath}`,
559
+ "",
560
+ "The quota file contains a `wave_size` field. Dispatch at most `wave_size` subagents at a time. If `cooldown_until` is non-null, wait until that timestamp before starting the first wave.",
561
+ "",
562
+ "For each wave: launch up to `wave_size` subagents in parallel (one per plan entry), wait for all of them to finish, then start the next wave. Repeat until all entries are dispatched.",
563
+ ]
564
+ : [
565
+ "Dispatch is prepared. Read only this dispatch plan JSON:",
566
+ "",
567
+ ` ${params.dispatchPlanPath}`,
568
+ "",
569
+ "Launch one host subagent for each entry in the plan.",
570
+ ];
531
571
  return [
532
572
  "# audit-code dispatch review",
533
573
  "",
534
- "Dispatch is prepared. Read only this dispatch plan JSON:",
535
- "",
536
- ` ${params.dispatchPlanPath}`,
574
+ ...fileLines,
537
575
  "",
538
- "Launch one host subagent for each entry in the plan. Pass the packet prompt path literally; do not load packet prompt files into this orchestrator context.",
576
+ "Pass each packet prompt path literally to its subagent; do not load packet prompt files into this orchestrator context.",
539
577
  "",
540
578
  "Subagent prompt shape:",
541
579
  "",
@@ -544,9 +582,9 @@ function renderDispatchReviewPrompt(params) {
544
582
  modelLine,
545
583
  toolsLine,
546
584
  "",
547
- "Wait for all review subagents to finish. Each subagent must submit its packet through the submit command printed in its packet prompt and stop after successful submission.",
585
+ "Each subagent must submit its packet through the submit command printed in its packet prompt and stop after successful submission.",
548
586
  "",
549
- "Then run exactly:",
587
+ "After all waves complete, run exactly:",
550
588
  "",
551
589
  ` ${mergeCommand}`,
552
590
  "",
@@ -1198,6 +1236,7 @@ async function cmdNextStep(argv) {
1198
1236
  repoRoot: root,
1199
1237
  artifactPaths: {
1200
1238
  dispatch_plan: dispatch.dispatch_plan_path,
1239
+ dispatch_quota: dispatch.dispatch_quota_path,
1201
1240
  dispatch_warnings: dispatch.dispatch_warnings_path,
1202
1241
  active_review_task: result.activeReviewRun.task_path,
1203
1242
  pending_audit_tasks: result.activeReviewRun.pending_audit_tasks_path ?? null,
@@ -1207,6 +1246,7 @@ async function cmdNextStep(argv) {
1207
1246
  artifactsDir,
1208
1247
  activeReviewRun: result.activeReviewRun,
1209
1248
  dispatchPlanPath: dispatch.dispatch_plan_path,
1249
+ dispatchQuotaPath: dispatch.dispatch_quota_path,
1210
1250
  hostCanRestrictSubagentTools,
1211
1251
  hostCanSelectSubagentModel,
1212
1252
  }),
@@ -1238,6 +1278,7 @@ async function cmdRunToCompletion(argv) {
1238
1278
  const agentBatchSize = getAgentBatchSize(argv, sessionConfig);
1239
1279
  const parallelWorkers = getParallelWorkers(argv, sessionConfig);
1240
1280
  const timeoutMs = getTimeoutMs(argv, sessionConfig);
1281
+ const hostModel = getHostModel(argv);
1241
1282
  const selfCliPath = resolve(argv[1] ?? process.argv[1] ?? "");
1242
1283
  const batchResultsDir = getBatchResultsDir(argv);
1243
1284
  if (batchResultsDir && getFlag(argv, "--results")) {
@@ -1375,8 +1416,27 @@ async function cmdRunToCompletion(argv) {
1375
1416
  return;
1376
1417
  }
1377
1418
  if (preferredExecutor === "agent" && parallelWorkers > 1) {
1419
+ const quotaState = await readQuotaState();
1420
+ const providerModelKey = buildProviderModelKey(provider.name, hostModel);
1421
+ const quotaStateEntry = quotaState.entries[providerModelKey] ?? null;
1422
+ const waveSchedule = scheduleWave({
1423
+ providerName: resolveFreshSessionProviderName(getExplicitProvider(argv), sessionConfig),
1424
+ sessionConfig,
1425
+ hostModel,
1426
+ requestedConcurrency: parallelWorkers,
1427
+ quotaStateEntry,
1428
+ });
1429
+ const waveSize = waveSchedule.wave_size;
1430
+ if (waveSchedule.cooldown_until) {
1431
+ const waitMs = new Date(waveSchedule.cooldown_until).getTime() - Date.now();
1432
+ if (waitMs > 0) {
1433
+ const cappedWait = Math.min(waitMs, 120_000);
1434
+ process.stderr.write(`[quota] Cooldown active — waiting ${Math.ceil(cappedWait / 1000)}s before next wave.\n`);
1435
+ await new Promise((r) => setTimeout(r, cappedWait));
1436
+ }
1437
+ }
1378
1438
  const allPendingTasks = buildPendingAuditTasks(bundle);
1379
- const taskGroups = chunkArray(allPendingTasks.slice(0, parallelWorkers * agentBatchSize), agentBatchSize);
1439
+ const taskGroups = chunkArray(allPendingTasks.slice(0, waveSize * agentBatchSize), agentBatchSize);
1380
1440
  const workerSlots = [];
1381
1441
  for (const rawGroup of taskGroups) {
1382
1442
  const group = await addFileLineCountHints(root, rawGroup);
@@ -1530,6 +1590,16 @@ async function cmdRunToCompletion(argv) {
1530
1590
  });
1531
1591
  artifactsWritten.add("run-ledger.json");
1532
1592
  }
1593
+ // Record outcome for adaptive learning (best-effort — never blocks dispatch)
1594
+ {
1595
+ const hasRateLimit = batchErrors.some(detectRateLimitError);
1596
+ await recordWaveOutcome(providerModelKey, {
1597
+ concurrency: workerSlots.length,
1598
+ estimated_tokens: waveSize * agentBatchSize * 900,
1599
+ outcome: hasRateLimit ? "rate_limited" : batchErrors.length > 0 ? "timeout" : "success",
1600
+ cooldown_until: hasRateLimit ? defaultCooldownUntil(null) : null,
1601
+ }, sessionConfig.quota?.empirical_half_life_hours ?? 24).catch(() => undefined);
1602
+ }
1533
1603
  if (batchErrors.length > 0) {
1534
1604
  const bundleAfter = await loadArtifactBundle(artifactsDir);
1535
1605
  const blockedState = buildBlockedAuditState({
@@ -2117,6 +2187,7 @@ async function prepareDispatchArtifacts(params) {
2117
2187
  }
2118
2188
  const tasks = await readJsonFile(tasksPath);
2119
2189
  const bundle = await loadArtifactBundle(artifactsDir);
2190
+ const sessionConfig = params.sessionConfig ?? (await loadSessionConfig(artifactsDir).catch(() => ({})));
2120
2191
  const lensDefsPath = join(packageRoot, "dispatch", "lens-definitions.json");
2121
2192
  const lensDefs = await readJsonFile(lensDefsPath);
2122
2193
  await mkdir(taskResultsDir, { recursive: true });
@@ -2342,6 +2413,52 @@ async function prepareDispatchArtifacts(params) {
2342
2413
  run_id: runId,
2343
2414
  entries: resultMapEntries,
2344
2415
  });
2416
+ // Compute and write dispatch-quota.json
2417
+ const hostModel = params.hostModel ?? null;
2418
+ const avgPacketTokens = plan.length > 0
2419
+ ? Math.floor(plan.reduce((s, p) => s + p.complexity.estimated_tokens, 0) / plan.length)
2420
+ : 0;
2421
+ const quotaProviderName = resolveFreshSessionProviderName(undefined, sessionConfig);
2422
+ const quotaProviderKey = buildProviderModelKey(quotaProviderName, hostModel);
2423
+ const quotaState = await readQuotaState().catch(() => ({ version: 1, entries: {} }));
2424
+ const quotaStateEntry = quotaState.entries[quotaProviderKey] ?? null;
2425
+ const waveSchedule = scheduleWave({
2426
+ providerName: quotaProviderName,
2427
+ sessionConfig,
2428
+ hostModel,
2429
+ requestedConcurrency: sessionConfig.parallel_workers ?? 1,
2430
+ estimatedPacketTokens: avgPacketTokens,
2431
+ quotaStateEntry,
2432
+ });
2433
+ const dispatchQuota = {
2434
+ contract_version: "audit-code-dispatch-quota/v1alpha1",
2435
+ run_id: runId,
2436
+ model: hostModel,
2437
+ resolved_limits: waveSchedule.resolved_limits,
2438
+ confidence: waveSchedule.confidence,
2439
+ source: waveSchedule.source,
2440
+ wave_size: waveSchedule.wave_size,
2441
+ estimated_wave_tokens: waveSchedule.estimated_wave_tokens,
2442
+ cooldown_until: waveSchedule.cooldown_until,
2443
+ };
2444
+ const dispatchQuotaPath = join(runDir, "dispatch-quota.json");
2445
+ await writeJsonFile(dispatchQuotaPath, dispatchQuota);
2446
+ // Warn about packets that exceed the context budget only when we have reliable limit
2447
+ // information (confidence medium/high). Low-confidence limits are conservative defaults
2448
+ // and would produce misleading warnings since the real context window is unknown.
2449
+ if (waveSchedule.confidence !== "low") {
2450
+ const contextBudget = waveSchedule.resolved_limits.context_tokens - waveSchedule.resolved_limits.output_tokens;
2451
+ for (const p of plan) {
2452
+ if (p.complexity.estimated_tokens > contextBudget) {
2453
+ warnings.push({
2454
+ code: "oversized_packet",
2455
+ message: `Packet ${p.packet_id} estimated tokens (${p.complexity.estimated_tokens}) exceed ` +
2456
+ `context budget (${contextBudget}). This packet may fail at dispatch. ` +
2457
+ `Set quota.default_context_tokens or quota.models in session-config.json to override.`,
2458
+ });
2459
+ }
2460
+ }
2461
+ }
2345
2462
  const warningsPath = warnings.length > 0
2346
2463
  ? join(runDir, "dispatch-warnings.json")
2347
2464
  : null;
@@ -2351,6 +2468,7 @@ async function prepareDispatchArtifacts(params) {
2351
2468
  return {
2352
2469
  run_id: runId,
2353
2470
  dispatch_plan_path: dispatchPlanPath,
2471
+ dispatch_quota_path: dispatchQuotaPath,
2354
2472
  packet_count: plan.length,
2355
2473
  task_count: orderedTasks.length,
2356
2474
  largest_packet: largestPacketId
@@ -2372,6 +2490,7 @@ async function cmdPrepareDispatch(argv) {
2372
2490
  runId,
2373
2491
  artifactsDir: getArtifactsDir(argv),
2374
2492
  root: getFlag(argv, "--root") ? getRootDir(argv) : undefined,
2493
+ hostModel: getHostModel(argv),
2375
2494
  });
2376
2495
  console.log(JSON.stringify(result, null, 2));
2377
2496
  }
@@ -2923,6 +3042,45 @@ async function cmdCleanup(argv) {
2923
3042
  async function cmdMcp(argv) {
2924
3043
  await runAuditCodeMcpServer(argv.slice(3));
2925
3044
  }
3045
+ async function cmdQuota(argv) {
3046
+ const artifactsDir = getArtifactsDir(argv);
3047
+ const sessionConfig = await loadSessionConfig(artifactsDir).catch(() => ({}));
3048
+ const explicitProvider = getExplicitProvider(argv);
3049
+ const hostModel = getHostModel(argv);
3050
+ const probeMode = getQuotaProbeMode(argv, sessionConfig);
3051
+ const providerName = resolveFreshSessionProviderName(explicitProvider, sessionConfig);
3052
+ const providerModelKey = buildProviderModelKey(providerName, hostModel);
3053
+ const { limits, source, confidence } = resolveLimits({ providerName, sessionConfig, hostModel });
3054
+ const probeResult = await probeProvider(providerName, probeMode);
3055
+ const quotaState = await readQuotaState().catch(() => ({ version: 1, entries: {} }));
3056
+ const quotaStateEntry = quotaState.entries[providerModelKey] ?? null;
3057
+ const halfLifeHours = sessionConfig.quota?.empirical_half_life_hours ?? 24;
3058
+ const waveSchedule = scheduleWave({
3059
+ providerName,
3060
+ sessionConfig,
3061
+ hostModel,
3062
+ requestedConcurrency: sessionConfig.parallel_workers ?? 1,
3063
+ quotaStateEntry,
3064
+ });
3065
+ console.log(JSON.stringify({
3066
+ provider: providerName,
3067
+ model: hostModel,
3068
+ provider_model_key: providerModelKey,
3069
+ resolved_limits: limits,
3070
+ confidence,
3071
+ source,
3072
+ probe: probeResult,
3073
+ learned_caps: quotaStateEntry
3074
+ ? {
3075
+ max_safe_concurrency: computeMaxSafeConcurrency(quotaStateEntry, halfLifeHours),
3076
+ cooldown_until: quotaStateEntry.cooldown_until,
3077
+ last_429_at: quotaStateEntry.last_429_at,
3078
+ }
3079
+ : null,
3080
+ wave_schedule: waveSchedule,
3081
+ quota_state_path: getQuotaStatePath(),
3082
+ }, null, 2));
3083
+ }
2926
3084
  async function main(argv) {
2927
3085
  const command = argv[2] ?? "sample-run";
2928
3086
  switch (command) {
@@ -2989,9 +3147,12 @@ async function main(argv) {
2989
3147
  case "validate-result":
2990
3148
  await cmdValidateResult(argv);
2991
3149
  return;
3150
+ case "quota":
3151
+ await cmdQuota(argv);
3152
+ return;
2992
3153
  default:
2993
3154
  console.error(`Unknown command: ${command}`);
2994
- console.error("Available commands: sample-run, advance-audit, next-step, run-to-completion, worker-run, import-external-analyzer, intake, plan, ingest-results, explain-task, update-runtime-validation, validate, validate-results, requeue, synthesize, cleanup, mcp, prepare-dispatch, merge-and-ingest, submit-packet, validate-result");
3155
+ console.error("Available commands: sample-run, advance-audit, next-step, run-to-completion, worker-run, import-external-analyzer, intake, plan, ingest-results, explain-task, update-runtime-validation, validate, validate-results, requeue, synthesize, cleanup, mcp, prepare-dispatch, merge-and-ingest, submit-packet, validate-result, quota");
2995
3156
  process.exitCode = 1;
2996
3157
  }
2997
3158
  }
@@ -6,6 +6,11 @@ export interface BuildReviewPacketOptions {
6
6
  lineIndex?: Record<string, number>;
7
7
  maxTasksPerPacket?: number;
8
8
  targetPacketLines?: number;
9
+ /**
10
+ * Available context budget in tokens (context_tokens − reserved_output_tokens).
11
+ * When provided, targetPacketLines is capped to fit within this budget.
12
+ */
13
+ maxContextTokens?: number;
9
14
  }
10
15
  export declare function buildReviewPackets(tasks: AuditTask[], options?: BuildReviewPacketOptions): ReviewPacket[];
11
16
  export declare function orderTasksForPacketReview(tasks: AuditTask[], options?: BuildReviewPacketOptions): AuditTask[];
@@ -949,7 +949,11 @@ function buildPacket(tasks, packetIndex, lineIndex, graphEdges = [], graphBundle
949
949
  }
950
950
  function buildReviewPacketPlanningData(tasks, options = {}) {
951
951
  const maxTasksPerPacket = options.maxTasksPerPacket ?? DEFAULT_MAX_TASKS_PER_PACKET;
952
- const targetPacketLines = options.targetPacketLines ?? DEFAULT_TARGET_PACKET_LINES;
952
+ const configuredTargetLines = options.targetPacketLines ?? DEFAULT_TARGET_PACKET_LINES;
953
+ const targetPacketLines = options.maxContextTokens != null
954
+ ? Math.min(configuredTargetLines, Math.max(1, Math.floor((options.maxContextTokens - ESTIMATED_PACKET_PROMPT_TOKENS) /
955
+ ESTIMATED_TOKENS_PER_LINE)))
956
+ : configuredTargetLines;
953
957
  const graphEdges = collectGraphEdges(options.graphBundle);
954
958
  const groups = buildTaskGroups(tasks);
955
959
  const planningGraphEdges = buildPlanningGraphEdges(groups, graphEdges, options.graphBundle, options.lineIndex, targetPacketLines);
@@ -0,0 +1,8 @@
1
+ export { resolveLimits, lookupKnownModel, classifyProvider } from "./limits.js";
2
+ export type { LimitResolutionResult, ResolveLimitsOptions, ProviderType } from "./limits.js";
3
+ export { readQuotaState, writeQuotaState, computeMaxSafeConcurrency, recordWaveOutcome, getQuotaStatePath, decayWeight, applyDecayToEntry, } from "./state.js";
4
+ export { scheduleWave, buildProviderModelKey } from "./scheduler.js";
5
+ export type { ScheduleWaveOptions } from "./scheduler.js";
6
+ export { probeProvider } from "./probe.js";
7
+ export type { ProbeResult } from "./probe.js";
8
+ export type { ResolvedLimits, LimitSource, LimitConfidence, QuotaState, QuotaStateEntry, ConcurrencyBucket, WaveSchedule, DispatchQuota, ObservedWaveOutcome, } from "./types.js";
@@ -0,0 +1,4 @@
1
+ export { resolveLimits, lookupKnownModel, classifyProvider } from "./limits.js";
2
+ export { readQuotaState, writeQuotaState, computeMaxSafeConcurrency, recordWaveOutcome, getQuotaStatePath, decayWeight, applyDecayToEntry, } from "./state.js";
3
+ export { scheduleWave, buildProviderModelKey } from "./scheduler.js";
4
+ export { probeProvider } from "./probe.js";
@@ -0,0 +1,16 @@
1
+ import type { ResolvedProviderName, SessionConfig } from "../types/sessionConfig.js";
2
+ import type { LimitConfidence, LimitSource, ResolvedLimits } from "./types.js";
3
+ export type ProviderType = "hosted" | "local" | "unknown";
4
+ export declare function classifyProvider(providerName: ResolvedProviderName): ProviderType;
5
+ export declare function lookupKnownModel(modelKey: string): Pick<ResolvedLimits, "context_tokens" | "output_tokens"> | undefined;
6
+ export interface LimitResolutionResult {
7
+ limits: ResolvedLimits;
8
+ source: LimitSource;
9
+ confidence: LimitConfidence;
10
+ }
11
+ export interface ResolveLimitsOptions {
12
+ providerName: ResolvedProviderName;
13
+ sessionConfig: SessionConfig;
14
+ hostModel?: string | null;
15
+ }
16
+ export declare function resolveLimits(options: ResolveLimitsOptions): LimitResolutionResult;
@@ -0,0 +1,77 @@
1
+ // RPM/TPM are omitted here — they are tier-dependent and must come from learning.
2
+ const KNOWN_MODEL_LIMITS = {
3
+ "anthropic/claude-opus-4-7": { context_tokens: 200_000, output_tokens: 32_000 },
4
+ "anthropic/claude-sonnet-4-6": { context_tokens: 200_000, output_tokens: 8_192 },
5
+ "anthropic/claude-haiku-4-5": { context_tokens: 200_000, output_tokens: 8_192 },
6
+ "anthropic/claude-opus-4-5": { context_tokens: 200_000, output_tokens: 8_192 },
7
+ "anthropic/claude-sonnet-4-5": { context_tokens: 200_000, output_tokens: 8_192 },
8
+ "openai/gpt-4o": { context_tokens: 128_000, output_tokens: 16_384 },
9
+ "openai/gpt-4o-mini": { context_tokens: 128_000, output_tokens: 16_384 },
10
+ "google/gemini-2.0-flash": { context_tokens: 1_048_576, output_tokens: 8_192 },
11
+ "google/gemini-1.5-pro": { context_tokens: 2_097_152, output_tokens: 8_192 },
12
+ };
13
+ export function classifyProvider(providerName) {
14
+ switch (providerName) {
15
+ case "claude-code":
16
+ case "opencode":
17
+ return "hosted";
18
+ case "local-subprocess":
19
+ return "local";
20
+ case "subprocess-template":
21
+ case "vscode-task":
22
+ default:
23
+ return "unknown";
24
+ }
25
+ }
26
+ export function lookupKnownModel(modelKey) {
27
+ return KNOWN_MODEL_LIMITS[modelKey.toLowerCase().trim()];
28
+ }
29
+ function defaultLimits(sessionConfig) {
30
+ const quota = sessionConfig.quota ?? {};
31
+ return {
32
+ context_tokens: quota.default_context_tokens ?? 32_000,
33
+ output_tokens: quota.reserved_output_tokens ?? 4_096,
34
+ requests_per_minute: null,
35
+ input_tokens_per_minute: null,
36
+ output_tokens_per_minute: null,
37
+ };
38
+ }
39
+ export function resolveLimits(options) {
40
+ const { providerName: _providerName, sessionConfig, hostModel } = options;
41
+ const quota = sessionConfig.quota ?? {};
42
+ const defaults = defaultLimits(sessionConfig);
43
+ // 1. Explicit per-model config overrides
44
+ if (hostModel && quota.models?.[hostModel]) {
45
+ const override = quota.models[hostModel];
46
+ return {
47
+ limits: {
48
+ context_tokens: override.context_tokens ?? defaults.context_tokens,
49
+ output_tokens: override.output_tokens ?? defaults.output_tokens,
50
+ requests_per_minute: override.requests_per_minute ?? null,
51
+ input_tokens_per_minute: override.input_tokens_per_minute ?? null,
52
+ output_tokens_per_minute: override.output_tokens_per_minute ?? null,
53
+ },
54
+ source: "explicit_config",
55
+ confidence: "high",
56
+ };
57
+ }
58
+ // 2. Static known-model database (context/output only; RPM/TPM from learning)
59
+ if (hostModel) {
60
+ const known = lookupKnownModel(hostModel);
61
+ if (known) {
62
+ return {
63
+ limits: {
64
+ context_tokens: known.context_tokens,
65
+ output_tokens: known.output_tokens,
66
+ requests_per_minute: null,
67
+ input_tokens_per_minute: null,
68
+ output_tokens_per_minute: null,
69
+ },
70
+ source: "known_metadata",
71
+ confidence: "medium",
72
+ };
73
+ }
74
+ }
75
+ // 3. Conservative defaults for all provider types
76
+ return { limits: defaults, source: "default", confidence: "low" };
77
+ }
@@ -0,0 +1,13 @@
1
+ export interface ProbeResult {
2
+ supported: boolean;
3
+ reason: string;
4
+ }
5
+ /**
6
+ * Probe a provider to discover its rate limits.
7
+ *
8
+ * Only subprocess-template supports direct probing since it is the only
9
+ * provider where the auditor controls the API call. IDE providers
10
+ * (claude-code, opencode) select the model internally; their limits come
11
+ * from known-model metadata or learned behavior.
12
+ */
13
+ export declare function probeProvider(providerName: string, probeMode?: "auto" | "never" | "force"): Promise<ProbeResult>;
@@ -0,0 +1,21 @@
1
+ /**
2
+ * Probe a provider to discover its rate limits.
3
+ *
4
+ * Only subprocess-template supports direct probing since it is the only
5
+ * provider where the auditor controls the API call. IDE providers
6
+ * (claude-code, opencode) select the model internally; their limits come
7
+ * from known-model metadata or learned behavior.
8
+ */
9
+ export async function probeProvider(providerName, probeMode = "auto") {
10
+ if (probeMode === "never") {
11
+ return { supported: false, reason: "probe disabled by config" };
12
+ }
13
+ if (providerName !== "subprocess-template") {
14
+ return {
15
+ supported: false,
16
+ reason: `probe not applicable for ${providerName} — limits come from known-model metadata or learned behavior`,
17
+ };
18
+ }
19
+ // subprocess-template probe not yet implemented
20
+ return { supported: false, reason: "subprocess-template probe not yet implemented" };
21
+ }
@@ -0,0 +1,14 @@
1
+ import type { ResolvedProviderName, SessionConfig } from "../types/sessionConfig.js";
2
+ import type { QuotaStateEntry, WaveSchedule } from "./types.js";
3
+ export interface ScheduleWaveOptions {
4
+ providerName: ResolvedProviderName;
5
+ sessionConfig: SessionConfig;
6
+ hostModel: string | null;
7
+ requestedConcurrency: number;
8
+ /** Average estimated tokens per packet/worker. Used for TPM budget. */
9
+ estimatedPacketTokens?: number;
10
+ quotaStateEntry?: QuotaStateEntry | null;
11
+ }
12
+ export declare function scheduleWave(options: ScheduleWaveOptions): WaveSchedule;
13
+ /** Build the state key used for indexing quota-state.json entries. */
14
+ export declare function buildProviderModelKey(providerName: string, hostModel: string | null | undefined): string;
@@ -0,0 +1,76 @@
1
+ import { classifyProvider, resolveLimits } from "./limits.js";
2
+ import { computeMaxSafeConcurrency } from "./state.js";
3
+ export function scheduleWave(options) {
4
+ const { providerName, sessionConfig, hostModel, requestedConcurrency, estimatedPacketTokens = 0, quotaStateEntry = null, } = options;
5
+ const quota = sessionConfig.quota ?? {};
6
+ if (quota.enabled === false) {
7
+ const limits = {
8
+ context_tokens: quota.default_context_tokens ?? 32_000,
9
+ output_tokens: quota.reserved_output_tokens ?? 4_096,
10
+ requests_per_minute: null,
11
+ input_tokens_per_minute: null,
12
+ output_tokens_per_minute: null,
13
+ };
14
+ return {
15
+ wave_size: requestedConcurrency,
16
+ estimated_wave_tokens: requestedConcurrency * estimatedPacketTokens,
17
+ cooldown_until: null,
18
+ confidence: "high",
19
+ source: "default",
20
+ resolved_limits: limits,
21
+ model: hostModel,
22
+ };
23
+ }
24
+ const safetyMargin = quota.safety_margin ?? 0.8;
25
+ const halfLifeHours = quota.empirical_half_life_hours ?? 24;
26
+ const providerType = classifyProvider(providerName);
27
+ const { limits, source, confidence } = resolveLimits({ providerName, sessionConfig, hostModel });
28
+ let waveSize = requestedConcurrency;
29
+ let cooldownUntil = null;
30
+ // Respect an active cooldown period
31
+ if (quotaStateEntry?.cooldown_until) {
32
+ const cooldownExpiry = new Date(quotaStateEntry.cooldown_until).getTime();
33
+ if (cooldownExpiry > Date.now()) {
34
+ cooldownUntil = quotaStateEntry.cooldown_until;
35
+ waveSize = 1;
36
+ }
37
+ }
38
+ if (!cooldownUntil) {
39
+ // Cap by requests-per-minute
40
+ if (limits.requests_per_minute != null) {
41
+ const rpmCap = Math.max(1, Math.floor(limits.requests_per_minute * safetyMargin));
42
+ waveSize = Math.min(waveSize, rpmCap);
43
+ }
44
+ // Cap by input tokens-per-minute
45
+ if (limits.input_tokens_per_minute != null && estimatedPacketTokens > 0) {
46
+ const tpmCap = Math.max(1, Math.floor((limits.input_tokens_per_minute * safetyMargin) / estimatedPacketTokens));
47
+ waveSize = Math.min(waveSize, tpmCap);
48
+ }
49
+ if (quotaStateEntry) {
50
+ const learnedCap = computeMaxSafeConcurrency(quotaStateEntry, halfLifeHours);
51
+ waveSize = Math.min(waveSize, learnedCap);
52
+ }
53
+ else if (providerType === "hosted" && source === "default") {
54
+ // Unknown hosted provider with no learned data and no model-specific limits —
55
+ // be conservative. If the caller supplied RPM/TPM caps those already govern rate;
56
+ // this guard only triggers when we have no rate information at all.
57
+ const conservativeDefault = quota.unknown_hosted_concurrency ?? 1;
58
+ waveSize = Math.min(waveSize, conservativeDefault);
59
+ }
60
+ // Local providers with no learned data: use requestedConcurrency (no rate pressure)
61
+ }
62
+ waveSize = Math.max(1, waveSize);
63
+ return {
64
+ wave_size: waveSize,
65
+ estimated_wave_tokens: waveSize * estimatedPacketTokens,
66
+ cooldown_until: cooldownUntil,
67
+ confidence,
68
+ source,
69
+ resolved_limits: limits,
70
+ model: hostModel,
71
+ };
72
+ }
73
+ /** Build the state key used for indexing quota-state.json entries. */
74
+ export function buildProviderModelKey(providerName, hostModel) {
75
+ return hostModel ? `${providerName}/${hostModel}` : `${providerName}/*`;
76
+ }
@@ -0,0 +1,12 @@
1
+ import type { ObservedWaveOutcome, QuotaState, QuotaStateEntry } from "./types.js";
2
+ export declare function getQuotaStatePath(): string;
3
+ export declare function decayWeight(weight: number, elapsedHours: number, halfLifeHours: number): number;
4
+ export declare function applyDecayToEntry(entry: QuotaStateEntry, halfLifeHours: number): QuotaStateEntry;
5
+ export declare function readQuotaState(): Promise<QuotaState>;
6
+ export declare function writeQuotaState(state: QuotaState): Promise<void>;
7
+ /**
8
+ * Returns the highest concurrency level for which decayed success evidence
9
+ * exceeds failure evidence, with a minimum of 1.
10
+ */
11
+ export declare function computeMaxSafeConcurrency(entry: QuotaStateEntry, halfLifeHours: number, maxToCheck?: number): number;
12
+ export declare function recordWaveOutcome(providerModelKey: string, outcome: ObservedWaveOutcome, halfLifeHours: number): Promise<void>;
@@ -0,0 +1,101 @@
1
+ import { mkdir, readFile, writeFile } from "node:fs/promises";
2
+ import { homedir } from "node:os";
3
+ import { join } from "node:path";
4
+ const STATE_DIR = join(homedir(), ".audit-code");
5
+ const STATE_PATH = join(STATE_DIR, "quota-state.json");
6
+ // A bucket needs at least this much success weight before we trust it.
7
+ const MIN_EVIDENCE_WEIGHT = 0.5;
8
+ export function getQuotaStatePath() {
9
+ return STATE_PATH;
10
+ }
11
+ export function decayWeight(weight, elapsedHours, halfLifeHours) {
12
+ if (halfLifeHours <= 0 || weight <= 0)
13
+ return 0;
14
+ return weight * Math.pow(0.5, elapsedHours / halfLifeHours);
15
+ }
16
+ export function applyDecayToEntry(entry, halfLifeHours) {
17
+ const elapsedHours = (Date.now() - new Date(entry.updated_at).getTime()) / (1000 * 60 * 60);
18
+ if (elapsedHours < 0.001)
19
+ return entry;
20
+ const decayed = {};
21
+ for (const [key, bucket] of Object.entries(entry.buckets)) {
22
+ decayed[key] = {
23
+ success_weight: decayWeight(bucket.success_weight, elapsedHours, halfLifeHours),
24
+ failure_weight: decayWeight(bucket.failure_weight, elapsedHours, halfLifeHours),
25
+ };
26
+ }
27
+ return { ...entry, buckets: decayed };
28
+ }
29
+ function isQuotaState(value) {
30
+ return (value !== null &&
31
+ typeof value === "object" &&
32
+ !Array.isArray(value) &&
33
+ value["version"] === 1 &&
34
+ typeof value["entries"] === "object");
35
+ }
36
+ export async function readQuotaState() {
37
+ try {
38
+ const raw = await readFile(STATE_PATH, "utf8");
39
+ const parsed = JSON.parse(raw);
40
+ if (isQuotaState(parsed))
41
+ return parsed;
42
+ }
43
+ catch {
44
+ // File not found or malformed — start fresh
45
+ }
46
+ return { version: 1, entries: {} };
47
+ }
48
+ export async function writeQuotaState(state) {
49
+ await mkdir(STATE_DIR, { recursive: true });
50
+ await writeFile(STATE_PATH, JSON.stringify(state, null, 2) + "\n", "utf8");
51
+ }
52
+ /**
53
+ * Returns the highest concurrency level for which decayed success evidence
54
+ * exceeds failure evidence, with a minimum of 1.
55
+ */
56
+ export function computeMaxSafeConcurrency(entry, halfLifeHours, maxToCheck = 32) {
57
+ const decayed = applyDecayToEntry(entry, halfLifeHours);
58
+ let maxSafe = 1;
59
+ for (let n = 1; n <= maxToCheck; n++) {
60
+ const bucket = decayed.buckets[String(n)];
61
+ if (!bucket)
62
+ break;
63
+ if (bucket.success_weight >= MIN_EVIDENCE_WEIGHT &&
64
+ bucket.success_weight > bucket.failure_weight) {
65
+ maxSafe = n;
66
+ }
67
+ else {
68
+ break;
69
+ }
70
+ }
71
+ return maxSafe;
72
+ }
73
+ function blankEntry() {
74
+ return { updated_at: new Date().toISOString(), buckets: {}, cooldown_until: null, last_429_at: null };
75
+ }
76
+ export async function recordWaveOutcome(providerModelKey, outcome, halfLifeHours) {
77
+ const state = await readQuotaState();
78
+ const entry = applyDecayToEntry(state.entries[providerModelKey] ?? blankEntry(), halfLifeHours);
79
+ if (outcome.outcome === "success") {
80
+ // Success at N proves 1..N are all safe
81
+ for (let n = 1; n <= outcome.concurrency; n++) {
82
+ const bucket = entry.buckets[String(n)] ?? { success_weight: 0, failure_weight: 0 };
83
+ bucket.success_weight += 1.0;
84
+ entry.buckets[String(n)] = bucket;
85
+ }
86
+ }
87
+ else {
88
+ entry.last_429_at = new Date().toISOString();
89
+ if (outcome.cooldown_until)
90
+ entry.cooldown_until = outcome.cooldown_until;
91
+ // Failure at N marks N and above as unsafe
92
+ for (let n = outcome.concurrency; n <= outcome.concurrency + 4; n++) {
93
+ const bucket = entry.buckets[String(n)] ?? { success_weight: 0, failure_weight: 0 };
94
+ bucket.failure_weight += 1.0;
95
+ entry.buckets[String(n)] = bucket;
96
+ }
97
+ }
98
+ entry.updated_at = new Date().toISOString();
99
+ state.entries[providerModelKey] = entry;
100
+ await writeQuotaState(state);
101
+ }
@@ -0,0 +1,50 @@
1
+ export type LimitSource = "explicit_config" | "cli_flags" | "known_metadata" | "learned" | "default";
2
+ export type LimitConfidence = "high" | "medium" | "low";
3
+ export interface ResolvedLimits {
4
+ context_tokens: number;
5
+ output_tokens: number;
6
+ requests_per_minute: number | null;
7
+ input_tokens_per_minute: number | null;
8
+ output_tokens_per_minute: number | null;
9
+ }
10
+ export interface ConcurrencyBucket {
11
+ success_weight: number;
12
+ failure_weight: number;
13
+ }
14
+ export interface QuotaStateEntry {
15
+ updated_at: string;
16
+ buckets: Record<string, ConcurrencyBucket>;
17
+ cooldown_until: string | null;
18
+ last_429_at: string | null;
19
+ }
20
+ export interface QuotaState {
21
+ version: 1;
22
+ entries: Record<string, QuotaStateEntry>;
23
+ }
24
+ export interface WaveSchedule {
25
+ wave_size: number;
26
+ estimated_wave_tokens: number;
27
+ cooldown_until: string | null;
28
+ confidence: LimitConfidence;
29
+ source: LimitSource;
30
+ resolved_limits: ResolvedLimits;
31
+ model: string | null;
32
+ }
33
+ export interface DispatchQuota {
34
+ contract_version: "audit-code-dispatch-quota/v1alpha1";
35
+ run_id: string;
36
+ model: string | null;
37
+ resolved_limits: ResolvedLimits;
38
+ confidence: LimitConfidence;
39
+ source: LimitSource;
40
+ wave_size: number;
41
+ estimated_wave_tokens: number;
42
+ cooldown_until: string | null;
43
+ }
44
+ export interface ObservedWaveOutcome {
45
+ concurrency: number;
46
+ estimated_tokens: number;
47
+ outcome: "success" | "rate_limited" | "timeout";
48
+ cooldown_until?: string | null;
49
+ reset_at?: string | null;
50
+ }
@@ -0,0 +1 @@
1
+ export {};
@@ -20,6 +20,33 @@ export interface VSCodeTaskConfig {
20
20
  command_template: string[];
21
21
  env?: Record<string, string>;
22
22
  }
23
+ export interface QuotaModelLimits {
24
+ context_tokens?: number;
25
+ output_tokens?: number;
26
+ requests_per_minute?: number;
27
+ input_tokens_per_minute?: number;
28
+ output_tokens_per_minute?: number;
29
+ }
30
+ export interface QuotaConfig {
31
+ /** Set to false to disable all quota scheduling (default: true). */
32
+ enabled?: boolean;
33
+ /** Whether to probe the provider for live limits (default: "auto"). */
34
+ probe?: "auto" | "never" | "force";
35
+ /** Fraction of known limits to actually use (default: 0.8). */
36
+ safety_margin?: number;
37
+ /** Concurrency ceiling for hosted providers with no learned data (default: 1). */
38
+ unknown_hosted_concurrency?: number;
39
+ /** Concurrency for local providers with no learned data (default: "unlimited"). */
40
+ unknown_local_concurrency?: number | "unlimited";
41
+ /** Assumed context window when the model is not recognized (default: 32000). */
42
+ default_context_tokens?: number;
43
+ /** Tokens reserved for model output per request (default: 4096). */
44
+ reserved_output_tokens?: number;
45
+ /** Half-life of empirical success/failure evidence in hours (default: 24). */
46
+ empirical_half_life_hours?: number;
47
+ /** Per-model overrides keyed by "provider/model". */
48
+ models?: Record<string, QuotaModelLimits>;
49
+ }
23
50
  export declare const PROVIDER_SECTION_KEYS: {
24
51
  readonly "subprocess-template": "subprocess_template";
25
52
  readonly "claude-code": "claude_code";
@@ -40,4 +67,5 @@ export interface SessionConfig {
40
67
  vscode_task?: VSCodeTaskConfig;
41
68
  agent_task_batch_size?: number;
42
69
  parallel_workers?: number;
70
+ quota?: QuotaConfig;
43
71
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "auditor-lambda",
3
- "version": "0.3.21",
3
+ "version": "0.3.22",
4
4
  "private": false,
5
5
  "description": "Portable hybrid code-auditing framework for arbitrary repositories.",
6
6
  "type": "module",
@@ -0,0 +1,77 @@
1
+ {
2
+ "$schema": "http://json-schema.org/draft-07/schema#",
3
+ "$id": "audit-code-dispatch-quota/v1alpha1",
4
+ "title": "DispatchQuota",
5
+ "description": "Quota schedule for a prepare-dispatch run. Written beside dispatch-plan.json. Hosts must launch at most wave_size packets per wave, then re-read this file before the next wave to pick up any updated limits.",
6
+ "type": "object",
7
+ "required": [
8
+ "contract_version",
9
+ "run_id",
10
+ "model",
11
+ "resolved_limits",
12
+ "confidence",
13
+ "source",
14
+ "wave_size",
15
+ "estimated_wave_tokens",
16
+ "cooldown_until"
17
+ ],
18
+ "additionalProperties": false,
19
+ "properties": {
20
+ "contract_version": {
21
+ "type": "string",
22
+ "const": "audit-code-dispatch-quota/v1alpha1"
23
+ },
24
+ "run_id": {
25
+ "type": "string",
26
+ "description": "The dispatch run this quota schedule applies to."
27
+ },
28
+ "model": {
29
+ "type": ["string", "null"],
30
+ "description": "The host model this schedule was computed for, or null if unknown."
31
+ },
32
+ "resolved_limits": {
33
+ "type": "object",
34
+ "description": "The rate and context limits used to compute the wave size.",
35
+ "required": [
36
+ "context_tokens",
37
+ "output_tokens",
38
+ "requests_per_minute",
39
+ "input_tokens_per_minute",
40
+ "output_tokens_per_minute"
41
+ ],
42
+ "additionalProperties": false,
43
+ "properties": {
44
+ "context_tokens": { "type": "integer", "minimum": 1 },
45
+ "output_tokens": { "type": "integer", "minimum": 1 },
46
+ "requests_per_minute": { "type": ["integer", "null"], "minimum": 1 },
47
+ "input_tokens_per_minute": { "type": ["integer", "null"], "minimum": 1 },
48
+ "output_tokens_per_minute": { "type": ["integer", "null"], "minimum": 1 }
49
+ }
50
+ },
51
+ "confidence": {
52
+ "type": "string",
53
+ "enum": ["high", "medium", "low"],
54
+ "description": "How confident the scheduler is in the resolved limits."
55
+ },
56
+ "source": {
57
+ "type": "string",
58
+ "enum": ["explicit_config", "cli_flags", "known_metadata", "learned", "default"],
59
+ "description": "Where the resolved limits came from."
60
+ },
61
+ "wave_size": {
62
+ "type": "integer",
63
+ "minimum": 1,
64
+ "description": "Maximum number of packets to dispatch in a single wave."
65
+ },
66
+ "estimated_wave_tokens": {
67
+ "type": "integer",
68
+ "minimum": 0,
69
+ "description": "Estimated total input tokens for one wave at the recommended wave_size."
70
+ },
71
+ "cooldown_until": {
72
+ "type": ["string", "null"],
73
+ "format": "date-time",
74
+ "description": "If non-null, the host should wait until this timestamp before launching the next wave."
75
+ }
76
+ }
77
+ }