auditor-lambda 0.3.33 → 0.3.34

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -28,11 +28,11 @@ import { buildAuditCodeHandoff, writeAuditCodeHandoffArtifacts, } from "./superv
28
28
  import { getSessionConfigPath, loadSessionConfig, readSessionConfigFile, } from "./supervisor/sessionConfig.js";
29
29
  import { clearDispatchFiles, buildRunId, ensureSupervisorDirs, getRunPaths, writeDispatchBatchFiles, writeWorkerTaskFiles, } from "./io/runArtifacts.js";
30
30
  import { renderWorkerPrompt } from "./prompts/renderWorkerPrompt.js";
31
- import { buildReviewPackets, orderTasksForPacketReview, } from "./orchestrator/reviewPackets.js";
31
+ import { buildReviewPackets, orderTasksForPacketReview, estimateTaskGroupTokens, } from "./orchestrator/reviewPackets.js";
32
32
  import { buildFileAnchorSummary, } from "./orchestrator/fileAnchors.js";
33
33
  import { LOCAL_SUBPROCESS_PROVIDER_NAME } from "./providers/constants.js";
34
34
  import { runAuditCodeMcpServer } from "./mcp/server.js";
35
- import { scheduleWave, buildProviderModelKey, readQuotaState, recordWaveOutcome, resolveLimits, resolveHostActiveSubagentLimit, probeProvider, computeMaxSafeConcurrency, getQuotaStatePath, } from "./quota/index.js";
35
+ import { scheduleWave, buildProviderModelKey, readQuotaState, recordWaveOutcome, resolveLimits, resolveHostActiveSubagentLimit, probeProvider, computeMaxSafeConcurrency, getQuotaStatePath, detectRateLimitError, computeCooldownUntil, runSlidingWindow, LearnedQuotaSource, CompositeQuotaSource, } from "./quota/index.js";
36
36
  const packageRoot = resolve(dirname(fileURLToPath(import.meta.url)), "..");
37
37
  const ADVANCE_AUDIT_CONTRACT_VERSION = "audit-code/v1alpha1";
38
38
  const WORKER_RESULT_CONTRACT_VERSION = "audit-code-worker-result/v1alpha1";
@@ -101,7 +101,7 @@ export function resolveHostDispatchCapability(options) {
101
101
  if (options.sessionConfig.host_can_dispatch_subagents !== undefined) {
102
102
  return options.sessionConfig.host_can_dispatch_subagents;
103
103
  }
104
- return optionalBooleanEnv((options.env ?? process.env).AUDIT_CODE_HOST_CAN_DISPATCH) ?? false;
104
+ return optionalBooleanEnv((options.env ?? process.env).AUDIT_CODE_HOST_CAN_DISPATCH) ?? true;
105
105
  }
106
106
  function toBase64Url(value) {
107
107
  return Buffer.from(value, "utf8").toString("base64url");
@@ -228,18 +228,6 @@ function getQuotaProbeMode(argv, sessionConfig) {
228
228
  return raw;
229
229
  return "auto";
230
230
  }
231
- function detectRateLimitError(errorText) {
232
- const lower = errorText.toLowerCase();
233
- return lower.includes("429") || lower.includes("rate limit") || lower.includes("rate_limit");
234
- }
235
- function defaultCooldownUntil(resetAtHeader) {
236
- if (resetAtHeader) {
237
- const t = new Date(resetAtHeader).getTime();
238
- if (!Number.isNaN(t))
239
- return new Date(t).toISOString();
240
- }
241
- return new Date(Date.now() + 60_000).toISOString();
242
- }
243
231
  function resolveRunProviderName(argv, sessionConfig) {
244
232
  return resolveFreshSessionProviderName(getExplicitProvider(argv), sessionConfig);
245
233
  }
@@ -1422,11 +1410,15 @@ async function cmdRunToCompletion(argv) {
1422
1410
  const quotaState = await readQuotaState();
1423
1411
  const providerModelKey = buildProviderModelKey(provider.name, hostModel);
1424
1412
  const quotaStateEntry = quotaState.entries[providerModelKey] ?? null;
1413
+ const allCandidateTasks = buildPendingAuditTasks(bundle);
1414
+ const candidateGroups = chunkArray(allCandidateTasks.slice(0, parallelWorkers * agentBatchSize), agentBatchSize);
1415
+ const slotTokenEstimates = candidateGroups.map((g) => estimateTaskGroupTokens(g));
1425
1416
  const waveSchedule = scheduleWave({
1426
1417
  providerName: resolveFreshSessionProviderName(getExplicitProvider(argv), sessionConfig),
1427
1418
  sessionConfig,
1428
1419
  hostModel,
1429
1420
  requestedConcurrency: parallelWorkers,
1421
+ estimatedSlotTokens: slotTokenEstimates,
1430
1422
  quotaStateEntry,
1431
1423
  });
1432
1424
  const waveSize = waveSchedule.wave_size;
@@ -1438,8 +1430,7 @@ async function cmdRunToCompletion(argv) {
1438
1430
  await new Promise((r) => setTimeout(r, cappedWait));
1439
1431
  }
1440
1432
  }
1441
- const allPendingTasks = buildPendingAuditTasks(bundle);
1442
- const taskGroups = chunkArray(allPendingTasks.slice(0, waveSize * agentBatchSize), agentBatchSize);
1433
+ const taskGroups = candidateGroups.slice(0, waveSize);
1443
1434
  const workerSlots = [];
1444
1435
  for (const rawGroup of taskGroups) {
1445
1436
  const group = await addFileLineCountHints(root, rawGroup);
@@ -1478,7 +1469,7 @@ async function cmdRunToCompletion(argv) {
1478
1469
  pending_audit_tasks_path: slot.pendingTasksPath,
1479
1470
  })), workerSlots.flatMap((slot) => slot.group));
1480
1471
  const parallelStartedAt = new Date().toISOString();
1481
- const launchResults = await Promise.allSettled(workerSlots.map((slot) => provider.launch({
1472
+ const { results: launchResults } = await runSlidingWindow(workerSlots.map((slot) => () => provider.launch({
1482
1473
  repoRoot: root,
1483
1474
  runId: slot.runId,
1484
1475
  obligationId,
@@ -1489,7 +1480,7 @@ async function cmdRunToCompletion(argv) {
1489
1480
  stderrPath: slot.paths.stderrPath,
1490
1481
  uiMode,
1491
1482
  timeoutMs,
1492
- })));
1483
+ })), waveSize);
1493
1484
  const launchErrorsByRunId = new Map();
1494
1485
  for (let index = 0; index < launchResults.length; index++) {
1495
1486
  const outcome = launchResults[index];
@@ -1601,12 +1592,14 @@ async function cmdRunToCompletion(argv) {
1601
1592
  }
1602
1593
  // Record outcome for adaptive learning (best-effort — never blocks dispatch)
1603
1594
  {
1604
- const hasRateLimit = batchErrors.some(detectRateLimitError);
1595
+ const rateLimitResults = batchErrors.map((e) => detectRateLimitError(e));
1596
+ const rateLimitHit = rateLimitResults.find((r) => r.isRateLimited);
1597
+ const retryAfterMs = rateLimitHit?.retryAfterMs ?? null;
1605
1598
  await recordWaveOutcome(providerModelKey, {
1606
1599
  concurrency: workerSlots.length,
1607
- estimated_tokens: waveSize * agentBatchSize * 900,
1608
- outcome: hasRateLimit ? "rate_limited" : batchErrors.length > 0 ? "timeout" : "success",
1609
- cooldown_until: hasRateLimit ? defaultCooldownUntil(null) : null,
1600
+ estimated_tokens: slotTokenEstimates.slice(0, workerSlots.length).reduce((a, b) => a + b, 0),
1601
+ outcome: rateLimitHit ? "rate_limited" : batchErrors.length > 0 ? "timeout" : "success",
1602
+ cooldown_until: rateLimitHit ? computeCooldownUntil(retryAfterMs) : null,
1610
1603
  }, sessionConfig.quota?.empirical_half_life_hours ?? 24).catch(() => undefined);
1611
1604
  }
1612
1605
  if (batchErrors.length > 0) {
@@ -2455,12 +2448,10 @@ async function prepareDispatchArtifacts(params) {
2455
2448
  });
2456
2449
  // Compute and write dispatch-quota.json
2457
2450
  const hostModel = params.hostModel ?? null;
2458
- const avgPacketTokens = plan.length > 0
2459
- ? Math.floor(plan.reduce((s, p) => s + p.complexity.estimated_tokens, 0) / plan.length)
2460
- : 0;
2451
+ const perPacketTokens = plan.map((p) => p.complexity.estimated_tokens);
2461
2452
  const quotaProviderName = resolveFreshSessionProviderName(undefined, sessionConfig);
2462
2453
  const quotaProviderKey = buildProviderModelKey(quotaProviderName, hostModel);
2463
- const quotaState = await readQuotaState().catch(() => ({ version: 1, entries: {} }));
2454
+ const quotaState = await readQuotaState().catch(() => ({ version: 2, entries: {} }));
2464
2455
  const quotaStateEntry = quotaState.entries[quotaProviderKey] ?? null;
2465
2456
  const hostConcurrencyLimit = resolveHostActiveSubagentLimit({
2466
2457
  explicitLimit: params.hostActiveSubagentLimit,
@@ -2471,12 +2462,12 @@ async function prepareDispatchArtifacts(params) {
2471
2462
  sessionConfig,
2472
2463
  hostModel,
2473
2464
  requestedConcurrency: sessionConfig.parallel_workers ?? plan.length,
2474
- estimatedPacketTokens: avgPacketTokens,
2465
+ estimatedSlotTokens: perPacketTokens,
2475
2466
  quotaStateEntry,
2476
2467
  hostConcurrencyLimit,
2477
2468
  });
2478
2469
  const dispatchQuota = {
2479
- contract_version: "audit-code-dispatch-quota/v1alpha1",
2470
+ contract_version: "audit-code-dispatch-quota/v1alpha2",
2480
2471
  run_id: runId,
2481
2472
  model: hostModel,
2482
2473
  resolved_limits: waveSchedule.resolved_limits,
@@ -2486,6 +2477,8 @@ async function prepareDispatchArtifacts(params) {
2486
2477
  wave_size: waveSchedule.wave_size,
2487
2478
  estimated_wave_tokens: waveSchedule.estimated_wave_tokens,
2488
2479
  cooldown_until: waveSchedule.cooldown_until,
2480
+ quota_source_snapshot: waveSchedule.quota_source_snapshot ?? null,
2481
+ backoff_state: null,
2489
2482
  };
2490
2483
  const dispatchQuotaPath = join(runDir, "dispatch-quota.json");
2491
2484
  await writeJsonFile(dispatchQuotaPath, dispatchQuota);
@@ -3212,13 +3205,15 @@ async function cmdQuota(argv) {
3212
3205
  const providerModelKey = buildProviderModelKey(providerName, hostModel);
3213
3206
  const { limits, source, confidence } = resolveLimits({ providerName, sessionConfig, hostModel });
3214
3207
  const probeResult = await probeProvider(providerName, probeMode);
3215
- const quotaState = await readQuotaState().catch(() => ({ version: 1, entries: {} }));
3208
+ const quotaState = await readQuotaState().catch(() => ({ version: 2, entries: {} }));
3216
3209
  const quotaStateEntry = quotaState.entries[providerModelKey] ?? null;
3217
3210
  const halfLifeHours = sessionConfig.quota?.empirical_half_life_hours ?? 24;
3218
3211
  const hostConcurrencyLimit = resolveHostActiveSubagentLimit({
3219
3212
  explicitLimit: getHostMaxActiveSubagents(argv),
3220
3213
  sessionConfig,
3221
3214
  });
3215
+ const quotaSource = new CompositeQuotaSource([new LearnedQuotaSource(halfLifeHours)]);
3216
+ const quotaSourceSnapshot = await quotaSource.queryCurrentUsage(providerModelKey).catch(() => null);
3222
3217
  const waveSchedule = scheduleWave({
3223
3218
  providerName,
3224
3219
  sessionConfig,
@@ -3226,6 +3221,7 @@ async function cmdQuota(argv) {
3226
3221
  requestedConcurrency: sessionConfig.parallel_workers ?? 1,
3227
3222
  quotaStateEntry,
3228
3223
  hostConcurrencyLimit,
3224
+ quotaSourceSnapshot,
3229
3225
  });
3230
3226
  console.log(JSON.stringify({
3231
3227
  provider: providerName,
@@ -3243,6 +3239,7 @@ async function cmdQuota(argv) {
3243
3239
  last_429_at: quotaStateEntry.last_429_at,
3244
3240
  }
3245
3241
  : null,
3242
+ quota_source_snapshot: quotaSourceSnapshot,
3246
3243
  wave_schedule: waveSchedule,
3247
3244
  quota_state_path: getQuotaStatePath(),
3248
3245
  }, null, 2));
@@ -1,6 +1,9 @@
1
1
  import type { AuditTask } from "../types.js";
2
2
  import type { AuditPlanMetrics, ReviewPacket } from "../types/reviewPlanning.js";
3
3
  import type { GraphBundle } from "../types/graph.js";
4
+ export declare const ESTIMATED_TOKENS_PER_LINE = 4;
5
+ export declare const ESTIMATED_PACKET_PROMPT_TOKENS = 900;
6
+ export declare function estimateTaskGroupTokens(tasks: AuditTask[]): number;
4
7
  export interface BuildReviewPacketOptions {
5
8
  graphBundle?: GraphBundle;
6
9
  lineIndex?: Record<string, number>;
@@ -2,8 +2,19 @@ import { createHash } from "node:crypto";
2
2
  import { LENS_ORDER } from "./unitBuilder.js";
3
3
  const DEFAULT_MAX_TASKS_PER_PACKET = 0;
4
4
  const DEFAULT_TARGET_PACKET_LINES = 8000;
5
- const ESTIMATED_TOKENS_PER_LINE = 4;
6
- const ESTIMATED_PACKET_PROMPT_TOKENS = 900;
5
+ export const ESTIMATED_TOKENS_PER_LINE = 4;
6
+ export const ESTIMATED_PACKET_PROMPT_TOKENS = 900;
7
+ export function estimateTaskGroupTokens(tasks) {
8
+ let totalLines = 0;
9
+ for (const task of tasks) {
10
+ if (task.file_line_counts) {
11
+ for (const count of Object.values(task.file_line_counts)) {
12
+ totalLines += count;
13
+ }
14
+ }
15
+ }
16
+ return ESTIMATED_PACKET_PROMPT_TOKENS + totalLines * ESTIMATED_TOKENS_PER_LINE;
17
+ }
7
18
  const PACKET_EXPANSION_MIN_CONFIDENCE = 0.65;
8
19
  const HIGH_FAN_DEGREE_THRESHOLD = 12;
9
20
  const HIGH_FAN_EXPANSION_CONFIDENCE = 0.99;
@@ -0,0 +1,7 @@
1
+ import type { QuotaSource, QuotaUsageSnapshot } from "./quotaSource.js";
2
+ export declare class CompositeQuotaSource implements QuotaSource {
3
+ readonly name = "composite";
4
+ private sources;
5
+ constructor(sources: QuotaSource[]);
6
+ queryCurrentUsage(providerModelKey: string): Promise<QuotaUsageSnapshot | null>;
7
+ }
@@ -0,0 +1,20 @@
1
+ export class CompositeQuotaSource {
2
+ name = "composite";
3
+ sources;
4
+ constructor(sources) {
5
+ this.sources = sources;
6
+ }
7
+ async queryCurrentUsage(providerModelKey) {
8
+ for (const source of this.sources) {
9
+ try {
10
+ const snapshot = await source.queryCurrentUsage(providerModelKey);
11
+ if (snapshot)
12
+ return snapshot;
13
+ }
14
+ catch {
15
+ // Skip failing sources, try next
16
+ }
17
+ }
18
+ return null;
19
+ }
20
+ }
@@ -0,0 +1,6 @@
1
+ import type { RateLimitDetectionResult } from "../errorParsing.js";
2
+ import type { ErrorParser } from "./genericErrorParser.js";
3
+ export declare class ClaudeCodeErrorParser implements ErrorParser {
4
+ readonly name = "claude-code";
5
+ parse(text: string): RateLimitDetectionResult;
6
+ }
@@ -0,0 +1,39 @@
1
+ export class ClaudeCodeErrorParser {
2
+ name = "claude-code";
3
+ parse(text) {
4
+ for (const line of text.split("\n")) {
5
+ const trimmed = line.trim();
6
+ if (!trimmed.startsWith("{"))
7
+ continue;
8
+ try {
9
+ const obj = JSON.parse(trimmed);
10
+ const level = obj["level"];
11
+ const type = obj["type"];
12
+ const message = obj["message"] ?? "";
13
+ const statusCode = obj["status_code"];
14
+ if (statusCode === 429 ||
15
+ type === "rate_limit_error" ||
16
+ (level === "error" && /\brate.?limit/i.test(message))) {
17
+ const retryAfter = obj["retry_after"];
18
+ const retryAfterMs = obj["retry_after_ms"];
19
+ let extractedMs = null;
20
+ if (retryAfterMs != null && retryAfterMs > 0) {
21
+ extractedMs = retryAfterMs;
22
+ }
23
+ else if (retryAfter != null && retryAfter > 0) {
24
+ extractedMs = retryAfter < 600 ? retryAfter * 1000 : retryAfter;
25
+ }
26
+ return {
27
+ isRateLimited: true,
28
+ retryAfterMs: extractedMs,
29
+ rawMatch: `claude-code-stderr:${statusCode ?? type ?? "rate_limit"}`,
30
+ };
31
+ }
32
+ }
33
+ catch {
34
+ // Not valid JSON, skip
35
+ }
36
+ }
37
+ return { isRateLimited: false, retryAfterMs: null, rawMatch: null };
38
+ }
39
+ }
@@ -0,0 +1,9 @@
1
+ import type { RateLimitDetectionResult } from "../errorParsing.js";
2
+ export interface ErrorParser {
3
+ readonly name: string;
4
+ parse(text: string): RateLimitDetectionResult;
5
+ }
6
+ export declare class GenericErrorParser implements ErrorParser {
7
+ readonly name = "generic";
8
+ parse(text: string): RateLimitDetectionResult;
9
+ }
@@ -0,0 +1,7 @@
1
+ import { detectRateLimitError } from "../errorParsing.js";
2
+ export class GenericErrorParser {
3
+ name = "generic";
4
+ parse(text) {
5
+ return detectRateLimitError(text);
6
+ }
7
+ }
@@ -0,0 +1,5 @@
1
+ export type { ErrorParser } from "./genericErrorParser.js";
2
+ export { GenericErrorParser } from "./genericErrorParser.js";
3
+ export { ClaudeCodeErrorParser } from "./claudeCodeErrorParser.js";
4
+ import type { ErrorParser } from "./genericErrorParser.js";
5
+ export declare function getErrorParserForProvider(providerName: string): ErrorParser;
@@ -0,0 +1,12 @@
1
+ export { GenericErrorParser } from "./genericErrorParser.js";
2
+ export { ClaudeCodeErrorParser } from "./claudeCodeErrorParser.js";
3
+ import { GenericErrorParser } from "./genericErrorParser.js";
4
+ import { ClaudeCodeErrorParser } from "./claudeCodeErrorParser.js";
5
+ const PROVIDER_PARSERS = {
6
+ "claude-code": () => new ClaudeCodeErrorParser(),
7
+ };
8
+ const genericParser = new GenericErrorParser();
9
+ export function getErrorParserForProvider(providerName) {
10
+ const factory = PROVIDER_PARSERS[providerName];
11
+ return factory ? factory() : genericParser;
12
+ }
@@ -0,0 +1,7 @@
1
+ export interface RateLimitDetectionResult {
2
+ isRateLimited: boolean;
3
+ retryAfterMs: number | null;
4
+ rawMatch: string | null;
5
+ }
6
+ export declare function detectRateLimitError(text: string): RateLimitDetectionResult;
7
+ export declare function computeCooldownUntil(retryAfterMs: number | null, defaultMs?: number): string;
@@ -0,0 +1,69 @@
1
+ const RATE_LIMIT_PATTERNS = [
2
+ /\b429\b/i,
3
+ /\btoo many requests\b/i,
4
+ /\brate.?limit/i,
5
+ /\boverloaded\b/i,
6
+ /\bresource.?exhausted\b/i,
7
+ /\bquota.?exceeded\b/i,
8
+ ];
9
+ function tryParseJson(text) {
10
+ const jsonStart = text.indexOf("{");
11
+ if (jsonStart === -1)
12
+ return null;
13
+ try {
14
+ return JSON.parse(text.slice(jsonStart));
15
+ }
16
+ catch {
17
+ return null;
18
+ }
19
+ }
20
+ function extractRetryAfterMs(obj) {
21
+ const headers = obj["headers"];
22
+ const retryAfter = headers?.["retry-after"] ??
23
+ headers?.["Retry-After"] ??
24
+ obj["retry_after"] ??
25
+ obj["retry_after_ms"];
26
+ if (retryAfter == null)
27
+ return null;
28
+ const val = typeof retryAfter === "string" ? Number(retryAfter) : retryAfter;
29
+ if (!Number.isFinite(val) || val <= 0)
30
+ return null;
31
+ // If the value looks like seconds (< 600), convert to ms
32
+ return val < 600 ? val * 1000 : val;
33
+ }
34
+ function detectFromJson(text) {
35
+ const obj = tryParseJson(text);
36
+ if (!obj)
37
+ return null;
38
+ const status = obj["status"];
39
+ const type = obj["type"];
40
+ const errorObj = obj["error"];
41
+ const errorType = errorObj?.["type"];
42
+ const isRateLimited = status === 429 ||
43
+ type === "rate_limit_error" ||
44
+ errorType === "rate_limit_error";
45
+ if (!isRateLimited)
46
+ return null;
47
+ return {
48
+ isRateLimited: true,
49
+ retryAfterMs: extractRetryAfterMs(obj),
50
+ rawMatch: `json:${status === 429 ? "status=429" : `type=${type ?? errorType}`}`,
51
+ };
52
+ }
53
+ export function detectRateLimitError(text) {
54
+ const jsonResult = detectFromJson(text);
55
+ if (jsonResult)
56
+ return jsonResult;
57
+ for (const pattern of RATE_LIMIT_PATTERNS) {
58
+ const match = pattern.exec(text);
59
+ if (match) {
60
+ return { isRateLimited: true, retryAfterMs: null, rawMatch: match[0] };
61
+ }
62
+ }
63
+ return { isRateLimited: false, retryAfterMs: null, rawMatch: null };
64
+ }
65
+ const DEFAULT_COOLDOWN_MS = 60_000;
66
+ export function computeCooldownUntil(retryAfterMs, defaultMs = DEFAULT_COOLDOWN_MS) {
67
+ const ms = retryAfterMs != null && retryAfterMs > 0 ? retryAfterMs : defaultMs;
68
+ return new Date(Date.now() + ms).toISOString();
69
+ }
@@ -0,0 +1,6 @@
1
+ export declare class FileLockTimeoutError extends Error {
2
+ constructor(lockPath: string);
3
+ }
4
+ export declare function acquireLock(lockPath: string, timeoutMs?: number): Promise<void>;
5
+ export declare function releaseLock(lockPath: string): Promise<void>;
6
+ export declare function withFileLock<T>(lockPath: string, fn: () => Promise<T>, timeoutMs?: number): Promise<T>;
@@ -0,0 +1,64 @@
1
+ import { open, unlink, stat } from "node:fs/promises";
2
+ const STALE_LOCK_MS = 30_000;
3
+ const RETRY_INTERVAL_MS = 50;
4
+ const DEFAULT_TIMEOUT_MS = 10_000;
5
+ export class FileLockTimeoutError extends Error {
6
+ constructor(lockPath) {
7
+ super(`Timed out acquiring lock: ${lockPath}`);
8
+ this.name = "FileLockTimeoutError";
9
+ }
10
+ }
11
+ async function isLockStale(lockPath) {
12
+ try {
13
+ const info = await stat(lockPath);
14
+ return Date.now() - info.mtimeMs > STALE_LOCK_MS;
15
+ }
16
+ catch {
17
+ return false;
18
+ }
19
+ }
20
+ export async function acquireLock(lockPath, timeoutMs = DEFAULT_TIMEOUT_MS) {
21
+ const deadline = Date.now() + timeoutMs;
22
+ while (true) {
23
+ try {
24
+ const fd = await open(lockPath, "wx");
25
+ await fd.close();
26
+ return;
27
+ }
28
+ catch (err) {
29
+ if (err.code !== "EEXIST")
30
+ throw err;
31
+ }
32
+ if (await isLockStale(lockPath)) {
33
+ try {
34
+ await unlink(lockPath);
35
+ continue;
36
+ }
37
+ catch {
38
+ // Another process may have already cleaned it up
39
+ }
40
+ }
41
+ if (Date.now() >= deadline) {
42
+ throw new FileLockTimeoutError(lockPath);
43
+ }
44
+ await new Promise((r) => setTimeout(r, RETRY_INTERVAL_MS));
45
+ }
46
+ }
47
+ export async function releaseLock(lockPath) {
48
+ try {
49
+ await unlink(lockPath);
50
+ }
51
+ catch (err) {
52
+ if (err.code !== "ENOENT")
53
+ throw err;
54
+ }
55
+ }
56
+ export async function withFileLock(lockPath, fn, timeoutMs) {
57
+ await acquireLock(lockPath, timeoutMs);
58
+ try {
59
+ return await fn();
60
+ }
61
+ finally {
62
+ await releaseLock(lockPath);
63
+ }
64
+ }
@@ -1,9 +1,19 @@
1
1
  export { resolveLimits, lookupKnownModel, classifyProvider } from "./limits.js";
2
2
  export type { LimitResolutionResult, ResolveLimitsOptions, ProviderType } from "./limits.js";
3
3
  export { detectHostActiveSubagentLimit, resolveHostActiveSubagentLimit, } from "./hostLimits.js";
4
- export { readQuotaState, writeQuotaState, computeMaxSafeConcurrency, recordWaveOutcome, getQuotaStatePath, decayWeight, applyDecayToEntry, } from "./state.js";
4
+ export { readQuotaState, writeQuotaState, computeMaxSafeConcurrency, recordWaveOutcome, getQuotaStatePath, decayWeight, applyDecayToEntry, computeBackoffCooldownMs, computeBackoffFailureWeight, computeRampUpConcurrency, } from "./state.js";
5
5
  export { scheduleWave, buildProviderModelKey } from "./scheduler.js";
6
6
  export type { ScheduleWaveOptions } from "./scheduler.js";
7
+ export { detectRateLimitError, computeCooldownUntil } from "./errorParsing.js";
8
+ export { acquireLock, releaseLock, withFileLock, FileLockTimeoutError } from "./fileLock.js";
9
+ export { runSlidingWindow } from "./slidingWindow.js";
10
+ export type { SlidingWindowResult } from "./slidingWindow.js";
11
+ export type { RateLimitDetectionResult } from "./errorParsing.js";
7
12
  export { probeProvider } from "./probe.js";
8
13
  export type { ProbeResult } from "./probe.js";
14
+ export type { QuotaSource, QuotaUsageSnapshot } from "./quotaSource.js";
15
+ export type { ErrorParser } from "./errorParsers/index.js";
16
+ export { GenericErrorParser, ClaudeCodeErrorParser, getErrorParserForProvider } from "./errorParsers/index.js";
17
+ export { LearnedQuotaSource } from "./learnedQuotaSource.js";
18
+ export { CompositeQuotaSource } from "./compositeQuotaSource.js";
9
19
  export type { ResolvedLimits, LimitSource, LimitConfidence, HostConcurrencyLimit, HostConcurrencyLimitSource, QuotaState, QuotaStateEntry, ConcurrencyBucket, WaveSchedule, DispatchQuota, ObservedWaveOutcome, } from "./types.js";
@@ -1,5 +1,11 @@
1
1
  export { resolveLimits, lookupKnownModel, classifyProvider } from "./limits.js";
2
2
  export { detectHostActiveSubagentLimit, resolveHostActiveSubagentLimit, } from "./hostLimits.js";
3
- export { readQuotaState, writeQuotaState, computeMaxSafeConcurrency, recordWaveOutcome, getQuotaStatePath, decayWeight, applyDecayToEntry, } from "./state.js";
3
+ export { readQuotaState, writeQuotaState, computeMaxSafeConcurrency, recordWaveOutcome, getQuotaStatePath, decayWeight, applyDecayToEntry, computeBackoffCooldownMs, computeBackoffFailureWeight, computeRampUpConcurrency, } from "./state.js";
4
4
  export { scheduleWave, buildProviderModelKey } from "./scheduler.js";
5
+ export { detectRateLimitError, computeCooldownUntil } from "./errorParsing.js";
6
+ export { acquireLock, releaseLock, withFileLock, FileLockTimeoutError } from "./fileLock.js";
7
+ export { runSlidingWindow } from "./slidingWindow.js";
5
8
  export { probeProvider } from "./probe.js";
9
+ export { GenericErrorParser, ClaudeCodeErrorParser, getErrorParserForProvider } from "./errorParsers/index.js";
10
+ export { LearnedQuotaSource } from "./learnedQuotaSource.js";
11
+ export { CompositeQuotaSource } from "./compositeQuotaSource.js";
@@ -0,0 +1,7 @@
1
+ import type { QuotaSource, QuotaUsageSnapshot } from "./quotaSource.js";
2
+ export declare class LearnedQuotaSource implements QuotaSource {
3
+ readonly name = "learned";
4
+ private halfLifeHours;
5
+ constructor(halfLifeHours?: number);
6
+ queryCurrentUsage(providerModelKey: string): Promise<QuotaUsageSnapshot | null>;
7
+ }
@@ -0,0 +1,25 @@
1
+ import { readQuotaState, computeMaxSafeConcurrency } from "./state.js";
2
+ export class LearnedQuotaSource {
3
+ name = "learned";
4
+ halfLifeHours;
5
+ constructor(halfLifeHours = 24) {
6
+ this.halfLifeHours = halfLifeHours;
7
+ }
8
+ async queryCurrentUsage(providerModelKey) {
9
+ const state = await readQuotaState();
10
+ const entry = state.entries[providerModelKey];
11
+ if (!entry)
12
+ return null;
13
+ const maxSafe = computeMaxSafeConcurrency(entry, this.halfLifeHours);
14
+ const isInCooldown = entry.cooldown_until != null &&
15
+ new Date(entry.cooldown_until).getTime() > Date.now();
16
+ return {
17
+ remaining_pct: isInCooldown ? 0 : null,
18
+ reset_at: isInCooldown ? entry.cooldown_until : null,
19
+ requests_remaining: maxSafe,
20
+ tokens_remaining: null,
21
+ captured_at: entry.updated_at,
22
+ source: "learned",
23
+ };
24
+ }
25
+ }
@@ -5,9 +5,6 @@ export interface ProbeResult {
5
5
  /**
6
6
  * Probe a provider to discover its rate limits.
7
7
  *
8
- * Only subprocess-template supports direct probing since it is the only
9
- * provider where the auditor controls the API call. IDE providers
10
- * (claude-code, opencode) select the model internally; their limits come
11
- * from known-model metadata or learned behavior.
8
+ * @deprecated Phase 3A replaces this with the QuotaSource abstraction.
12
9
  */
13
10
  export declare function probeProvider(providerName: string, probeMode?: "auto" | "never" | "force"): Promise<ProbeResult>;
@@ -1,10 +1,7 @@
1
1
  /**
2
2
  * Probe a provider to discover its rate limits.
3
3
  *
4
- * Only subprocess-template supports direct probing since it is the only
5
- * provider where the auditor controls the API call. IDE providers
6
- * (claude-code, opencode) select the model internally; their limits come
7
- * from known-model metadata or learned behavior.
4
+ * @deprecated Phase 3A replaces this with the QuotaSource abstraction.
8
5
  */
9
6
  export async function probeProvider(providerName, probeMode = "auto") {
10
7
  if (probeMode === "never") {
@@ -0,0 +1,12 @@
1
+ export interface QuotaUsageSnapshot {
2
+ remaining_pct: number | null;
3
+ reset_at: string | null;
4
+ requests_remaining: number | null;
5
+ tokens_remaining: number | null;
6
+ captured_at: string;
7
+ source: string;
8
+ }
9
+ export interface QuotaSource {
10
+ readonly name: string;
11
+ queryCurrentUsage(providerModelKey: string): Promise<QuotaUsageSnapshot | null>;
12
+ }
@@ -0,0 +1 @@
1
+ export {};
@@ -1,14 +1,18 @@
1
1
  import type { ResolvedProviderName, SessionConfig } from "../types/sessionConfig.js";
2
2
  import type { HostConcurrencyLimit, QuotaStateEntry, WaveSchedule } from "./types.js";
3
+ import type { QuotaUsageSnapshot } from "./quotaSource.js";
3
4
  export interface ScheduleWaveOptions {
4
5
  providerName: ResolvedProviderName;
5
6
  sessionConfig: SessionConfig;
6
7
  hostModel: string | null;
7
8
  requestedConcurrency: number;
8
- /** Average estimated tokens per packet/worker. Used for TPM budget. */
9
+ /** Per-slot estimated tokens (one entry per worker slot). Used for TPM budget. */
10
+ estimatedSlotTokens?: number[];
11
+ /** @deprecated Use estimatedSlotTokens instead. Average tokens per slot — used as fallback. */
9
12
  estimatedPacketTokens?: number;
10
13
  quotaStateEntry?: QuotaStateEntry | null;
11
14
  hostConcurrencyLimit?: HostConcurrencyLimit | null;
15
+ quotaSourceSnapshot?: QuotaUsageSnapshot | null;
12
16
  }
13
17
  export declare function scheduleWave(options: ScheduleWaveOptions): WaveSchedule;
14
18
  /** Build the state key used for indexing quota-state.json entries. */
@@ -1,7 +1,20 @@
1
1
  import { classifyProvider, resolveLimits } from "./limits.js";
2
- import { computeMaxSafeConcurrency } from "./state.js";
2
+ import { computeMaxSafeConcurrency, computeRampUpConcurrency } from "./state.js";
3
+ function sumTopN(sorted, n) {
4
+ let sum = 0;
5
+ for (let i = 0; i < Math.min(n, sorted.length); i++)
6
+ sum += sorted[i];
7
+ return sum;
8
+ }
3
9
  export function scheduleWave(options) {
4
- const { providerName, sessionConfig, hostModel, requestedConcurrency, estimatedPacketTokens = 0, quotaStateEntry = null, hostConcurrencyLimit = null, } = options;
10
+ const { providerName, sessionConfig, hostModel, requestedConcurrency, estimatedSlotTokens, estimatedPacketTokens = 0, quotaStateEntry = null, hostConcurrencyLimit = null, quotaSourceSnapshot = null, } = options;
11
+ // Descending sort so sumTopN picks the largest slots
12
+ const slotsSorted = estimatedSlotTokens
13
+ ? [...estimatedSlotTokens].sort((a, b) => b - a)
14
+ : null;
15
+ const avgTokens = slotsSorted && slotsSorted.length > 0
16
+ ? Math.floor(slotsSorted.reduce((a, b) => a + b, 0) / slotsSorted.length)
17
+ : estimatedPacketTokens;
5
18
  const quota = sessionConfig.quota ?? {};
6
19
  const applyHostConcurrencyLimit = (waveSize) => {
7
20
  if (hostConcurrencyLimit === null)
@@ -19,7 +32,7 @@ export function scheduleWave(options) {
19
32
  };
20
33
  return {
21
34
  wave_size: waveSize,
22
- estimated_wave_tokens: waveSize * estimatedPacketTokens,
35
+ estimated_wave_tokens: slotsSorted ? sumTopN(slotsSorted, waveSize) : waveSize * avgTokens,
23
36
  cooldown_until: null,
24
37
  confidence: "high",
25
38
  source: "default",
@@ -48,12 +61,25 @@ export function scheduleWave(options) {
48
61
  waveSize = Math.min(waveSize, rpmCap);
49
62
  }
50
63
  // Cap by input tokens-per-minute
51
- if (limits.input_tokens_per_minute != null && estimatedPacketTokens > 0) {
52
- const tpmCap = Math.max(1, Math.floor((limits.input_tokens_per_minute * safetyMargin) / estimatedPacketTokens));
53
- waveSize = Math.min(waveSize, tpmCap);
64
+ if (limits.input_tokens_per_minute != null && avgTokens > 0) {
65
+ const tpmBudget = limits.input_tokens_per_minute * safetyMargin;
66
+ if (slotsSorted && slotsSorted.length > 0) {
67
+ let candidateSize = waveSize;
68
+ while (candidateSize > 1 && sumTopN(slotsSorted, candidateSize) > tpmBudget) {
69
+ candidateSize--;
70
+ }
71
+ waveSize = Math.max(1, candidateSize);
72
+ }
73
+ else {
74
+ const tpmCap = Math.max(1, Math.floor(tpmBudget / avgTokens));
75
+ waveSize = Math.min(waveSize, tpmCap);
76
+ }
54
77
  }
55
78
  if (quotaStateEntry) {
56
- const learnedCap = computeMaxSafeConcurrency(quotaStateEntry, halfLifeHours);
79
+ const rampUp = quota.ramp_up_enabled !== false;
80
+ const learnedCap = rampUp
81
+ ? computeRampUpConcurrency(quotaStateEntry, halfLifeHours)
82
+ : computeMaxSafeConcurrency(quotaStateEntry, halfLifeHours);
57
83
  waveSize = Math.min(waveSize, learnedCap);
58
84
  }
59
85
  else {
@@ -61,22 +87,38 @@ export function scheduleWave(options) {
61
87
  const fallbackCap = providerType === "local"
62
88
  ? quota.unknown_local_concurrency
63
89
  : (quota.unknown_hosted_concurrency ?? 1);
64
- if (typeof fallbackCap === "number" && Number.isFinite(fallbackCap)) {
90
+ if (fallbackCap === "unlimited") {
91
+ // no cap — "unlimited" intentionally skips clamping
92
+ }
93
+ else if (typeof fallbackCap === "number" && Number.isFinite(fallbackCap)) {
65
94
  waveSize = Math.min(waveSize, Math.max(1, Math.floor(fallbackCap)));
66
95
  }
67
96
  }
68
97
  }
98
+ // Apply real-time quota source data if available
99
+ if (quotaSourceSnapshot && !cooldownUntil) {
100
+ if (quotaSourceSnapshot.remaining_pct != null && quotaSourceSnapshot.remaining_pct < 0.1) {
101
+ waveSize = 1;
102
+ if (quotaSourceSnapshot.reset_at) {
103
+ cooldownUntil = quotaSourceSnapshot.reset_at;
104
+ }
105
+ }
106
+ else if (quotaSourceSnapshot.remaining_pct != null && quotaSourceSnapshot.remaining_pct < 0.3) {
107
+ waveSize = Math.min(waveSize, Math.max(1, Math.floor(waveSize * 0.5)));
108
+ }
109
+ }
69
110
  waveSize = applyHostConcurrencyLimit(waveSize);
70
111
  waveSize = Math.max(1, waveSize);
71
112
  return {
72
113
  wave_size: waveSize,
73
- estimated_wave_tokens: waveSize * estimatedPacketTokens,
114
+ estimated_wave_tokens: slotsSorted ? sumTopN(slotsSorted, waveSize) : waveSize * avgTokens,
74
115
  cooldown_until: cooldownUntil,
75
116
  confidence,
76
117
  source,
77
118
  resolved_limits: limits,
78
119
  host_concurrency_limit: hostConcurrencyLimit,
79
120
  model: hostModel,
121
+ quota_source_snapshot: quotaSourceSnapshot,
80
122
  };
81
123
  }
82
124
  /** Build the state key used for indexing quota-state.json entries. */
@@ -0,0 +1,4 @@
1
+ export interface SlidingWindowResult<T> {
2
+ results: PromiseSettledResult<T>[];
3
+ }
4
+ export declare function runSlidingWindow<T>(tasks: Array<() => Promise<T>>, concurrency: number, onComplete?: (index: number, result: PromiseSettledResult<T>) => void): Promise<SlidingWindowResult<T>>;
@@ -0,0 +1,28 @@
1
+ export async function runSlidingWindow(tasks, concurrency, onComplete) {
2
+ const results = new Array(tasks.length);
3
+ let nextIndex = 0;
4
+ async function runOne(index) {
5
+ let result;
6
+ try {
7
+ const value = await tasks[index]();
8
+ result = { status: "fulfilled", value };
9
+ }
10
+ catch (reason) {
11
+ result = { status: "rejected", reason };
12
+ }
13
+ results[index] = result;
14
+ onComplete?.(index, result);
15
+ if (nextIndex < tasks.length) {
16
+ const next = nextIndex++;
17
+ await runOne(next);
18
+ }
19
+ }
20
+ const initialBatch = Math.min(concurrency, tasks.length);
21
+ const runners = [];
22
+ for (let i = 0; i < initialBatch; i++) {
23
+ const idx = nextIndex++;
24
+ runners.push(runOne(idx));
25
+ }
26
+ await Promise.all(runners);
27
+ return { results };
28
+ }
@@ -9,4 +9,7 @@ export declare function writeQuotaState(state: QuotaState): Promise<void>;
9
9
  * exceeds failure evidence, with a minimum of 1.
10
10
  */
11
11
  export declare function computeMaxSafeConcurrency(entry: QuotaStateEntry, halfLifeHours: number, maxToCheck?: number): number;
12
+ export declare function computeRampUpConcurrency(entry: QuotaStateEntry, halfLifeHours: number, maxToCheck?: number): number;
13
+ export declare function computeBackoffCooldownMs(consecutive429Count: number): number;
14
+ export declare function computeBackoffFailureWeight(consecutive429Count: number): number;
12
15
  export declare function recordWaveOutcome(providerModelKey: string, outcome: ObservedWaveOutcome, halfLifeHours: number): Promise<void>;
@@ -1,6 +1,7 @@
1
1
  import { mkdir, readFile, writeFile } from "node:fs/promises";
2
2
  import { homedir } from "node:os";
3
3
  import { join } from "node:path";
4
+ import { withFileLock } from "./fileLock.js";
4
5
  const STATE_DIR = join(homedir(), ".audit-code");
5
6
  const STATE_PATH = join(STATE_DIR, "quota-state.json");
6
7
  // A bucket needs at least this much success weight before we trust it.
@@ -27,31 +28,38 @@ export function applyDecayToEntry(entry, halfLifeHours) {
27
28
  return { ...entry, buckets: decayed };
28
29
  }
29
30
  function isQuotaState(value) {
30
- return (value !== null &&
31
- typeof value === "object" &&
32
- !Array.isArray(value) &&
33
- value["version"] === 1 &&
34
- typeof value["entries"] === "object");
31
+ if (value === null || typeof value !== "object" || Array.isArray(value))
32
+ return false;
33
+ const obj = value;
34
+ const version = obj["version"];
35
+ return (version === 1 || version === 2) && typeof obj["entries"] === "object";
35
36
  }
36
37
  export async function readQuotaState() {
37
38
  try {
38
39
  const raw = await readFile(STATE_PATH, "utf8");
39
40
  const parsed = JSON.parse(raw);
40
- if (isQuotaState(parsed))
41
+ if (isQuotaState(parsed)) {
42
+ if (parsed.version === 1) {
43
+ for (const entry of Object.values(parsed.entries)) {
44
+ entry.consecutive_429_count ??= 0;
45
+ }
46
+ }
41
47
  return parsed;
42
- process.stderr.write(`[quota] ignoring invalid quota state at ${STATE_PATH}: expected { version: 1, entries: object }\n`);
48
+ }
49
+ process.stderr.write(`[quota] ignoring invalid quota state at ${STATE_PATH}: expected { version: 1|2, entries: object }\n`);
43
50
  }
44
51
  catch (error) {
45
52
  if (error.code === "ENOENT") {
46
- return { version: 1, entries: {} };
53
+ return { version: 2, entries: {} };
47
54
  }
48
55
  process.stderr.write(`[quota] ignoring unreadable quota state at ${STATE_PATH}: ${error instanceof Error ? error.message : String(error)}\n`);
49
56
  }
50
- return { version: 1, entries: {} };
57
+ return { version: 2, entries: {} };
51
58
  }
52
59
  export async function writeQuotaState(state) {
53
60
  await mkdir(STATE_DIR, { recursive: true });
54
- await writeFile(STATE_PATH, JSON.stringify(state, null, 2) + "\n", "utf8");
61
+ const normalized = { ...state, version: 2 };
62
+ await writeFile(STATE_PATH, JSON.stringify(normalized, null, 2) + "\n", "utf8");
55
63
  }
56
64
  /**
57
65
  * Returns the highest concurrency level for which decayed success evidence
@@ -74,14 +82,39 @@ export function computeMaxSafeConcurrency(entry, halfLifeHours, maxToCheck = 32)
74
82
  }
75
83
  return maxSafe;
76
84
  }
85
+ const RAMP_UP_MIN_SUCCESSES = 2;
86
+ export function computeRampUpConcurrency(entry, halfLifeHours, maxToCheck = 32) {
87
+ const maxSafe = computeMaxSafeConcurrency(entry, halfLifeHours, maxToCheck);
88
+ const decayed = applyDecayToEntry(entry, halfLifeHours);
89
+ const bucket = decayed.buckets[String(maxSafe)];
90
+ if (bucket &&
91
+ bucket.success_weight >= RAMP_UP_MIN_SUCCESSES &&
92
+ bucket.failure_weight === 0) {
93
+ return maxSafe + 1;
94
+ }
95
+ return maxSafe;
96
+ }
77
97
  function blankEntry() {
78
98
  return { updated_at: new Date().toISOString(), buckets: {}, cooldown_until: null, last_429_at: null };
79
99
  }
100
+ const BASE_COOLDOWN_MS = 60_000;
101
+ const MAX_COOLDOWN_MS = 15 * 60_000;
102
+ export function computeBackoffCooldownMs(consecutive429Count) {
103
+ const ms = BASE_COOLDOWN_MS * Math.pow(2, Math.max(0, consecutive429Count - 1));
104
+ return Math.min(ms, MAX_COOLDOWN_MS);
105
+ }
106
+ export function computeBackoffFailureWeight(consecutive429Count) {
107
+ return 1.0 + 0.5 * Math.max(0, consecutive429Count - 1);
108
+ }
109
+ const LOCK_PATH = STATE_PATH + ".lock";
80
110
  export async function recordWaveOutcome(providerModelKey, outcome, halfLifeHours) {
111
+ await withFileLock(LOCK_PATH, () => recordWaveOutcomeUnsafe(providerModelKey, outcome, halfLifeHours));
112
+ }
113
+ async function recordWaveOutcomeUnsafe(providerModelKey, outcome, halfLifeHours) {
81
114
  const state = await readQuotaState();
82
115
  const entry = applyDecayToEntry(state.entries[providerModelKey] ?? blankEntry(), halfLifeHours);
83
116
  if (outcome.outcome === "success") {
84
- // Success at N proves 1..N are all safe
117
+ entry.consecutive_429_count = 0;
85
118
  for (let n = 1; n <= outcome.concurrency; n++) {
86
119
  const bucket = entry.buckets[String(n)] ?? { success_weight: 0, failure_weight: 0 };
87
120
  bucket.success_weight += 1.0;
@@ -89,13 +122,23 @@ export async function recordWaveOutcome(providerModelKey, outcome, halfLifeHours
89
122
  }
90
123
  }
91
124
  else {
125
+ const prev429Count = entry.consecutive_429_count ?? 0;
126
+ const new429Count = outcome.outcome === "rate_limited" ? prev429Count + 1 : prev429Count;
127
+ entry.consecutive_429_count = new429Count;
92
128
  entry.last_429_at = new Date().toISOString();
93
- if (outcome.cooldown_until)
129
+ if (outcome.outcome === "rate_limited" && new429Count > 0) {
130
+ const backoffMs = computeBackoffCooldownMs(new429Count);
131
+ entry.cooldown_until = new Date(Date.now() + backoffMs).toISOString();
132
+ }
133
+ else if (outcome.cooldown_until) {
94
134
  entry.cooldown_until = outcome.cooldown_until;
95
- // Failure at N marks N and above as unsafe
135
+ }
136
+ const failureWeight = outcome.outcome === "rate_limited"
137
+ ? computeBackoffFailureWeight(new429Count)
138
+ : 1.0;
96
139
  for (let n = outcome.concurrency; n <= outcome.concurrency + 4; n++) {
97
140
  const bucket = entry.buckets[String(n)] ?? { success_weight: 0, failure_weight: 0 };
98
- bucket.failure_weight += 1.0;
141
+ bucket.failure_weight += failureWeight;
99
142
  entry.buckets[String(n)] = bucket;
100
143
  }
101
144
  }
@@ -22,9 +22,10 @@ export interface QuotaStateEntry {
22
22
  buckets: Record<string, ConcurrencyBucket>;
23
23
  cooldown_until: string | null;
24
24
  last_429_at: string | null;
25
+ consecutive_429_count?: number;
25
26
  }
26
27
  export interface QuotaState {
27
- version: 1;
28
+ version: 1 | 2;
28
29
  entries: Record<string, QuotaStateEntry>;
29
30
  }
30
31
  export interface WaveSchedule {
@@ -36,9 +37,15 @@ export interface WaveSchedule {
36
37
  resolved_limits: ResolvedLimits;
37
38
  host_concurrency_limit: HostConcurrencyLimit | null;
38
39
  model: string | null;
40
+ quota_source_snapshot?: import("./quotaSource.js").QuotaUsageSnapshot | null;
41
+ }
42
+ export interface BackoffState {
43
+ consecutive_429_count: number;
44
+ current_cooldown_ms: number;
45
+ current_failure_weight: number;
39
46
  }
40
47
  export interface DispatchQuota {
41
- contract_version: "audit-code-dispatch-quota/v1alpha1";
48
+ contract_version: "audit-code-dispatch-quota/v1alpha1" | "audit-code-dispatch-quota/v1alpha2";
42
49
  run_id: string;
43
50
  model: string | null;
44
51
  resolved_limits: ResolvedLimits;
@@ -48,6 +55,8 @@ export interface DispatchQuota {
48
55
  wave_size: number;
49
56
  estimated_wave_tokens: number;
50
57
  cooldown_until: string | null;
58
+ quota_source_snapshot?: import("./quotaSource.js").QuotaUsageSnapshot | null;
59
+ backoff_state?: BackoffState | null;
51
60
  }
52
61
  export interface ObservedWaveOutcome {
53
62
  concurrency: number;
@@ -44,6 +44,8 @@ export interface QuotaConfig {
44
44
  reserved_output_tokens?: number;
45
45
  /** Half-life of empirical success/failure evidence in hours (default: 24). */
46
46
  empirical_half_life_hours?: number;
47
+ /** Allow the scheduler to try concurrency maxSafe+1 after consecutive successes (default: true). */
48
+ ramp_up_enabled?: boolean;
47
49
  /** Hard host ceiling for simultaneously active conversation subagents. */
48
50
  host_active_subagent_limit?: number;
49
51
  /** Per-model overrides keyed by "provider/model". */
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "auditor-lambda",
3
- "version": "0.3.33",
3
+ "version": "0.3.34",
4
4
  "private": false,
5
5
  "description": "Portable hybrid code-auditing framework for arbitrary repositories.",
6
6
  "type": "module",
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "$schema": "http://json-schema.org/draft-07/schema#",
3
- "$id": "audit-code-dispatch-quota/v1alpha1",
3
+ "$id": "audit-code-dispatch-quota/v1alpha2",
4
4
  "title": "DispatchQuota",
5
5
  "description": "Quota schedule for a prepare-dispatch run. Written beside dispatch-plan.json. Hosts must launch at most wave_size packets per wave, then re-read this file before the next wave to pick up any updated limits.",
6
6
  "type": "object",
@@ -20,7 +20,7 @@
20
20
  "properties": {
21
21
  "contract_version": {
22
22
  "type": "string",
23
- "const": "audit-code-dispatch-quota/v1alpha1"
23
+ "enum": ["audit-code-dispatch-quota/v1alpha1", "audit-code-dispatch-quota/v1alpha2"]
24
24
  },
25
25
  "run_id": {
26
26
  "type": "string",
@@ -97,6 +97,27 @@
97
97
  "type": ["string", "null"],
98
98
  "format": "date-time",
99
99
  "description": "If non-null, the host should wait until this timestamp before launching the next wave."
100
+ },
101
+ "quota_source_snapshot": {
102
+ "type": ["object", "null"],
103
+ "description": "Real-time usage snapshot from a QuotaSource, if available.",
104
+ "properties": {
105
+ "remaining_pct": { "type": ["number", "null"] },
106
+ "reset_at": { "type": ["string", "null"], "format": "date-time" },
107
+ "requests_remaining": { "type": ["integer", "null"] },
108
+ "tokens_remaining": { "type": ["integer", "null"] },
109
+ "captured_at": { "type": "string", "format": "date-time" },
110
+ "source": { "type": "string" }
111
+ }
112
+ },
113
+ "backoff_state": {
114
+ "type": ["object", "null"],
115
+ "description": "Exponential backoff state for repeated rate-limit errors.",
116
+ "properties": {
117
+ "consecutive_429_count": { "type": "integer", "minimum": 0 },
118
+ "current_cooldown_ms": { "type": "integer", "minimum": 0 },
119
+ "current_failure_weight": { "type": "number", "minimum": 0 }
120
+ }
100
121
  }
101
122
  }
102
123
  }