npm - auditor-lambda - Versions diffs - 0.3.33 → 0.3.34 - Mend

auditor-lambda 0.3.33 → 0.3.34

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

package/dist/cli.js +27 -30
package/dist/orchestrator/reviewPackets.d.ts +3 -0
package/dist/orchestrator/reviewPackets.js +13 -2
package/dist/quota/compositeQuotaSource.d.ts +7 -0
package/dist/quota/compositeQuotaSource.js +20 -0
package/dist/quota/errorParsers/claudeCodeErrorParser.d.ts +6 -0
package/dist/quota/errorParsers/claudeCodeErrorParser.js +39 -0
package/dist/quota/errorParsers/genericErrorParser.d.ts +9 -0
package/dist/quota/errorParsers/genericErrorParser.js +7 -0
package/dist/quota/errorParsers/index.d.ts +5 -0
package/dist/quota/errorParsers/index.js +12 -0
package/dist/quota/errorParsing.d.ts +7 -0
package/dist/quota/errorParsing.js +69 -0
package/dist/quota/fileLock.d.ts +6 -0
package/dist/quota/fileLock.js +64 -0
package/dist/quota/index.d.ts +11 -1
package/dist/quota/index.js +7 -1
package/dist/quota/learnedQuotaSource.d.ts +7 -0
package/dist/quota/learnedQuotaSource.js +25 -0
package/dist/quota/probe.d.ts +1 -4
package/dist/quota/probe.js +1 -4
package/dist/quota/quotaSource.d.ts +12 -0
package/dist/quota/quotaSource.js +1 -0
package/dist/quota/scheduler.d.ts +5 -1
package/dist/quota/scheduler.js +51 -9
package/dist/quota/slidingWindow.d.ts +4 -0
package/dist/quota/slidingWindow.js +28 -0
package/dist/quota/state.d.ts +3 -0
package/dist/quota/state.js +57 -14
package/dist/quota/types.d.ts +11 -2
package/dist/types/sessionConfig.d.ts +2 -0
package/package.json +1 -1
package/schemas/dispatch_quota.schema.json +23 -2

package/dist/cli.js CHANGED Viewed

@@ -28,11 +28,11 @@ import { buildAuditCodeHandoff, writeAuditCodeHandoffArtifacts, } from "./superv
 import { getSessionConfigPath, loadSessionConfig, readSessionConfigFile, } from "./supervisor/sessionConfig.js";
 import { clearDispatchFiles, buildRunId, ensureSupervisorDirs, getRunPaths, writeDispatchBatchFiles, writeWorkerTaskFiles, } from "./io/runArtifacts.js";
 import { renderWorkerPrompt } from "./prompts/renderWorkerPrompt.js";
-import { buildReviewPackets, orderTasksForPacketReview, } from "./orchestrator/reviewPackets.js";
+import { buildReviewPackets, orderTasksForPacketReview, estimateTaskGroupTokens, } from "./orchestrator/reviewPackets.js";
 import { buildFileAnchorSummary, } from "./orchestrator/fileAnchors.js";
 import { LOCAL_SUBPROCESS_PROVIDER_NAME } from "./providers/constants.js";
 import { runAuditCodeMcpServer } from "./mcp/server.js";
-import { scheduleWave, buildProviderModelKey, readQuotaState, recordWaveOutcome, resolveLimits, resolveHostActiveSubagentLimit, probeProvider, computeMaxSafeConcurrency, getQuotaStatePath, } from "./quota/index.js";
+import { scheduleWave, buildProviderModelKey, readQuotaState, recordWaveOutcome, resolveLimits, resolveHostActiveSubagentLimit, probeProvider, computeMaxSafeConcurrency, getQuotaStatePath, detectRateLimitError, computeCooldownUntil, runSlidingWindow, LearnedQuotaSource, CompositeQuotaSource, } from "./quota/index.js";
 const packageRoot = resolve(dirname(fileURLToPath(import.meta.url)), "..");
 const ADVANCE_AUDIT_CONTRACT_VERSION = "audit-code/v1alpha1";
 const WORKER_RESULT_CONTRACT_VERSION = "audit-code-worker-result/v1alpha1";
@@ -101,7 +101,7 @@ export function resolveHostDispatchCapability(options) {
     if (options.sessionConfig.host_can_dispatch_subagents !== undefined) {
         return options.sessionConfig.host_can_dispatch_subagents;
     }
-    return optionalBooleanEnv((options.env ?? process.env).AUDIT_CODE_HOST_CAN_DISPATCH) ?? false;
+    return optionalBooleanEnv((options.env ?? process.env).AUDIT_CODE_HOST_CAN_DISPATCH) ?? true;
 }
 function toBase64Url(value) {
     return Buffer.from(value, "utf8").toString("base64url");
@@ -228,18 +228,6 @@ function getQuotaProbeMode(argv, sessionConfig) {
         return raw;
     return "auto";
 }
-function detectRateLimitError(errorText) {
-    const lower = errorText.toLowerCase();
-    return lower.includes("429") || lower.includes("rate limit") || lower.includes("rate_limit");
-}
-function defaultCooldownUntil(resetAtHeader) {
-    if (resetAtHeader) {
-        const t = new Date(resetAtHeader).getTime();
-        if (!Number.isNaN(t))
-            return new Date(t).toISOString();
-    }
-    return new Date(Date.now() + 60_000).toISOString();
-}
 function resolveRunProviderName(argv, sessionConfig) {
     return resolveFreshSessionProviderName(getExplicitProvider(argv), sessionConfig);
 }
@@ -1422,11 +1410,15 @@ async function cmdRunToCompletion(argv) {
             const quotaState = await readQuotaState();
             const providerModelKey = buildProviderModelKey(provider.name, hostModel);
             const quotaStateEntry = quotaState.entries[providerModelKey] ?? null;
+            const allCandidateTasks = buildPendingAuditTasks(bundle);
+            const candidateGroups = chunkArray(allCandidateTasks.slice(0, parallelWorkers * agentBatchSize), agentBatchSize);
+            const slotTokenEstimates = candidateGroups.map((g) => estimateTaskGroupTokens(g));
             const waveSchedule = scheduleWave({
                 providerName: resolveFreshSessionProviderName(getExplicitProvider(argv), sessionConfig),
                 sessionConfig,
                 hostModel,
                 requestedConcurrency: parallelWorkers,
+                estimatedSlotTokens: slotTokenEstimates,
                 quotaStateEntry,
             });
             const waveSize = waveSchedule.wave_size;
@@ -1438,8 +1430,7 @@ async function cmdRunToCompletion(argv) {
                     await new Promise((r) => setTimeout(r, cappedWait));
                 }
             }
-            const allPendingTasks = buildPendingAuditTasks(bundle);
-            const taskGroups = chunkArray(allPendingTasks.slice(0, waveSize * agentBatchSize), agentBatchSize);
+            const taskGroups = candidateGroups.slice(0, waveSize);
             const workerSlots = [];
             for (const rawGroup of taskGroups) {
                 const group = await addFileLineCountHints(root, rawGroup);
@@ -1478,7 +1469,7 @@ async function cmdRunToCompletion(argv) {
                 pending_audit_tasks_path: slot.pendingTasksPath,
             })), workerSlots.flatMap((slot) => slot.group));
             const parallelStartedAt = new Date().toISOString();
-            const launchResults = await Promise.allSettled(workerSlots.map((slot) => provider.launch({
+            const { results: launchResults } = await runSlidingWindow(workerSlots.map((slot) => () => provider.launch({
                 repoRoot: root,
                 runId: slot.runId,
                 obligationId,
@@ -1489,7 +1480,7 @@ async function cmdRunToCompletion(argv) {
                 stderrPath: slot.paths.stderrPath,
                 uiMode,
                 timeoutMs,
-            })));
+            })), waveSize);
             const launchErrorsByRunId = new Map();
             for (let index = 0; index < launchResults.length; index++) {
                 const outcome = launchResults[index];
@@ -1601,12 +1592,14 @@ async function cmdRunToCompletion(argv) {
             }
             // Record outcome for adaptive learning (best-effort — never blocks dispatch)
             {
-                const hasRateLimit = batchErrors.some(detectRateLimitError);
+                const rateLimitResults = batchErrors.map((e) => detectRateLimitError(e));
+                const rateLimitHit = rateLimitResults.find((r) => r.isRateLimited);
+                const retryAfterMs = rateLimitHit?.retryAfterMs ?? null;
                 await recordWaveOutcome(providerModelKey, {
                     concurrency: workerSlots.length,
-                    estimated_tokens: waveSize * agentBatchSize * 900,
-                    outcome: hasRateLimit ? "rate_limited" : batchErrors.length > 0 ? "timeout" : "success",
-                    cooldown_until: hasRateLimit ? defaultCooldownUntil(null) : null,
+                    estimated_tokens: slotTokenEstimates.slice(0, workerSlots.length).reduce((a, b) => a + b, 0),
+                    outcome: rateLimitHit ? "rate_limited" : batchErrors.length > 0 ? "timeout" : "success",
+                    cooldown_until: rateLimitHit ? computeCooldownUntil(retryAfterMs) : null,
                 }, sessionConfig.quota?.empirical_half_life_hours ?? 24).catch(() => undefined);
             }
             if (batchErrors.length > 0) {
@@ -2455,12 +2448,10 @@ async function prepareDispatchArtifacts(params) {
     });
     // Compute and write dispatch-quota.json
     const hostModel = params.hostModel ?? null;
-    const avgPacketTokens = plan.length > 0
-        ? Math.floor(plan.reduce((s, p) => s + p.complexity.estimated_tokens, 0) / plan.length)
-        : 0;
+    const perPacketTokens = plan.map((p) => p.complexity.estimated_tokens);
     const quotaProviderName = resolveFreshSessionProviderName(undefined, sessionConfig);
     const quotaProviderKey = buildProviderModelKey(quotaProviderName, hostModel);
-    const quotaState = await readQuotaState().catch(() => ({ version: 1, entries: {} }));
+    const quotaState = await readQuotaState().catch(() => ({ version: 2, entries: {} }));
     const quotaStateEntry = quotaState.entries[quotaProviderKey] ?? null;
     const hostConcurrencyLimit = resolveHostActiveSubagentLimit({
         explicitLimit: params.hostActiveSubagentLimit,
@@ -2471,12 +2462,12 @@ async function prepareDispatchArtifacts(params) {
         sessionConfig,
         hostModel,
         requestedConcurrency: sessionConfig.parallel_workers ?? plan.length,
-        estimatedPacketTokens: avgPacketTokens,
+        estimatedSlotTokens: perPacketTokens,
         quotaStateEntry,
         hostConcurrencyLimit,
     });
     const dispatchQuota = {
-        contract_version: "audit-code-dispatch-quota/v1alpha1",
+        contract_version: "audit-code-dispatch-quota/v1alpha2",
         run_id: runId,
         model: hostModel,
         resolved_limits: waveSchedule.resolved_limits,
@@ -2486,6 +2477,8 @@ async function prepareDispatchArtifacts(params) {
         wave_size: waveSchedule.wave_size,
         estimated_wave_tokens: waveSchedule.estimated_wave_tokens,
         cooldown_until: waveSchedule.cooldown_until,
+        quota_source_snapshot: waveSchedule.quota_source_snapshot ?? null,
+        backoff_state: null,
     };
     const dispatchQuotaPath = join(runDir, "dispatch-quota.json");
     await writeJsonFile(dispatchQuotaPath, dispatchQuota);
@@ -3212,13 +3205,15 @@ async function cmdQuota(argv) {
     const providerModelKey = buildProviderModelKey(providerName, hostModel);
     const { limits, source, confidence } = resolveLimits({ providerName, sessionConfig, hostModel });
     const probeResult = await probeProvider(providerName, probeMode);
-    const quotaState = await readQuotaState().catch(() => ({ version: 1, entries: {} }));
+    const quotaState = await readQuotaState().catch(() => ({ version: 2, entries: {} }));
     const quotaStateEntry = quotaState.entries[providerModelKey] ?? null;
     const halfLifeHours = sessionConfig.quota?.empirical_half_life_hours ?? 24;
     const hostConcurrencyLimit = resolveHostActiveSubagentLimit({
         explicitLimit: getHostMaxActiveSubagents(argv),
         sessionConfig,
     });
+    const quotaSource = new CompositeQuotaSource([new LearnedQuotaSource(halfLifeHours)]);
+    const quotaSourceSnapshot = await quotaSource.queryCurrentUsage(providerModelKey).catch(() => null);
     const waveSchedule = scheduleWave({
         providerName,
         sessionConfig,
@@ -3226,6 +3221,7 @@ async function cmdQuota(argv) {
         requestedConcurrency: sessionConfig.parallel_workers ?? 1,
         quotaStateEntry,
         hostConcurrencyLimit,
+        quotaSourceSnapshot,
     });
     console.log(JSON.stringify({
         provider: providerName,
@@ -3243,6 +3239,7 @@ async function cmdQuota(argv) {
                 last_429_at: quotaStateEntry.last_429_at,
             }
             : null,
+        quota_source_snapshot: quotaSourceSnapshot,
         wave_schedule: waveSchedule,
         quota_state_path: getQuotaStatePath(),
     }, null, 2));

package/dist/orchestrator/reviewPackets.d.ts CHANGED Viewed

@@ -1,6 +1,9 @@
 import type { AuditTask } from "../types.js";
 import type { AuditPlanMetrics, ReviewPacket } from "../types/reviewPlanning.js";
 import type { GraphBundle } from "../types/graph.js";
+export declare const ESTIMATED_TOKENS_PER_LINE = 4;
+export declare const ESTIMATED_PACKET_PROMPT_TOKENS = 900;
+export declare function estimateTaskGroupTokens(tasks: AuditTask[]): number;
 export interface BuildReviewPacketOptions {
     graphBundle?: GraphBundle;
     lineIndex?: Record<string, number>;

package/dist/orchestrator/reviewPackets.js CHANGED Viewed

@@ -2,8 +2,19 @@ import { createHash } from "node:crypto";
 import { LENS_ORDER } from "./unitBuilder.js";
 const DEFAULT_MAX_TASKS_PER_PACKET = 0;
 const DEFAULT_TARGET_PACKET_LINES = 8000;
-const ESTIMATED_TOKENS_PER_LINE = 4;
-const ESTIMATED_PACKET_PROMPT_TOKENS = 900;
+export const ESTIMATED_TOKENS_PER_LINE = 4;
+export const ESTIMATED_PACKET_PROMPT_TOKENS = 900;
+export function estimateTaskGroupTokens(tasks) {
+    let totalLines = 0;
+    for (const task of tasks) {
+        if (task.file_line_counts) {
+            for (const count of Object.values(task.file_line_counts)) {
+                totalLines += count;
+            }
+        }
+    }
+    return ESTIMATED_PACKET_PROMPT_TOKENS + totalLines * ESTIMATED_TOKENS_PER_LINE;
+}
 const PACKET_EXPANSION_MIN_CONFIDENCE = 0.65;
 const HIGH_FAN_DEGREE_THRESHOLD = 12;
 const HIGH_FAN_EXPANSION_CONFIDENCE = 0.99;

package/dist/quota/compositeQuotaSource.d.ts ADDED Viewed

@@ -0,0 +1,7 @@
+import type { QuotaSource, QuotaUsageSnapshot } from "./quotaSource.js";
+export declare class CompositeQuotaSource implements QuotaSource {
+    readonly name = "composite";
+    private sources;
+    constructor(sources: QuotaSource[]);
+    queryCurrentUsage(providerModelKey: string): Promise<QuotaUsageSnapshot | null>;
+}

package/dist/quota/compositeQuotaSource.js ADDED Viewed

@@ -0,0 +1,20 @@
+export class CompositeQuotaSource {
+    name = "composite";
+    sources;
+    constructor(sources) {
+        this.sources = sources;
+    }
+    async queryCurrentUsage(providerModelKey) {
+        for (const source of this.sources) {
+            try {
+                const snapshot = await source.queryCurrentUsage(providerModelKey);
+                if (snapshot)
+                    return snapshot;
+            }
+            catch {
+                // Skip failing sources, try next
+            }
+        }
+        return null;
+    }
+}

package/dist/quota/errorParsers/claudeCodeErrorParser.d.ts ADDED Viewed

@@ -0,0 +1,6 @@
+import type { RateLimitDetectionResult } from "../errorParsing.js";
+import type { ErrorParser } from "./genericErrorParser.js";
+export declare class ClaudeCodeErrorParser implements ErrorParser {
+    readonly name = "claude-code";
+    parse(text: string): RateLimitDetectionResult;
+}

package/dist/quota/errorParsers/claudeCodeErrorParser.js ADDED Viewed

@@ -0,0 +1,39 @@
+export class ClaudeCodeErrorParser {
+    name = "claude-code";
+    parse(text) {
+        for (const line of text.split("\n")) {
+            const trimmed = line.trim();
+            if (!trimmed.startsWith("{"))
+                continue;
+            try {
+                const obj = JSON.parse(trimmed);
+                const level = obj["level"];
+                const type = obj["type"];
+                const message = obj["message"] ?? "";
+                const statusCode = obj["status_code"];
+                if (statusCode === 429 ||
+                    type === "rate_limit_error" ||
+                    (level === "error" && /\brate.?limit/i.test(message))) {
+                    const retryAfter = obj["retry_after"];
+                    const retryAfterMs = obj["retry_after_ms"];
+                    let extractedMs = null;
+                    if (retryAfterMs != null && retryAfterMs > 0) {
+                        extractedMs = retryAfterMs;
+                    }
+                    else if (retryAfter != null && retryAfter > 0) {
+                        extractedMs = retryAfter < 600 ? retryAfter * 1000 : retryAfter;
+                    }
+                    return {
+                        isRateLimited: true,
+                        retryAfterMs: extractedMs,
+                        rawMatch: `claude-code-stderr:${statusCode ?? type ?? "rate_limit"}`,
+                    };
+                }
+            }
+            catch {
+                // Not valid JSON, skip
+            }
+        }
+        return { isRateLimited: false, retryAfterMs: null, rawMatch: null };
+    }
+}

package/dist/quota/errorParsers/genericErrorParser.d.ts ADDED Viewed

@@ -0,0 +1,9 @@
+import type { RateLimitDetectionResult } from "../errorParsing.js";
+export interface ErrorParser {
+    readonly name: string;
+    parse(text: string): RateLimitDetectionResult;
+}
+export declare class GenericErrorParser implements ErrorParser {
+    readonly name = "generic";
+    parse(text: string): RateLimitDetectionResult;
+}

package/dist/quota/errorParsers/genericErrorParser.js ADDED Viewed

@@ -0,0 +1,7 @@
+import { detectRateLimitError } from "../errorParsing.js";
+export class GenericErrorParser {
+    name = "generic";
+    parse(text) {
+        return detectRateLimitError(text);
+    }
+}

package/dist/quota/errorParsers/index.d.ts ADDED Viewed

@@ -0,0 +1,5 @@
+export type { ErrorParser } from "./genericErrorParser.js";
+export { GenericErrorParser } from "./genericErrorParser.js";
+export { ClaudeCodeErrorParser } from "./claudeCodeErrorParser.js";
+import type { ErrorParser } from "./genericErrorParser.js";
+export declare function getErrorParserForProvider(providerName: string): ErrorParser;

package/dist/quota/errorParsers/index.js ADDED Viewed

@@ -0,0 +1,12 @@
+export { GenericErrorParser } from "./genericErrorParser.js";
+export { ClaudeCodeErrorParser } from "./claudeCodeErrorParser.js";
+import { GenericErrorParser } from "./genericErrorParser.js";
+import { ClaudeCodeErrorParser } from "./claudeCodeErrorParser.js";
+const PROVIDER_PARSERS = {
+    "claude-code": () => new ClaudeCodeErrorParser(),
+};
+const genericParser = new GenericErrorParser();
+export function getErrorParserForProvider(providerName) {
+    const factory = PROVIDER_PARSERS[providerName];
+    return factory ? factory() : genericParser;
+}

package/dist/quota/errorParsing.d.ts ADDED Viewed

@@ -0,0 +1,7 @@
+export interface RateLimitDetectionResult {
+    isRateLimited: boolean;
+    retryAfterMs: number | null;
+    rawMatch: string | null;
+}
+export declare function detectRateLimitError(text: string): RateLimitDetectionResult;
+export declare function computeCooldownUntil(retryAfterMs: number | null, defaultMs?: number): string;

package/dist/quota/errorParsing.js ADDED Viewed

@@ -0,0 +1,69 @@
+const RATE_LIMIT_PATTERNS = [
+    /\b429\b/i,
+    /\btoo many requests\b/i,
+    /\brate.?limit/i,
+    /\boverloaded\b/i,
+    /\bresource.?exhausted\b/i,
+    /\bquota.?exceeded\b/i,
+];
+function tryParseJson(text) {
+    const jsonStart = text.indexOf("{");
+    if (jsonStart === -1)
+        return null;
+    try {
+        return JSON.parse(text.slice(jsonStart));
+    }
+    catch {
+        return null;
+    }
+}
+function extractRetryAfterMs(obj) {
+    const headers = obj["headers"];
+    const retryAfter = headers?.["retry-after"] ??
+        headers?.["Retry-After"] ??
+        obj["retry_after"] ??
+        obj["retry_after_ms"];
+    if (retryAfter == null)
+        return null;
+    const val = typeof retryAfter === "string" ? Number(retryAfter) : retryAfter;
+    if (!Number.isFinite(val) || val <= 0)
+        return null;
+    // If the value looks like seconds (< 600), convert to ms
+    return val < 600 ? val * 1000 : val;
+}
+function detectFromJson(text) {
+    const obj = tryParseJson(text);
+    if (!obj)
+        return null;
+    const status = obj["status"];
+    const type = obj["type"];
+    const errorObj = obj["error"];
+    const errorType = errorObj?.["type"];
+    const isRateLimited = status === 429 ||
+        type === "rate_limit_error" ||
+        errorType === "rate_limit_error";
+    if (!isRateLimited)
+        return null;
+    return {
+        isRateLimited: true,
+        retryAfterMs: extractRetryAfterMs(obj),
+        rawMatch: `json:${status === 429 ? "status=429" : `type=${type ?? errorType}`}`,
+    };
+}
+export function detectRateLimitError(text) {
+    const jsonResult = detectFromJson(text);
+    if (jsonResult)
+        return jsonResult;
+    for (const pattern of RATE_LIMIT_PATTERNS) {
+        const match = pattern.exec(text);
+        if (match) {
+            return { isRateLimited: true, retryAfterMs: null, rawMatch: match[0] };
+        }
+    }
+    return { isRateLimited: false, retryAfterMs: null, rawMatch: null };
+}
+const DEFAULT_COOLDOWN_MS = 60_000;
+export function computeCooldownUntil(retryAfterMs, defaultMs = DEFAULT_COOLDOWN_MS) {
+    const ms = retryAfterMs != null && retryAfterMs > 0 ? retryAfterMs : defaultMs;
+    return new Date(Date.now() + ms).toISOString();
+}

package/dist/quota/fileLock.d.ts ADDED Viewed

@@ -0,0 +1,6 @@
+export declare class FileLockTimeoutError extends Error {
+    constructor(lockPath: string);
+}
+export declare function acquireLock(lockPath: string, timeoutMs?: number): Promise<void>;
+export declare function releaseLock(lockPath: string): Promise<void>;
+export declare function withFileLock<T>(lockPath: string, fn: () => Promise<T>, timeoutMs?: number): Promise<T>;

package/dist/quota/fileLock.js ADDED Viewed

@@ -0,0 +1,64 @@
+import { open, unlink, stat } from "node:fs/promises";
+const STALE_LOCK_MS = 30_000;
+const RETRY_INTERVAL_MS = 50;
+const DEFAULT_TIMEOUT_MS = 10_000;
+export class FileLockTimeoutError extends Error {
+    constructor(lockPath) {
+        super(`Timed out acquiring lock: ${lockPath}`);
+        this.name = "FileLockTimeoutError";
+    }
+}
+async function isLockStale(lockPath) {
+    try {
+        const info = await stat(lockPath);
+        return Date.now() - info.mtimeMs > STALE_LOCK_MS;
+    }
+    catch {
+        return false;
+    }
+}
+export async function acquireLock(lockPath, timeoutMs = DEFAULT_TIMEOUT_MS) {
+    const deadline = Date.now() + timeoutMs;
+    while (true) {
+        try {
+            const fd = await open(lockPath, "wx");
+            await fd.close();
+            return;
+        }
+        catch (err) {
+            if (err.code !== "EEXIST")
+                throw err;
+        }
+        if (await isLockStale(lockPath)) {
+            try {
+                await unlink(lockPath);
+                continue;
+            }
+            catch {
+                // Another process may have already cleaned it up
+            }
+        }
+        if (Date.now() >= deadline) {
+            throw new FileLockTimeoutError(lockPath);
+        }
+        await new Promise((r) => setTimeout(r, RETRY_INTERVAL_MS));
+    }
+}
+export async function releaseLock(lockPath) {
+    try {
+        await unlink(lockPath);
+    }
+    catch (err) {
+        if (err.code !== "ENOENT")
+            throw err;
+    }
+}
+export async function withFileLock(lockPath, fn, timeoutMs) {
+    await acquireLock(lockPath, timeoutMs);
+    try {
+        return await fn();
+    }
+    finally {
+        await releaseLock(lockPath);
+    }
+}

package/dist/quota/index.d.ts CHANGED Viewed

@@ -1,9 +1,19 @@
 export { resolveLimits, lookupKnownModel, classifyProvider } from "./limits.js";
 export type { LimitResolutionResult, ResolveLimitsOptions, ProviderType } from "./limits.js";
 export { detectHostActiveSubagentLimit, resolveHostActiveSubagentLimit, } from "./hostLimits.js";
-export { readQuotaState, writeQuotaState, computeMaxSafeConcurrency, recordWaveOutcome, getQuotaStatePath, decayWeight, applyDecayToEntry, } from "./state.js";
+export { readQuotaState, writeQuotaState, computeMaxSafeConcurrency, recordWaveOutcome, getQuotaStatePath, decayWeight, applyDecayToEntry, computeBackoffCooldownMs, computeBackoffFailureWeight, computeRampUpConcurrency, } from "./state.js";
 export { scheduleWave, buildProviderModelKey } from "./scheduler.js";
 export type { ScheduleWaveOptions } from "./scheduler.js";
+export { detectRateLimitError, computeCooldownUntil } from "./errorParsing.js";
+export { acquireLock, releaseLock, withFileLock, FileLockTimeoutError } from "./fileLock.js";
+export { runSlidingWindow } from "./slidingWindow.js";
+export type { SlidingWindowResult } from "./slidingWindow.js";
+export type { RateLimitDetectionResult } from "./errorParsing.js";
 export { probeProvider } from "./probe.js";
 export type { ProbeResult } from "./probe.js";
+export type { QuotaSource, QuotaUsageSnapshot } from "./quotaSource.js";
+export type { ErrorParser } from "./errorParsers/index.js";
+export { GenericErrorParser, ClaudeCodeErrorParser, getErrorParserForProvider } from "./errorParsers/index.js";
+export { LearnedQuotaSource } from "./learnedQuotaSource.js";
+export { CompositeQuotaSource } from "./compositeQuotaSource.js";
 export type { ResolvedLimits, LimitSource, LimitConfidence, HostConcurrencyLimit, HostConcurrencyLimitSource, QuotaState, QuotaStateEntry, ConcurrencyBucket, WaveSchedule, DispatchQuota, ObservedWaveOutcome, } from "./types.js";

package/dist/quota/index.js CHANGED Viewed

@@ -1,5 +1,11 @@
 export { resolveLimits, lookupKnownModel, classifyProvider } from "./limits.js";
 export { detectHostActiveSubagentLimit, resolveHostActiveSubagentLimit, } from "./hostLimits.js";
-export { readQuotaState, writeQuotaState, computeMaxSafeConcurrency, recordWaveOutcome, getQuotaStatePath, decayWeight, applyDecayToEntry, } from "./state.js";
+export { readQuotaState, writeQuotaState, computeMaxSafeConcurrency, recordWaveOutcome, getQuotaStatePath, decayWeight, applyDecayToEntry, computeBackoffCooldownMs, computeBackoffFailureWeight, computeRampUpConcurrency, } from "./state.js";
 export { scheduleWave, buildProviderModelKey } from "./scheduler.js";
+export { detectRateLimitError, computeCooldownUntil } from "./errorParsing.js";
+export { acquireLock, releaseLock, withFileLock, FileLockTimeoutError } from "./fileLock.js";
+export { runSlidingWindow } from "./slidingWindow.js";
 export { probeProvider } from "./probe.js";
+export { GenericErrorParser, ClaudeCodeErrorParser, getErrorParserForProvider } from "./errorParsers/index.js";
+export { LearnedQuotaSource } from "./learnedQuotaSource.js";
+export { CompositeQuotaSource } from "./compositeQuotaSource.js";

package/dist/quota/learnedQuotaSource.d.ts ADDED Viewed

@@ -0,0 +1,7 @@
+import type { QuotaSource, QuotaUsageSnapshot } from "./quotaSource.js";
+export declare class LearnedQuotaSource implements QuotaSource {
+    readonly name = "learned";
+    private halfLifeHours;
+    constructor(halfLifeHours?: number);
+    queryCurrentUsage(providerModelKey: string): Promise<QuotaUsageSnapshot | null>;
+}

package/dist/quota/learnedQuotaSource.js ADDED Viewed

@@ -0,0 +1,25 @@
+import { readQuotaState, computeMaxSafeConcurrency } from "./state.js";
+export class LearnedQuotaSource {
+    name = "learned";
+    halfLifeHours;
+    constructor(halfLifeHours = 24) {
+        this.halfLifeHours = halfLifeHours;
+    }
+    async queryCurrentUsage(providerModelKey) {
+        const state = await readQuotaState();
+        const entry = state.entries[providerModelKey];
+        if (!entry)
+            return null;
+        const maxSafe = computeMaxSafeConcurrency(entry, this.halfLifeHours);
+        const isInCooldown = entry.cooldown_until != null &&
+            new Date(entry.cooldown_until).getTime() > Date.now();
+        return {
+            remaining_pct: isInCooldown ? 0 : null,
+            reset_at: isInCooldown ? entry.cooldown_until : null,
+            requests_remaining: maxSafe,
+            tokens_remaining: null,
+            captured_at: entry.updated_at,
+            source: "learned",
+        };
+    }
+}

package/dist/quota/probe.d.ts CHANGED Viewed

@@ -5,9 +5,6 @@ export interface ProbeResult {
 /**
  * Probe a provider to discover its rate limits.
  *
- * Only subprocess-template supports direct probing since it is the only
- * provider where the auditor controls the API call. IDE providers
- * (claude-code, opencode) select the model internally; their limits come
- * from known-model metadata or learned behavior.
+ * @deprecated Phase 3A replaces this with the QuotaSource abstraction.
  */
 export declare function probeProvider(providerName: string, probeMode?: "auto" | "never" | "force"): Promise<ProbeResult>;

package/dist/quota/probe.js CHANGED Viewed

@@ -1,10 +1,7 @@
 /**
  * Probe a provider to discover its rate limits.
  *
- * Only subprocess-template supports direct probing since it is the only
- * provider where the auditor controls the API call. IDE providers
- * (claude-code, opencode) select the model internally; their limits come
- * from known-model metadata or learned behavior.
+ * @deprecated Phase 3A replaces this with the QuotaSource abstraction.
  */
 export async function probeProvider(providerName, probeMode = "auto") {
     if (probeMode === "never") {

package/dist/quota/quotaSource.d.ts ADDED Viewed

@@ -0,0 +1,12 @@
+export interface QuotaUsageSnapshot {
+    remaining_pct: number | null;
+    reset_at: string | null;
+    requests_remaining: number | null;
+    tokens_remaining: number | null;
+    captured_at: string;
+    source: string;
+}
+export interface QuotaSource {
+    readonly name: string;
+    queryCurrentUsage(providerModelKey: string): Promise<QuotaUsageSnapshot | null>;
+}

package/dist/quota/quotaSource.js ADDED Viewed

	@@ -0,0 +1 @@
1	+ export {};

package/dist/quota/scheduler.d.ts CHANGED Viewed

@@ -1,14 +1,18 @@
 import type { ResolvedProviderName, SessionConfig } from "../types/sessionConfig.js";
 import type { HostConcurrencyLimit, QuotaStateEntry, WaveSchedule } from "./types.js";
+import type { QuotaUsageSnapshot } from "./quotaSource.js";
 export interface ScheduleWaveOptions {
     providerName: ResolvedProviderName;
     sessionConfig: SessionConfig;
     hostModel: string | null;
     requestedConcurrency: number;
-    /** Average estimated tokens per packet/worker. Used for TPM budget. */
+    /** Per-slot estimated tokens (one entry per worker slot). Used for TPM budget. */
+    estimatedSlotTokens?: number[];
+    /** @deprecated Use estimatedSlotTokens instead. Average tokens per slot — used as fallback. */
     estimatedPacketTokens?: number;
     quotaStateEntry?: QuotaStateEntry | null;
     hostConcurrencyLimit?: HostConcurrencyLimit | null;
+    quotaSourceSnapshot?: QuotaUsageSnapshot | null;
 }
 export declare function scheduleWave(options: ScheduleWaveOptions): WaveSchedule;
 /** Build the state key used for indexing quota-state.json entries. */

package/dist/quota/scheduler.js CHANGED Viewed

@@ -1,7 +1,20 @@
 import { classifyProvider, resolveLimits } from "./limits.js";
-import { computeMaxSafeConcurrency } from "./state.js";
+import { computeMaxSafeConcurrency, computeRampUpConcurrency } from "./state.js";
+function sumTopN(sorted, n) {
+    let sum = 0;
+    for (let i = 0; i < Math.min(n, sorted.length); i++)
+        sum += sorted[i];
+    return sum;
+}
 export function scheduleWave(options) {
-    const { providerName, sessionConfig, hostModel, requestedConcurrency, estimatedPacketTokens = 0, quotaStateEntry = null, hostConcurrencyLimit = null, } = options;
+    const { providerName, sessionConfig, hostModel, requestedConcurrency, estimatedSlotTokens, estimatedPacketTokens = 0, quotaStateEntry = null, hostConcurrencyLimit = null, quotaSourceSnapshot = null, } = options;
+    // Descending sort so sumTopN picks the largest slots
+    const slotsSorted = estimatedSlotTokens
+        ? [...estimatedSlotTokens].sort((a, b) => b - a)
+        : null;
+    const avgTokens = slotsSorted && slotsSorted.length > 0
+        ? Math.floor(slotsSorted.reduce((a, b) => a + b, 0) / slotsSorted.length)
+        : estimatedPacketTokens;
     const quota = sessionConfig.quota ?? {};
     const applyHostConcurrencyLimit = (waveSize) => {
         if (hostConcurrencyLimit === null)
@@ -19,7 +32,7 @@ export function scheduleWave(options) {
         };
         return {
             wave_size: waveSize,
-            estimated_wave_tokens: waveSize * estimatedPacketTokens,
+            estimated_wave_tokens: slotsSorted ? sumTopN(slotsSorted, waveSize) : waveSize * avgTokens,
             cooldown_until: null,
             confidence: "high",
             source: "default",
@@ -48,12 +61,25 @@ export function scheduleWave(options) {
             waveSize = Math.min(waveSize, rpmCap);
         }
         // Cap by input tokens-per-minute
-        if (limits.input_tokens_per_minute != null && estimatedPacketTokens > 0) {
-            const tpmCap = Math.max(1, Math.floor((limits.input_tokens_per_minute * safetyMargin) / estimatedPacketTokens));
-            waveSize = Math.min(waveSize, tpmCap);
+        if (limits.input_tokens_per_minute != null && avgTokens > 0) {
+            const tpmBudget = limits.input_tokens_per_minute * safetyMargin;
+            if (slotsSorted && slotsSorted.length > 0) {
+                let candidateSize = waveSize;
+                while (candidateSize > 1 && sumTopN(slotsSorted, candidateSize) > tpmBudget) {
+                    candidateSize--;
+                }
+                waveSize = Math.max(1, candidateSize);
+            }
+            else {
+                const tpmCap = Math.max(1, Math.floor(tpmBudget / avgTokens));
+                waveSize = Math.min(waveSize, tpmCap);
+            }
         }
         if (quotaStateEntry) {
-            const learnedCap = computeMaxSafeConcurrency(quotaStateEntry, halfLifeHours);
+            const rampUp = quota.ramp_up_enabled !== false;
+            const learnedCap = rampUp
+                ? computeRampUpConcurrency(quotaStateEntry, halfLifeHours)
+                : computeMaxSafeConcurrency(quotaStateEntry, halfLifeHours);
             waveSize = Math.min(waveSize, learnedCap);
         }
         else {
@@ -61,22 +87,38 @@ export function scheduleWave(options) {
             const fallbackCap = providerType === "local"
                 ? quota.unknown_local_concurrency
                 : (quota.unknown_hosted_concurrency ?? 1);
-            if (typeof fallbackCap === "number" && Number.isFinite(fallbackCap)) {
+            if (fallbackCap === "unlimited") {
+                // no cap — "unlimited" intentionally skips clamping
+            }
+            else if (typeof fallbackCap === "number" && Number.isFinite(fallbackCap)) {
                 waveSize = Math.min(waveSize, Math.max(1, Math.floor(fallbackCap)));
             }
         }
     }
+    // Apply real-time quota source data if available
+    if (quotaSourceSnapshot && !cooldownUntil) {
+        if (quotaSourceSnapshot.remaining_pct != null && quotaSourceSnapshot.remaining_pct < 0.1) {
+            waveSize = 1;
+            if (quotaSourceSnapshot.reset_at) {
+                cooldownUntil = quotaSourceSnapshot.reset_at;
+            }
+        }
+        else if (quotaSourceSnapshot.remaining_pct != null && quotaSourceSnapshot.remaining_pct < 0.3) {
+            waveSize = Math.min(waveSize, Math.max(1, Math.floor(waveSize * 0.5)));
+        }
+    }
     waveSize = applyHostConcurrencyLimit(waveSize);
     waveSize = Math.max(1, waveSize);
     return {
         wave_size: waveSize,
-        estimated_wave_tokens: waveSize * estimatedPacketTokens,
+        estimated_wave_tokens: slotsSorted ? sumTopN(slotsSorted, waveSize) : waveSize * avgTokens,
         cooldown_until: cooldownUntil,
         confidence,
         source,
         resolved_limits: limits,
         host_concurrency_limit: hostConcurrencyLimit,
         model: hostModel,
+        quota_source_snapshot: quotaSourceSnapshot,
     };
 }
 /** Build the state key used for indexing quota-state.json entries. */

package/dist/quota/slidingWindow.d.ts ADDED Viewed

@@ -0,0 +1,4 @@
+export interface SlidingWindowResult<T> {
+    results: PromiseSettledResult<T>[];
+}
+export declare function runSlidingWindow<T>(tasks: Array<() => Promise<T>>, concurrency: number, onComplete?: (index: number, result: PromiseSettledResult<T>) => void): Promise<SlidingWindowResult<T>>;

package/dist/quota/slidingWindow.js ADDED Viewed

@@ -0,0 +1,28 @@
+export async function runSlidingWindow(tasks, concurrency, onComplete) {
+    const results = new Array(tasks.length);
+    let nextIndex = 0;
+    async function runOne(index) {
+        let result;
+        try {
+            const value = await tasks[index]();
+            result = { status: "fulfilled", value };
+        }
+        catch (reason) {
+            result = { status: "rejected", reason };
+        }
+        results[index] = result;
+        onComplete?.(index, result);
+        if (nextIndex < tasks.length) {
+            const next = nextIndex++;
+            await runOne(next);
+        }
+    }
+    const initialBatch = Math.min(concurrency, tasks.length);
+    const runners = [];
+    for (let i = 0; i < initialBatch; i++) {
+        const idx = nextIndex++;
+        runners.push(runOne(idx));
+    }
+    await Promise.all(runners);
+    return { results };
+}

package/dist/quota/state.d.ts CHANGED Viewed

@@ -9,4 +9,7 @@ export declare function writeQuotaState(state: QuotaState): Promise<void>;
  * exceeds failure evidence, with a minimum of 1.
  */
 export declare function computeMaxSafeConcurrency(entry: QuotaStateEntry, halfLifeHours: number, maxToCheck?: number): number;
+export declare function computeRampUpConcurrency(entry: QuotaStateEntry, halfLifeHours: number, maxToCheck?: number): number;
+export declare function computeBackoffCooldownMs(consecutive429Count: number): number;
+export declare function computeBackoffFailureWeight(consecutive429Count: number): number;
 export declare function recordWaveOutcome(providerModelKey: string, outcome: ObservedWaveOutcome, halfLifeHours: number): Promise<void>;

package/dist/quota/state.js CHANGED Viewed

@@ -1,6 +1,7 @@
 import { mkdir, readFile, writeFile } from "node:fs/promises";
 import { homedir } from "node:os";
 import { join } from "node:path";
+import { withFileLock } from "./fileLock.js";
 const STATE_DIR = join(homedir(), ".audit-code");
 const STATE_PATH = join(STATE_DIR, "quota-state.json");
 // A bucket needs at least this much success weight before we trust it.
@@ -27,31 +28,38 @@ export function applyDecayToEntry(entry, halfLifeHours) {
     return { ...entry, buckets: decayed };
 }
 function isQuotaState(value) {
-    return (value !== null &&
-        typeof value === "object" &&
-        !Array.isArray(value) &&
-        value["version"] === 1 &&
-        typeof value["entries"] === "object");
+    if (value === null || typeof value !== "object" || Array.isArray(value))
+        return false;
+    const obj = value;
+    const version = obj["version"];
+    return (version === 1 || version === 2) && typeof obj["entries"] === "object";
 }
 export async function readQuotaState() {
     try {
         const raw = await readFile(STATE_PATH, "utf8");
         const parsed = JSON.parse(raw);
-        if (isQuotaState(parsed))
+        if (isQuotaState(parsed)) {
+            if (parsed.version === 1) {
+                for (const entry of Object.values(parsed.entries)) {
+                    entry.consecutive_429_count ??= 0;
+                }
+            }
             return parsed;
-        process.stderr.write(`[quota] ignoring invalid quota state at ${STATE_PATH}: expected { version: 1, entries: object }\n`);
+        }
+        process.stderr.write(`[quota] ignoring invalid quota state at ${STATE_PATH}: expected { version: 1|2, entries: object }\n`);
     }
     catch (error) {
         if (error.code === "ENOENT") {
-            return { version: 1, entries: {} };
+            return { version: 2, entries: {} };
         }
         process.stderr.write(`[quota] ignoring unreadable quota state at ${STATE_PATH}: ${error instanceof Error ? error.message : String(error)}\n`);
     }
-    return { version: 1, entries: {} };
+    return { version: 2, entries: {} };
 }
 export async function writeQuotaState(state) {
     await mkdir(STATE_DIR, { recursive: true });
-    await writeFile(STATE_PATH, JSON.stringify(state, null, 2) + "\n", "utf8");
+    const normalized = { ...state, version: 2 };
+    await writeFile(STATE_PATH, JSON.stringify(normalized, null, 2) + "\n", "utf8");
 }
 /**
  * Returns the highest concurrency level for which decayed success evidence
@@ -74,14 +82,39 @@ export function computeMaxSafeConcurrency(entry, halfLifeHours, maxToCheck = 32)
     }
     return maxSafe;
 }
+const RAMP_UP_MIN_SUCCESSES = 2;
+export function computeRampUpConcurrency(entry, halfLifeHours, maxToCheck = 32) {
+    const maxSafe = computeMaxSafeConcurrency(entry, halfLifeHours, maxToCheck);
+    const decayed = applyDecayToEntry(entry, halfLifeHours);
+    const bucket = decayed.buckets[String(maxSafe)];
+    if (bucket &&
+        bucket.success_weight >= RAMP_UP_MIN_SUCCESSES &&
+        bucket.failure_weight === 0) {
+        return maxSafe + 1;
+    }
+    return maxSafe;
+}
 function blankEntry() {
     return { updated_at: new Date().toISOString(), buckets: {}, cooldown_until: null, last_429_at: null };
 }
+const BASE_COOLDOWN_MS = 60_000;
+const MAX_COOLDOWN_MS = 15 * 60_000;
+export function computeBackoffCooldownMs(consecutive429Count) {
+    const ms = BASE_COOLDOWN_MS * Math.pow(2, Math.max(0, consecutive429Count - 1));
+    return Math.min(ms, MAX_COOLDOWN_MS);
+}
+export function computeBackoffFailureWeight(consecutive429Count) {
+    return 1.0 + 0.5 * Math.max(0, consecutive429Count - 1);
+}
+const LOCK_PATH = STATE_PATH + ".lock";
 export async function recordWaveOutcome(providerModelKey, outcome, halfLifeHours) {
+    await withFileLock(LOCK_PATH, () => recordWaveOutcomeUnsafe(providerModelKey, outcome, halfLifeHours));
+}
+async function recordWaveOutcomeUnsafe(providerModelKey, outcome, halfLifeHours) {
     const state = await readQuotaState();
     const entry = applyDecayToEntry(state.entries[providerModelKey] ?? blankEntry(), halfLifeHours);
     if (outcome.outcome === "success") {
-        // Success at N proves 1..N are all safe
+        entry.consecutive_429_count = 0;
         for (let n = 1; n <= outcome.concurrency; n++) {
             const bucket = entry.buckets[String(n)] ?? { success_weight: 0, failure_weight: 0 };
             bucket.success_weight += 1.0;
@@ -89,13 +122,23 @@ export async function recordWaveOutcome(providerModelKey, outcome, halfLifeHours
         }
     }
     else {
+        const prev429Count = entry.consecutive_429_count ?? 0;
+        const new429Count = outcome.outcome === "rate_limited" ? prev429Count + 1 : prev429Count;
+        entry.consecutive_429_count = new429Count;
         entry.last_429_at = new Date().toISOString();
-        if (outcome.cooldown_until)
+        if (outcome.outcome === "rate_limited" && new429Count > 0) {
+            const backoffMs = computeBackoffCooldownMs(new429Count);
+            entry.cooldown_until = new Date(Date.now() + backoffMs).toISOString();
+        }
+        else if (outcome.cooldown_until) {
             entry.cooldown_until = outcome.cooldown_until;
-        // Failure at N marks N and above as unsafe
+        }
+        const failureWeight = outcome.outcome === "rate_limited"
+            ? computeBackoffFailureWeight(new429Count)
+            : 1.0;
         for (let n = outcome.concurrency; n <= outcome.concurrency + 4; n++) {
             const bucket = entry.buckets[String(n)] ?? { success_weight: 0, failure_weight: 0 };
-            bucket.failure_weight += 1.0;
+            bucket.failure_weight += failureWeight;
             entry.buckets[String(n)] = bucket;
         }
     }

package/dist/quota/types.d.ts CHANGED Viewed

@@ -22,9 +22,10 @@ export interface QuotaStateEntry {
     buckets: Record<string, ConcurrencyBucket>;
     cooldown_until: string | null;
     last_429_at: string | null;
+    consecutive_429_count?: number;
 }
 export interface QuotaState {
-    version: 1;
+    version: 1 | 2;
     entries: Record<string, QuotaStateEntry>;
 }
 export interface WaveSchedule {
@@ -36,9 +37,15 @@ export interface WaveSchedule {
     resolved_limits: ResolvedLimits;
     host_concurrency_limit: HostConcurrencyLimit | null;
     model: string | null;
+    quota_source_snapshot?: import("./quotaSource.js").QuotaUsageSnapshot | null;
+}
+export interface BackoffState {
+    consecutive_429_count: number;
+    current_cooldown_ms: number;
+    current_failure_weight: number;
 }
 export interface DispatchQuota {
-    contract_version: "audit-code-dispatch-quota/v1alpha1";
+    contract_version: "audit-code-dispatch-quota/v1alpha1" | "audit-code-dispatch-quota/v1alpha2";
     run_id: string;
     model: string | null;
     resolved_limits: ResolvedLimits;
@@ -48,6 +55,8 @@ export interface DispatchQuota {
     wave_size: number;
     estimated_wave_tokens: number;
     cooldown_until: string | null;
+    quota_source_snapshot?: import("./quotaSource.js").QuotaUsageSnapshot | null;
+    backoff_state?: BackoffState | null;
 }
 export interface ObservedWaveOutcome {
     concurrency: number;

package/dist/types/sessionConfig.d.ts CHANGED Viewed

@@ -44,6 +44,8 @@ export interface QuotaConfig {
     reserved_output_tokens?: number;
     /** Half-life of empirical success/failure evidence in hours (default: 24). */
     empirical_half_life_hours?: number;
+    /** Allow the scheduler to try concurrency maxSafe+1 after consecutive successes (default: true). */
+    ramp_up_enabled?: boolean;
     /** Hard host ceiling for simultaneously active conversation subagents. */
     host_active_subagent_limit?: number;
     /** Per-model overrides keyed by "provider/model". */

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "auditor-lambda",
-  "version": "0.3.33",
+  "version": "0.3.34",
   "private": false,
   "description": "Portable hybrid code-auditing framework for arbitrary repositories.",
   "type": "module",

package/schemas/dispatch_quota.schema.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "$schema": "http://json-schema.org/draft-07/schema#",
-  "$id": "audit-code-dispatch-quota/v1alpha1",
+  "$id": "audit-code-dispatch-quota/v1alpha2",
   "title": "DispatchQuota",
   "description": "Quota schedule for a prepare-dispatch run. Written beside dispatch-plan.json. Hosts must launch at most wave_size packets per wave, then re-read this file before the next wave to pick up any updated limits.",
   "type": "object",
@@ -20,7 +20,7 @@
   "properties": {
     "contract_version": {
       "type": "string",
-      "const": "audit-code-dispatch-quota/v1alpha1"
+      "enum": ["audit-code-dispatch-quota/v1alpha1", "audit-code-dispatch-quota/v1alpha2"]
     },
     "run_id": {
       "type": "string",
@@ -97,6 +97,27 @@
       "type": ["string", "null"],
       "format": "date-time",
       "description": "If non-null, the host should wait until this timestamp before launching the next wave."
+    },
+    "quota_source_snapshot": {
+      "type": ["object", "null"],
+      "description": "Real-time usage snapshot from a QuotaSource, if available.",
+      "properties": {
+        "remaining_pct": { "type": ["number", "null"] },
+        "reset_at": { "type": ["string", "null"], "format": "date-time" },
+        "requests_remaining": { "type": ["integer", "null"] },
+        "tokens_remaining": { "type": ["integer", "null"] },
+        "captured_at": { "type": "string", "format": "date-time" },
+        "source": { "type": "string" }
+      }
+    },
+    "backoff_state": {
+      "type": ["object", "null"],
+      "description": "Exponential backoff state for repeated rate-limit errors.",
+      "properties": {
+        "consecutive_429_count": { "type": "integer", "minimum": 0 },
+        "current_cooldown_ms": { "type": "integer", "minimum": 0 },
+        "current_failure_weight": { "type": "number", "minimum": 0 }
+      }
     }
   }
 }