npm - auditor-lambda - Versions diffs - 0.3.21 → 0.3.23 - Mend

auditor-lambda 0.3.21 → 0.3.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

package/audit-code-wrapper-lib.mjs +5 -0
package/dist/cli.js +169 -8
package/dist/orchestrator/reviewPackets.d.ts +5 -0
package/dist/orchestrator/reviewPackets.js +5 -1
package/dist/quota/index.d.ts +8 -0
package/dist/quota/index.js +4 -0
package/dist/quota/limits.d.ts +16 -0
package/dist/quota/limits.js +77 -0
package/dist/quota/probe.d.ts +13 -0
package/dist/quota/probe.js +21 -0
package/dist/quota/scheduler.d.ts +14 -0
package/dist/quota/scheduler.js +76 -0
package/dist/quota/state.d.ts +12 -0
package/dist/quota/state.js +101 -0
package/dist/quota/types.d.ts +50 -0
package/dist/quota/types.js +1 -0
package/dist/types/sessionConfig.d.ts +28 -0
package/package.json +1 -1
package/schemas/dispatch_quota.schema.json +77 -0

package/audit-code-wrapper-lib.mjs CHANGED Viewed

@@ -2685,6 +2685,11 @@ export async function runAuditCodeWrapper({
     return;
   }
+  if (argv[0] === 'quota') {
+    await runDistCommand('quota', argv.slice(1), { ensureArtifactsDir: true });
+    return;
+  }
   if (argv[0] === 'submit-packet') {
     await runDistCommand('submit-packet', argv.slice(1));
     return;

package/dist/cli.js CHANGED Viewed

@@ -32,6 +32,7 @@ import { buildReviewPackets, orderTasksForPacketReview, } from "./orchestrator/r
 import { buildFileAnchorSummary, } from "./orchestrator/fileAnchors.js";
 import { LOCAL_SUBPROCESS_PROVIDER_NAME } from "./providers/constants.js";
 import { runAuditCodeMcpServer } from "./mcp/server.js";
+import { scheduleWave, buildProviderModelKey, readQuotaState, recordWaveOutcome, resolveLimits, probeProvider, computeMaxSafeConcurrency, getQuotaStatePath, } from "./quota/index.js";
 const packageRoot = resolve(dirname(fileURLToPath(import.meta.url)), "..");
 const ADVANCE_AUDIT_CONTRACT_VERSION = "audit-code/v1alpha1";
 const WORKER_RESULT_CONTRACT_VERSION = "audit-code-worker-result/v1alpha1";
@@ -178,6 +179,27 @@ function getTimeoutMs(argv, sessionConfig) {
 function getExplicitProvider(argv) {
     return getFlag(argv, "--provider");
 }
+function getHostModel(argv) {
+    return getFlag(argv, "--host-model") ?? null;
+}
+function getQuotaProbeMode(argv, sessionConfig) {
+    const raw = getFlag(argv, "--quota-probe") ?? sessionConfig.quota?.probe ?? "auto";
+    if (raw === "auto" || raw === "never" || raw === "force")
+        return raw;
+    return "auto";
+}
+function detectRateLimitError(errorText) {
+    const lower = errorText.toLowerCase();
+    return lower.includes("429") || lower.includes("rate limit") || lower.includes("rate_limit");
+}
+function defaultCooldownUntil(resetAtHeader) {
+    if (resetAtHeader) {
+        const t = new Date(resetAtHeader).getTime();
+        if (!Number.isNaN(t))
+            return new Date(t).toISOString();
+    }
+    return new Date(Date.now() + 60_000).toISOString();
+}
 function resolveRunProviderName(argv, sessionConfig) {
     return resolveFreshSessionProviderName(getExplicitProvider(argv), sessionConfig);
 }
@@ -528,14 +550,30 @@ function renderDispatchReviewPrompt(params) {
     const toolsLine = params.hostCanRestrictSubagentTools
         ? "Restrict review subagents to read/search plus the packet submit command named in their prompt. Do not give them source edit/write tools."
         : "Do not ask the user about per-subagent tool restrictions; this host did not report a callable restriction facility.";
+    const fileLines = params.dispatchQuotaPath
+        ? [
+            "Dispatch is prepared. Read both of these files:",
+            "",
+            `  Dispatch plan:  ${params.dispatchPlanPath}`,
+            `  Dispatch quota: ${params.dispatchQuotaPath}`,
+            "",
+            "The quota file contains a `wave_size` field. Dispatch at most `wave_size` subagents at a time. If `cooldown_until` is non-null, wait until that timestamp before starting the first wave.",
+            "",
+            "For each wave: launch up to `wave_size` subagents in parallel (one per plan entry), wait for all of them to finish, then start the next wave. Repeat until all entries are dispatched.",
+        ]
+        : [
+            "Dispatch is prepared. Read only this dispatch plan JSON:",
+            "",
+            `  ${params.dispatchPlanPath}`,
+            "",
+            "Launch one host subagent for each entry in the plan.",
+        ];
     return [
         "# audit-code dispatch review",
         "",
-        "Dispatch is prepared. Read only this dispatch plan JSON:",
-        "",
-        `  ${params.dispatchPlanPath}`,
+        ...fileLines,
         "",
-        "Launch one host subagent for each entry in the plan. Pass the packet prompt path literally; do not load packet prompt files into this orchestrator context.",
+        "Pass each packet prompt path literally to its subagent; do not load packet prompt files into this orchestrator context.",
         "",
         "Subagent prompt shape:",
         "",
@@ -544,9 +582,9 @@ function renderDispatchReviewPrompt(params) {
         modelLine,
         toolsLine,
         "",
-        "Wait for all review subagents to finish. Each subagent must submit its packet through the submit command printed in its packet prompt and stop after successful submission.",
+        "Each subagent must submit its packet through the submit command printed in its packet prompt and stop after successful submission.",
         "",
-        "Then run exactly:",
+        "After all waves complete, run exactly:",
         "",
         `  ${mergeCommand}`,
         "",
@@ -1198,6 +1236,7 @@ async function cmdNextStep(argv) {
         repoRoot: root,
         artifactPaths: {
             dispatch_plan: dispatch.dispatch_plan_path,
+            dispatch_quota: dispatch.dispatch_quota_path,
             dispatch_warnings: dispatch.dispatch_warnings_path,
             active_review_task: result.activeReviewRun.task_path,
             pending_audit_tasks: result.activeReviewRun.pending_audit_tasks_path ?? null,
@@ -1207,6 +1246,7 @@ async function cmdNextStep(argv) {
             artifactsDir,
             activeReviewRun: result.activeReviewRun,
             dispatchPlanPath: dispatch.dispatch_plan_path,
+            dispatchQuotaPath: dispatch.dispatch_quota_path,
             hostCanRestrictSubagentTools,
             hostCanSelectSubagentModel,
         }),
@@ -1238,6 +1278,7 @@ async function cmdRunToCompletion(argv) {
     const agentBatchSize = getAgentBatchSize(argv, sessionConfig);
     const parallelWorkers = getParallelWorkers(argv, sessionConfig);
     const timeoutMs = getTimeoutMs(argv, sessionConfig);
+    const hostModel = getHostModel(argv);
     const selfCliPath = resolve(argv[1] ?? process.argv[1] ?? "");
     const batchResultsDir = getBatchResultsDir(argv);
     if (batchResultsDir && getFlag(argv, "--results")) {
@@ -1375,8 +1416,27 @@ async function cmdRunToCompletion(argv) {
             return;
         }
         if (preferredExecutor === "agent" && parallelWorkers > 1) {
+            const quotaState = await readQuotaState();
+            const providerModelKey = buildProviderModelKey(provider.name, hostModel);
+            const quotaStateEntry = quotaState.entries[providerModelKey] ?? null;
+            const waveSchedule = scheduleWave({
+                providerName: resolveFreshSessionProviderName(getExplicitProvider(argv), sessionConfig),
+                sessionConfig,
+                hostModel,
+                requestedConcurrency: parallelWorkers,
+                quotaStateEntry,
+            });
+            const waveSize = waveSchedule.wave_size;
+            if (waveSchedule.cooldown_until) {
+                const waitMs = new Date(waveSchedule.cooldown_until).getTime() - Date.now();
+                if (waitMs > 0) {
+                    const cappedWait = Math.min(waitMs, 120_000);
+                    process.stderr.write(`[quota] Cooldown active — waiting ${Math.ceil(cappedWait / 1000)}s before next wave.\n`);
+                    await new Promise((r) => setTimeout(r, cappedWait));
+                }
+            }
             const allPendingTasks = buildPendingAuditTasks(bundle);
-            const taskGroups = chunkArray(allPendingTasks.slice(0, parallelWorkers * agentBatchSize), agentBatchSize);
+            const taskGroups = chunkArray(allPendingTasks.slice(0, waveSize * agentBatchSize), agentBatchSize);
             const workerSlots = [];
             for (const rawGroup of taskGroups) {
                 const group = await addFileLineCountHints(root, rawGroup);
@@ -1530,6 +1590,16 @@ async function cmdRunToCompletion(argv) {
                 });
                 artifactsWritten.add("run-ledger.json");
             }
+            // Record outcome for adaptive learning (best-effort — never blocks dispatch)
+            {
+                const hasRateLimit = batchErrors.some(detectRateLimitError);
+                await recordWaveOutcome(providerModelKey, {
+                    concurrency: workerSlots.length,
+                    estimated_tokens: waveSize * agentBatchSize * 900,
+                    outcome: hasRateLimit ? "rate_limited" : batchErrors.length > 0 ? "timeout" : "success",
+                    cooldown_until: hasRateLimit ? defaultCooldownUntil(null) : null,
+                }, sessionConfig.quota?.empirical_half_life_hours ?? 24).catch(() => undefined);
+            }
             if (batchErrors.length > 0) {
                 const bundleAfter = await loadArtifactBundle(artifactsDir);
                 const blockedState = buildBlockedAuditState({
@@ -2117,6 +2187,7 @@ async function prepareDispatchArtifacts(params) {
     }
     const tasks = await readJsonFile(tasksPath);
     const bundle = await loadArtifactBundle(artifactsDir);
+    const sessionConfig = params.sessionConfig ?? (await loadSessionConfig(artifactsDir).catch(() => ({})));
     const lensDefsPath = join(packageRoot, "dispatch", "lens-definitions.json");
     const lensDefs = await readJsonFile(lensDefsPath);
     await mkdir(taskResultsDir, { recursive: true });
@@ -2342,6 +2413,52 @@ async function prepareDispatchArtifacts(params) {
         run_id: runId,
         entries: resultMapEntries,
     });
+    // Compute and write dispatch-quota.json
+    const hostModel = params.hostModel ?? null;
+    const avgPacketTokens = plan.length > 0
+        ? Math.floor(plan.reduce((s, p) => s + p.complexity.estimated_tokens, 0) / plan.length)
+        : 0;
+    const quotaProviderName = resolveFreshSessionProviderName(undefined, sessionConfig);
+    const quotaProviderKey = buildProviderModelKey(quotaProviderName, hostModel);
+    const quotaState = await readQuotaState().catch(() => ({ version: 1, entries: {} }));
+    const quotaStateEntry = quotaState.entries[quotaProviderKey] ?? null;
+    const waveSchedule = scheduleWave({
+        providerName: quotaProviderName,
+        sessionConfig,
+        hostModel,
+        requestedConcurrency: sessionConfig.parallel_workers ?? 1,
+        estimatedPacketTokens: avgPacketTokens,
+        quotaStateEntry,
+    });
+    const dispatchQuota = {
+        contract_version: "audit-code-dispatch-quota/v1alpha1",
+        run_id: runId,
+        model: hostModel,
+        resolved_limits: waveSchedule.resolved_limits,
+        confidence: waveSchedule.confidence,
+        source: waveSchedule.source,
+        wave_size: waveSchedule.wave_size,
+        estimated_wave_tokens: waveSchedule.estimated_wave_tokens,
+        cooldown_until: waveSchedule.cooldown_until,
+    };
+    const dispatchQuotaPath = join(runDir, "dispatch-quota.json");
+    await writeJsonFile(dispatchQuotaPath, dispatchQuota);
+    // Warn about packets that exceed the context budget only when we have reliable limit
+    // information (confidence medium/high). Low-confidence limits are conservative defaults
+    // and would produce misleading warnings since the real context window is unknown.
+    if (waveSchedule.confidence !== "low") {
+        const contextBudget = waveSchedule.resolved_limits.context_tokens - waveSchedule.resolved_limits.output_tokens;
+        for (const p of plan) {
+            if (p.complexity.estimated_tokens > contextBudget) {
+                warnings.push({
+                    code: "oversized_packet",
+                    message: `Packet ${p.packet_id} estimated tokens (${p.complexity.estimated_tokens}) exceed ` +
+                        `context budget (${contextBudget}). This packet may fail at dispatch. ` +
+                        `Set quota.default_context_tokens or quota.models in session-config.json to override.`,
+                });
+            }
+        }
+    }
     const warningsPath = warnings.length > 0
         ? join(runDir, "dispatch-warnings.json")
         : null;
@@ -2351,6 +2468,7 @@ async function prepareDispatchArtifacts(params) {
     return {
         run_id: runId,
         dispatch_plan_path: dispatchPlanPath,
+        dispatch_quota_path: dispatchQuotaPath,
         packet_count: plan.length,
         task_count: orderedTasks.length,
         largest_packet: largestPacketId
@@ -2372,6 +2490,7 @@ async function cmdPrepareDispatch(argv) {
         runId,
         artifactsDir: getArtifactsDir(argv),
         root: getFlag(argv, "--root") ? getRootDir(argv) : undefined,
+        hostModel: getHostModel(argv),
     });
     console.log(JSON.stringify(result, null, 2));
 }
@@ -2923,6 +3042,45 @@ async function cmdCleanup(argv) {
 async function cmdMcp(argv) {
     await runAuditCodeMcpServer(argv.slice(3));
 }
+async function cmdQuota(argv) {
+    const artifactsDir = getArtifactsDir(argv);
+    const sessionConfig = await loadSessionConfig(artifactsDir).catch(() => ({}));
+    const explicitProvider = getExplicitProvider(argv);
+    const hostModel = getHostModel(argv);
+    const probeMode = getQuotaProbeMode(argv, sessionConfig);
+    const providerName = resolveFreshSessionProviderName(explicitProvider, sessionConfig);
+    const providerModelKey = buildProviderModelKey(providerName, hostModel);
+    const { limits, source, confidence } = resolveLimits({ providerName, sessionConfig, hostModel });
+    const probeResult = await probeProvider(providerName, probeMode);
+    const quotaState = await readQuotaState().catch(() => ({ version: 1, entries: {} }));
+    const quotaStateEntry = quotaState.entries[providerModelKey] ?? null;
+    const halfLifeHours = sessionConfig.quota?.empirical_half_life_hours ?? 24;
+    const waveSchedule = scheduleWave({
+        providerName,
+        sessionConfig,
+        hostModel,
+        requestedConcurrency: sessionConfig.parallel_workers ?? 1,
+        quotaStateEntry,
+    });
+    console.log(JSON.stringify({
+        provider: providerName,
+        model: hostModel,
+        provider_model_key: providerModelKey,
+        resolved_limits: limits,
+        confidence,
+        source,
+        probe: probeResult,
+        learned_caps: quotaStateEntry
+            ? {
+                max_safe_concurrency: computeMaxSafeConcurrency(quotaStateEntry, halfLifeHours),
+                cooldown_until: quotaStateEntry.cooldown_until,
+                last_429_at: quotaStateEntry.last_429_at,
+            }
+            : null,
+        wave_schedule: waveSchedule,
+        quota_state_path: getQuotaStatePath(),
+    }, null, 2));
+}
 async function main(argv) {
     const command = argv[2] ?? "sample-run";
     switch (command) {
@@ -2989,9 +3147,12 @@ async function main(argv) {
         case "validate-result":
             await cmdValidateResult(argv);
             return;
+        case "quota":
+            await cmdQuota(argv);
+            return;
         default:
             console.error(`Unknown command: ${command}`);
-            console.error("Available commands: sample-run, advance-audit, next-step, run-to-completion, worker-run, import-external-analyzer, intake, plan, ingest-results, explain-task, update-runtime-validation, validate, validate-results, requeue, synthesize, cleanup, mcp, prepare-dispatch, merge-and-ingest, submit-packet, validate-result");
+            console.error("Available commands: sample-run, advance-audit, next-step, run-to-completion, worker-run, import-external-analyzer, intake, plan, ingest-results, explain-task, update-runtime-validation, validate, validate-results, requeue, synthesize, cleanup, mcp, prepare-dispatch, merge-and-ingest, submit-packet, validate-result, quota");
             process.exitCode = 1;
     }
 }

package/dist/orchestrator/reviewPackets.d.ts CHANGED Viewed

@@ -6,6 +6,11 @@ export interface BuildReviewPacketOptions {
     lineIndex?: Record<string, number>;
     maxTasksPerPacket?: number;
     targetPacketLines?: number;
+    /**
+     * Available context budget in tokens (context_tokens − reserved_output_tokens).
+     * When provided, targetPacketLines is capped to fit within this budget.
+     */
+    maxContextTokens?: number;
 }
 export declare function buildReviewPackets(tasks: AuditTask[], options?: BuildReviewPacketOptions): ReviewPacket[];
 export declare function orderTasksForPacketReview(tasks: AuditTask[], options?: BuildReviewPacketOptions): AuditTask[];

package/dist/orchestrator/reviewPackets.js CHANGED Viewed

@@ -949,7 +949,11 @@ function buildPacket(tasks, packetIndex, lineIndex, graphEdges = [], graphBundle
 }
 function buildReviewPacketPlanningData(tasks, options = {}) {
     const maxTasksPerPacket = options.maxTasksPerPacket ?? DEFAULT_MAX_TASKS_PER_PACKET;
-    const targetPacketLines = options.targetPacketLines ?? DEFAULT_TARGET_PACKET_LINES;
+    const configuredTargetLines = options.targetPacketLines ?? DEFAULT_TARGET_PACKET_LINES;
+    const targetPacketLines = options.maxContextTokens != null
+        ? Math.min(configuredTargetLines, Math.max(1, Math.floor((options.maxContextTokens - ESTIMATED_PACKET_PROMPT_TOKENS) /
+            ESTIMATED_TOKENS_PER_LINE)))
+        : configuredTargetLines;
     const graphEdges = collectGraphEdges(options.graphBundle);
     const groups = buildTaskGroups(tasks);
     const planningGraphEdges = buildPlanningGraphEdges(groups, graphEdges, options.graphBundle, options.lineIndex, targetPacketLines);

package/dist/quota/index.d.ts ADDED Viewed

@@ -0,0 +1,8 @@
+export { resolveLimits, lookupKnownModel, classifyProvider } from "./limits.js";
+export type { LimitResolutionResult, ResolveLimitsOptions, ProviderType } from "./limits.js";
+export { readQuotaState, writeQuotaState, computeMaxSafeConcurrency, recordWaveOutcome, getQuotaStatePath, decayWeight, applyDecayToEntry, } from "./state.js";
+export { scheduleWave, buildProviderModelKey } from "./scheduler.js";
+export type { ScheduleWaveOptions } from "./scheduler.js";
+export { probeProvider } from "./probe.js";
+export type { ProbeResult } from "./probe.js";
+export type { ResolvedLimits, LimitSource, LimitConfidence, QuotaState, QuotaStateEntry, ConcurrencyBucket, WaveSchedule, DispatchQuota, ObservedWaveOutcome, } from "./types.js";

package/dist/quota/index.js ADDED Viewed

@@ -0,0 +1,4 @@
+export { resolveLimits, lookupKnownModel, classifyProvider } from "./limits.js";
+export { readQuotaState, writeQuotaState, computeMaxSafeConcurrency, recordWaveOutcome, getQuotaStatePath, decayWeight, applyDecayToEntry, } from "./state.js";
+export { scheduleWave, buildProviderModelKey } from "./scheduler.js";
+export { probeProvider } from "./probe.js";

package/dist/quota/limits.d.ts ADDED Viewed

@@ -0,0 +1,16 @@
+import type { ResolvedProviderName, SessionConfig } from "../types/sessionConfig.js";
+import type { LimitConfidence, LimitSource, ResolvedLimits } from "./types.js";
+export type ProviderType = "hosted" | "local" | "unknown";
+export declare function classifyProvider(providerName: ResolvedProviderName): ProviderType;
+export declare function lookupKnownModel(modelKey: string): Pick<ResolvedLimits, "context_tokens" | "output_tokens"> | undefined;
+export interface LimitResolutionResult {
+    limits: ResolvedLimits;
+    source: LimitSource;
+    confidence: LimitConfidence;
+}
+export interface ResolveLimitsOptions {
+    providerName: ResolvedProviderName;
+    sessionConfig: SessionConfig;
+    hostModel?: string | null;
+}
+export declare function resolveLimits(options: ResolveLimitsOptions): LimitResolutionResult;

package/dist/quota/limits.js ADDED Viewed

@@ -0,0 +1,77 @@
+// RPM/TPM are omitted here — they are tier-dependent and must come from learning.
+const KNOWN_MODEL_LIMITS = {
+    "anthropic/claude-opus-4-7": { context_tokens: 200_000, output_tokens: 32_000 },
+    "anthropic/claude-sonnet-4-6": { context_tokens: 200_000, output_tokens: 8_192 },
+    "anthropic/claude-haiku-4-5": { context_tokens: 200_000, output_tokens: 8_192 },
+    "anthropic/claude-opus-4-5": { context_tokens: 200_000, output_tokens: 8_192 },
+    "anthropic/claude-sonnet-4-5": { context_tokens: 200_000, output_tokens: 8_192 },
+    "openai/gpt-4o": { context_tokens: 128_000, output_tokens: 16_384 },
+    "openai/gpt-4o-mini": { context_tokens: 128_000, output_tokens: 16_384 },
+    "google/gemini-2.0-flash": { context_tokens: 1_048_576, output_tokens: 8_192 },
+    "google/gemini-1.5-pro": { context_tokens: 2_097_152, output_tokens: 8_192 },
+};
+export function classifyProvider(providerName) {
+    switch (providerName) {
+        case "claude-code":
+        case "opencode":
+            return "hosted";
+        case "local-subprocess":
+            return "local";
+        case "subprocess-template":
+        case "vscode-task":
+        default:
+            return "unknown";
+    }
+}
+export function lookupKnownModel(modelKey) {
+    return KNOWN_MODEL_LIMITS[modelKey.toLowerCase().trim()];
+}
+function defaultLimits(sessionConfig) {
+    const quota = sessionConfig.quota ?? {};
+    return {
+        context_tokens: quota.default_context_tokens ?? 32_000,
+        output_tokens: quota.reserved_output_tokens ?? 4_096,
+        requests_per_minute: null,
+        input_tokens_per_minute: null,
+        output_tokens_per_minute: null,
+    };
+}
+export function resolveLimits(options) {
+    const { providerName: _providerName, sessionConfig, hostModel } = options;
+    const quota = sessionConfig.quota ?? {};
+    const defaults = defaultLimits(sessionConfig);
+    // 1. Explicit per-model config overrides
+    if (hostModel && quota.models?.[hostModel]) {
+        const override = quota.models[hostModel];
+        return {
+            limits: {
+                context_tokens: override.context_tokens ?? defaults.context_tokens,
+                output_tokens: override.output_tokens ?? defaults.output_tokens,
+                requests_per_minute: override.requests_per_minute ?? null,
+                input_tokens_per_minute: override.input_tokens_per_minute ?? null,
+                output_tokens_per_minute: override.output_tokens_per_minute ?? null,
+            },
+            source: "explicit_config",
+            confidence: "high",
+        };
+    }
+    // 2. Static known-model database (context/output only; RPM/TPM from learning)
+    if (hostModel) {
+        const known = lookupKnownModel(hostModel);
+        if (known) {
+            return {
+                limits: {
+                    context_tokens: known.context_tokens,
+                    output_tokens: known.output_tokens,
+                    requests_per_minute: null,
+                    input_tokens_per_minute: null,
+                    output_tokens_per_minute: null,
+                },
+                source: "known_metadata",
+                confidence: "medium",
+            };
+        }
+    }
+    // 3. Conservative defaults for all provider types
+    return { limits: defaults, source: "default", confidence: "low" };
+}

package/dist/quota/probe.d.ts ADDED Viewed

@@ -0,0 +1,13 @@
+export interface ProbeResult {
+    supported: boolean;
+    reason: string;
+}
+/**
+ * Probe a provider to discover its rate limits.
+ *
+ * Only subprocess-template supports direct probing since it is the only
+ * provider where the auditor controls the API call. IDE providers
+ * (claude-code, opencode) select the model internally; their limits come
+ * from known-model metadata or learned behavior.
+ */
+export declare function probeProvider(providerName: string, probeMode?: "auto" | "never" | "force"): Promise<ProbeResult>;

package/dist/quota/probe.js ADDED Viewed

@@ -0,0 +1,21 @@
+/**
+ * Probe a provider to discover its rate limits.
+ *
+ * Only subprocess-template supports direct probing since it is the only
+ * provider where the auditor controls the API call. IDE providers
+ * (claude-code, opencode) select the model internally; their limits come
+ * from known-model metadata or learned behavior.
+ */
+export async function probeProvider(providerName, probeMode = "auto") {
+    if (probeMode === "never") {
+        return { supported: false, reason: "probe disabled by config" };
+    }
+    if (providerName !== "subprocess-template") {
+        return {
+            supported: false,
+            reason: `probe not applicable for ${providerName} — limits come from known-model metadata or learned behavior`,
+        };
+    }
+    // subprocess-template probe not yet implemented
+    return { supported: false, reason: "subprocess-template probe not yet implemented" };
+}

package/dist/quota/scheduler.d.ts ADDED Viewed

@@ -0,0 +1,14 @@
+import type { ResolvedProviderName, SessionConfig } from "../types/sessionConfig.js";
+import type { QuotaStateEntry, WaveSchedule } from "./types.js";
+export interface ScheduleWaveOptions {
+    providerName: ResolvedProviderName;
+    sessionConfig: SessionConfig;
+    hostModel: string | null;
+    requestedConcurrency: number;
+    /** Average estimated tokens per packet/worker. Used for TPM budget. */
+    estimatedPacketTokens?: number;
+    quotaStateEntry?: QuotaStateEntry | null;
+}
+export declare function scheduleWave(options: ScheduleWaveOptions): WaveSchedule;
+/** Build the state key used for indexing quota-state.json entries. */
+export declare function buildProviderModelKey(providerName: string, hostModel: string | null | undefined): string;

package/dist/quota/scheduler.js ADDED Viewed

@@ -0,0 +1,76 @@
+import { classifyProvider, resolveLimits } from "./limits.js";
+import { computeMaxSafeConcurrency } from "./state.js";
+export function scheduleWave(options) {
+    const { providerName, sessionConfig, hostModel, requestedConcurrency, estimatedPacketTokens = 0, quotaStateEntry = null, } = options;
+    const quota = sessionConfig.quota ?? {};
+    if (quota.enabled === false) {
+        const limits = {
+            context_tokens: quota.default_context_tokens ?? 32_000,
+            output_tokens: quota.reserved_output_tokens ?? 4_096,
+            requests_per_minute: null,
+            input_tokens_per_minute: null,
+            output_tokens_per_minute: null,
+        };
+        return {
+            wave_size: requestedConcurrency,
+            estimated_wave_tokens: requestedConcurrency * estimatedPacketTokens,
+            cooldown_until: null,
+            confidence: "high",
+            source: "default",
+            resolved_limits: limits,
+            model: hostModel,
+        };
+    }
+    const safetyMargin = quota.safety_margin ?? 0.8;
+    const halfLifeHours = quota.empirical_half_life_hours ?? 24;
+    const providerType = classifyProvider(providerName);
+    const { limits, source, confidence } = resolveLimits({ providerName, sessionConfig, hostModel });
+    let waveSize = requestedConcurrency;
+    let cooldownUntil = null;
+    // Respect an active cooldown period
+    if (quotaStateEntry?.cooldown_until) {
+        const cooldownExpiry = new Date(quotaStateEntry.cooldown_until).getTime();
+        if (cooldownExpiry > Date.now()) {
+            cooldownUntil = quotaStateEntry.cooldown_until;
+            waveSize = 1;
+        }
+    }
+    if (!cooldownUntil) {
+        // Cap by requests-per-minute
+        if (limits.requests_per_minute != null) {
+            const rpmCap = Math.max(1, Math.floor(limits.requests_per_minute * safetyMargin));
+            waveSize = Math.min(waveSize, rpmCap);
+        }
+        // Cap by input tokens-per-minute
+        if (limits.input_tokens_per_minute != null && estimatedPacketTokens > 0) {
+            const tpmCap = Math.max(1, Math.floor((limits.input_tokens_per_minute * safetyMargin) / estimatedPacketTokens));
+            waveSize = Math.min(waveSize, tpmCap);
+        }
+        if (quotaStateEntry) {
+            const learnedCap = computeMaxSafeConcurrency(quotaStateEntry, halfLifeHours);
+            waveSize = Math.min(waveSize, learnedCap);
+        }
+        else if (providerType === "hosted" && source === "default") {
+            // Unknown hosted provider with no learned data and no model-specific limits —
+            // be conservative. If the caller supplied RPM/TPM caps those already govern rate;
+            // this guard only triggers when we have no rate information at all.
+            const conservativeDefault = quota.unknown_hosted_concurrency ?? 1;
+            waveSize = Math.min(waveSize, conservativeDefault);
+        }
+        // Local providers with no learned data: use requestedConcurrency (no rate pressure)
+    }
+    waveSize = Math.max(1, waveSize);
+    return {
+        wave_size: waveSize,
+        estimated_wave_tokens: waveSize * estimatedPacketTokens,
+        cooldown_until: cooldownUntil,
+        confidence,
+        source,
+        resolved_limits: limits,
+        model: hostModel,
+    };
+}
+/** Build the state key used for indexing quota-state.json entries. */
+export function buildProviderModelKey(providerName, hostModel) {
+    return hostModel ? `${providerName}/${hostModel}` : `${providerName}/*`;
+}

package/dist/quota/state.d.ts ADDED Viewed

@@ -0,0 +1,12 @@
+import type { ObservedWaveOutcome, QuotaState, QuotaStateEntry } from "./types.js";
+export declare function getQuotaStatePath(): string;
+export declare function decayWeight(weight: number, elapsedHours: number, halfLifeHours: number): number;
+export declare function applyDecayToEntry(entry: QuotaStateEntry, halfLifeHours: number): QuotaStateEntry;
+export declare function readQuotaState(): Promise<QuotaState>;
+export declare function writeQuotaState(state: QuotaState): Promise<void>;
+/**
+ * Returns the highest concurrency level for which decayed success evidence
+ * exceeds failure evidence, with a minimum of 1.
+ */
+export declare function computeMaxSafeConcurrency(entry: QuotaStateEntry, halfLifeHours: number, maxToCheck?: number): number;
+export declare function recordWaveOutcome(providerModelKey: string, outcome: ObservedWaveOutcome, halfLifeHours: number): Promise<void>;

package/dist/quota/state.js ADDED Viewed

@@ -0,0 +1,101 @@
+import { mkdir, readFile, writeFile } from "node:fs/promises";
+import { homedir } from "node:os";
+import { join } from "node:path";
+const STATE_DIR = join(homedir(), ".audit-code");
+const STATE_PATH = join(STATE_DIR, "quota-state.json");
+// A bucket needs at least this much success weight before we trust it.
+const MIN_EVIDENCE_WEIGHT = 0.5;
+export function getQuotaStatePath() {
+    return STATE_PATH;
+}
+export function decayWeight(weight, elapsedHours, halfLifeHours) {
+    if (halfLifeHours <= 0 || weight <= 0)
+        return 0;
+    return weight * Math.pow(0.5, elapsedHours / halfLifeHours);
+}
+export function applyDecayToEntry(entry, halfLifeHours) {
+    const elapsedHours = (Date.now() - new Date(entry.updated_at).getTime()) / (1000 * 60 * 60);
+    if (elapsedHours < 0.001)
+        return entry;
+    const decayed = {};
+    for (const [key, bucket] of Object.entries(entry.buckets)) {
+        decayed[key] = {
+            success_weight: decayWeight(bucket.success_weight, elapsedHours, halfLifeHours),
+            failure_weight: decayWeight(bucket.failure_weight, elapsedHours, halfLifeHours),
+        };
+    }
+    return { ...entry, buckets: decayed };
+}
+function isQuotaState(value) {
+    return (value !== null &&
+        typeof value === "object" &&
+        !Array.isArray(value) &&
+        value["version"] === 1 &&
+        typeof value["entries"] === "object");
+}
+export async function readQuotaState() {
+    try {
+        const raw = await readFile(STATE_PATH, "utf8");
+        const parsed = JSON.parse(raw);
+        if (isQuotaState(parsed))
+            return parsed;
+    }
+    catch {
+        // File not found or malformed — start fresh
+    }
+    return { version: 1, entries: {} };
+}
+export async function writeQuotaState(state) {
+    await mkdir(STATE_DIR, { recursive: true });
+    await writeFile(STATE_PATH, JSON.stringify(state, null, 2) + "\n", "utf8");
+}
+/**
+ * Returns the highest concurrency level for which decayed success evidence
+ * exceeds failure evidence, with a minimum of 1.
+ */
+export function computeMaxSafeConcurrency(entry, halfLifeHours, maxToCheck = 32) {
+    const decayed = applyDecayToEntry(entry, halfLifeHours);
+    let maxSafe = 1;
+    for (let n = 1; n <= maxToCheck; n++) {
+        const bucket = decayed.buckets[String(n)];
+        if (!bucket)
+            break;
+        if (bucket.success_weight >= MIN_EVIDENCE_WEIGHT &&
+            bucket.success_weight > bucket.failure_weight) {
+            maxSafe = n;
+        }
+        else {
+            break;
+        }
+    }
+    return maxSafe;
+}
+function blankEntry() {
+    return { updated_at: new Date().toISOString(), buckets: {}, cooldown_until: null, last_429_at: null };
+}
+export async function recordWaveOutcome(providerModelKey, outcome, halfLifeHours) {
+    const state = await readQuotaState();
+    const entry = applyDecayToEntry(state.entries[providerModelKey] ?? blankEntry(), halfLifeHours);
+    if (outcome.outcome === "success") {
+        // Success at N proves 1..N are all safe
+        for (let n = 1; n <= outcome.concurrency; n++) {
+            const bucket = entry.buckets[String(n)] ?? { success_weight: 0, failure_weight: 0 };
+            bucket.success_weight += 1.0;
+            entry.buckets[String(n)] = bucket;
+        }
+    }
+    else {
+        entry.last_429_at = new Date().toISOString();
+        if (outcome.cooldown_until)
+            entry.cooldown_until = outcome.cooldown_until;
+        // Failure at N marks N and above as unsafe
+        for (let n = outcome.concurrency; n <= outcome.concurrency + 4; n++) {
+            const bucket = entry.buckets[String(n)] ?? { success_weight: 0, failure_weight: 0 };
+            bucket.failure_weight += 1.0;
+            entry.buckets[String(n)] = bucket;
+        }
+    }
+    entry.updated_at = new Date().toISOString();
+    state.entries[providerModelKey] = entry;
+    await writeQuotaState(state);
+}

package/dist/quota/types.d.ts ADDED Viewed

@@ -0,0 +1,50 @@
+export type LimitSource = "explicit_config" | "cli_flags" | "known_metadata" | "learned" | "default";
+export type LimitConfidence = "high" | "medium" | "low";
+export interface ResolvedLimits {
+    context_tokens: number;
+    output_tokens: number;
+    requests_per_minute: number | null;
+    input_tokens_per_minute: number | null;
+    output_tokens_per_minute: number | null;
+}
+export interface ConcurrencyBucket {
+    success_weight: number;
+    failure_weight: number;
+}
+export interface QuotaStateEntry {
+    updated_at: string;
+    buckets: Record<string, ConcurrencyBucket>;
+    cooldown_until: string | null;
+    last_429_at: string | null;
+}
+export interface QuotaState {
+    version: 1;
+    entries: Record<string, QuotaStateEntry>;
+}
+export interface WaveSchedule {
+    wave_size: number;
+    estimated_wave_tokens: number;
+    cooldown_until: string | null;
+    confidence: LimitConfidence;
+    source: LimitSource;
+    resolved_limits: ResolvedLimits;
+    model: string | null;
+}
+export interface DispatchQuota {
+    contract_version: "audit-code-dispatch-quota/v1alpha1";
+    run_id: string;
+    model: string | null;
+    resolved_limits: ResolvedLimits;
+    confidence: LimitConfidence;
+    source: LimitSource;
+    wave_size: number;
+    estimated_wave_tokens: number;
+    cooldown_until: string | null;
+}
+export interface ObservedWaveOutcome {
+    concurrency: number;
+    estimated_tokens: number;
+    outcome: "success" | "rate_limited" | "timeout";
+    cooldown_until?: string | null;
+    reset_at?: string | null;
+}

package/dist/quota/types.js ADDED Viewed

	@@ -0,0 +1 @@
1	+ export {};

package/dist/types/sessionConfig.d.ts CHANGED Viewed

@@ -20,6 +20,33 @@ export interface VSCodeTaskConfig {
     command_template: string[];
     env?: Record<string, string>;
 }
+export interface QuotaModelLimits {
+    context_tokens?: number;
+    output_tokens?: number;
+    requests_per_minute?: number;
+    input_tokens_per_minute?: number;
+    output_tokens_per_minute?: number;
+}
+export interface QuotaConfig {
+    /** Set to false to disable all quota scheduling (default: true). */
+    enabled?: boolean;
+    /** Whether to probe the provider for live limits (default: "auto"). */
+    probe?: "auto" | "never" | "force";
+    /** Fraction of known limits to actually use (default: 0.8). */
+    safety_margin?: number;
+    /** Concurrency ceiling for hosted providers with no learned data (default: 1). */
+    unknown_hosted_concurrency?: number;
+    /** Concurrency for local providers with no learned data (default: "unlimited"). */
+    unknown_local_concurrency?: number | "unlimited";
+    /** Assumed context window when the model is not recognized (default: 32000). */
+    default_context_tokens?: number;
+    /** Tokens reserved for model output per request (default: 4096). */
+    reserved_output_tokens?: number;
+    /** Half-life of empirical success/failure evidence in hours (default: 24). */
+    empirical_half_life_hours?: number;
+    /** Per-model overrides keyed by "provider/model". */
+    models?: Record<string, QuotaModelLimits>;
+}
 export declare const PROVIDER_SECTION_KEYS: {
     readonly "subprocess-template": "subprocess_template";
     readonly "claude-code": "claude_code";
@@ -40,4 +67,5 @@ export interface SessionConfig {
     vscode_task?: VSCodeTaskConfig;
     agent_task_batch_size?: number;
     parallel_workers?: number;
+    quota?: QuotaConfig;
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "auditor-lambda",
-  "version": "0.3.21",
+  "version": "0.3.23",
   "private": false,
   "description": "Portable hybrid code-auditing framework for arbitrary repositories.",
   "type": "module",

package/schemas/dispatch_quota.schema.json ADDED Viewed

@@ -0,0 +1,77 @@
+{
+  "$schema": "http://json-schema.org/draft-07/schema#",
+  "$id": "audit-code-dispatch-quota/v1alpha1",
+  "title": "DispatchQuota",
+  "description": "Quota schedule for a prepare-dispatch run. Written beside dispatch-plan.json. Hosts must launch at most wave_size packets per wave, then re-read this file before the next wave to pick up any updated limits.",
+  "type": "object",
+  "required": [
+    "contract_version",
+    "run_id",
+    "model",
+    "resolved_limits",
+    "confidence",
+    "source",
+    "wave_size",
+    "estimated_wave_tokens",
+    "cooldown_until"
+  ],
+  "additionalProperties": false,
+  "properties": {
+    "contract_version": {
+      "type": "string",
+      "const": "audit-code-dispatch-quota/v1alpha1"
+    },
+    "run_id": {
+      "type": "string",
+      "description": "The dispatch run this quota schedule applies to."
+    },
+    "model": {
+      "type": ["string", "null"],
+      "description": "The host model this schedule was computed for, or null if unknown."
+    },
+    "resolved_limits": {
+      "type": "object",
+      "description": "The rate and context limits used to compute the wave size.",
+      "required": [
+        "context_tokens",
+        "output_tokens",
+        "requests_per_minute",
+        "input_tokens_per_minute",
+        "output_tokens_per_minute"
+      ],
+      "additionalProperties": false,
+      "properties": {
+        "context_tokens": { "type": "integer", "minimum": 1 },
+        "output_tokens": { "type": "integer", "minimum": 1 },
+        "requests_per_minute": { "type": ["integer", "null"], "minimum": 1 },
+        "input_tokens_per_minute": { "type": ["integer", "null"], "minimum": 1 },
+        "output_tokens_per_minute": { "type": ["integer", "null"], "minimum": 1 }
+      }
+    },
+    "confidence": {
+      "type": "string",
+      "enum": ["high", "medium", "low"],
+      "description": "How confident the scheduler is in the resolved limits."
+    },
+    "source": {
+      "type": "string",
+      "enum": ["explicit_config", "cli_flags", "known_metadata", "learned", "default"],
+      "description": "Where the resolved limits came from."
+    },
+    "wave_size": {
+      "type": "integer",
+      "minimum": 1,
+      "description": "Maximum number of packets to dispatch in a single wave."
+    },
+    "estimated_wave_tokens": {
+      "type": "integer",
+      "minimum": 0,
+      "description": "Estimated total input tokens for one wave at the recommended wave_size."
+    },
+    "cooldown_until": {
+      "type": ["string", "null"],
+      "format": "date-time",
+      "description": "If non-null, the host should wait until this timestamp before launching the next wave."
+    }
+  }
+}