auditor-lambda 0.3.37 → 0.3.38

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -32,7 +32,7 @@ import { buildReviewPackets, orderTasksForPacketReview, estimateTaskGroupTokens,
32
32
  import { buildFileAnchorSummary, } from "./orchestrator/fileAnchors.js";
33
33
  import { LOCAL_SUBPROCESS_PROVIDER_NAME } from "./providers/constants.js";
34
34
  import { runAuditCodeMcpServer } from "./mcp/server.js";
35
- import { scheduleWave, buildProviderModelKey, readQuotaState, recordWaveOutcome, resolveLimits, resolveHostActiveSubagentLimit, probeProvider, computeMaxSafeConcurrency, getQuotaStatePath, detectRateLimitError, computeCooldownUntil, runSlidingWindow, LearnedQuotaSource, CompositeQuotaSource, } from "./quota/index.js";
35
+ import { scheduleWave, buildProviderModelKey, readQuotaState, recordWaveOutcome, resolveLimits, resolveHostActiveSubagentLimit, probeProvider, computeMaxSafeConcurrency, getQuotaStatePath, detectRateLimitError, computeCooldownUntil, runSlidingWindow, LearnedQuotaSource, CompositeQuotaSource, lookupDiscoveredLimits, updateDiscoveredLimits, mergeDiscoveredLimits, getHeaderExtractorForProvider, } from "./quota/index.js";
36
36
  const packageRoot = resolve(dirname(fileURLToPath(import.meta.url)), "..");
37
37
  const ADVANCE_AUDIT_CONTRACT_VERSION = "audit-code/v1alpha1";
38
38
  const WORKER_RESULT_CONTRACT_VERSION = "audit-code-worker-result/v1alpha1";
@@ -1426,6 +1426,18 @@ async function cmdRunToCompletion(argv) {
1426
1426
  const allCandidateTasks = buildPendingAuditTasks(bundle);
1427
1427
  const candidateGroups = chunkArray(allCandidateTasks.slice(0, parallelWorkers * agentBatchSize), agentBatchSize);
1428
1428
  const slotTokenEstimates = candidateGroups.map((g) => estimateTaskGroupTokens(g));
1429
+ const providerLimits = await provider.queryLimits?.(hostModel)
1430
+ .then((r) => r ? { ...r, source: "provider_query" } : null)
1431
+ .catch(() => null)
1432
+ ?? null;
1433
+ const cachedLimits = await lookupDiscoveredLimits(providerModelKey).catch(() => null);
1434
+ const discoveredLimits = mergeDiscoveredLimits(providerLimits, cachedLimits);
1435
+ const halfLifeHours = sessionConfig.quota?.empirical_half_life_hours ?? 24;
1436
+ const quotaSource = new CompositeQuotaSource([new LearnedQuotaSource(halfLifeHours)]);
1437
+ const quotaSourceSnapshot = await quotaSource.queryCurrentUsage(providerModelKey).catch(() => null);
1438
+ const hostConcurrencyLimit = resolveHostActiveSubagentLimit({
1439
+ sessionConfig,
1440
+ });
1429
1441
  const waveSchedule = scheduleWave({
1430
1442
  providerName: resolveFreshSessionProviderName(getExplicitProvider(argv), sessionConfig),
1431
1443
  sessionConfig,
@@ -1433,6 +1445,9 @@ async function cmdRunToCompletion(argv) {
1433
1445
  requestedConcurrency: parallelWorkers,
1434
1446
  estimatedSlotTokens: slotTokenEstimates,
1435
1447
  quotaStateEntry,
1448
+ hostConcurrencyLimit,
1449
+ quotaSourceSnapshot,
1450
+ discoveredLimits,
1436
1451
  });
1437
1452
  const waveSize = waveSchedule.wave_size;
1438
1453
  if (waveSchedule.cooldown_until) {
@@ -1615,6 +1630,27 @@ async function cmdRunToCompletion(argv) {
1615
1630
  cooldown_until: rateLimitHit ? computeCooldownUntil(retryAfterMs) : null,
1616
1631
  }, sessionConfig.quota?.empirical_half_life_hours ?? 24).catch(() => undefined);
1617
1632
  }
1633
+ // Extract rate-limit headers from worker stderr (best-effort)
1634
+ {
1635
+ const extractor = getHeaderExtractorForProvider(provider.name);
1636
+ for (const slot of workerSlots) {
1637
+ try {
1638
+ const stderr = await readFile(slot.paths.stderrPath, "utf8");
1639
+ const extracted = extractor.extract(stderr);
1640
+ if (extracted && (extracted.requests_per_minute != null || extracted.input_tokens_per_minute != null)) {
1641
+ await updateDiscoveredLimits(providerModelKey, {
1642
+ requests_per_minute: extracted.requests_per_minute,
1643
+ input_tokens_per_minute: extracted.input_tokens_per_minute,
1644
+ source: "header_extraction",
1645
+ });
1646
+ break; // one successful extraction is enough
1647
+ }
1648
+ }
1649
+ catch {
1650
+ // stderr file missing or unreadable — skip
1651
+ }
1652
+ }
1653
+ }
1618
1654
  if (batchErrors.length > 0) {
1619
1655
  const bundleAfter = await loadArtifactBundle(artifactsDir);
1620
1656
  const blockedState = buildBlockedAuditState({
@@ -2470,6 +2506,7 @@ async function prepareDispatchArtifacts(params) {
2470
2506
  explicitLimit: params.hostActiveSubagentLimit,
2471
2507
  sessionConfig,
2472
2508
  });
2509
+ const dispatchCachedLimits = await lookupDiscoveredLimits(quotaProviderKey).catch(() => null);
2473
2510
  const waveSchedule = scheduleWave({
2474
2511
  providerName: quotaProviderName,
2475
2512
  sessionConfig,
@@ -2478,6 +2515,7 @@ async function prepareDispatchArtifacts(params) {
2478
2515
  estimatedSlotTokens: perPacketTokens,
2479
2516
  quotaStateEntry,
2480
2517
  hostConcurrencyLimit,
2518
+ discoveredLimits: dispatchCachedLimits,
2481
2519
  });
2482
2520
  const dispatchQuota = {
2483
2521
  contract_version: "audit-code-dispatch-quota/v1alpha2",
@@ -3227,6 +3265,7 @@ async function cmdQuota(argv) {
3227
3265
  });
3228
3266
  const quotaSource = new CompositeQuotaSource([new LearnedQuotaSource(halfLifeHours)]);
3229
3267
  const quotaSourceSnapshot = await quotaSource.queryCurrentUsage(providerModelKey).catch(() => null);
3268
+ const queryDiscoveredLimits = await lookupDiscoveredLimits(providerModelKey).catch(() => null);
3230
3269
  const waveSchedule = scheduleWave({
3231
3270
  providerName,
3232
3271
  sessionConfig,
@@ -3235,6 +3274,7 @@ async function cmdQuota(argv) {
3235
3274
  quotaStateEntry,
3236
3275
  hostConcurrencyLimit,
3237
3276
  quotaSourceSnapshot,
3277
+ discoveredLimits: queryDiscoveredLimits,
3238
3278
  });
3239
3279
  console.log(JSON.stringify({
3240
3280
  provider: providerName,
@@ -3253,6 +3293,7 @@ async function cmdQuota(argv) {
3253
3293
  }
3254
3294
  : null,
3255
3295
  quota_source_snapshot: quotaSourceSnapshot,
3296
+ discovered_limits: queryDiscoveredLimits,
3256
3297
  wave_schedule: waveSchedule,
3257
3298
  quota_state_path: getQuotaStatePath(),
3258
3299
  }, null, 2));
@@ -21,7 +21,13 @@ export interface LaunchFreshSessionResult {
21
21
  stderrPath?: string;
22
22
  error?: string;
23
23
  }
24
+ export interface ProviderRateLimits {
25
+ requests_per_minute?: number | null;
26
+ input_tokens_per_minute?: number | null;
27
+ output_tokens_per_minute?: number | null;
28
+ }
24
29
  export interface FreshSessionProvider {
25
30
  name: string;
26
31
  launch(input: LaunchFreshSessionInput): Promise<LaunchFreshSessionResult>;
32
+ queryLimits?(model: string | null): Promise<ProviderRateLimits | null>;
27
33
  }
@@ -0,0 +1,21 @@
1
+ export interface DiscoveredRateLimits {
2
+ requests_per_minute?: number | null;
3
+ input_tokens_per_minute?: number | null;
4
+ output_tokens_per_minute?: number | null;
5
+ source: string;
6
+ }
7
+ export interface DiscoveredLimitsCacheEntry {
8
+ requests_per_minute?: number;
9
+ input_tokens_per_minute?: number;
10
+ discovered_at: string;
11
+ source: string;
12
+ }
13
+ export interface DiscoveredLimitsCache {
14
+ version: 1;
15
+ entries: Record<string, DiscoveredLimitsCacheEntry>;
16
+ }
17
+ export declare function readDiscoveredLimitsCache(): Promise<DiscoveredLimitsCache>;
18
+ export declare function writeDiscoveredLimitsCache(cache: DiscoveredLimitsCache): Promise<void>;
19
+ export declare function updateDiscoveredLimits(providerModelKey: string, limits: DiscoveredRateLimits): Promise<void>;
20
+ export declare function lookupDiscoveredLimits(providerModelKey: string): Promise<DiscoveredRateLimits | null>;
21
+ export declare function mergeDiscoveredLimits(...sources: (DiscoveredRateLimits | null | undefined)[]): DiscoveredRateLimits | null;
@@ -0,0 +1,74 @@
1
+ import { mkdir, readFile, writeFile } from "node:fs/promises";
2
+ import { dirname } from "node:path";
3
+ import { getQuotaStatePath } from "./state.js";
4
+ function getCachePath() {
5
+ return getQuotaStatePath().replace(/quota-state\.json$/, "discovered-limits.json");
6
+ }
7
+ export async function readDiscoveredLimitsCache() {
8
+ try {
9
+ const raw = await readFile(getCachePath(), "utf8");
10
+ const parsed = JSON.parse(raw);
11
+ if (parsed !== null &&
12
+ typeof parsed === "object" &&
13
+ !Array.isArray(parsed) &&
14
+ parsed["version"] === 1) {
15
+ return parsed;
16
+ }
17
+ }
18
+ catch (error) {
19
+ if (error.code !== "ENOENT") {
20
+ process.stderr.write(`[quota] ignoring unreadable discovered-limits cache: ${error instanceof Error ? error.message : String(error)}\n`);
21
+ }
22
+ }
23
+ return { version: 1, entries: {} };
24
+ }
25
+ export async function writeDiscoveredLimitsCache(cache) {
26
+ const cachePath = getCachePath();
27
+ await mkdir(dirname(cachePath), { recursive: true });
28
+ await writeFile(cachePath, JSON.stringify(cache, null, 2) + "\n", "utf8");
29
+ }
30
+ export async function updateDiscoveredLimits(providerModelKey, limits) {
31
+ const cache = await readDiscoveredLimitsCache();
32
+ const existing = cache.entries[providerModelKey];
33
+ const entry = {
34
+ ...existing,
35
+ discovered_at: new Date().toISOString(),
36
+ source: limits.source,
37
+ };
38
+ if (limits.requests_per_minute != null) {
39
+ entry.requests_per_minute = limits.requests_per_minute;
40
+ }
41
+ if (limits.input_tokens_per_minute != null) {
42
+ entry.input_tokens_per_minute = limits.input_tokens_per_minute;
43
+ }
44
+ cache.entries[providerModelKey] = entry;
45
+ await writeDiscoveredLimitsCache(cache);
46
+ }
47
+ export async function lookupDiscoveredLimits(providerModelKey) {
48
+ const cache = await readDiscoveredLimitsCache();
49
+ const entry = cache.entries[providerModelKey];
50
+ if (!entry)
51
+ return null;
52
+ if (entry.requests_per_minute == null && entry.input_tokens_per_minute == null)
53
+ return null;
54
+ return {
55
+ requests_per_minute: entry.requests_per_minute ?? null,
56
+ input_tokens_per_minute: entry.input_tokens_per_minute ?? null,
57
+ source: entry.source,
58
+ };
59
+ }
60
+ export function mergeDiscoveredLimits(...sources) {
61
+ let merged = null;
62
+ for (const source of sources) {
63
+ if (!source)
64
+ continue;
65
+ if (!merged) {
66
+ merged = { ...source };
67
+ continue;
68
+ }
69
+ merged.requests_per_minute ??= source.requests_per_minute;
70
+ merged.input_tokens_per_minute ??= source.input_tokens_per_minute;
71
+ merged.output_tokens_per_minute ??= source.output_tokens_per_minute;
72
+ }
73
+ return merged;
74
+ }
@@ -0,0 +1,8 @@
1
+ export interface ExtractedRateLimits {
2
+ requests_per_minute: number | null;
3
+ input_tokens_per_minute: number | null;
4
+ remaining_requests: number | null;
5
+ remaining_tokens: number | null;
6
+ reset_at: string | null;
7
+ }
8
+ export declare function extractRateLimitHeaders(text: string): ExtractedRateLimits | null;
@@ -0,0 +1,140 @@
1
+ const HEADER_PATTERNS = [
2
+ // Standard x-ratelimit-* (OpenAI, Anthropic, and others)
3
+ { pattern: /x-ratelimit-limit-requests:\s*(\d+)/i, field: "requests_per_minute" },
4
+ { pattern: /x-ratelimit-limit-tokens:\s*(\d+)/i, field: "input_tokens_per_minute" },
5
+ { pattern: /x-ratelimit-remaining-requests:\s*(\d+)/i, field: "remaining_requests" },
6
+ { pattern: /x-ratelimit-remaining-tokens:\s*(\d+)/i, field: "remaining_tokens" },
7
+ { pattern: /x-ratelimit-reset-requests:\s*(.+)/i, field: "reset_at", transform: parseResetValue },
8
+ { pattern: /x-ratelimit-reset-tokens:\s*(.+)/i, field: "reset_at", transform: parseResetValue },
9
+ // Anthropic-specific header naming
10
+ { pattern: /anthropic-ratelimit-requests-limit:\s*(\d+)/i, field: "requests_per_minute" },
11
+ { pattern: /anthropic-ratelimit-tokens-limit:\s*(\d+)/i, field: "input_tokens_per_minute" },
12
+ { pattern: /anthropic-ratelimit-requests-remaining:\s*(\d+)/i, field: "remaining_requests" },
13
+ { pattern: /anthropic-ratelimit-tokens-remaining:\s*(\d+)/i, field: "remaining_tokens" },
14
+ { pattern: /anthropic-ratelimit-requests-reset:\s*(.+)/i, field: "reset_at", transform: parseResetValue },
15
+ { pattern: /anthropic-ratelimit-tokens-reset:\s*(.+)/i, field: "reset_at", transform: parseResetValue },
16
+ ];
17
+ function parseResetValue(value) {
18
+ const trimmed = value.trim();
19
+ if (!trimmed)
20
+ return null;
21
+ // ISO timestamp
22
+ if (/^\d{4}-\d{2}-\d{2}/.test(trimmed))
23
+ return trimmed;
24
+ // Relative seconds (e.g. "42s", "42")
25
+ const seconds = parseFloat(trimmed);
26
+ if (Number.isFinite(seconds) && seconds > 0) {
27
+ return new Date(Date.now() + seconds * 1000).toISOString();
28
+ }
29
+ return trimmed;
30
+ }
31
+ function parseNumericValue(value) {
32
+ const n = parseInt(value, 10);
33
+ return Number.isFinite(n) && n > 0 ? n : null;
34
+ }
35
+ export function extractRateLimitHeaders(text) {
36
+ const result = {
37
+ requests_per_minute: null,
38
+ input_tokens_per_minute: null,
39
+ remaining_requests: null,
40
+ remaining_tokens: null,
41
+ reset_at: null,
42
+ };
43
+ let found = false;
44
+ for (const { pattern, field, transform } of HEADER_PATTERNS) {
45
+ const match = pattern.exec(text);
46
+ if (!match || !match[1])
47
+ continue;
48
+ if (result[field] != null)
49
+ continue; // first match wins
50
+ if (transform) {
51
+ const transformed = transform(match[1]);
52
+ if (transformed != null) {
53
+ result[field] = transformed;
54
+ found = true;
55
+ }
56
+ }
57
+ else {
58
+ const numeric = parseNumericValue(match[1]);
59
+ if (numeric != null) {
60
+ result[field] = numeric;
61
+ found = true;
62
+ }
63
+ }
64
+ }
65
+ // Also try JSON objects that embed header-like fields
66
+ if (!found) {
67
+ const jsonResult = extractFromJson(text);
68
+ if (jsonResult)
69
+ return jsonResult;
70
+ }
71
+ return found ? result : null;
72
+ }
73
+ function extractFromJson(text) {
74
+ const jsonPattern = /\{[^{}]*"(?:x-ratelimit|anthropic-ratelimit|ratelimit)[^{}]*\}/gi;
75
+ for (const match of text.matchAll(jsonPattern)) {
76
+ try {
77
+ const obj = JSON.parse(match[0]);
78
+ return extractFromHeaderObject(obj);
79
+ }
80
+ catch {
81
+ // not valid JSON
82
+ }
83
+ }
84
+ // Try line-by-line JSON (Claude Code stderr format)
85
+ for (const line of text.split("\n")) {
86
+ const trimmed = line.trim();
87
+ if (!trimmed.startsWith("{"))
88
+ continue;
89
+ try {
90
+ const obj = JSON.parse(trimmed);
91
+ const headers = obj["headers"] ??
92
+ obj["response_headers"];
93
+ if (headers) {
94
+ const extracted = extractFromHeaderObject(headers);
95
+ if (extracted)
96
+ return extracted;
97
+ }
98
+ }
99
+ catch {
100
+ // not valid JSON
101
+ }
102
+ }
103
+ return null;
104
+ }
105
+ function extractFromHeaderObject(headers) {
106
+ const get = (keys) => {
107
+ for (const key of keys) {
108
+ const val = headers[key] ?? headers[key.toLowerCase()];
109
+ if (val != null) {
110
+ const n = typeof val === "number" ? val : parseInt(String(val), 10);
111
+ if (Number.isFinite(n) && n > 0)
112
+ return n;
113
+ }
114
+ }
115
+ return null;
116
+ };
117
+ const rpm = get([
118
+ "x-ratelimit-limit-requests",
119
+ "anthropic-ratelimit-requests-limit",
120
+ ]);
121
+ const tpm = get([
122
+ "x-ratelimit-limit-tokens",
123
+ "anthropic-ratelimit-tokens-limit",
124
+ ]);
125
+ if (rpm == null && tpm == null)
126
+ return null;
127
+ return {
128
+ requests_per_minute: rpm,
129
+ input_tokens_per_minute: tpm,
130
+ remaining_requests: get([
131
+ "x-ratelimit-remaining-requests",
132
+ "anthropic-ratelimit-requests-remaining",
133
+ ]),
134
+ remaining_tokens: get([
135
+ "x-ratelimit-remaining-tokens",
136
+ "anthropic-ratelimit-tokens-remaining",
137
+ ]),
138
+ reset_at: null,
139
+ };
140
+ }
@@ -0,0 +1,6 @@
1
+ import type { ExtractedRateLimits } from "../headerExtraction.js";
2
+ import type { HeaderExtractor } from "./genericHeaderExtractor.js";
3
+ export declare class ClaudeCodeHeaderExtractor implements HeaderExtractor {
4
+ readonly name = "claude-code";
5
+ extract(stderr: string): ExtractedRateLimits | null;
6
+ }
@@ -0,0 +1,28 @@
1
+ import { extractRateLimitHeaders } from "../headerExtraction.js";
2
+ export class ClaudeCodeHeaderExtractor {
3
+ name = "claude-code";
4
+ extract(stderr) {
5
+ // Claude Code emits structured JSON lines to stderr. Collect all lines
6
+ // that might contain header data and feed them to the agnostic parser.
7
+ const candidates = [];
8
+ for (const line of stderr.split("\n")) {
9
+ const trimmed = line.trim();
10
+ if (!trimmed.startsWith("{"))
11
+ continue;
12
+ try {
13
+ const obj = JSON.parse(trimmed);
14
+ if (obj["headers"] || obj["response_headers"]) {
15
+ candidates.push(trimmed);
16
+ }
17
+ }
18
+ catch {
19
+ // not JSON
20
+ }
21
+ }
22
+ if (candidates.length > 0) {
23
+ return extractRateLimitHeaders(candidates.join("\n"));
24
+ }
25
+ // Fall back to scanning the full text for raw header lines
26
+ return extractRateLimitHeaders(stderr);
27
+ }
28
+ }
@@ -0,0 +1,9 @@
1
+ import type { ExtractedRateLimits } from "../headerExtraction.js";
2
+ export interface HeaderExtractor {
3
+ readonly name: string;
4
+ extract(stderr: string): ExtractedRateLimits | null;
5
+ }
6
+ export declare class GenericHeaderExtractor implements HeaderExtractor {
7
+ readonly name = "generic";
8
+ extract(stderr: string): ExtractedRateLimits | null;
9
+ }
@@ -0,0 +1,7 @@
1
+ import { extractRateLimitHeaders } from "../headerExtraction.js";
2
+ export class GenericHeaderExtractor {
3
+ name = "generic";
4
+ extract(stderr) {
5
+ return extractRateLimitHeaders(stderr);
6
+ }
7
+ }
@@ -0,0 +1,5 @@
1
+ export type { HeaderExtractor } from "./genericHeaderExtractor.js";
2
+ export { GenericHeaderExtractor } from "./genericHeaderExtractor.js";
3
+ export { ClaudeCodeHeaderExtractor } from "./claudeCodeHeaderExtractor.js";
4
+ import type { HeaderExtractor } from "./genericHeaderExtractor.js";
5
+ export declare function getHeaderExtractorForProvider(providerName: string): HeaderExtractor;
@@ -0,0 +1,12 @@
1
+ export { GenericHeaderExtractor } from "./genericHeaderExtractor.js";
2
+ export { ClaudeCodeHeaderExtractor } from "./claudeCodeHeaderExtractor.js";
3
+ import { GenericHeaderExtractor } from "./genericHeaderExtractor.js";
4
+ import { ClaudeCodeHeaderExtractor } from "./claudeCodeHeaderExtractor.js";
5
+ const PROVIDER_EXTRACTORS = {
6
+ "claude-code": () => new ClaudeCodeHeaderExtractor(),
7
+ };
8
+ const genericExtractor = new GenericHeaderExtractor();
9
+ export function getHeaderExtractorForProvider(providerName) {
10
+ const factory = PROVIDER_EXTRACTORS[providerName];
11
+ return factory ? factory() : genericExtractor;
12
+ }
@@ -16,4 +16,10 @@ export type { ErrorParser } from "./errorParsers/index.js";
16
16
  export { GenericErrorParser, ClaudeCodeErrorParser, getErrorParserForProvider } from "./errorParsers/index.js";
17
17
  export { LearnedQuotaSource } from "./learnedQuotaSource.js";
18
18
  export { CompositeQuotaSource } from "./compositeQuotaSource.js";
19
+ export { lookupDiscoveredLimits, updateDiscoveredLimits, mergeDiscoveredLimits, readDiscoveredLimitsCache, writeDiscoveredLimitsCache, } from "./discoveredLimits.js";
20
+ export type { DiscoveredRateLimits, DiscoveredLimitsCache, DiscoveredLimitsCacheEntry } from "./discoveredLimits.js";
21
+ export { extractRateLimitHeaders } from "./headerExtraction.js";
22
+ export type { ExtractedRateLimits } from "./headerExtraction.js";
23
+ export type { HeaderExtractor } from "./headerExtractors/index.js";
24
+ export { GenericHeaderExtractor, ClaudeCodeHeaderExtractor, getHeaderExtractorForProvider } from "./headerExtractors/index.js";
19
25
  export type { ResolvedLimits, LimitSource, LimitConfidence, HostConcurrencyLimit, HostConcurrencyLimitSource, QuotaState, QuotaStateEntry, ConcurrencyBucket, WaveSchedule, DispatchQuota, ObservedWaveOutcome, } from "./types.js";
@@ -9,3 +9,6 @@ export { probeProvider } from "./probe.js";
9
9
  export { GenericErrorParser, ClaudeCodeErrorParser, getErrorParserForProvider } from "./errorParsers/index.js";
10
10
  export { LearnedQuotaSource } from "./learnedQuotaSource.js";
11
11
  export { CompositeQuotaSource } from "./compositeQuotaSource.js";
12
+ export { lookupDiscoveredLimits, updateDiscoveredLimits, mergeDiscoveredLimits, readDiscoveredLimitsCache, writeDiscoveredLimitsCache, } from "./discoveredLimits.js";
13
+ export { extractRateLimitHeaders } from "./headerExtraction.js";
14
+ export { GenericHeaderExtractor, ClaudeCodeHeaderExtractor, getHeaderExtractorForProvider } from "./headerExtractors/index.js";
@@ -1,6 +1,7 @@
1
1
  import type { ResolvedProviderName, SessionConfig } from "../types/sessionConfig.js";
2
2
  import type { HostConcurrencyLimit, QuotaStateEntry, WaveSchedule } from "./types.js";
3
3
  import type { QuotaUsageSnapshot } from "./quotaSource.js";
4
+ import type { DiscoveredRateLimits } from "./discoveredLimits.js";
4
5
  export interface ScheduleWaveOptions {
5
6
  providerName: ResolvedProviderName;
6
7
  sessionConfig: SessionConfig;
@@ -13,6 +14,8 @@ export interface ScheduleWaveOptions {
13
14
  quotaStateEntry?: QuotaStateEntry | null;
14
15
  hostConcurrencyLimit?: HostConcurrencyLimit | null;
15
16
  quotaSourceSnapshot?: QuotaUsageSnapshot | null;
17
+ /** RPM/TPM discovered from provider queries or response header extraction. */
18
+ discoveredLimits?: DiscoveredRateLimits | null;
16
19
  }
17
20
  export declare function scheduleWave(options: ScheduleWaveOptions): WaveSchedule;
18
21
  /** Build the state key used for indexing quota-state.json entries. */
@@ -7,7 +7,7 @@ function sumTopN(sorted, n) {
7
7
  return sum;
8
8
  }
9
9
  export function scheduleWave(options) {
10
- const { providerName, sessionConfig, hostModel, requestedConcurrency, estimatedSlotTokens, estimatedPacketTokens = 0, quotaStateEntry = null, hostConcurrencyLimit = null, quotaSourceSnapshot = null, } = options;
10
+ const { providerName, sessionConfig, hostModel, requestedConcurrency, estimatedSlotTokens, estimatedPacketTokens = 0, quotaStateEntry = null, hostConcurrencyLimit = null, quotaSourceSnapshot = null, discoveredLimits = null, } = options;
11
11
  // Descending sort so sumTopN picks the largest slots
12
12
  const slotsSorted = estimatedSlotTokens
13
13
  ? [...estimatedSlotTokens].sort((a, b) => b - a)
@@ -44,6 +44,12 @@ export function scheduleWave(options) {
44
44
  const safetyMargin = quota.safety_margin ?? 0.8;
45
45
  const halfLifeHours = quota.empirical_half_life_hours ?? 24;
46
46
  const { limits, source, confidence } = resolveLimits({ providerName, sessionConfig, hostModel });
47
+ // Fill null RPM/TPM from discovered limits (provider query or header extraction)
48
+ if (discoveredLimits) {
49
+ limits.requests_per_minute ??= discoveredLimits.requests_per_minute ?? null;
50
+ limits.input_tokens_per_minute ??= discoveredLimits.input_tokens_per_minute ?? null;
51
+ limits.output_tokens_per_minute ??= discoveredLimits.output_tokens_per_minute ?? null;
52
+ }
47
53
  let waveSize = requestedConcurrency;
48
54
  let cooldownUntil = null;
49
55
  // Respect an active cooldown period
@@ -93,6 +99,17 @@ export function scheduleWave(options) {
93
99
  else if (typeof fallbackCap === "number" && Number.isFinite(fallbackCap)) {
94
100
  waveSize = Math.min(waveSize, Math.max(1, Math.floor(fallbackCap)));
95
101
  }
102
+ // First-contact cap: when no learned history, no configured fallback, AND
103
+ // no RPM/TPM limits from any source, apply a conservative ceiling.
104
+ // This triggers only for unconfigured local providers (fallbackCap is
105
+ // undefined). Hosted providers default to 1 via unknown_hosted_concurrency,
106
+ // and "unlimited" is an explicit opt-out.
107
+ if (fallbackCap == null &&
108
+ limits.requests_per_minute == null &&
109
+ limits.input_tokens_per_minute == null) {
110
+ const firstContactCap = quota.first_contact_concurrency ?? 3;
111
+ waveSize = Math.min(waveSize, Math.max(1, firstContactCap));
112
+ }
96
113
  }
97
114
  }
98
115
  // Apply real-time quota source data if available
@@ -46,6 +46,9 @@ export interface QuotaConfig {
46
46
  empirical_half_life_hours?: number;
47
47
  /** Allow the scheduler to try concurrency maxSafe+1 after consecutive successes (default: true). */
48
48
  ramp_up_enabled?: boolean;
49
+ /** Conservative concurrency cap for the first wave when no learned history
50
+ * and no discovered RPM/TPM limits exist (default: 3). */
51
+ first_contact_concurrency?: number;
49
52
  /** Hard host ceiling for simultaneously active conversation subagents. */
50
53
  host_active_subagent_limit?: number;
51
54
  /** Per-model overrides keyed by "provider/model". */
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "auditor-lambda",
3
- "version": "0.3.37",
3
+ "version": "0.3.38",
4
4
  "private": false,
5
5
  "description": "Portable hybrid code-auditing framework for arbitrary repositories.",
6
6
  "type": "module",