auditor-lambda 0.3.32 → 0.3.34

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/README.md +2 -1
  2. package/audit-code-wrapper-lib.mjs +30 -28
  3. package/dist/cli.d.ts +5 -0
  4. package/dist/cli.js +55 -123
  5. package/dist/mcp/server.js +11 -11
  6. package/dist/orchestrator/reviewPackets.d.ts +3 -0
  7. package/dist/orchestrator/reviewPackets.js +13 -2
  8. package/dist/quota/compositeQuotaSource.d.ts +7 -0
  9. package/dist/quota/compositeQuotaSource.js +20 -0
  10. package/dist/quota/errorParsers/claudeCodeErrorParser.d.ts +6 -0
  11. package/dist/quota/errorParsers/claudeCodeErrorParser.js +39 -0
  12. package/dist/quota/errorParsers/genericErrorParser.d.ts +9 -0
  13. package/dist/quota/errorParsers/genericErrorParser.js +7 -0
  14. package/dist/quota/errorParsers/index.d.ts +5 -0
  15. package/dist/quota/errorParsers/index.js +12 -0
  16. package/dist/quota/errorParsing.d.ts +7 -0
  17. package/dist/quota/errorParsing.js +69 -0
  18. package/dist/quota/fileLock.d.ts +6 -0
  19. package/dist/quota/fileLock.js +64 -0
  20. package/dist/quota/index.d.ts +11 -1
  21. package/dist/quota/index.js +7 -1
  22. package/dist/quota/learnedQuotaSource.d.ts +7 -0
  23. package/dist/quota/learnedQuotaSource.js +25 -0
  24. package/dist/quota/probe.d.ts +1 -4
  25. package/dist/quota/probe.js +1 -4
  26. package/dist/quota/quotaSource.d.ts +12 -0
  27. package/dist/quota/quotaSource.js +1 -0
  28. package/dist/quota/scheduler.d.ts +5 -1
  29. package/dist/quota/scheduler.js +51 -9
  30. package/dist/quota/slidingWindow.d.ts +4 -0
  31. package/dist/quota/slidingWindow.js +28 -0
  32. package/dist/quota/state.d.ts +3 -0
  33. package/dist/quota/state.js +57 -14
  34. package/dist/quota/types.d.ts +11 -2
  35. package/dist/supervisor/operatorHandoff.js +1 -1
  36. package/dist/types/sessionConfig.d.ts +3 -0
  37. package/dist/validation/sessionConfig.js +4 -0
  38. package/package.json +1 -1
  39. package/schemas/dispatch_quota.schema.json +23 -2
  40. package/skills/audit-code/audit-code.prompt.md +5 -0
@@ -2,8 +2,19 @@ import { createHash } from "node:crypto";
2
2
  import { LENS_ORDER } from "./unitBuilder.js";
3
3
  const DEFAULT_MAX_TASKS_PER_PACKET = 0;
4
4
  const DEFAULT_TARGET_PACKET_LINES = 8000;
5
- const ESTIMATED_TOKENS_PER_LINE = 4;
6
- const ESTIMATED_PACKET_PROMPT_TOKENS = 900;
5
+ export const ESTIMATED_TOKENS_PER_LINE = 4;
6
+ export const ESTIMATED_PACKET_PROMPT_TOKENS = 900;
7
+ export function estimateTaskGroupTokens(tasks) {
8
+ let totalLines = 0;
9
+ for (const task of tasks) {
10
+ if (task.file_line_counts) {
11
+ for (const count of Object.values(task.file_line_counts)) {
12
+ totalLines += count;
13
+ }
14
+ }
15
+ }
16
+ return ESTIMATED_PACKET_PROMPT_TOKENS + totalLines * ESTIMATED_TOKENS_PER_LINE;
17
+ }
7
18
  const PACKET_EXPANSION_MIN_CONFIDENCE = 0.65;
8
19
  const HIGH_FAN_DEGREE_THRESHOLD = 12;
9
20
  const HIGH_FAN_EXPANSION_CONFIDENCE = 0.99;
@@ -0,0 +1,7 @@
1
+ import type { QuotaSource, QuotaUsageSnapshot } from "./quotaSource.js";
2
+ export declare class CompositeQuotaSource implements QuotaSource {
3
+ readonly name = "composite";
4
+ private sources;
5
+ constructor(sources: QuotaSource[]);
6
+ queryCurrentUsage(providerModelKey: string): Promise<QuotaUsageSnapshot | null>;
7
+ }
@@ -0,0 +1,20 @@
1
+ export class CompositeQuotaSource {
2
+ name = "composite";
3
+ sources;
4
+ constructor(sources) {
5
+ this.sources = sources;
6
+ }
7
+ async queryCurrentUsage(providerModelKey) {
8
+ for (const source of this.sources) {
9
+ try {
10
+ const snapshot = await source.queryCurrentUsage(providerModelKey);
11
+ if (snapshot)
12
+ return snapshot;
13
+ }
14
+ catch {
15
+ // Skip failing sources, try next
16
+ }
17
+ }
18
+ return null;
19
+ }
20
+ }
@@ -0,0 +1,6 @@
1
+ import type { RateLimitDetectionResult } from "../errorParsing.js";
2
+ import type { ErrorParser } from "./genericErrorParser.js";
3
+ export declare class ClaudeCodeErrorParser implements ErrorParser {
4
+ readonly name = "claude-code";
5
+ parse(text: string): RateLimitDetectionResult;
6
+ }
@@ -0,0 +1,39 @@
1
+ export class ClaudeCodeErrorParser {
2
+ name = "claude-code";
3
+ parse(text) {
4
+ for (const line of text.split("\n")) {
5
+ const trimmed = line.trim();
6
+ if (!trimmed.startsWith("{"))
7
+ continue;
8
+ try {
9
+ const obj = JSON.parse(trimmed);
10
+ const level = obj["level"];
11
+ const type = obj["type"];
12
+ const message = obj["message"] ?? "";
13
+ const statusCode = obj["status_code"];
14
+ if (statusCode === 429 ||
15
+ type === "rate_limit_error" ||
16
+ (level === "error" && /\brate.?limit/i.test(message))) {
17
+ const retryAfter = obj["retry_after"];
18
+ const retryAfterMs = obj["retry_after_ms"];
19
+ let extractedMs = null;
20
+ if (retryAfterMs != null && retryAfterMs > 0) {
21
+ extractedMs = retryAfterMs;
22
+ }
23
+ else if (retryAfter != null && retryAfter > 0) {
24
+ extractedMs = retryAfter < 600 ? retryAfter * 1000 : retryAfter;
25
+ }
26
+ return {
27
+ isRateLimited: true,
28
+ retryAfterMs: extractedMs,
29
+ rawMatch: `claude-code-stderr:${statusCode ?? type ?? "rate_limit"}`,
30
+ };
31
+ }
32
+ }
33
+ catch {
34
+ // Not valid JSON, skip
35
+ }
36
+ }
37
+ return { isRateLimited: false, retryAfterMs: null, rawMatch: null };
38
+ }
39
+ }
@@ -0,0 +1,9 @@
1
+ import type { RateLimitDetectionResult } from "../errorParsing.js";
2
+ export interface ErrorParser {
3
+ readonly name: string;
4
+ parse(text: string): RateLimitDetectionResult;
5
+ }
6
+ export declare class GenericErrorParser implements ErrorParser {
7
+ readonly name = "generic";
8
+ parse(text: string): RateLimitDetectionResult;
9
+ }
@@ -0,0 +1,7 @@
1
+ import { detectRateLimitError } from "../errorParsing.js";
2
+ export class GenericErrorParser {
3
+ name = "generic";
4
+ parse(text) {
5
+ return detectRateLimitError(text);
6
+ }
7
+ }
@@ -0,0 +1,5 @@
1
+ export type { ErrorParser } from "./genericErrorParser.js";
2
+ export { GenericErrorParser } from "./genericErrorParser.js";
3
+ export { ClaudeCodeErrorParser } from "./claudeCodeErrorParser.js";
4
+ import type { ErrorParser } from "./genericErrorParser.js";
5
+ export declare function getErrorParserForProvider(providerName: string): ErrorParser;
@@ -0,0 +1,12 @@
1
+ export { GenericErrorParser } from "./genericErrorParser.js";
2
+ export { ClaudeCodeErrorParser } from "./claudeCodeErrorParser.js";
3
+ import { GenericErrorParser } from "./genericErrorParser.js";
4
+ import { ClaudeCodeErrorParser } from "./claudeCodeErrorParser.js";
5
+ const PROVIDER_PARSERS = {
6
+ "claude-code": () => new ClaudeCodeErrorParser(),
7
+ };
8
+ const genericParser = new GenericErrorParser();
9
+ export function getErrorParserForProvider(providerName) {
10
+ const factory = PROVIDER_PARSERS[providerName];
11
+ return factory ? factory() : genericParser;
12
+ }
@@ -0,0 +1,7 @@
1
+ export interface RateLimitDetectionResult {
2
+ isRateLimited: boolean;
3
+ retryAfterMs: number | null;
4
+ rawMatch: string | null;
5
+ }
6
+ export declare function detectRateLimitError(text: string): RateLimitDetectionResult;
7
+ export declare function computeCooldownUntil(retryAfterMs: number | null, defaultMs?: number): string;
@@ -0,0 +1,69 @@
1
+ const RATE_LIMIT_PATTERNS = [
2
+ /\b429\b/i,
3
+ /\btoo many requests\b/i,
4
+ /\brate.?limit/i,
5
+ /\boverloaded\b/i,
6
+ /\bresource.?exhausted\b/i,
7
+ /\bquota.?exceeded\b/i,
8
+ ];
9
+ function tryParseJson(text) {
10
+ const jsonStart = text.indexOf("{");
11
+ if (jsonStart === -1)
12
+ return null;
13
+ try {
14
+ return JSON.parse(text.slice(jsonStart));
15
+ }
16
+ catch {
17
+ return null;
18
+ }
19
+ }
20
+ function extractRetryAfterMs(obj) {
21
+ const headers = obj["headers"];
22
+ const retryAfter = headers?.["retry-after"] ??
23
+ headers?.["Retry-After"] ??
24
+ obj["retry_after"] ??
25
+ obj["retry_after_ms"];
26
+ if (retryAfter == null)
27
+ return null;
28
+ const val = typeof retryAfter === "string" ? Number(retryAfter) : retryAfter;
29
+ if (!Number.isFinite(val) || val <= 0)
30
+ return null;
31
+ // If the value looks like seconds (< 600), convert to ms
32
+ return val < 600 ? val * 1000 : val;
33
+ }
34
+ function detectFromJson(text) {
35
+ const obj = tryParseJson(text);
36
+ if (!obj)
37
+ return null;
38
+ const status = obj["status"];
39
+ const type = obj["type"];
40
+ const errorObj = obj["error"];
41
+ const errorType = errorObj?.["type"];
42
+ const isRateLimited = status === 429 ||
43
+ type === "rate_limit_error" ||
44
+ errorType === "rate_limit_error";
45
+ if (!isRateLimited)
46
+ return null;
47
+ return {
48
+ isRateLimited: true,
49
+ retryAfterMs: extractRetryAfterMs(obj),
50
+ rawMatch: `json:${status === 429 ? "status=429" : `type=${type ?? errorType}`}`,
51
+ };
52
+ }
53
+ export function detectRateLimitError(text) {
54
+ const jsonResult = detectFromJson(text);
55
+ if (jsonResult)
56
+ return jsonResult;
57
+ for (const pattern of RATE_LIMIT_PATTERNS) {
58
+ const match = pattern.exec(text);
59
+ if (match) {
60
+ return { isRateLimited: true, retryAfterMs: null, rawMatch: match[0] };
61
+ }
62
+ }
63
+ return { isRateLimited: false, retryAfterMs: null, rawMatch: null };
64
+ }
65
+ const DEFAULT_COOLDOWN_MS = 60_000;
66
+ export function computeCooldownUntil(retryAfterMs, defaultMs = DEFAULT_COOLDOWN_MS) {
67
+ const ms = retryAfterMs != null && retryAfterMs > 0 ? retryAfterMs : defaultMs;
68
+ return new Date(Date.now() + ms).toISOString();
69
+ }
@@ -0,0 +1,6 @@
1
+ export declare class FileLockTimeoutError extends Error {
2
+ constructor(lockPath: string);
3
+ }
4
+ export declare function acquireLock(lockPath: string, timeoutMs?: number): Promise<void>;
5
+ export declare function releaseLock(lockPath: string): Promise<void>;
6
+ export declare function withFileLock<T>(lockPath: string, fn: () => Promise<T>, timeoutMs?: number): Promise<T>;
@@ -0,0 +1,64 @@
1
+ import { open, unlink, stat } from "node:fs/promises";
2
+ const STALE_LOCK_MS = 30_000;
3
+ const RETRY_INTERVAL_MS = 50;
4
+ const DEFAULT_TIMEOUT_MS = 10_000;
5
+ export class FileLockTimeoutError extends Error {
6
+ constructor(lockPath) {
7
+ super(`Timed out acquiring lock: ${lockPath}`);
8
+ this.name = "FileLockTimeoutError";
9
+ }
10
+ }
11
+ async function isLockStale(lockPath) {
12
+ try {
13
+ const info = await stat(lockPath);
14
+ return Date.now() - info.mtimeMs > STALE_LOCK_MS;
15
+ }
16
+ catch {
17
+ return false;
18
+ }
19
+ }
20
+ export async function acquireLock(lockPath, timeoutMs = DEFAULT_TIMEOUT_MS) {
21
+ const deadline = Date.now() + timeoutMs;
22
+ while (true) {
23
+ try {
24
+ const fd = await open(lockPath, "wx");
25
+ await fd.close();
26
+ return;
27
+ }
28
+ catch (err) {
29
+ if (err.code !== "EEXIST")
30
+ throw err;
31
+ }
32
+ if (await isLockStale(lockPath)) {
33
+ try {
34
+ await unlink(lockPath);
35
+ continue;
36
+ }
37
+ catch {
38
+ // Another process may have already cleaned it up
39
+ }
40
+ }
41
+ if (Date.now() >= deadline) {
42
+ throw new FileLockTimeoutError(lockPath);
43
+ }
44
+ await new Promise((r) => setTimeout(r, RETRY_INTERVAL_MS));
45
+ }
46
+ }
47
+ export async function releaseLock(lockPath) {
48
+ try {
49
+ await unlink(lockPath);
50
+ }
51
+ catch (err) {
52
+ if (err.code !== "ENOENT")
53
+ throw err;
54
+ }
55
+ }
56
+ export async function withFileLock(lockPath, fn, timeoutMs) {
57
+ await acquireLock(lockPath, timeoutMs);
58
+ try {
59
+ return await fn();
60
+ }
61
+ finally {
62
+ await releaseLock(lockPath);
63
+ }
64
+ }
@@ -1,9 +1,19 @@
1
1
  export { resolveLimits, lookupKnownModel, classifyProvider } from "./limits.js";
2
2
  export type { LimitResolutionResult, ResolveLimitsOptions, ProviderType } from "./limits.js";
3
3
  export { detectHostActiveSubagentLimit, resolveHostActiveSubagentLimit, } from "./hostLimits.js";
4
- export { readQuotaState, writeQuotaState, computeMaxSafeConcurrency, recordWaveOutcome, getQuotaStatePath, decayWeight, applyDecayToEntry, } from "./state.js";
4
+ export { readQuotaState, writeQuotaState, computeMaxSafeConcurrency, recordWaveOutcome, getQuotaStatePath, decayWeight, applyDecayToEntry, computeBackoffCooldownMs, computeBackoffFailureWeight, computeRampUpConcurrency, } from "./state.js";
5
5
  export { scheduleWave, buildProviderModelKey } from "./scheduler.js";
6
6
  export type { ScheduleWaveOptions } from "./scheduler.js";
7
+ export { detectRateLimitError, computeCooldownUntil } from "./errorParsing.js";
8
+ export { acquireLock, releaseLock, withFileLock, FileLockTimeoutError } from "./fileLock.js";
9
+ export { runSlidingWindow } from "./slidingWindow.js";
10
+ export type { SlidingWindowResult } from "./slidingWindow.js";
11
+ export type { RateLimitDetectionResult } from "./errorParsing.js";
7
12
  export { probeProvider } from "./probe.js";
8
13
  export type { ProbeResult } from "./probe.js";
14
+ export type { QuotaSource, QuotaUsageSnapshot } from "./quotaSource.js";
15
+ export type { ErrorParser } from "./errorParsers/index.js";
16
+ export { GenericErrorParser, ClaudeCodeErrorParser, getErrorParserForProvider } from "./errorParsers/index.js";
17
+ export { LearnedQuotaSource } from "./learnedQuotaSource.js";
18
+ export { CompositeQuotaSource } from "./compositeQuotaSource.js";
9
19
  export type { ResolvedLimits, LimitSource, LimitConfidence, HostConcurrencyLimit, HostConcurrencyLimitSource, QuotaState, QuotaStateEntry, ConcurrencyBucket, WaveSchedule, DispatchQuota, ObservedWaveOutcome, } from "./types.js";
@@ -1,5 +1,11 @@
1
1
  export { resolveLimits, lookupKnownModel, classifyProvider } from "./limits.js";
2
2
  export { detectHostActiveSubagentLimit, resolveHostActiveSubagentLimit, } from "./hostLimits.js";
3
- export { readQuotaState, writeQuotaState, computeMaxSafeConcurrency, recordWaveOutcome, getQuotaStatePath, decayWeight, applyDecayToEntry, } from "./state.js";
3
+ export { readQuotaState, writeQuotaState, computeMaxSafeConcurrency, recordWaveOutcome, getQuotaStatePath, decayWeight, applyDecayToEntry, computeBackoffCooldownMs, computeBackoffFailureWeight, computeRampUpConcurrency, } from "./state.js";
4
4
  export { scheduleWave, buildProviderModelKey } from "./scheduler.js";
5
+ export { detectRateLimitError, computeCooldownUntil } from "./errorParsing.js";
6
+ export { acquireLock, releaseLock, withFileLock, FileLockTimeoutError } from "./fileLock.js";
7
+ export { runSlidingWindow } from "./slidingWindow.js";
5
8
  export { probeProvider } from "./probe.js";
9
+ export { GenericErrorParser, ClaudeCodeErrorParser, getErrorParserForProvider } from "./errorParsers/index.js";
10
+ export { LearnedQuotaSource } from "./learnedQuotaSource.js";
11
+ export { CompositeQuotaSource } from "./compositeQuotaSource.js";
@@ -0,0 +1,7 @@
1
+ import type { QuotaSource, QuotaUsageSnapshot } from "./quotaSource.js";
2
+ export declare class LearnedQuotaSource implements QuotaSource {
3
+ readonly name = "learned";
4
+ private halfLifeHours;
5
+ constructor(halfLifeHours?: number);
6
+ queryCurrentUsage(providerModelKey: string): Promise<QuotaUsageSnapshot | null>;
7
+ }
@@ -0,0 +1,25 @@
1
+ import { readQuotaState, computeMaxSafeConcurrency } from "./state.js";
2
+ export class LearnedQuotaSource {
3
+ name = "learned";
4
+ halfLifeHours;
5
+ constructor(halfLifeHours = 24) {
6
+ this.halfLifeHours = halfLifeHours;
7
+ }
8
+ async queryCurrentUsage(providerModelKey) {
9
+ const state = await readQuotaState();
10
+ const entry = state.entries[providerModelKey];
11
+ if (!entry)
12
+ return null;
13
+ const maxSafe = computeMaxSafeConcurrency(entry, this.halfLifeHours);
14
+ const isInCooldown = entry.cooldown_until != null &&
15
+ new Date(entry.cooldown_until).getTime() > Date.now();
16
+ return {
17
+ remaining_pct: isInCooldown ? 0 : null,
18
+ reset_at: isInCooldown ? entry.cooldown_until : null,
19
+ requests_remaining: maxSafe,
20
+ tokens_remaining: null,
21
+ captured_at: entry.updated_at,
22
+ source: "learned",
23
+ };
24
+ }
25
+ }
@@ -5,9 +5,6 @@ export interface ProbeResult {
5
5
  /**
6
6
  * Probe a provider to discover its rate limits.
7
7
  *
8
- * Only subprocess-template supports direct probing since it is the only
9
- * provider where the auditor controls the API call. IDE providers
10
- * (claude-code, opencode) select the model internally; their limits come
11
- * from known-model metadata or learned behavior.
8
+ * @deprecated Phase 3A replaces this with the QuotaSource abstraction.
12
9
  */
13
10
  export declare function probeProvider(providerName: string, probeMode?: "auto" | "never" | "force"): Promise<ProbeResult>;
@@ -1,10 +1,7 @@
1
1
  /**
2
2
  * Probe a provider to discover its rate limits.
3
3
  *
4
- * Only subprocess-template supports direct probing since it is the only
5
- * provider where the auditor controls the API call. IDE providers
6
- * (claude-code, opencode) select the model internally; their limits come
7
- * from known-model metadata or learned behavior.
4
+ * @deprecated Phase 3A replaces this with the QuotaSource abstraction.
8
5
  */
9
6
  export async function probeProvider(providerName, probeMode = "auto") {
10
7
  if (probeMode === "never") {
@@ -0,0 +1,12 @@
1
+ export interface QuotaUsageSnapshot {
2
+ remaining_pct: number | null;
3
+ reset_at: string | null;
4
+ requests_remaining: number | null;
5
+ tokens_remaining: number | null;
6
+ captured_at: string;
7
+ source: string;
8
+ }
9
+ export interface QuotaSource {
10
+ readonly name: string;
11
+ queryCurrentUsage(providerModelKey: string): Promise<QuotaUsageSnapshot | null>;
12
+ }
@@ -0,0 +1 @@
1
+ export {};
@@ -1,14 +1,18 @@
1
1
  import type { ResolvedProviderName, SessionConfig } from "../types/sessionConfig.js";
2
2
  import type { HostConcurrencyLimit, QuotaStateEntry, WaveSchedule } from "./types.js";
3
+ import type { QuotaUsageSnapshot } from "./quotaSource.js";
3
4
  export interface ScheduleWaveOptions {
4
5
  providerName: ResolvedProviderName;
5
6
  sessionConfig: SessionConfig;
6
7
  hostModel: string | null;
7
8
  requestedConcurrency: number;
8
- /** Average estimated tokens per packet/worker. Used for TPM budget. */
9
+ /** Per-slot estimated tokens (one entry per worker slot). Used for TPM budget. */
10
+ estimatedSlotTokens?: number[];
11
+ /** @deprecated Use estimatedSlotTokens instead. Average tokens per slot — used as fallback. */
9
12
  estimatedPacketTokens?: number;
10
13
  quotaStateEntry?: QuotaStateEntry | null;
11
14
  hostConcurrencyLimit?: HostConcurrencyLimit | null;
15
+ quotaSourceSnapshot?: QuotaUsageSnapshot | null;
12
16
  }
13
17
  export declare function scheduleWave(options: ScheduleWaveOptions): WaveSchedule;
14
18
  /** Build the state key used for indexing quota-state.json entries. */
@@ -1,7 +1,20 @@
1
1
  import { classifyProvider, resolveLimits } from "./limits.js";
2
- import { computeMaxSafeConcurrency } from "./state.js";
2
+ import { computeMaxSafeConcurrency, computeRampUpConcurrency } from "./state.js";
3
+ function sumTopN(sorted, n) {
4
+ let sum = 0;
5
+ for (let i = 0; i < Math.min(n, sorted.length); i++)
6
+ sum += sorted[i];
7
+ return sum;
8
+ }
3
9
  export function scheduleWave(options) {
4
- const { providerName, sessionConfig, hostModel, requestedConcurrency, estimatedPacketTokens = 0, quotaStateEntry = null, hostConcurrencyLimit = null, } = options;
10
+ const { providerName, sessionConfig, hostModel, requestedConcurrency, estimatedSlotTokens, estimatedPacketTokens = 0, quotaStateEntry = null, hostConcurrencyLimit = null, quotaSourceSnapshot = null, } = options;
11
+ // Descending sort so sumTopN picks the largest slots
12
+ const slotsSorted = estimatedSlotTokens
13
+ ? [...estimatedSlotTokens].sort((a, b) => b - a)
14
+ : null;
15
+ const avgTokens = slotsSorted && slotsSorted.length > 0
16
+ ? Math.floor(slotsSorted.reduce((a, b) => a + b, 0) / slotsSorted.length)
17
+ : estimatedPacketTokens;
5
18
  const quota = sessionConfig.quota ?? {};
6
19
  const applyHostConcurrencyLimit = (waveSize) => {
7
20
  if (hostConcurrencyLimit === null)
@@ -19,7 +32,7 @@ export function scheduleWave(options) {
19
32
  };
20
33
  return {
21
34
  wave_size: waveSize,
22
- estimated_wave_tokens: waveSize * estimatedPacketTokens,
35
+ estimated_wave_tokens: slotsSorted ? sumTopN(slotsSorted, waveSize) : waveSize * avgTokens,
23
36
  cooldown_until: null,
24
37
  confidence: "high",
25
38
  source: "default",
@@ -48,12 +61,25 @@ export function scheduleWave(options) {
48
61
  waveSize = Math.min(waveSize, rpmCap);
49
62
  }
50
63
  // Cap by input tokens-per-minute
51
- if (limits.input_tokens_per_minute != null && estimatedPacketTokens > 0) {
52
- const tpmCap = Math.max(1, Math.floor((limits.input_tokens_per_minute * safetyMargin) / estimatedPacketTokens));
53
- waveSize = Math.min(waveSize, tpmCap);
64
+ if (limits.input_tokens_per_minute != null && avgTokens > 0) {
65
+ const tpmBudget = limits.input_tokens_per_minute * safetyMargin;
66
+ if (slotsSorted && slotsSorted.length > 0) {
67
+ let candidateSize = waveSize;
68
+ while (candidateSize > 1 && sumTopN(slotsSorted, candidateSize) > tpmBudget) {
69
+ candidateSize--;
70
+ }
71
+ waveSize = Math.max(1, candidateSize);
72
+ }
73
+ else {
74
+ const tpmCap = Math.max(1, Math.floor(tpmBudget / avgTokens));
75
+ waveSize = Math.min(waveSize, tpmCap);
76
+ }
54
77
  }
55
78
  if (quotaStateEntry) {
56
- const learnedCap = computeMaxSafeConcurrency(quotaStateEntry, halfLifeHours);
79
+ const rampUp = quota.ramp_up_enabled !== false;
80
+ const learnedCap = rampUp
81
+ ? computeRampUpConcurrency(quotaStateEntry, halfLifeHours)
82
+ : computeMaxSafeConcurrency(quotaStateEntry, halfLifeHours);
57
83
  waveSize = Math.min(waveSize, learnedCap);
58
84
  }
59
85
  else {
@@ -61,22 +87,38 @@ export function scheduleWave(options) {
61
87
  const fallbackCap = providerType === "local"
62
88
  ? quota.unknown_local_concurrency
63
89
  : (quota.unknown_hosted_concurrency ?? 1);
64
- if (typeof fallbackCap === "number" && Number.isFinite(fallbackCap)) {
90
+ if (fallbackCap === "unlimited") {
91
+ // no cap — "unlimited" intentionally skips clamping
92
+ }
93
+ else if (typeof fallbackCap === "number" && Number.isFinite(fallbackCap)) {
65
94
  waveSize = Math.min(waveSize, Math.max(1, Math.floor(fallbackCap)));
66
95
  }
67
96
  }
68
97
  }
98
+ // Apply real-time quota source data if available
99
+ if (quotaSourceSnapshot && !cooldownUntil) {
100
+ if (quotaSourceSnapshot.remaining_pct != null && quotaSourceSnapshot.remaining_pct < 0.1) {
101
+ waveSize = 1;
102
+ if (quotaSourceSnapshot.reset_at) {
103
+ cooldownUntil = quotaSourceSnapshot.reset_at;
104
+ }
105
+ }
106
+ else if (quotaSourceSnapshot.remaining_pct != null && quotaSourceSnapshot.remaining_pct < 0.3) {
107
+ waveSize = Math.min(waveSize, Math.max(1, Math.floor(waveSize * 0.5)));
108
+ }
109
+ }
69
110
  waveSize = applyHostConcurrencyLimit(waveSize);
70
111
  waveSize = Math.max(1, waveSize);
71
112
  return {
72
113
  wave_size: waveSize,
73
- estimated_wave_tokens: waveSize * estimatedPacketTokens,
114
+ estimated_wave_tokens: slotsSorted ? sumTopN(slotsSorted, waveSize) : waveSize * avgTokens,
74
115
  cooldown_until: cooldownUntil,
75
116
  confidence,
76
117
  source,
77
118
  resolved_limits: limits,
78
119
  host_concurrency_limit: hostConcurrencyLimit,
79
120
  model: hostModel,
121
+ quota_source_snapshot: quotaSourceSnapshot,
80
122
  };
81
123
  }
82
124
  /** Build the state key used for indexing quota-state.json entries. */
@@ -0,0 +1,4 @@
1
+ export interface SlidingWindowResult<T> {
2
+ results: PromiseSettledResult<T>[];
3
+ }
4
+ export declare function runSlidingWindow<T>(tasks: Array<() => Promise<T>>, concurrency: number, onComplete?: (index: number, result: PromiseSettledResult<T>) => void): Promise<SlidingWindowResult<T>>;
@@ -0,0 +1,28 @@
1
+ export async function runSlidingWindow(tasks, concurrency, onComplete) {
2
+ const results = new Array(tasks.length);
3
+ let nextIndex = 0;
4
+ async function runOne(index) {
5
+ let result;
6
+ try {
7
+ const value = await tasks[index]();
8
+ result = { status: "fulfilled", value };
9
+ }
10
+ catch (reason) {
11
+ result = { status: "rejected", reason };
12
+ }
13
+ results[index] = result;
14
+ onComplete?.(index, result);
15
+ if (nextIndex < tasks.length) {
16
+ const next = nextIndex++;
17
+ await runOne(next);
18
+ }
19
+ }
20
+ const initialBatch = Math.min(concurrency, tasks.length);
21
+ const runners = [];
22
+ for (let i = 0; i < initialBatch; i++) {
23
+ const idx = nextIndex++;
24
+ runners.push(runOne(idx));
25
+ }
26
+ await Promise.all(runners);
27
+ return { results };
28
+ }
@@ -9,4 +9,7 @@ export declare function writeQuotaState(state: QuotaState): Promise<void>;
9
9
  * exceeds failure evidence, with a minimum of 1.
10
10
  */
11
11
  export declare function computeMaxSafeConcurrency(entry: QuotaStateEntry, halfLifeHours: number, maxToCheck?: number): number;
12
+ export declare function computeRampUpConcurrency(entry: QuotaStateEntry, halfLifeHours: number, maxToCheck?: number): number;
13
+ export declare function computeBackoffCooldownMs(consecutive429Count: number): number;
14
+ export declare function computeBackoffFailureWeight(consecutive429Count: number): number;
12
15
  export declare function recordWaveOutcome(providerModelKey: string, outcome: ObservedWaveOutcome, halfLifeHours: number): Promise<void>;