auditor-lambda 0.3.32 → 0.3.34
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -1
- package/audit-code-wrapper-lib.mjs +30 -28
- package/dist/cli.d.ts +5 -0
- package/dist/cli.js +55 -123
- package/dist/mcp/server.js +11 -11
- package/dist/orchestrator/reviewPackets.d.ts +3 -0
- package/dist/orchestrator/reviewPackets.js +13 -2
- package/dist/quota/compositeQuotaSource.d.ts +7 -0
- package/dist/quota/compositeQuotaSource.js +20 -0
- package/dist/quota/errorParsers/claudeCodeErrorParser.d.ts +6 -0
- package/dist/quota/errorParsers/claudeCodeErrorParser.js +39 -0
- package/dist/quota/errorParsers/genericErrorParser.d.ts +9 -0
- package/dist/quota/errorParsers/genericErrorParser.js +7 -0
- package/dist/quota/errorParsers/index.d.ts +5 -0
- package/dist/quota/errorParsers/index.js +12 -0
- package/dist/quota/errorParsing.d.ts +7 -0
- package/dist/quota/errorParsing.js +69 -0
- package/dist/quota/fileLock.d.ts +6 -0
- package/dist/quota/fileLock.js +64 -0
- package/dist/quota/index.d.ts +11 -1
- package/dist/quota/index.js +7 -1
- package/dist/quota/learnedQuotaSource.d.ts +7 -0
- package/dist/quota/learnedQuotaSource.js +25 -0
- package/dist/quota/probe.d.ts +1 -4
- package/dist/quota/probe.js +1 -4
- package/dist/quota/quotaSource.d.ts +12 -0
- package/dist/quota/quotaSource.js +1 -0
- package/dist/quota/scheduler.d.ts +5 -1
- package/dist/quota/scheduler.js +51 -9
- package/dist/quota/slidingWindow.d.ts +4 -0
- package/dist/quota/slidingWindow.js +28 -0
- package/dist/quota/state.d.ts +3 -0
- package/dist/quota/state.js +57 -14
- package/dist/quota/types.d.ts +11 -2
- package/dist/supervisor/operatorHandoff.js +1 -1
- package/dist/types/sessionConfig.d.ts +3 -0
- package/dist/validation/sessionConfig.js +4 -0
- package/package.json +1 -1
- package/schemas/dispatch_quota.schema.json +23 -2
- package/skills/audit-code/audit-code.prompt.md +5 -0
|
@@ -2,8 +2,19 @@ import { createHash } from "node:crypto";
|
|
|
2
2
|
import { LENS_ORDER } from "./unitBuilder.js";
|
|
3
3
|
const DEFAULT_MAX_TASKS_PER_PACKET = 0;
|
|
4
4
|
const DEFAULT_TARGET_PACKET_LINES = 8000;
|
|
5
|
-
const ESTIMATED_TOKENS_PER_LINE = 4;
|
|
6
|
-
const ESTIMATED_PACKET_PROMPT_TOKENS = 900;
|
|
5
|
+
export const ESTIMATED_TOKENS_PER_LINE = 4;
|
|
6
|
+
export const ESTIMATED_PACKET_PROMPT_TOKENS = 900;
|
|
7
|
+
export function estimateTaskGroupTokens(tasks) {
|
|
8
|
+
let totalLines = 0;
|
|
9
|
+
for (const task of tasks) {
|
|
10
|
+
if (task.file_line_counts) {
|
|
11
|
+
for (const count of Object.values(task.file_line_counts)) {
|
|
12
|
+
totalLines += count;
|
|
13
|
+
}
|
|
14
|
+
}
|
|
15
|
+
}
|
|
16
|
+
return ESTIMATED_PACKET_PROMPT_TOKENS + totalLines * ESTIMATED_TOKENS_PER_LINE;
|
|
17
|
+
}
|
|
7
18
|
const PACKET_EXPANSION_MIN_CONFIDENCE = 0.65;
|
|
8
19
|
const HIGH_FAN_DEGREE_THRESHOLD = 12;
|
|
9
20
|
const HIGH_FAN_EXPANSION_CONFIDENCE = 0.99;
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import type { QuotaSource, QuotaUsageSnapshot } from "./quotaSource.js";
|
|
2
|
+
export declare class CompositeQuotaSource implements QuotaSource {
|
|
3
|
+
readonly name = "composite";
|
|
4
|
+
private sources;
|
|
5
|
+
constructor(sources: QuotaSource[]);
|
|
6
|
+
queryCurrentUsage(providerModelKey: string): Promise<QuotaUsageSnapshot | null>;
|
|
7
|
+
}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
export class CompositeQuotaSource {
|
|
2
|
+
name = "composite";
|
|
3
|
+
sources;
|
|
4
|
+
constructor(sources) {
|
|
5
|
+
this.sources = sources;
|
|
6
|
+
}
|
|
7
|
+
async queryCurrentUsage(providerModelKey) {
|
|
8
|
+
for (const source of this.sources) {
|
|
9
|
+
try {
|
|
10
|
+
const snapshot = await source.queryCurrentUsage(providerModelKey);
|
|
11
|
+
if (snapshot)
|
|
12
|
+
return snapshot;
|
|
13
|
+
}
|
|
14
|
+
catch {
|
|
15
|
+
// Skip failing sources, try next
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
return null;
|
|
19
|
+
}
|
|
20
|
+
}
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
import type { RateLimitDetectionResult } from "../errorParsing.js";
|
|
2
|
+
import type { ErrorParser } from "./genericErrorParser.js";
|
|
3
|
+
export declare class ClaudeCodeErrorParser implements ErrorParser {
|
|
4
|
+
readonly name = "claude-code";
|
|
5
|
+
parse(text: string): RateLimitDetectionResult;
|
|
6
|
+
}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
export class ClaudeCodeErrorParser {
|
|
2
|
+
name = "claude-code";
|
|
3
|
+
parse(text) {
|
|
4
|
+
for (const line of text.split("\n")) {
|
|
5
|
+
const trimmed = line.trim();
|
|
6
|
+
if (!trimmed.startsWith("{"))
|
|
7
|
+
continue;
|
|
8
|
+
try {
|
|
9
|
+
const obj = JSON.parse(trimmed);
|
|
10
|
+
const level = obj["level"];
|
|
11
|
+
const type = obj["type"];
|
|
12
|
+
const message = obj["message"] ?? "";
|
|
13
|
+
const statusCode = obj["status_code"];
|
|
14
|
+
if (statusCode === 429 ||
|
|
15
|
+
type === "rate_limit_error" ||
|
|
16
|
+
(level === "error" && /\brate.?limit/i.test(message))) {
|
|
17
|
+
const retryAfter = obj["retry_after"];
|
|
18
|
+
const retryAfterMs = obj["retry_after_ms"];
|
|
19
|
+
let extractedMs = null;
|
|
20
|
+
if (retryAfterMs != null && retryAfterMs > 0) {
|
|
21
|
+
extractedMs = retryAfterMs;
|
|
22
|
+
}
|
|
23
|
+
else if (retryAfter != null && retryAfter > 0) {
|
|
24
|
+
extractedMs = retryAfter < 600 ? retryAfter * 1000 : retryAfter;
|
|
25
|
+
}
|
|
26
|
+
return {
|
|
27
|
+
isRateLimited: true,
|
|
28
|
+
retryAfterMs: extractedMs,
|
|
29
|
+
rawMatch: `claude-code-stderr:${statusCode ?? type ?? "rate_limit"}`,
|
|
30
|
+
};
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
catch {
|
|
34
|
+
// Not valid JSON, skip
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
return { isRateLimited: false, retryAfterMs: null, rawMatch: null };
|
|
38
|
+
}
|
|
39
|
+
}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import type { RateLimitDetectionResult } from "../errorParsing.js";
|
|
2
|
+
export interface ErrorParser {
|
|
3
|
+
readonly name: string;
|
|
4
|
+
parse(text: string): RateLimitDetectionResult;
|
|
5
|
+
}
|
|
6
|
+
export declare class GenericErrorParser implements ErrorParser {
|
|
7
|
+
readonly name = "generic";
|
|
8
|
+
parse(text: string): RateLimitDetectionResult;
|
|
9
|
+
}
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
export type { ErrorParser } from "./genericErrorParser.js";
|
|
2
|
+
export { GenericErrorParser } from "./genericErrorParser.js";
|
|
3
|
+
export { ClaudeCodeErrorParser } from "./claudeCodeErrorParser.js";
|
|
4
|
+
import type { ErrorParser } from "./genericErrorParser.js";
|
|
5
|
+
export declare function getErrorParserForProvider(providerName: string): ErrorParser;
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
export { GenericErrorParser } from "./genericErrorParser.js";
|
|
2
|
+
export { ClaudeCodeErrorParser } from "./claudeCodeErrorParser.js";
|
|
3
|
+
import { GenericErrorParser } from "./genericErrorParser.js";
|
|
4
|
+
import { ClaudeCodeErrorParser } from "./claudeCodeErrorParser.js";
|
|
5
|
+
const PROVIDER_PARSERS = {
|
|
6
|
+
"claude-code": () => new ClaudeCodeErrorParser(),
|
|
7
|
+
};
|
|
8
|
+
const genericParser = new GenericErrorParser();
|
|
9
|
+
export function getErrorParserForProvider(providerName) {
|
|
10
|
+
const factory = PROVIDER_PARSERS[providerName];
|
|
11
|
+
return factory ? factory() : genericParser;
|
|
12
|
+
}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
export interface RateLimitDetectionResult {
|
|
2
|
+
isRateLimited: boolean;
|
|
3
|
+
retryAfterMs: number | null;
|
|
4
|
+
rawMatch: string | null;
|
|
5
|
+
}
|
|
6
|
+
export declare function detectRateLimitError(text: string): RateLimitDetectionResult;
|
|
7
|
+
export declare function computeCooldownUntil(retryAfterMs: number | null, defaultMs?: number): string;
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
const RATE_LIMIT_PATTERNS = [
|
|
2
|
+
/\b429\b/i,
|
|
3
|
+
/\btoo many requests\b/i,
|
|
4
|
+
/\brate.?limit/i,
|
|
5
|
+
/\boverloaded\b/i,
|
|
6
|
+
/\bresource.?exhausted\b/i,
|
|
7
|
+
/\bquota.?exceeded\b/i,
|
|
8
|
+
];
|
|
9
|
+
function tryParseJson(text) {
|
|
10
|
+
const jsonStart = text.indexOf("{");
|
|
11
|
+
if (jsonStart === -1)
|
|
12
|
+
return null;
|
|
13
|
+
try {
|
|
14
|
+
return JSON.parse(text.slice(jsonStart));
|
|
15
|
+
}
|
|
16
|
+
catch {
|
|
17
|
+
return null;
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
function extractRetryAfterMs(obj) {
|
|
21
|
+
const headers = obj["headers"];
|
|
22
|
+
const retryAfter = headers?.["retry-after"] ??
|
|
23
|
+
headers?.["Retry-After"] ??
|
|
24
|
+
obj["retry_after"] ??
|
|
25
|
+
obj["retry_after_ms"];
|
|
26
|
+
if (retryAfter == null)
|
|
27
|
+
return null;
|
|
28
|
+
const val = typeof retryAfter === "string" ? Number(retryAfter) : retryAfter;
|
|
29
|
+
if (!Number.isFinite(val) || val <= 0)
|
|
30
|
+
return null;
|
|
31
|
+
// If the value looks like seconds (< 600), convert to ms
|
|
32
|
+
return val < 600 ? val * 1000 : val;
|
|
33
|
+
}
|
|
34
|
+
function detectFromJson(text) {
|
|
35
|
+
const obj = tryParseJson(text);
|
|
36
|
+
if (!obj)
|
|
37
|
+
return null;
|
|
38
|
+
const status = obj["status"];
|
|
39
|
+
const type = obj["type"];
|
|
40
|
+
const errorObj = obj["error"];
|
|
41
|
+
const errorType = errorObj?.["type"];
|
|
42
|
+
const isRateLimited = status === 429 ||
|
|
43
|
+
type === "rate_limit_error" ||
|
|
44
|
+
errorType === "rate_limit_error";
|
|
45
|
+
if (!isRateLimited)
|
|
46
|
+
return null;
|
|
47
|
+
return {
|
|
48
|
+
isRateLimited: true,
|
|
49
|
+
retryAfterMs: extractRetryAfterMs(obj),
|
|
50
|
+
rawMatch: `json:${status === 429 ? "status=429" : `type=${type ?? errorType}`}`,
|
|
51
|
+
};
|
|
52
|
+
}
|
|
53
|
+
export function detectRateLimitError(text) {
|
|
54
|
+
const jsonResult = detectFromJson(text);
|
|
55
|
+
if (jsonResult)
|
|
56
|
+
return jsonResult;
|
|
57
|
+
for (const pattern of RATE_LIMIT_PATTERNS) {
|
|
58
|
+
const match = pattern.exec(text);
|
|
59
|
+
if (match) {
|
|
60
|
+
return { isRateLimited: true, retryAfterMs: null, rawMatch: match[0] };
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
return { isRateLimited: false, retryAfterMs: null, rawMatch: null };
|
|
64
|
+
}
|
|
65
|
+
const DEFAULT_COOLDOWN_MS = 60_000;
|
|
66
|
+
export function computeCooldownUntil(retryAfterMs, defaultMs = DEFAULT_COOLDOWN_MS) {
|
|
67
|
+
const ms = retryAfterMs != null && retryAfterMs > 0 ? retryAfterMs : defaultMs;
|
|
68
|
+
return new Date(Date.now() + ms).toISOString();
|
|
69
|
+
}
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
export declare class FileLockTimeoutError extends Error {
|
|
2
|
+
constructor(lockPath: string);
|
|
3
|
+
}
|
|
4
|
+
export declare function acquireLock(lockPath: string, timeoutMs?: number): Promise<void>;
|
|
5
|
+
export declare function releaseLock(lockPath: string): Promise<void>;
|
|
6
|
+
export declare function withFileLock<T>(lockPath: string, fn: () => Promise<T>, timeoutMs?: number): Promise<T>;
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
import { open, unlink, stat } from "node:fs/promises";
|
|
2
|
+
const STALE_LOCK_MS = 30_000;
|
|
3
|
+
const RETRY_INTERVAL_MS = 50;
|
|
4
|
+
const DEFAULT_TIMEOUT_MS = 10_000;
|
|
5
|
+
export class FileLockTimeoutError extends Error {
|
|
6
|
+
constructor(lockPath) {
|
|
7
|
+
super(`Timed out acquiring lock: ${lockPath}`);
|
|
8
|
+
this.name = "FileLockTimeoutError";
|
|
9
|
+
}
|
|
10
|
+
}
|
|
11
|
+
async function isLockStale(lockPath) {
|
|
12
|
+
try {
|
|
13
|
+
const info = await stat(lockPath);
|
|
14
|
+
return Date.now() - info.mtimeMs > STALE_LOCK_MS;
|
|
15
|
+
}
|
|
16
|
+
catch {
|
|
17
|
+
return false;
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
export async function acquireLock(lockPath, timeoutMs = DEFAULT_TIMEOUT_MS) {
|
|
21
|
+
const deadline = Date.now() + timeoutMs;
|
|
22
|
+
while (true) {
|
|
23
|
+
try {
|
|
24
|
+
const fd = await open(lockPath, "wx");
|
|
25
|
+
await fd.close();
|
|
26
|
+
return;
|
|
27
|
+
}
|
|
28
|
+
catch (err) {
|
|
29
|
+
if (err.code !== "EEXIST")
|
|
30
|
+
throw err;
|
|
31
|
+
}
|
|
32
|
+
if (await isLockStale(lockPath)) {
|
|
33
|
+
try {
|
|
34
|
+
await unlink(lockPath);
|
|
35
|
+
continue;
|
|
36
|
+
}
|
|
37
|
+
catch {
|
|
38
|
+
// Another process may have already cleaned it up
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
if (Date.now() >= deadline) {
|
|
42
|
+
throw new FileLockTimeoutError(lockPath);
|
|
43
|
+
}
|
|
44
|
+
await new Promise((r) => setTimeout(r, RETRY_INTERVAL_MS));
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
export async function releaseLock(lockPath) {
|
|
48
|
+
try {
|
|
49
|
+
await unlink(lockPath);
|
|
50
|
+
}
|
|
51
|
+
catch (err) {
|
|
52
|
+
if (err.code !== "ENOENT")
|
|
53
|
+
throw err;
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
export async function withFileLock(lockPath, fn, timeoutMs) {
|
|
57
|
+
await acquireLock(lockPath, timeoutMs);
|
|
58
|
+
try {
|
|
59
|
+
return await fn();
|
|
60
|
+
}
|
|
61
|
+
finally {
|
|
62
|
+
await releaseLock(lockPath);
|
|
63
|
+
}
|
|
64
|
+
}
|
package/dist/quota/index.d.ts
CHANGED
|
@@ -1,9 +1,19 @@
|
|
|
1
1
|
export { resolveLimits, lookupKnownModel, classifyProvider } from "./limits.js";
|
|
2
2
|
export type { LimitResolutionResult, ResolveLimitsOptions, ProviderType } from "./limits.js";
|
|
3
3
|
export { detectHostActiveSubagentLimit, resolveHostActiveSubagentLimit, } from "./hostLimits.js";
|
|
4
|
-
export { readQuotaState, writeQuotaState, computeMaxSafeConcurrency, recordWaveOutcome, getQuotaStatePath, decayWeight, applyDecayToEntry, } from "./state.js";
|
|
4
|
+
export { readQuotaState, writeQuotaState, computeMaxSafeConcurrency, recordWaveOutcome, getQuotaStatePath, decayWeight, applyDecayToEntry, computeBackoffCooldownMs, computeBackoffFailureWeight, computeRampUpConcurrency, } from "./state.js";
|
|
5
5
|
export { scheduleWave, buildProviderModelKey } from "./scheduler.js";
|
|
6
6
|
export type { ScheduleWaveOptions } from "./scheduler.js";
|
|
7
|
+
export { detectRateLimitError, computeCooldownUntil } from "./errorParsing.js";
|
|
8
|
+
export { acquireLock, releaseLock, withFileLock, FileLockTimeoutError } from "./fileLock.js";
|
|
9
|
+
export { runSlidingWindow } from "./slidingWindow.js";
|
|
10
|
+
export type { SlidingWindowResult } from "./slidingWindow.js";
|
|
11
|
+
export type { RateLimitDetectionResult } from "./errorParsing.js";
|
|
7
12
|
export { probeProvider } from "./probe.js";
|
|
8
13
|
export type { ProbeResult } from "./probe.js";
|
|
14
|
+
export type { QuotaSource, QuotaUsageSnapshot } from "./quotaSource.js";
|
|
15
|
+
export type { ErrorParser } from "./errorParsers/index.js";
|
|
16
|
+
export { GenericErrorParser, ClaudeCodeErrorParser, getErrorParserForProvider } from "./errorParsers/index.js";
|
|
17
|
+
export { LearnedQuotaSource } from "./learnedQuotaSource.js";
|
|
18
|
+
export { CompositeQuotaSource } from "./compositeQuotaSource.js";
|
|
9
19
|
export type { ResolvedLimits, LimitSource, LimitConfidence, HostConcurrencyLimit, HostConcurrencyLimitSource, QuotaState, QuotaStateEntry, ConcurrencyBucket, WaveSchedule, DispatchQuota, ObservedWaveOutcome, } from "./types.js";
|
package/dist/quota/index.js
CHANGED
|
@@ -1,5 +1,11 @@
|
|
|
1
1
|
export { resolveLimits, lookupKnownModel, classifyProvider } from "./limits.js";
|
|
2
2
|
export { detectHostActiveSubagentLimit, resolveHostActiveSubagentLimit, } from "./hostLimits.js";
|
|
3
|
-
export { readQuotaState, writeQuotaState, computeMaxSafeConcurrency, recordWaveOutcome, getQuotaStatePath, decayWeight, applyDecayToEntry, } from "./state.js";
|
|
3
|
+
export { readQuotaState, writeQuotaState, computeMaxSafeConcurrency, recordWaveOutcome, getQuotaStatePath, decayWeight, applyDecayToEntry, computeBackoffCooldownMs, computeBackoffFailureWeight, computeRampUpConcurrency, } from "./state.js";
|
|
4
4
|
export { scheduleWave, buildProviderModelKey } from "./scheduler.js";
|
|
5
|
+
export { detectRateLimitError, computeCooldownUntil } from "./errorParsing.js";
|
|
6
|
+
export { acquireLock, releaseLock, withFileLock, FileLockTimeoutError } from "./fileLock.js";
|
|
7
|
+
export { runSlidingWindow } from "./slidingWindow.js";
|
|
5
8
|
export { probeProvider } from "./probe.js";
|
|
9
|
+
export { GenericErrorParser, ClaudeCodeErrorParser, getErrorParserForProvider } from "./errorParsers/index.js";
|
|
10
|
+
export { LearnedQuotaSource } from "./learnedQuotaSource.js";
|
|
11
|
+
export { CompositeQuotaSource } from "./compositeQuotaSource.js";
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import type { QuotaSource, QuotaUsageSnapshot } from "./quotaSource.js";
|
|
2
|
+
export declare class LearnedQuotaSource implements QuotaSource {
|
|
3
|
+
readonly name = "learned";
|
|
4
|
+
private halfLifeHours;
|
|
5
|
+
constructor(halfLifeHours?: number);
|
|
6
|
+
queryCurrentUsage(providerModelKey: string): Promise<QuotaUsageSnapshot | null>;
|
|
7
|
+
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import { readQuotaState, computeMaxSafeConcurrency } from "./state.js";
|
|
2
|
+
export class LearnedQuotaSource {
|
|
3
|
+
name = "learned";
|
|
4
|
+
halfLifeHours;
|
|
5
|
+
constructor(halfLifeHours = 24) {
|
|
6
|
+
this.halfLifeHours = halfLifeHours;
|
|
7
|
+
}
|
|
8
|
+
async queryCurrentUsage(providerModelKey) {
|
|
9
|
+
const state = await readQuotaState();
|
|
10
|
+
const entry = state.entries[providerModelKey];
|
|
11
|
+
if (!entry)
|
|
12
|
+
return null;
|
|
13
|
+
const maxSafe = computeMaxSafeConcurrency(entry, this.halfLifeHours);
|
|
14
|
+
const isInCooldown = entry.cooldown_until != null &&
|
|
15
|
+
new Date(entry.cooldown_until).getTime() > Date.now();
|
|
16
|
+
return {
|
|
17
|
+
remaining_pct: isInCooldown ? 0 : null,
|
|
18
|
+
reset_at: isInCooldown ? entry.cooldown_until : null,
|
|
19
|
+
requests_remaining: maxSafe,
|
|
20
|
+
tokens_remaining: null,
|
|
21
|
+
captured_at: entry.updated_at,
|
|
22
|
+
source: "learned",
|
|
23
|
+
};
|
|
24
|
+
}
|
|
25
|
+
}
|
package/dist/quota/probe.d.ts
CHANGED
|
@@ -5,9 +5,6 @@ export interface ProbeResult {
|
|
|
5
5
|
/**
|
|
6
6
|
* Probe a provider to discover its rate limits.
|
|
7
7
|
*
|
|
8
|
-
*
|
|
9
|
-
* provider where the auditor controls the API call. IDE providers
|
|
10
|
-
* (claude-code, opencode) select the model internally; their limits come
|
|
11
|
-
* from known-model metadata or learned behavior.
|
|
8
|
+
* @deprecated Phase 3A replaces this with the QuotaSource abstraction.
|
|
12
9
|
*/
|
|
13
10
|
export declare function probeProvider(providerName: string, probeMode?: "auto" | "never" | "force"): Promise<ProbeResult>;
|
package/dist/quota/probe.js
CHANGED
|
@@ -1,10 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Probe a provider to discover its rate limits.
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
* provider where the auditor controls the API call. IDE providers
|
|
6
|
-
* (claude-code, opencode) select the model internally; their limits come
|
|
7
|
-
* from known-model metadata or learned behavior.
|
|
4
|
+
* @deprecated Phase 3A replaces this with the QuotaSource abstraction.
|
|
8
5
|
*/
|
|
9
6
|
export async function probeProvider(providerName, probeMode = "auto") {
|
|
10
7
|
if (probeMode === "never") {
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
export interface QuotaUsageSnapshot {
|
|
2
|
+
remaining_pct: number | null;
|
|
3
|
+
reset_at: string | null;
|
|
4
|
+
requests_remaining: number | null;
|
|
5
|
+
tokens_remaining: number | null;
|
|
6
|
+
captured_at: string;
|
|
7
|
+
source: string;
|
|
8
|
+
}
|
|
9
|
+
export interface QuotaSource {
|
|
10
|
+
readonly name: string;
|
|
11
|
+
queryCurrentUsage(providerModelKey: string): Promise<QuotaUsageSnapshot | null>;
|
|
12
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -1,14 +1,18 @@
|
|
|
1
1
|
import type { ResolvedProviderName, SessionConfig } from "../types/sessionConfig.js";
|
|
2
2
|
import type { HostConcurrencyLimit, QuotaStateEntry, WaveSchedule } from "./types.js";
|
|
3
|
+
import type { QuotaUsageSnapshot } from "./quotaSource.js";
|
|
3
4
|
export interface ScheduleWaveOptions {
|
|
4
5
|
providerName: ResolvedProviderName;
|
|
5
6
|
sessionConfig: SessionConfig;
|
|
6
7
|
hostModel: string | null;
|
|
7
8
|
requestedConcurrency: number;
|
|
8
|
-
/**
|
|
9
|
+
/** Per-slot estimated tokens (one entry per worker slot). Used for TPM budget. */
|
|
10
|
+
estimatedSlotTokens?: number[];
|
|
11
|
+
/** @deprecated Use estimatedSlotTokens instead. Average tokens per slot — used as fallback. */
|
|
9
12
|
estimatedPacketTokens?: number;
|
|
10
13
|
quotaStateEntry?: QuotaStateEntry | null;
|
|
11
14
|
hostConcurrencyLimit?: HostConcurrencyLimit | null;
|
|
15
|
+
quotaSourceSnapshot?: QuotaUsageSnapshot | null;
|
|
12
16
|
}
|
|
13
17
|
export declare function scheduleWave(options: ScheduleWaveOptions): WaveSchedule;
|
|
14
18
|
/** Build the state key used for indexing quota-state.json entries. */
|
package/dist/quota/scheduler.js
CHANGED
|
@@ -1,7 +1,20 @@
|
|
|
1
1
|
import { classifyProvider, resolveLimits } from "./limits.js";
|
|
2
|
-
import { computeMaxSafeConcurrency } from "./state.js";
|
|
2
|
+
import { computeMaxSafeConcurrency, computeRampUpConcurrency } from "./state.js";
|
|
3
|
+
function sumTopN(sorted, n) {
|
|
4
|
+
let sum = 0;
|
|
5
|
+
for (let i = 0; i < Math.min(n, sorted.length); i++)
|
|
6
|
+
sum += sorted[i];
|
|
7
|
+
return sum;
|
|
8
|
+
}
|
|
3
9
|
export function scheduleWave(options) {
|
|
4
|
-
const { providerName, sessionConfig, hostModel, requestedConcurrency, estimatedPacketTokens = 0, quotaStateEntry = null, hostConcurrencyLimit = null, } = options;
|
|
10
|
+
const { providerName, sessionConfig, hostModel, requestedConcurrency, estimatedSlotTokens, estimatedPacketTokens = 0, quotaStateEntry = null, hostConcurrencyLimit = null, quotaSourceSnapshot = null, } = options;
|
|
11
|
+
// Descending sort so sumTopN picks the largest slots
|
|
12
|
+
const slotsSorted = estimatedSlotTokens
|
|
13
|
+
? [...estimatedSlotTokens].sort((a, b) => b - a)
|
|
14
|
+
: null;
|
|
15
|
+
const avgTokens = slotsSorted && slotsSorted.length > 0
|
|
16
|
+
? Math.floor(slotsSorted.reduce((a, b) => a + b, 0) / slotsSorted.length)
|
|
17
|
+
: estimatedPacketTokens;
|
|
5
18
|
const quota = sessionConfig.quota ?? {};
|
|
6
19
|
const applyHostConcurrencyLimit = (waveSize) => {
|
|
7
20
|
if (hostConcurrencyLimit === null)
|
|
@@ -19,7 +32,7 @@ export function scheduleWave(options) {
|
|
|
19
32
|
};
|
|
20
33
|
return {
|
|
21
34
|
wave_size: waveSize,
|
|
22
|
-
estimated_wave_tokens: waveSize *
|
|
35
|
+
estimated_wave_tokens: slotsSorted ? sumTopN(slotsSorted, waveSize) : waveSize * avgTokens,
|
|
23
36
|
cooldown_until: null,
|
|
24
37
|
confidence: "high",
|
|
25
38
|
source: "default",
|
|
@@ -48,12 +61,25 @@ export function scheduleWave(options) {
|
|
|
48
61
|
waveSize = Math.min(waveSize, rpmCap);
|
|
49
62
|
}
|
|
50
63
|
// Cap by input tokens-per-minute
|
|
51
|
-
if (limits.input_tokens_per_minute != null &&
|
|
52
|
-
const
|
|
53
|
-
|
|
64
|
+
if (limits.input_tokens_per_minute != null && avgTokens > 0) {
|
|
65
|
+
const tpmBudget = limits.input_tokens_per_minute * safetyMargin;
|
|
66
|
+
if (slotsSorted && slotsSorted.length > 0) {
|
|
67
|
+
let candidateSize = waveSize;
|
|
68
|
+
while (candidateSize > 1 && sumTopN(slotsSorted, candidateSize) > tpmBudget) {
|
|
69
|
+
candidateSize--;
|
|
70
|
+
}
|
|
71
|
+
waveSize = Math.max(1, candidateSize);
|
|
72
|
+
}
|
|
73
|
+
else {
|
|
74
|
+
const tpmCap = Math.max(1, Math.floor(tpmBudget / avgTokens));
|
|
75
|
+
waveSize = Math.min(waveSize, tpmCap);
|
|
76
|
+
}
|
|
54
77
|
}
|
|
55
78
|
if (quotaStateEntry) {
|
|
56
|
-
const
|
|
79
|
+
const rampUp = quota.ramp_up_enabled !== false;
|
|
80
|
+
const learnedCap = rampUp
|
|
81
|
+
? computeRampUpConcurrency(quotaStateEntry, halfLifeHours)
|
|
82
|
+
: computeMaxSafeConcurrency(quotaStateEntry, halfLifeHours);
|
|
57
83
|
waveSize = Math.min(waveSize, learnedCap);
|
|
58
84
|
}
|
|
59
85
|
else {
|
|
@@ -61,22 +87,38 @@ export function scheduleWave(options) {
|
|
|
61
87
|
const fallbackCap = providerType === "local"
|
|
62
88
|
? quota.unknown_local_concurrency
|
|
63
89
|
: (quota.unknown_hosted_concurrency ?? 1);
|
|
64
|
-
if (
|
|
90
|
+
if (fallbackCap === "unlimited") {
|
|
91
|
+
// no cap — "unlimited" intentionally skips clamping
|
|
92
|
+
}
|
|
93
|
+
else if (typeof fallbackCap === "number" && Number.isFinite(fallbackCap)) {
|
|
65
94
|
waveSize = Math.min(waveSize, Math.max(1, Math.floor(fallbackCap)));
|
|
66
95
|
}
|
|
67
96
|
}
|
|
68
97
|
}
|
|
98
|
+
// Apply real-time quota source data if available
|
|
99
|
+
if (quotaSourceSnapshot && !cooldownUntil) {
|
|
100
|
+
if (quotaSourceSnapshot.remaining_pct != null && quotaSourceSnapshot.remaining_pct < 0.1) {
|
|
101
|
+
waveSize = 1;
|
|
102
|
+
if (quotaSourceSnapshot.reset_at) {
|
|
103
|
+
cooldownUntil = quotaSourceSnapshot.reset_at;
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
else if (quotaSourceSnapshot.remaining_pct != null && quotaSourceSnapshot.remaining_pct < 0.3) {
|
|
107
|
+
waveSize = Math.min(waveSize, Math.max(1, Math.floor(waveSize * 0.5)));
|
|
108
|
+
}
|
|
109
|
+
}
|
|
69
110
|
waveSize = applyHostConcurrencyLimit(waveSize);
|
|
70
111
|
waveSize = Math.max(1, waveSize);
|
|
71
112
|
return {
|
|
72
113
|
wave_size: waveSize,
|
|
73
|
-
estimated_wave_tokens: waveSize *
|
|
114
|
+
estimated_wave_tokens: slotsSorted ? sumTopN(slotsSorted, waveSize) : waveSize * avgTokens,
|
|
74
115
|
cooldown_until: cooldownUntil,
|
|
75
116
|
confidence,
|
|
76
117
|
source,
|
|
77
118
|
resolved_limits: limits,
|
|
78
119
|
host_concurrency_limit: hostConcurrencyLimit,
|
|
79
120
|
model: hostModel,
|
|
121
|
+
quota_source_snapshot: quotaSourceSnapshot,
|
|
80
122
|
};
|
|
81
123
|
}
|
|
82
124
|
/** Build the state key used for indexing quota-state.json entries. */
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
export interface SlidingWindowResult<T> {
|
|
2
|
+
results: PromiseSettledResult<T>[];
|
|
3
|
+
}
|
|
4
|
+
export declare function runSlidingWindow<T>(tasks: Array<() => Promise<T>>, concurrency: number, onComplete?: (index: number, result: PromiseSettledResult<T>) => void): Promise<SlidingWindowResult<T>>;
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
export async function runSlidingWindow(tasks, concurrency, onComplete) {
|
|
2
|
+
const results = new Array(tasks.length);
|
|
3
|
+
let nextIndex = 0;
|
|
4
|
+
async function runOne(index) {
|
|
5
|
+
let result;
|
|
6
|
+
try {
|
|
7
|
+
const value = await tasks[index]();
|
|
8
|
+
result = { status: "fulfilled", value };
|
|
9
|
+
}
|
|
10
|
+
catch (reason) {
|
|
11
|
+
result = { status: "rejected", reason };
|
|
12
|
+
}
|
|
13
|
+
results[index] = result;
|
|
14
|
+
onComplete?.(index, result);
|
|
15
|
+
if (nextIndex < tasks.length) {
|
|
16
|
+
const next = nextIndex++;
|
|
17
|
+
await runOne(next);
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
const initialBatch = Math.min(concurrency, tasks.length);
|
|
21
|
+
const runners = [];
|
|
22
|
+
for (let i = 0; i < initialBatch; i++) {
|
|
23
|
+
const idx = nextIndex++;
|
|
24
|
+
runners.push(runOne(idx));
|
|
25
|
+
}
|
|
26
|
+
await Promise.all(runners);
|
|
27
|
+
return { results };
|
|
28
|
+
}
|
package/dist/quota/state.d.ts
CHANGED
|
@@ -9,4 +9,7 @@ export declare function writeQuotaState(state: QuotaState): Promise<void>;
|
|
|
9
9
|
* exceeds failure evidence, with a minimum of 1.
|
|
10
10
|
*/
|
|
11
11
|
export declare function computeMaxSafeConcurrency(entry: QuotaStateEntry, halfLifeHours: number, maxToCheck?: number): number;
|
|
12
|
+
export declare function computeRampUpConcurrency(entry: QuotaStateEntry, halfLifeHours: number, maxToCheck?: number): number;
|
|
13
|
+
export declare function computeBackoffCooldownMs(consecutive429Count: number): number;
|
|
14
|
+
export declare function computeBackoffFailureWeight(consecutive429Count: number): number;
|
|
12
15
|
export declare function recordWaveOutcome(providerModelKey: string, outcome: ObservedWaveOutcome, halfLifeHours: number): Promise<void>;
|