auditor-lambda 0.3.33 → 0.3.34
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +27 -30
- package/dist/orchestrator/reviewPackets.d.ts +3 -0
- package/dist/orchestrator/reviewPackets.js +13 -2
- package/dist/quota/compositeQuotaSource.d.ts +7 -0
- package/dist/quota/compositeQuotaSource.js +20 -0
- package/dist/quota/errorParsers/claudeCodeErrorParser.d.ts +6 -0
- package/dist/quota/errorParsers/claudeCodeErrorParser.js +39 -0
- package/dist/quota/errorParsers/genericErrorParser.d.ts +9 -0
- package/dist/quota/errorParsers/genericErrorParser.js +7 -0
- package/dist/quota/errorParsers/index.d.ts +5 -0
- package/dist/quota/errorParsers/index.js +12 -0
- package/dist/quota/errorParsing.d.ts +7 -0
- package/dist/quota/errorParsing.js +69 -0
- package/dist/quota/fileLock.d.ts +6 -0
- package/dist/quota/fileLock.js +64 -0
- package/dist/quota/index.d.ts +11 -1
- package/dist/quota/index.js +7 -1
- package/dist/quota/learnedQuotaSource.d.ts +7 -0
- package/dist/quota/learnedQuotaSource.js +25 -0
- package/dist/quota/probe.d.ts +1 -4
- package/dist/quota/probe.js +1 -4
- package/dist/quota/quotaSource.d.ts +12 -0
- package/dist/quota/quotaSource.js +1 -0
- package/dist/quota/scheduler.d.ts +5 -1
- package/dist/quota/scheduler.js +51 -9
- package/dist/quota/slidingWindow.d.ts +4 -0
- package/dist/quota/slidingWindow.js +28 -0
- package/dist/quota/state.d.ts +3 -0
- package/dist/quota/state.js +57 -14
- package/dist/quota/types.d.ts +11 -2
- package/dist/types/sessionConfig.d.ts +2 -0
- package/package.json +1 -1
- package/schemas/dispatch_quota.schema.json +23 -2
package/dist/cli.js
CHANGED
|
@@ -28,11 +28,11 @@ import { buildAuditCodeHandoff, writeAuditCodeHandoffArtifacts, } from "./superv
|
|
|
28
28
|
import { getSessionConfigPath, loadSessionConfig, readSessionConfigFile, } from "./supervisor/sessionConfig.js";
|
|
29
29
|
import { clearDispatchFiles, buildRunId, ensureSupervisorDirs, getRunPaths, writeDispatchBatchFiles, writeWorkerTaskFiles, } from "./io/runArtifacts.js";
|
|
30
30
|
import { renderWorkerPrompt } from "./prompts/renderWorkerPrompt.js";
|
|
31
|
-
import { buildReviewPackets, orderTasksForPacketReview, } from "./orchestrator/reviewPackets.js";
|
|
31
|
+
import { buildReviewPackets, orderTasksForPacketReview, estimateTaskGroupTokens, } from "./orchestrator/reviewPackets.js";
|
|
32
32
|
import { buildFileAnchorSummary, } from "./orchestrator/fileAnchors.js";
|
|
33
33
|
import { LOCAL_SUBPROCESS_PROVIDER_NAME } from "./providers/constants.js";
|
|
34
34
|
import { runAuditCodeMcpServer } from "./mcp/server.js";
|
|
35
|
-
import { scheduleWave, buildProviderModelKey, readQuotaState, recordWaveOutcome, resolveLimits, resolveHostActiveSubagentLimit, probeProvider, computeMaxSafeConcurrency, getQuotaStatePath, } from "./quota/index.js";
|
|
35
|
+
import { scheduleWave, buildProviderModelKey, readQuotaState, recordWaveOutcome, resolveLimits, resolveHostActiveSubagentLimit, probeProvider, computeMaxSafeConcurrency, getQuotaStatePath, detectRateLimitError, computeCooldownUntil, runSlidingWindow, LearnedQuotaSource, CompositeQuotaSource, } from "./quota/index.js";
|
|
36
36
|
const packageRoot = resolve(dirname(fileURLToPath(import.meta.url)), "..");
|
|
37
37
|
const ADVANCE_AUDIT_CONTRACT_VERSION = "audit-code/v1alpha1";
|
|
38
38
|
const WORKER_RESULT_CONTRACT_VERSION = "audit-code-worker-result/v1alpha1";
|
|
@@ -101,7 +101,7 @@ export function resolveHostDispatchCapability(options) {
|
|
|
101
101
|
if (options.sessionConfig.host_can_dispatch_subagents !== undefined) {
|
|
102
102
|
return options.sessionConfig.host_can_dispatch_subagents;
|
|
103
103
|
}
|
|
104
|
-
return optionalBooleanEnv((options.env ?? process.env).AUDIT_CODE_HOST_CAN_DISPATCH) ??
|
|
104
|
+
return optionalBooleanEnv((options.env ?? process.env).AUDIT_CODE_HOST_CAN_DISPATCH) ?? true;
|
|
105
105
|
}
|
|
106
106
|
function toBase64Url(value) {
|
|
107
107
|
return Buffer.from(value, "utf8").toString("base64url");
|
|
@@ -228,18 +228,6 @@ function getQuotaProbeMode(argv, sessionConfig) {
|
|
|
228
228
|
return raw;
|
|
229
229
|
return "auto";
|
|
230
230
|
}
|
|
231
|
-
function detectRateLimitError(errorText) {
|
|
232
|
-
const lower = errorText.toLowerCase();
|
|
233
|
-
return lower.includes("429") || lower.includes("rate limit") || lower.includes("rate_limit");
|
|
234
|
-
}
|
|
235
|
-
function defaultCooldownUntil(resetAtHeader) {
|
|
236
|
-
if (resetAtHeader) {
|
|
237
|
-
const t = new Date(resetAtHeader).getTime();
|
|
238
|
-
if (!Number.isNaN(t))
|
|
239
|
-
return new Date(t).toISOString();
|
|
240
|
-
}
|
|
241
|
-
return new Date(Date.now() + 60_000).toISOString();
|
|
242
|
-
}
|
|
243
231
|
function resolveRunProviderName(argv, sessionConfig) {
|
|
244
232
|
return resolveFreshSessionProviderName(getExplicitProvider(argv), sessionConfig);
|
|
245
233
|
}
|
|
@@ -1422,11 +1410,15 @@ async function cmdRunToCompletion(argv) {
|
|
|
1422
1410
|
const quotaState = await readQuotaState();
|
|
1423
1411
|
const providerModelKey = buildProviderModelKey(provider.name, hostModel);
|
|
1424
1412
|
const quotaStateEntry = quotaState.entries[providerModelKey] ?? null;
|
|
1413
|
+
const allCandidateTasks = buildPendingAuditTasks(bundle);
|
|
1414
|
+
const candidateGroups = chunkArray(allCandidateTasks.slice(0, parallelWorkers * agentBatchSize), agentBatchSize);
|
|
1415
|
+
const slotTokenEstimates = candidateGroups.map((g) => estimateTaskGroupTokens(g));
|
|
1425
1416
|
const waveSchedule = scheduleWave({
|
|
1426
1417
|
providerName: resolveFreshSessionProviderName(getExplicitProvider(argv), sessionConfig),
|
|
1427
1418
|
sessionConfig,
|
|
1428
1419
|
hostModel,
|
|
1429
1420
|
requestedConcurrency: parallelWorkers,
|
|
1421
|
+
estimatedSlotTokens: slotTokenEstimates,
|
|
1430
1422
|
quotaStateEntry,
|
|
1431
1423
|
});
|
|
1432
1424
|
const waveSize = waveSchedule.wave_size;
|
|
@@ -1438,8 +1430,7 @@ async function cmdRunToCompletion(argv) {
|
|
|
1438
1430
|
await new Promise((r) => setTimeout(r, cappedWait));
|
|
1439
1431
|
}
|
|
1440
1432
|
}
|
|
1441
|
-
const
|
|
1442
|
-
const taskGroups = chunkArray(allPendingTasks.slice(0, waveSize * agentBatchSize), agentBatchSize);
|
|
1433
|
+
const taskGroups = candidateGroups.slice(0, waveSize);
|
|
1443
1434
|
const workerSlots = [];
|
|
1444
1435
|
for (const rawGroup of taskGroups) {
|
|
1445
1436
|
const group = await addFileLineCountHints(root, rawGroup);
|
|
@@ -1478,7 +1469,7 @@ async function cmdRunToCompletion(argv) {
|
|
|
1478
1469
|
pending_audit_tasks_path: slot.pendingTasksPath,
|
|
1479
1470
|
})), workerSlots.flatMap((slot) => slot.group));
|
|
1480
1471
|
const parallelStartedAt = new Date().toISOString();
|
|
1481
|
-
const launchResults = await
|
|
1472
|
+
const { results: launchResults } = await runSlidingWindow(workerSlots.map((slot) => () => provider.launch({
|
|
1482
1473
|
repoRoot: root,
|
|
1483
1474
|
runId: slot.runId,
|
|
1484
1475
|
obligationId,
|
|
@@ -1489,7 +1480,7 @@ async function cmdRunToCompletion(argv) {
|
|
|
1489
1480
|
stderrPath: slot.paths.stderrPath,
|
|
1490
1481
|
uiMode,
|
|
1491
1482
|
timeoutMs,
|
|
1492
|
-
})));
|
|
1483
|
+
})), waveSize);
|
|
1493
1484
|
const launchErrorsByRunId = new Map();
|
|
1494
1485
|
for (let index = 0; index < launchResults.length; index++) {
|
|
1495
1486
|
const outcome = launchResults[index];
|
|
@@ -1601,12 +1592,14 @@ async function cmdRunToCompletion(argv) {
|
|
|
1601
1592
|
}
|
|
1602
1593
|
// Record outcome for adaptive learning (best-effort — never blocks dispatch)
|
|
1603
1594
|
{
|
|
1604
|
-
const
|
|
1595
|
+
const rateLimitResults = batchErrors.map((e) => detectRateLimitError(e));
|
|
1596
|
+
const rateLimitHit = rateLimitResults.find((r) => r.isRateLimited);
|
|
1597
|
+
const retryAfterMs = rateLimitHit?.retryAfterMs ?? null;
|
|
1605
1598
|
await recordWaveOutcome(providerModelKey, {
|
|
1606
1599
|
concurrency: workerSlots.length,
|
|
1607
|
-
estimated_tokens:
|
|
1608
|
-
outcome:
|
|
1609
|
-
cooldown_until:
|
|
1600
|
+
estimated_tokens: slotTokenEstimates.slice(0, workerSlots.length).reduce((a, b) => a + b, 0),
|
|
1601
|
+
outcome: rateLimitHit ? "rate_limited" : batchErrors.length > 0 ? "timeout" : "success",
|
|
1602
|
+
cooldown_until: rateLimitHit ? computeCooldownUntil(retryAfterMs) : null,
|
|
1610
1603
|
}, sessionConfig.quota?.empirical_half_life_hours ?? 24).catch(() => undefined);
|
|
1611
1604
|
}
|
|
1612
1605
|
if (batchErrors.length > 0) {
|
|
@@ -2455,12 +2448,10 @@ async function prepareDispatchArtifacts(params) {
|
|
|
2455
2448
|
});
|
|
2456
2449
|
// Compute and write dispatch-quota.json
|
|
2457
2450
|
const hostModel = params.hostModel ?? null;
|
|
2458
|
-
const
|
|
2459
|
-
? Math.floor(plan.reduce((s, p) => s + p.complexity.estimated_tokens, 0) / plan.length)
|
|
2460
|
-
: 0;
|
|
2451
|
+
const perPacketTokens = plan.map((p) => p.complexity.estimated_tokens);
|
|
2461
2452
|
const quotaProviderName = resolveFreshSessionProviderName(undefined, sessionConfig);
|
|
2462
2453
|
const quotaProviderKey = buildProviderModelKey(quotaProviderName, hostModel);
|
|
2463
|
-
const quotaState = await readQuotaState().catch(() => ({ version:
|
|
2454
|
+
const quotaState = await readQuotaState().catch(() => ({ version: 2, entries: {} }));
|
|
2464
2455
|
const quotaStateEntry = quotaState.entries[quotaProviderKey] ?? null;
|
|
2465
2456
|
const hostConcurrencyLimit = resolveHostActiveSubagentLimit({
|
|
2466
2457
|
explicitLimit: params.hostActiveSubagentLimit,
|
|
@@ -2471,12 +2462,12 @@ async function prepareDispatchArtifacts(params) {
|
|
|
2471
2462
|
sessionConfig,
|
|
2472
2463
|
hostModel,
|
|
2473
2464
|
requestedConcurrency: sessionConfig.parallel_workers ?? plan.length,
|
|
2474
|
-
|
|
2465
|
+
estimatedSlotTokens: perPacketTokens,
|
|
2475
2466
|
quotaStateEntry,
|
|
2476
2467
|
hostConcurrencyLimit,
|
|
2477
2468
|
});
|
|
2478
2469
|
const dispatchQuota = {
|
|
2479
|
-
contract_version: "audit-code-dispatch-quota/
|
|
2470
|
+
contract_version: "audit-code-dispatch-quota/v1alpha2",
|
|
2480
2471
|
run_id: runId,
|
|
2481
2472
|
model: hostModel,
|
|
2482
2473
|
resolved_limits: waveSchedule.resolved_limits,
|
|
@@ -2486,6 +2477,8 @@ async function prepareDispatchArtifacts(params) {
|
|
|
2486
2477
|
wave_size: waveSchedule.wave_size,
|
|
2487
2478
|
estimated_wave_tokens: waveSchedule.estimated_wave_tokens,
|
|
2488
2479
|
cooldown_until: waveSchedule.cooldown_until,
|
|
2480
|
+
quota_source_snapshot: waveSchedule.quota_source_snapshot ?? null,
|
|
2481
|
+
backoff_state: null,
|
|
2489
2482
|
};
|
|
2490
2483
|
const dispatchQuotaPath = join(runDir, "dispatch-quota.json");
|
|
2491
2484
|
await writeJsonFile(dispatchQuotaPath, dispatchQuota);
|
|
@@ -3212,13 +3205,15 @@ async function cmdQuota(argv) {
|
|
|
3212
3205
|
const providerModelKey = buildProviderModelKey(providerName, hostModel);
|
|
3213
3206
|
const { limits, source, confidence } = resolveLimits({ providerName, sessionConfig, hostModel });
|
|
3214
3207
|
const probeResult = await probeProvider(providerName, probeMode);
|
|
3215
|
-
const quotaState = await readQuotaState().catch(() => ({ version:
|
|
3208
|
+
const quotaState = await readQuotaState().catch(() => ({ version: 2, entries: {} }));
|
|
3216
3209
|
const quotaStateEntry = quotaState.entries[providerModelKey] ?? null;
|
|
3217
3210
|
const halfLifeHours = sessionConfig.quota?.empirical_half_life_hours ?? 24;
|
|
3218
3211
|
const hostConcurrencyLimit = resolveHostActiveSubagentLimit({
|
|
3219
3212
|
explicitLimit: getHostMaxActiveSubagents(argv),
|
|
3220
3213
|
sessionConfig,
|
|
3221
3214
|
});
|
|
3215
|
+
const quotaSource = new CompositeQuotaSource([new LearnedQuotaSource(halfLifeHours)]);
|
|
3216
|
+
const quotaSourceSnapshot = await quotaSource.queryCurrentUsage(providerModelKey).catch(() => null);
|
|
3222
3217
|
const waveSchedule = scheduleWave({
|
|
3223
3218
|
providerName,
|
|
3224
3219
|
sessionConfig,
|
|
@@ -3226,6 +3221,7 @@ async function cmdQuota(argv) {
|
|
|
3226
3221
|
requestedConcurrency: sessionConfig.parallel_workers ?? 1,
|
|
3227
3222
|
quotaStateEntry,
|
|
3228
3223
|
hostConcurrencyLimit,
|
|
3224
|
+
quotaSourceSnapshot,
|
|
3229
3225
|
});
|
|
3230
3226
|
console.log(JSON.stringify({
|
|
3231
3227
|
provider: providerName,
|
|
@@ -3243,6 +3239,7 @@ async function cmdQuota(argv) {
|
|
|
3243
3239
|
last_429_at: quotaStateEntry.last_429_at,
|
|
3244
3240
|
}
|
|
3245
3241
|
: null,
|
|
3242
|
+
quota_source_snapshot: quotaSourceSnapshot,
|
|
3246
3243
|
wave_schedule: waveSchedule,
|
|
3247
3244
|
quota_state_path: getQuotaStatePath(),
|
|
3248
3245
|
}, null, 2));
|
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
import type { AuditTask } from "../types.js";
|
|
2
2
|
import type { AuditPlanMetrics, ReviewPacket } from "../types/reviewPlanning.js";
|
|
3
3
|
import type { GraphBundle } from "../types/graph.js";
|
|
4
|
+
export declare const ESTIMATED_TOKENS_PER_LINE = 4;
|
|
5
|
+
export declare const ESTIMATED_PACKET_PROMPT_TOKENS = 900;
|
|
6
|
+
export declare function estimateTaskGroupTokens(tasks: AuditTask[]): number;
|
|
4
7
|
export interface BuildReviewPacketOptions {
|
|
5
8
|
graphBundle?: GraphBundle;
|
|
6
9
|
lineIndex?: Record<string, number>;
|
|
@@ -2,8 +2,19 @@ import { createHash } from "node:crypto";
|
|
|
2
2
|
import { LENS_ORDER } from "./unitBuilder.js";
|
|
3
3
|
const DEFAULT_MAX_TASKS_PER_PACKET = 0;
|
|
4
4
|
const DEFAULT_TARGET_PACKET_LINES = 8000;
|
|
5
|
-
const ESTIMATED_TOKENS_PER_LINE = 4;
|
|
6
|
-
const ESTIMATED_PACKET_PROMPT_TOKENS = 900;
|
|
5
|
+
export const ESTIMATED_TOKENS_PER_LINE = 4;
|
|
6
|
+
export const ESTIMATED_PACKET_PROMPT_TOKENS = 900;
|
|
7
|
+
export function estimateTaskGroupTokens(tasks) {
|
|
8
|
+
let totalLines = 0;
|
|
9
|
+
for (const task of tasks) {
|
|
10
|
+
if (task.file_line_counts) {
|
|
11
|
+
for (const count of Object.values(task.file_line_counts)) {
|
|
12
|
+
totalLines += count;
|
|
13
|
+
}
|
|
14
|
+
}
|
|
15
|
+
}
|
|
16
|
+
return ESTIMATED_PACKET_PROMPT_TOKENS + totalLines * ESTIMATED_TOKENS_PER_LINE;
|
|
17
|
+
}
|
|
7
18
|
const PACKET_EXPANSION_MIN_CONFIDENCE = 0.65;
|
|
8
19
|
const HIGH_FAN_DEGREE_THRESHOLD = 12;
|
|
9
20
|
const HIGH_FAN_EXPANSION_CONFIDENCE = 0.99;
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import type { QuotaSource, QuotaUsageSnapshot } from "./quotaSource.js";
|
|
2
|
+
export declare class CompositeQuotaSource implements QuotaSource {
|
|
3
|
+
readonly name = "composite";
|
|
4
|
+
private sources;
|
|
5
|
+
constructor(sources: QuotaSource[]);
|
|
6
|
+
queryCurrentUsage(providerModelKey: string): Promise<QuotaUsageSnapshot | null>;
|
|
7
|
+
}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
export class CompositeQuotaSource {
|
|
2
|
+
name = "composite";
|
|
3
|
+
sources;
|
|
4
|
+
constructor(sources) {
|
|
5
|
+
this.sources = sources;
|
|
6
|
+
}
|
|
7
|
+
async queryCurrentUsage(providerModelKey) {
|
|
8
|
+
for (const source of this.sources) {
|
|
9
|
+
try {
|
|
10
|
+
const snapshot = await source.queryCurrentUsage(providerModelKey);
|
|
11
|
+
if (snapshot)
|
|
12
|
+
return snapshot;
|
|
13
|
+
}
|
|
14
|
+
catch {
|
|
15
|
+
// Skip failing sources, try next
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
return null;
|
|
19
|
+
}
|
|
20
|
+
}
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
import type { RateLimitDetectionResult } from "../errorParsing.js";
|
|
2
|
+
import type { ErrorParser } from "./genericErrorParser.js";
|
|
3
|
+
export declare class ClaudeCodeErrorParser implements ErrorParser {
|
|
4
|
+
readonly name = "claude-code";
|
|
5
|
+
parse(text: string): RateLimitDetectionResult;
|
|
6
|
+
}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
export class ClaudeCodeErrorParser {
|
|
2
|
+
name = "claude-code";
|
|
3
|
+
parse(text) {
|
|
4
|
+
for (const line of text.split("\n")) {
|
|
5
|
+
const trimmed = line.trim();
|
|
6
|
+
if (!trimmed.startsWith("{"))
|
|
7
|
+
continue;
|
|
8
|
+
try {
|
|
9
|
+
const obj = JSON.parse(trimmed);
|
|
10
|
+
const level = obj["level"];
|
|
11
|
+
const type = obj["type"];
|
|
12
|
+
const message = obj["message"] ?? "";
|
|
13
|
+
const statusCode = obj["status_code"];
|
|
14
|
+
if (statusCode === 429 ||
|
|
15
|
+
type === "rate_limit_error" ||
|
|
16
|
+
(level === "error" && /\brate.?limit/i.test(message))) {
|
|
17
|
+
const retryAfter = obj["retry_after"];
|
|
18
|
+
const retryAfterMs = obj["retry_after_ms"];
|
|
19
|
+
let extractedMs = null;
|
|
20
|
+
if (retryAfterMs != null && retryAfterMs > 0) {
|
|
21
|
+
extractedMs = retryAfterMs;
|
|
22
|
+
}
|
|
23
|
+
else if (retryAfter != null && retryAfter > 0) {
|
|
24
|
+
extractedMs = retryAfter < 600 ? retryAfter * 1000 : retryAfter;
|
|
25
|
+
}
|
|
26
|
+
return {
|
|
27
|
+
isRateLimited: true,
|
|
28
|
+
retryAfterMs: extractedMs,
|
|
29
|
+
rawMatch: `claude-code-stderr:${statusCode ?? type ?? "rate_limit"}`,
|
|
30
|
+
};
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
catch {
|
|
34
|
+
// Not valid JSON, skip
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
return { isRateLimited: false, retryAfterMs: null, rawMatch: null };
|
|
38
|
+
}
|
|
39
|
+
}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import type { RateLimitDetectionResult } from "../errorParsing.js";
|
|
2
|
+
export interface ErrorParser {
|
|
3
|
+
readonly name: string;
|
|
4
|
+
parse(text: string): RateLimitDetectionResult;
|
|
5
|
+
}
|
|
6
|
+
export declare class GenericErrorParser implements ErrorParser {
|
|
7
|
+
readonly name = "generic";
|
|
8
|
+
parse(text: string): RateLimitDetectionResult;
|
|
9
|
+
}
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
export type { ErrorParser } from "./genericErrorParser.js";
|
|
2
|
+
export { GenericErrorParser } from "./genericErrorParser.js";
|
|
3
|
+
export { ClaudeCodeErrorParser } from "./claudeCodeErrorParser.js";
|
|
4
|
+
import type { ErrorParser } from "./genericErrorParser.js";
|
|
5
|
+
export declare function getErrorParserForProvider(providerName: string): ErrorParser;
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
export { GenericErrorParser } from "./genericErrorParser.js";
|
|
2
|
+
export { ClaudeCodeErrorParser } from "./claudeCodeErrorParser.js";
|
|
3
|
+
import { GenericErrorParser } from "./genericErrorParser.js";
|
|
4
|
+
import { ClaudeCodeErrorParser } from "./claudeCodeErrorParser.js";
|
|
5
|
+
const PROVIDER_PARSERS = {
|
|
6
|
+
"claude-code": () => new ClaudeCodeErrorParser(),
|
|
7
|
+
};
|
|
8
|
+
const genericParser = new GenericErrorParser();
|
|
9
|
+
export function getErrorParserForProvider(providerName) {
|
|
10
|
+
const factory = PROVIDER_PARSERS[providerName];
|
|
11
|
+
return factory ? factory() : genericParser;
|
|
12
|
+
}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
export interface RateLimitDetectionResult {
|
|
2
|
+
isRateLimited: boolean;
|
|
3
|
+
retryAfterMs: number | null;
|
|
4
|
+
rawMatch: string | null;
|
|
5
|
+
}
|
|
6
|
+
export declare function detectRateLimitError(text: string): RateLimitDetectionResult;
|
|
7
|
+
export declare function computeCooldownUntil(retryAfterMs: number | null, defaultMs?: number): string;
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
const RATE_LIMIT_PATTERNS = [
|
|
2
|
+
/\b429\b/i,
|
|
3
|
+
/\btoo many requests\b/i,
|
|
4
|
+
/\brate.?limit/i,
|
|
5
|
+
/\boverloaded\b/i,
|
|
6
|
+
/\bresource.?exhausted\b/i,
|
|
7
|
+
/\bquota.?exceeded\b/i,
|
|
8
|
+
];
|
|
9
|
+
function tryParseJson(text) {
|
|
10
|
+
const jsonStart = text.indexOf("{");
|
|
11
|
+
if (jsonStart === -1)
|
|
12
|
+
return null;
|
|
13
|
+
try {
|
|
14
|
+
return JSON.parse(text.slice(jsonStart));
|
|
15
|
+
}
|
|
16
|
+
catch {
|
|
17
|
+
return null;
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
function extractRetryAfterMs(obj) {
|
|
21
|
+
const headers = obj["headers"];
|
|
22
|
+
const retryAfter = headers?.["retry-after"] ??
|
|
23
|
+
headers?.["Retry-After"] ??
|
|
24
|
+
obj["retry_after"] ??
|
|
25
|
+
obj["retry_after_ms"];
|
|
26
|
+
if (retryAfter == null)
|
|
27
|
+
return null;
|
|
28
|
+
const val = typeof retryAfter === "string" ? Number(retryAfter) : retryAfter;
|
|
29
|
+
if (!Number.isFinite(val) || val <= 0)
|
|
30
|
+
return null;
|
|
31
|
+
// If the value looks like seconds (< 600), convert to ms
|
|
32
|
+
return val < 600 ? val * 1000 : val;
|
|
33
|
+
}
|
|
34
|
+
function detectFromJson(text) {
|
|
35
|
+
const obj = tryParseJson(text);
|
|
36
|
+
if (!obj)
|
|
37
|
+
return null;
|
|
38
|
+
const status = obj["status"];
|
|
39
|
+
const type = obj["type"];
|
|
40
|
+
const errorObj = obj["error"];
|
|
41
|
+
const errorType = errorObj?.["type"];
|
|
42
|
+
const isRateLimited = status === 429 ||
|
|
43
|
+
type === "rate_limit_error" ||
|
|
44
|
+
errorType === "rate_limit_error";
|
|
45
|
+
if (!isRateLimited)
|
|
46
|
+
return null;
|
|
47
|
+
return {
|
|
48
|
+
isRateLimited: true,
|
|
49
|
+
retryAfterMs: extractRetryAfterMs(obj),
|
|
50
|
+
rawMatch: `json:${status === 429 ? "status=429" : `type=${type ?? errorType}`}`,
|
|
51
|
+
};
|
|
52
|
+
}
|
|
53
|
+
export function detectRateLimitError(text) {
|
|
54
|
+
const jsonResult = detectFromJson(text);
|
|
55
|
+
if (jsonResult)
|
|
56
|
+
return jsonResult;
|
|
57
|
+
for (const pattern of RATE_LIMIT_PATTERNS) {
|
|
58
|
+
const match = pattern.exec(text);
|
|
59
|
+
if (match) {
|
|
60
|
+
return { isRateLimited: true, retryAfterMs: null, rawMatch: match[0] };
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
return { isRateLimited: false, retryAfterMs: null, rawMatch: null };
|
|
64
|
+
}
|
|
65
|
+
const DEFAULT_COOLDOWN_MS = 60_000;
|
|
66
|
+
export function computeCooldownUntil(retryAfterMs, defaultMs = DEFAULT_COOLDOWN_MS) {
|
|
67
|
+
const ms = retryAfterMs != null && retryAfterMs > 0 ? retryAfterMs : defaultMs;
|
|
68
|
+
return new Date(Date.now() + ms).toISOString();
|
|
69
|
+
}
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
export declare class FileLockTimeoutError extends Error {
|
|
2
|
+
constructor(lockPath: string);
|
|
3
|
+
}
|
|
4
|
+
export declare function acquireLock(lockPath: string, timeoutMs?: number): Promise<void>;
|
|
5
|
+
export declare function releaseLock(lockPath: string): Promise<void>;
|
|
6
|
+
export declare function withFileLock<T>(lockPath: string, fn: () => Promise<T>, timeoutMs?: number): Promise<T>;
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
import { open, unlink, stat } from "node:fs/promises";
|
|
2
|
+
const STALE_LOCK_MS = 30_000;
|
|
3
|
+
const RETRY_INTERVAL_MS = 50;
|
|
4
|
+
const DEFAULT_TIMEOUT_MS = 10_000;
|
|
5
|
+
export class FileLockTimeoutError extends Error {
|
|
6
|
+
constructor(lockPath) {
|
|
7
|
+
super(`Timed out acquiring lock: ${lockPath}`);
|
|
8
|
+
this.name = "FileLockTimeoutError";
|
|
9
|
+
}
|
|
10
|
+
}
|
|
11
|
+
async function isLockStale(lockPath) {
|
|
12
|
+
try {
|
|
13
|
+
const info = await stat(lockPath);
|
|
14
|
+
return Date.now() - info.mtimeMs > STALE_LOCK_MS;
|
|
15
|
+
}
|
|
16
|
+
catch {
|
|
17
|
+
return false;
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
export async function acquireLock(lockPath, timeoutMs = DEFAULT_TIMEOUT_MS) {
|
|
21
|
+
const deadline = Date.now() + timeoutMs;
|
|
22
|
+
while (true) {
|
|
23
|
+
try {
|
|
24
|
+
const fd = await open(lockPath, "wx");
|
|
25
|
+
await fd.close();
|
|
26
|
+
return;
|
|
27
|
+
}
|
|
28
|
+
catch (err) {
|
|
29
|
+
if (err.code !== "EEXIST")
|
|
30
|
+
throw err;
|
|
31
|
+
}
|
|
32
|
+
if (await isLockStale(lockPath)) {
|
|
33
|
+
try {
|
|
34
|
+
await unlink(lockPath);
|
|
35
|
+
continue;
|
|
36
|
+
}
|
|
37
|
+
catch {
|
|
38
|
+
// Another process may have already cleaned it up
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
if (Date.now() >= deadline) {
|
|
42
|
+
throw new FileLockTimeoutError(lockPath);
|
|
43
|
+
}
|
|
44
|
+
await new Promise((r) => setTimeout(r, RETRY_INTERVAL_MS));
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
export async function releaseLock(lockPath) {
|
|
48
|
+
try {
|
|
49
|
+
await unlink(lockPath);
|
|
50
|
+
}
|
|
51
|
+
catch (err) {
|
|
52
|
+
if (err.code !== "ENOENT")
|
|
53
|
+
throw err;
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
export async function withFileLock(lockPath, fn, timeoutMs) {
|
|
57
|
+
await acquireLock(lockPath, timeoutMs);
|
|
58
|
+
try {
|
|
59
|
+
return await fn();
|
|
60
|
+
}
|
|
61
|
+
finally {
|
|
62
|
+
await releaseLock(lockPath);
|
|
63
|
+
}
|
|
64
|
+
}
|
package/dist/quota/index.d.ts
CHANGED
|
@@ -1,9 +1,19 @@
|
|
|
1
1
|
export { resolveLimits, lookupKnownModel, classifyProvider } from "./limits.js";
|
|
2
2
|
export type { LimitResolutionResult, ResolveLimitsOptions, ProviderType } from "./limits.js";
|
|
3
3
|
export { detectHostActiveSubagentLimit, resolveHostActiveSubagentLimit, } from "./hostLimits.js";
|
|
4
|
-
export { readQuotaState, writeQuotaState, computeMaxSafeConcurrency, recordWaveOutcome, getQuotaStatePath, decayWeight, applyDecayToEntry, } from "./state.js";
|
|
4
|
+
export { readQuotaState, writeQuotaState, computeMaxSafeConcurrency, recordWaveOutcome, getQuotaStatePath, decayWeight, applyDecayToEntry, computeBackoffCooldownMs, computeBackoffFailureWeight, computeRampUpConcurrency, } from "./state.js";
|
|
5
5
|
export { scheduleWave, buildProviderModelKey } from "./scheduler.js";
|
|
6
6
|
export type { ScheduleWaveOptions } from "./scheduler.js";
|
|
7
|
+
export { detectRateLimitError, computeCooldownUntil } from "./errorParsing.js";
|
|
8
|
+
export { acquireLock, releaseLock, withFileLock, FileLockTimeoutError } from "./fileLock.js";
|
|
9
|
+
export { runSlidingWindow } from "./slidingWindow.js";
|
|
10
|
+
export type { SlidingWindowResult } from "./slidingWindow.js";
|
|
11
|
+
export type { RateLimitDetectionResult } from "./errorParsing.js";
|
|
7
12
|
export { probeProvider } from "./probe.js";
|
|
8
13
|
export type { ProbeResult } from "./probe.js";
|
|
14
|
+
export type { QuotaSource, QuotaUsageSnapshot } from "./quotaSource.js";
|
|
15
|
+
export type { ErrorParser } from "./errorParsers/index.js";
|
|
16
|
+
export { GenericErrorParser, ClaudeCodeErrorParser, getErrorParserForProvider } from "./errorParsers/index.js";
|
|
17
|
+
export { LearnedQuotaSource } from "./learnedQuotaSource.js";
|
|
18
|
+
export { CompositeQuotaSource } from "./compositeQuotaSource.js";
|
|
9
19
|
export type { ResolvedLimits, LimitSource, LimitConfidence, HostConcurrencyLimit, HostConcurrencyLimitSource, QuotaState, QuotaStateEntry, ConcurrencyBucket, WaveSchedule, DispatchQuota, ObservedWaveOutcome, } from "./types.js";
|
package/dist/quota/index.js
CHANGED
|
@@ -1,5 +1,11 @@
|
|
|
1
1
|
export { resolveLimits, lookupKnownModel, classifyProvider } from "./limits.js";
|
|
2
2
|
export { detectHostActiveSubagentLimit, resolveHostActiveSubagentLimit, } from "./hostLimits.js";
|
|
3
|
-
export { readQuotaState, writeQuotaState, computeMaxSafeConcurrency, recordWaveOutcome, getQuotaStatePath, decayWeight, applyDecayToEntry, } from "./state.js";
|
|
3
|
+
export { readQuotaState, writeQuotaState, computeMaxSafeConcurrency, recordWaveOutcome, getQuotaStatePath, decayWeight, applyDecayToEntry, computeBackoffCooldownMs, computeBackoffFailureWeight, computeRampUpConcurrency, } from "./state.js";
|
|
4
4
|
export { scheduleWave, buildProviderModelKey } from "./scheduler.js";
|
|
5
|
+
export { detectRateLimitError, computeCooldownUntil } from "./errorParsing.js";
|
|
6
|
+
export { acquireLock, releaseLock, withFileLock, FileLockTimeoutError } from "./fileLock.js";
|
|
7
|
+
export { runSlidingWindow } from "./slidingWindow.js";
|
|
5
8
|
export { probeProvider } from "./probe.js";
|
|
9
|
+
export { GenericErrorParser, ClaudeCodeErrorParser, getErrorParserForProvider } from "./errorParsers/index.js";
|
|
10
|
+
export { LearnedQuotaSource } from "./learnedQuotaSource.js";
|
|
11
|
+
export { CompositeQuotaSource } from "./compositeQuotaSource.js";
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import type { QuotaSource, QuotaUsageSnapshot } from "./quotaSource.js";
|
|
2
|
+
export declare class LearnedQuotaSource implements QuotaSource {
|
|
3
|
+
readonly name = "learned";
|
|
4
|
+
private halfLifeHours;
|
|
5
|
+
constructor(halfLifeHours?: number);
|
|
6
|
+
queryCurrentUsage(providerModelKey: string): Promise<QuotaUsageSnapshot | null>;
|
|
7
|
+
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import { readQuotaState, computeMaxSafeConcurrency } from "./state.js";
|
|
2
|
+
export class LearnedQuotaSource {
|
|
3
|
+
name = "learned";
|
|
4
|
+
halfLifeHours;
|
|
5
|
+
constructor(halfLifeHours = 24) {
|
|
6
|
+
this.halfLifeHours = halfLifeHours;
|
|
7
|
+
}
|
|
8
|
+
async queryCurrentUsage(providerModelKey) {
|
|
9
|
+
const state = await readQuotaState();
|
|
10
|
+
const entry = state.entries[providerModelKey];
|
|
11
|
+
if (!entry)
|
|
12
|
+
return null;
|
|
13
|
+
const maxSafe = computeMaxSafeConcurrency(entry, this.halfLifeHours);
|
|
14
|
+
const isInCooldown = entry.cooldown_until != null &&
|
|
15
|
+
new Date(entry.cooldown_until).getTime() > Date.now();
|
|
16
|
+
return {
|
|
17
|
+
remaining_pct: isInCooldown ? 0 : null,
|
|
18
|
+
reset_at: isInCooldown ? entry.cooldown_until : null,
|
|
19
|
+
requests_remaining: maxSafe,
|
|
20
|
+
tokens_remaining: null,
|
|
21
|
+
captured_at: entry.updated_at,
|
|
22
|
+
source: "learned",
|
|
23
|
+
};
|
|
24
|
+
}
|
|
25
|
+
}
|
package/dist/quota/probe.d.ts
CHANGED
|
@@ -5,9 +5,6 @@ export interface ProbeResult {
|
|
|
5
5
|
/**
|
|
6
6
|
* Probe a provider to discover its rate limits.
|
|
7
7
|
*
|
|
8
|
-
*
|
|
9
|
-
* provider where the auditor controls the API call. IDE providers
|
|
10
|
-
* (claude-code, opencode) select the model internally; their limits come
|
|
11
|
-
* from known-model metadata or learned behavior.
|
|
8
|
+
* @deprecated Phase 3A replaces this with the QuotaSource abstraction.
|
|
12
9
|
*/
|
|
13
10
|
export declare function probeProvider(providerName: string, probeMode?: "auto" | "never" | "force"): Promise<ProbeResult>;
|
package/dist/quota/probe.js
CHANGED
|
@@ -1,10 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Probe a provider to discover its rate limits.
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
* provider where the auditor controls the API call. IDE providers
|
|
6
|
-
* (claude-code, opencode) select the model internally; their limits come
|
|
7
|
-
* from known-model metadata or learned behavior.
|
|
4
|
+
* @deprecated Phase 3A replaces this with the QuotaSource abstraction.
|
|
8
5
|
*/
|
|
9
6
|
export async function probeProvider(providerName, probeMode = "auto") {
|
|
10
7
|
if (probeMode === "never") {
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
export interface QuotaUsageSnapshot {
|
|
2
|
+
remaining_pct: number | null;
|
|
3
|
+
reset_at: string | null;
|
|
4
|
+
requests_remaining: number | null;
|
|
5
|
+
tokens_remaining: number | null;
|
|
6
|
+
captured_at: string;
|
|
7
|
+
source: string;
|
|
8
|
+
}
|
|
9
|
+
export interface QuotaSource {
|
|
10
|
+
readonly name: string;
|
|
11
|
+
queryCurrentUsage(providerModelKey: string): Promise<QuotaUsageSnapshot | null>;
|
|
12
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -1,14 +1,18 @@
|
|
|
1
1
|
import type { ResolvedProviderName, SessionConfig } from "../types/sessionConfig.js";
|
|
2
2
|
import type { HostConcurrencyLimit, QuotaStateEntry, WaveSchedule } from "./types.js";
|
|
3
|
+
import type { QuotaUsageSnapshot } from "./quotaSource.js";
|
|
3
4
|
export interface ScheduleWaveOptions {
|
|
4
5
|
providerName: ResolvedProviderName;
|
|
5
6
|
sessionConfig: SessionConfig;
|
|
6
7
|
hostModel: string | null;
|
|
7
8
|
requestedConcurrency: number;
|
|
8
|
-
/**
|
|
9
|
+
/** Per-slot estimated tokens (one entry per worker slot). Used for TPM budget. */
|
|
10
|
+
estimatedSlotTokens?: number[];
|
|
11
|
+
/** @deprecated Use estimatedSlotTokens instead. Average tokens per slot — used as fallback. */
|
|
9
12
|
estimatedPacketTokens?: number;
|
|
10
13
|
quotaStateEntry?: QuotaStateEntry | null;
|
|
11
14
|
hostConcurrencyLimit?: HostConcurrencyLimit | null;
|
|
15
|
+
quotaSourceSnapshot?: QuotaUsageSnapshot | null;
|
|
12
16
|
}
|
|
13
17
|
export declare function scheduleWave(options: ScheduleWaveOptions): WaveSchedule;
|
|
14
18
|
/** Build the state key used for indexing quota-state.json entries. */
|
package/dist/quota/scheduler.js
CHANGED
|
@@ -1,7 +1,20 @@
|
|
|
1
1
|
import { classifyProvider, resolveLimits } from "./limits.js";
|
|
2
|
-
import { computeMaxSafeConcurrency } from "./state.js";
|
|
2
|
+
import { computeMaxSafeConcurrency, computeRampUpConcurrency } from "./state.js";
|
|
3
|
+
function sumTopN(sorted, n) {
|
|
4
|
+
let sum = 0;
|
|
5
|
+
for (let i = 0; i < Math.min(n, sorted.length); i++)
|
|
6
|
+
sum += sorted[i];
|
|
7
|
+
return sum;
|
|
8
|
+
}
|
|
3
9
|
export function scheduleWave(options) {
|
|
4
|
-
const { providerName, sessionConfig, hostModel, requestedConcurrency, estimatedPacketTokens = 0, quotaStateEntry = null, hostConcurrencyLimit = null, } = options;
|
|
10
|
+
const { providerName, sessionConfig, hostModel, requestedConcurrency, estimatedSlotTokens, estimatedPacketTokens = 0, quotaStateEntry = null, hostConcurrencyLimit = null, quotaSourceSnapshot = null, } = options;
|
|
11
|
+
// Descending sort so sumTopN picks the largest slots
|
|
12
|
+
const slotsSorted = estimatedSlotTokens
|
|
13
|
+
? [...estimatedSlotTokens].sort((a, b) => b - a)
|
|
14
|
+
: null;
|
|
15
|
+
const avgTokens = slotsSorted && slotsSorted.length > 0
|
|
16
|
+
? Math.floor(slotsSorted.reduce((a, b) => a + b, 0) / slotsSorted.length)
|
|
17
|
+
: estimatedPacketTokens;
|
|
5
18
|
const quota = sessionConfig.quota ?? {};
|
|
6
19
|
const applyHostConcurrencyLimit = (waveSize) => {
|
|
7
20
|
if (hostConcurrencyLimit === null)
|
|
@@ -19,7 +32,7 @@ export function scheduleWave(options) {
|
|
|
19
32
|
};
|
|
20
33
|
return {
|
|
21
34
|
wave_size: waveSize,
|
|
22
|
-
estimated_wave_tokens: waveSize *
|
|
35
|
+
estimated_wave_tokens: slotsSorted ? sumTopN(slotsSorted, waveSize) : waveSize * avgTokens,
|
|
23
36
|
cooldown_until: null,
|
|
24
37
|
confidence: "high",
|
|
25
38
|
source: "default",
|
|
@@ -48,12 +61,25 @@ export function scheduleWave(options) {
|
|
|
48
61
|
waveSize = Math.min(waveSize, rpmCap);
|
|
49
62
|
}
|
|
50
63
|
// Cap by input tokens-per-minute
|
|
51
|
-
if (limits.input_tokens_per_minute != null &&
|
|
52
|
-
const
|
|
53
|
-
|
|
64
|
+
if (limits.input_tokens_per_minute != null && avgTokens > 0) {
|
|
65
|
+
const tpmBudget = limits.input_tokens_per_minute * safetyMargin;
|
|
66
|
+
if (slotsSorted && slotsSorted.length > 0) {
|
|
67
|
+
let candidateSize = waveSize;
|
|
68
|
+
while (candidateSize > 1 && sumTopN(slotsSorted, candidateSize) > tpmBudget) {
|
|
69
|
+
candidateSize--;
|
|
70
|
+
}
|
|
71
|
+
waveSize = Math.max(1, candidateSize);
|
|
72
|
+
}
|
|
73
|
+
else {
|
|
74
|
+
const tpmCap = Math.max(1, Math.floor(tpmBudget / avgTokens));
|
|
75
|
+
waveSize = Math.min(waveSize, tpmCap);
|
|
76
|
+
}
|
|
54
77
|
}
|
|
55
78
|
if (quotaStateEntry) {
|
|
56
|
-
const
|
|
79
|
+
const rampUp = quota.ramp_up_enabled !== false;
|
|
80
|
+
const learnedCap = rampUp
|
|
81
|
+
? computeRampUpConcurrency(quotaStateEntry, halfLifeHours)
|
|
82
|
+
: computeMaxSafeConcurrency(quotaStateEntry, halfLifeHours);
|
|
57
83
|
waveSize = Math.min(waveSize, learnedCap);
|
|
58
84
|
}
|
|
59
85
|
else {
|
|
@@ -61,22 +87,38 @@ export function scheduleWave(options) {
|
|
|
61
87
|
const fallbackCap = providerType === "local"
|
|
62
88
|
? quota.unknown_local_concurrency
|
|
63
89
|
: (quota.unknown_hosted_concurrency ?? 1);
|
|
64
|
-
if (
|
|
90
|
+
if (fallbackCap === "unlimited") {
|
|
91
|
+
// no cap — "unlimited" intentionally skips clamping
|
|
92
|
+
}
|
|
93
|
+
else if (typeof fallbackCap === "number" && Number.isFinite(fallbackCap)) {
|
|
65
94
|
waveSize = Math.min(waveSize, Math.max(1, Math.floor(fallbackCap)));
|
|
66
95
|
}
|
|
67
96
|
}
|
|
68
97
|
}
|
|
98
|
+
// Apply real-time quota source data if available
|
|
99
|
+
if (quotaSourceSnapshot && !cooldownUntil) {
|
|
100
|
+
if (quotaSourceSnapshot.remaining_pct != null && quotaSourceSnapshot.remaining_pct < 0.1) {
|
|
101
|
+
waveSize = 1;
|
|
102
|
+
if (quotaSourceSnapshot.reset_at) {
|
|
103
|
+
cooldownUntil = quotaSourceSnapshot.reset_at;
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
else if (quotaSourceSnapshot.remaining_pct != null && quotaSourceSnapshot.remaining_pct < 0.3) {
|
|
107
|
+
waveSize = Math.min(waveSize, Math.max(1, Math.floor(waveSize * 0.5)));
|
|
108
|
+
}
|
|
109
|
+
}
|
|
69
110
|
waveSize = applyHostConcurrencyLimit(waveSize);
|
|
70
111
|
waveSize = Math.max(1, waveSize);
|
|
71
112
|
return {
|
|
72
113
|
wave_size: waveSize,
|
|
73
|
-
estimated_wave_tokens: waveSize *
|
|
114
|
+
estimated_wave_tokens: slotsSorted ? sumTopN(slotsSorted, waveSize) : waveSize * avgTokens,
|
|
74
115
|
cooldown_until: cooldownUntil,
|
|
75
116
|
confidence,
|
|
76
117
|
source,
|
|
77
118
|
resolved_limits: limits,
|
|
78
119
|
host_concurrency_limit: hostConcurrencyLimit,
|
|
79
120
|
model: hostModel,
|
|
121
|
+
quota_source_snapshot: quotaSourceSnapshot,
|
|
80
122
|
};
|
|
81
123
|
}
|
|
82
124
|
/** Build the state key used for indexing quota-state.json entries. */
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
export interface SlidingWindowResult<T> {
|
|
2
|
+
results: PromiseSettledResult<T>[];
|
|
3
|
+
}
|
|
4
|
+
export declare function runSlidingWindow<T>(tasks: Array<() => Promise<T>>, concurrency: number, onComplete?: (index: number, result: PromiseSettledResult<T>) => void): Promise<SlidingWindowResult<T>>;
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
export async function runSlidingWindow(tasks, concurrency, onComplete) {
|
|
2
|
+
const results = new Array(tasks.length);
|
|
3
|
+
let nextIndex = 0;
|
|
4
|
+
async function runOne(index) {
|
|
5
|
+
let result;
|
|
6
|
+
try {
|
|
7
|
+
const value = await tasks[index]();
|
|
8
|
+
result = { status: "fulfilled", value };
|
|
9
|
+
}
|
|
10
|
+
catch (reason) {
|
|
11
|
+
result = { status: "rejected", reason };
|
|
12
|
+
}
|
|
13
|
+
results[index] = result;
|
|
14
|
+
onComplete?.(index, result);
|
|
15
|
+
if (nextIndex < tasks.length) {
|
|
16
|
+
const next = nextIndex++;
|
|
17
|
+
await runOne(next);
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
const initialBatch = Math.min(concurrency, tasks.length);
|
|
21
|
+
const runners = [];
|
|
22
|
+
for (let i = 0; i < initialBatch; i++) {
|
|
23
|
+
const idx = nextIndex++;
|
|
24
|
+
runners.push(runOne(idx));
|
|
25
|
+
}
|
|
26
|
+
await Promise.all(runners);
|
|
27
|
+
return { results };
|
|
28
|
+
}
|
package/dist/quota/state.d.ts
CHANGED
|
@@ -9,4 +9,7 @@ export declare function writeQuotaState(state: QuotaState): Promise<void>;
|
|
|
9
9
|
* exceeds failure evidence, with a minimum of 1.
|
|
10
10
|
*/
|
|
11
11
|
export declare function computeMaxSafeConcurrency(entry: QuotaStateEntry, halfLifeHours: number, maxToCheck?: number): number;
|
|
12
|
+
export declare function computeRampUpConcurrency(entry: QuotaStateEntry, halfLifeHours: number, maxToCheck?: number): number;
|
|
13
|
+
export declare function computeBackoffCooldownMs(consecutive429Count: number): number;
|
|
14
|
+
export declare function computeBackoffFailureWeight(consecutive429Count: number): number;
|
|
12
15
|
export declare function recordWaveOutcome(providerModelKey: string, outcome: ObservedWaveOutcome, halfLifeHours: number): Promise<void>;
|
package/dist/quota/state.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { mkdir, readFile, writeFile } from "node:fs/promises";
|
|
2
2
|
import { homedir } from "node:os";
|
|
3
3
|
import { join } from "node:path";
|
|
4
|
+
import { withFileLock } from "./fileLock.js";
|
|
4
5
|
const STATE_DIR = join(homedir(), ".audit-code");
|
|
5
6
|
const STATE_PATH = join(STATE_DIR, "quota-state.json");
|
|
6
7
|
// A bucket needs at least this much success weight before we trust it.
|
|
@@ -27,31 +28,38 @@ export function applyDecayToEntry(entry, halfLifeHours) {
|
|
|
27
28
|
return { ...entry, buckets: decayed };
|
|
28
29
|
}
|
|
29
30
|
function isQuotaState(value) {
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
31
|
+
if (value === null || typeof value !== "object" || Array.isArray(value))
|
|
32
|
+
return false;
|
|
33
|
+
const obj = value;
|
|
34
|
+
const version = obj["version"];
|
|
35
|
+
return (version === 1 || version === 2) && typeof obj["entries"] === "object";
|
|
35
36
|
}
|
|
36
37
|
export async function readQuotaState() {
|
|
37
38
|
try {
|
|
38
39
|
const raw = await readFile(STATE_PATH, "utf8");
|
|
39
40
|
const parsed = JSON.parse(raw);
|
|
40
|
-
if (isQuotaState(parsed))
|
|
41
|
+
if (isQuotaState(parsed)) {
|
|
42
|
+
if (parsed.version === 1) {
|
|
43
|
+
for (const entry of Object.values(parsed.entries)) {
|
|
44
|
+
entry.consecutive_429_count ??= 0;
|
|
45
|
+
}
|
|
46
|
+
}
|
|
41
47
|
return parsed;
|
|
42
|
-
|
|
48
|
+
}
|
|
49
|
+
process.stderr.write(`[quota] ignoring invalid quota state at ${STATE_PATH}: expected { version: 1|2, entries: object }\n`);
|
|
43
50
|
}
|
|
44
51
|
catch (error) {
|
|
45
52
|
if (error.code === "ENOENT") {
|
|
46
|
-
return { version:
|
|
53
|
+
return { version: 2, entries: {} };
|
|
47
54
|
}
|
|
48
55
|
process.stderr.write(`[quota] ignoring unreadable quota state at ${STATE_PATH}: ${error instanceof Error ? error.message : String(error)}\n`);
|
|
49
56
|
}
|
|
50
|
-
return { version:
|
|
57
|
+
return { version: 2, entries: {} };
|
|
51
58
|
}
|
|
52
59
|
export async function writeQuotaState(state) {
|
|
53
60
|
await mkdir(STATE_DIR, { recursive: true });
|
|
54
|
-
|
|
61
|
+
const normalized = { ...state, version: 2 };
|
|
62
|
+
await writeFile(STATE_PATH, JSON.stringify(normalized, null, 2) + "\n", "utf8");
|
|
55
63
|
}
|
|
56
64
|
/**
|
|
57
65
|
* Returns the highest concurrency level for which decayed success evidence
|
|
@@ -74,14 +82,39 @@ export function computeMaxSafeConcurrency(entry, halfLifeHours, maxToCheck = 32)
|
|
|
74
82
|
}
|
|
75
83
|
return maxSafe;
|
|
76
84
|
}
|
|
85
|
+
const RAMP_UP_MIN_SUCCESSES = 2;
|
|
86
|
+
export function computeRampUpConcurrency(entry, halfLifeHours, maxToCheck = 32) {
|
|
87
|
+
const maxSafe = computeMaxSafeConcurrency(entry, halfLifeHours, maxToCheck);
|
|
88
|
+
const decayed = applyDecayToEntry(entry, halfLifeHours);
|
|
89
|
+
const bucket = decayed.buckets[String(maxSafe)];
|
|
90
|
+
if (bucket &&
|
|
91
|
+
bucket.success_weight >= RAMP_UP_MIN_SUCCESSES &&
|
|
92
|
+
bucket.failure_weight === 0) {
|
|
93
|
+
return maxSafe + 1;
|
|
94
|
+
}
|
|
95
|
+
return maxSafe;
|
|
96
|
+
}
|
|
77
97
|
function blankEntry() {
|
|
78
98
|
return { updated_at: new Date().toISOString(), buckets: {}, cooldown_until: null, last_429_at: null };
|
|
79
99
|
}
|
|
100
|
+
const BASE_COOLDOWN_MS = 60_000;
|
|
101
|
+
const MAX_COOLDOWN_MS = 15 * 60_000;
|
|
102
|
+
export function computeBackoffCooldownMs(consecutive429Count) {
|
|
103
|
+
const ms = BASE_COOLDOWN_MS * Math.pow(2, Math.max(0, consecutive429Count - 1));
|
|
104
|
+
return Math.min(ms, MAX_COOLDOWN_MS);
|
|
105
|
+
}
|
|
106
|
+
export function computeBackoffFailureWeight(consecutive429Count) {
|
|
107
|
+
return 1.0 + 0.5 * Math.max(0, consecutive429Count - 1);
|
|
108
|
+
}
|
|
109
|
+
const LOCK_PATH = STATE_PATH + ".lock";
|
|
80
110
|
export async function recordWaveOutcome(providerModelKey, outcome, halfLifeHours) {
|
|
111
|
+
await withFileLock(LOCK_PATH, () => recordWaveOutcomeUnsafe(providerModelKey, outcome, halfLifeHours));
|
|
112
|
+
}
|
|
113
|
+
async function recordWaveOutcomeUnsafe(providerModelKey, outcome, halfLifeHours) {
|
|
81
114
|
const state = await readQuotaState();
|
|
82
115
|
const entry = applyDecayToEntry(state.entries[providerModelKey] ?? blankEntry(), halfLifeHours);
|
|
83
116
|
if (outcome.outcome === "success") {
|
|
84
|
-
|
|
117
|
+
entry.consecutive_429_count = 0;
|
|
85
118
|
for (let n = 1; n <= outcome.concurrency; n++) {
|
|
86
119
|
const bucket = entry.buckets[String(n)] ?? { success_weight: 0, failure_weight: 0 };
|
|
87
120
|
bucket.success_weight += 1.0;
|
|
@@ -89,13 +122,23 @@ export async function recordWaveOutcome(providerModelKey, outcome, halfLifeHours
|
|
|
89
122
|
}
|
|
90
123
|
}
|
|
91
124
|
else {
|
|
125
|
+
const prev429Count = entry.consecutive_429_count ?? 0;
|
|
126
|
+
const new429Count = outcome.outcome === "rate_limited" ? prev429Count + 1 : prev429Count;
|
|
127
|
+
entry.consecutive_429_count = new429Count;
|
|
92
128
|
entry.last_429_at = new Date().toISOString();
|
|
93
|
-
if (outcome.
|
|
129
|
+
if (outcome.outcome === "rate_limited" && new429Count > 0) {
|
|
130
|
+
const backoffMs = computeBackoffCooldownMs(new429Count);
|
|
131
|
+
entry.cooldown_until = new Date(Date.now() + backoffMs).toISOString();
|
|
132
|
+
}
|
|
133
|
+
else if (outcome.cooldown_until) {
|
|
94
134
|
entry.cooldown_until = outcome.cooldown_until;
|
|
95
|
-
|
|
135
|
+
}
|
|
136
|
+
const failureWeight = outcome.outcome === "rate_limited"
|
|
137
|
+
? computeBackoffFailureWeight(new429Count)
|
|
138
|
+
: 1.0;
|
|
96
139
|
for (let n = outcome.concurrency; n <= outcome.concurrency + 4; n++) {
|
|
97
140
|
const bucket = entry.buckets[String(n)] ?? { success_weight: 0, failure_weight: 0 };
|
|
98
|
-
bucket.failure_weight +=
|
|
141
|
+
bucket.failure_weight += failureWeight;
|
|
99
142
|
entry.buckets[String(n)] = bucket;
|
|
100
143
|
}
|
|
101
144
|
}
|
package/dist/quota/types.d.ts
CHANGED
|
@@ -22,9 +22,10 @@ export interface QuotaStateEntry {
|
|
|
22
22
|
buckets: Record<string, ConcurrencyBucket>;
|
|
23
23
|
cooldown_until: string | null;
|
|
24
24
|
last_429_at: string | null;
|
|
25
|
+
consecutive_429_count?: number;
|
|
25
26
|
}
|
|
26
27
|
export interface QuotaState {
|
|
27
|
-
version: 1;
|
|
28
|
+
version: 1 | 2;
|
|
28
29
|
entries: Record<string, QuotaStateEntry>;
|
|
29
30
|
}
|
|
30
31
|
export interface WaveSchedule {
|
|
@@ -36,9 +37,15 @@ export interface WaveSchedule {
|
|
|
36
37
|
resolved_limits: ResolvedLimits;
|
|
37
38
|
host_concurrency_limit: HostConcurrencyLimit | null;
|
|
38
39
|
model: string | null;
|
|
40
|
+
quota_source_snapshot?: import("./quotaSource.js").QuotaUsageSnapshot | null;
|
|
41
|
+
}
|
|
42
|
+
export interface BackoffState {
|
|
43
|
+
consecutive_429_count: number;
|
|
44
|
+
current_cooldown_ms: number;
|
|
45
|
+
current_failure_weight: number;
|
|
39
46
|
}
|
|
40
47
|
export interface DispatchQuota {
|
|
41
|
-
contract_version: "audit-code-dispatch-quota/v1alpha1";
|
|
48
|
+
contract_version: "audit-code-dispatch-quota/v1alpha1" | "audit-code-dispatch-quota/v1alpha2";
|
|
42
49
|
run_id: string;
|
|
43
50
|
model: string | null;
|
|
44
51
|
resolved_limits: ResolvedLimits;
|
|
@@ -48,6 +55,8 @@ export interface DispatchQuota {
|
|
|
48
55
|
wave_size: number;
|
|
49
56
|
estimated_wave_tokens: number;
|
|
50
57
|
cooldown_until: string | null;
|
|
58
|
+
quota_source_snapshot?: import("./quotaSource.js").QuotaUsageSnapshot | null;
|
|
59
|
+
backoff_state?: BackoffState | null;
|
|
51
60
|
}
|
|
52
61
|
export interface ObservedWaveOutcome {
|
|
53
62
|
concurrency: number;
|
|
@@ -44,6 +44,8 @@ export interface QuotaConfig {
|
|
|
44
44
|
reserved_output_tokens?: number;
|
|
45
45
|
/** Half-life of empirical success/failure evidence in hours (default: 24). */
|
|
46
46
|
empirical_half_life_hours?: number;
|
|
47
|
+
/** Allow the scheduler to try concurrency maxSafe+1 after consecutive successes (default: true). */
|
|
48
|
+
ramp_up_enabled?: boolean;
|
|
47
49
|
/** Hard host ceiling for simultaneously active conversation subagents. */
|
|
48
50
|
host_active_subagent_limit?: number;
|
|
49
51
|
/** Per-model overrides keyed by "provider/model". */
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"$schema": "http://json-schema.org/draft-07/schema#",
|
|
3
|
-
"$id": "audit-code-dispatch-quota/
|
|
3
|
+
"$id": "audit-code-dispatch-quota/v1alpha2",
|
|
4
4
|
"title": "DispatchQuota",
|
|
5
5
|
"description": "Quota schedule for a prepare-dispatch run. Written beside dispatch-plan.json. Hosts must launch at most wave_size packets per wave, then re-read this file before the next wave to pick up any updated limits.",
|
|
6
6
|
"type": "object",
|
|
@@ -20,7 +20,7 @@
|
|
|
20
20
|
"properties": {
|
|
21
21
|
"contract_version": {
|
|
22
22
|
"type": "string",
|
|
23
|
-
"
|
|
23
|
+
"enum": ["audit-code-dispatch-quota/v1alpha1", "audit-code-dispatch-quota/v1alpha2"]
|
|
24
24
|
},
|
|
25
25
|
"run_id": {
|
|
26
26
|
"type": "string",
|
|
@@ -97,6 +97,27 @@
|
|
|
97
97
|
"type": ["string", "null"],
|
|
98
98
|
"format": "date-time",
|
|
99
99
|
"description": "If non-null, the host should wait until this timestamp before launching the next wave."
|
|
100
|
+
},
|
|
101
|
+
"quota_source_snapshot": {
|
|
102
|
+
"type": ["object", "null"],
|
|
103
|
+
"description": "Real-time usage snapshot from a QuotaSource, if available.",
|
|
104
|
+
"properties": {
|
|
105
|
+
"remaining_pct": { "type": ["number", "null"] },
|
|
106
|
+
"reset_at": { "type": ["string", "null"], "format": "date-time" },
|
|
107
|
+
"requests_remaining": { "type": ["integer", "null"] },
|
|
108
|
+
"tokens_remaining": { "type": ["integer", "null"] },
|
|
109
|
+
"captured_at": { "type": "string", "format": "date-time" },
|
|
110
|
+
"source": { "type": "string" }
|
|
111
|
+
}
|
|
112
|
+
},
|
|
113
|
+
"backoff_state": {
|
|
114
|
+
"type": ["object", "null"],
|
|
115
|
+
"description": "Exponential backoff state for repeated rate-limit errors.",
|
|
116
|
+
"properties": {
|
|
117
|
+
"consecutive_429_count": { "type": "integer", "minimum": 0 },
|
|
118
|
+
"current_cooldown_ms": { "type": "integer", "minimum": 0 },
|
|
119
|
+
"current_failure_weight": { "type": "number", "minimum": 0 }
|
|
120
|
+
}
|
|
100
121
|
}
|
|
101
122
|
}
|
|
102
123
|
}
|