auditor-lambda 0.3.21 → 0.3.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/audit-code-wrapper-lib.mjs +5 -0
- package/dist/cli.js +169 -8
- package/dist/orchestrator/reviewPackets.d.ts +5 -0
- package/dist/orchestrator/reviewPackets.js +5 -1
- package/dist/quota/index.d.ts +8 -0
- package/dist/quota/index.js +4 -0
- package/dist/quota/limits.d.ts +16 -0
- package/dist/quota/limits.js +77 -0
- package/dist/quota/probe.d.ts +13 -0
- package/dist/quota/probe.js +21 -0
- package/dist/quota/scheduler.d.ts +14 -0
- package/dist/quota/scheduler.js +76 -0
- package/dist/quota/state.d.ts +12 -0
- package/dist/quota/state.js +101 -0
- package/dist/quota/types.d.ts +50 -0
- package/dist/quota/types.js +1 -0
- package/dist/types/sessionConfig.d.ts +28 -0
- package/package.json +1 -1
- package/schemas/dispatch_quota.schema.json +77 -0
|
@@ -2685,6 +2685,11 @@ export async function runAuditCodeWrapper({
|
|
|
2685
2685
|
return;
|
|
2686
2686
|
}
|
|
2687
2687
|
|
|
2688
|
+
if (argv[0] === 'quota') {
|
|
2689
|
+
await runDistCommand('quota', argv.slice(1), { ensureArtifactsDir: true });
|
|
2690
|
+
return;
|
|
2691
|
+
}
|
|
2692
|
+
|
|
2688
2693
|
if (argv[0] === 'submit-packet') {
|
|
2689
2694
|
await runDistCommand('submit-packet', argv.slice(1));
|
|
2690
2695
|
return;
|
package/dist/cli.js
CHANGED
|
@@ -32,6 +32,7 @@ import { buildReviewPackets, orderTasksForPacketReview, } from "./orchestrator/r
|
|
|
32
32
|
import { buildFileAnchorSummary, } from "./orchestrator/fileAnchors.js";
|
|
33
33
|
import { LOCAL_SUBPROCESS_PROVIDER_NAME } from "./providers/constants.js";
|
|
34
34
|
import { runAuditCodeMcpServer } from "./mcp/server.js";
|
|
35
|
+
import { scheduleWave, buildProviderModelKey, readQuotaState, recordWaveOutcome, resolveLimits, probeProvider, computeMaxSafeConcurrency, getQuotaStatePath, } from "./quota/index.js";
|
|
35
36
|
const packageRoot = resolve(dirname(fileURLToPath(import.meta.url)), "..");
|
|
36
37
|
const ADVANCE_AUDIT_CONTRACT_VERSION = "audit-code/v1alpha1";
|
|
37
38
|
const WORKER_RESULT_CONTRACT_VERSION = "audit-code-worker-result/v1alpha1";
|
|
@@ -178,6 +179,27 @@ function getTimeoutMs(argv, sessionConfig) {
|
|
|
178
179
|
function getExplicitProvider(argv) {
|
|
179
180
|
return getFlag(argv, "--provider");
|
|
180
181
|
}
|
|
182
|
+
function getHostModel(argv) {
|
|
183
|
+
return getFlag(argv, "--host-model") ?? null;
|
|
184
|
+
}
|
|
185
|
+
function getQuotaProbeMode(argv, sessionConfig) {
|
|
186
|
+
const raw = getFlag(argv, "--quota-probe") ?? sessionConfig.quota?.probe ?? "auto";
|
|
187
|
+
if (raw === "auto" || raw === "never" || raw === "force")
|
|
188
|
+
return raw;
|
|
189
|
+
return "auto";
|
|
190
|
+
}
|
|
191
|
+
function detectRateLimitError(errorText) {
|
|
192
|
+
const lower = errorText.toLowerCase();
|
|
193
|
+
return lower.includes("429") || lower.includes("rate limit") || lower.includes("rate_limit");
|
|
194
|
+
}
|
|
195
|
+
function defaultCooldownUntil(resetAtHeader) {
|
|
196
|
+
if (resetAtHeader) {
|
|
197
|
+
const t = new Date(resetAtHeader).getTime();
|
|
198
|
+
if (!Number.isNaN(t))
|
|
199
|
+
return new Date(t).toISOString();
|
|
200
|
+
}
|
|
201
|
+
return new Date(Date.now() + 60_000).toISOString();
|
|
202
|
+
}
|
|
181
203
|
function resolveRunProviderName(argv, sessionConfig) {
|
|
182
204
|
return resolveFreshSessionProviderName(getExplicitProvider(argv), sessionConfig);
|
|
183
205
|
}
|
|
@@ -528,14 +550,30 @@ function renderDispatchReviewPrompt(params) {
|
|
|
528
550
|
const toolsLine = params.hostCanRestrictSubagentTools
|
|
529
551
|
? "Restrict review subagents to read/search plus the packet submit command named in their prompt. Do not give them source edit/write tools."
|
|
530
552
|
: "Do not ask the user about per-subagent tool restrictions; this host did not report a callable restriction facility.";
|
|
553
|
+
const fileLines = params.dispatchQuotaPath
|
|
554
|
+
? [
|
|
555
|
+
"Dispatch is prepared. Read both of these files:",
|
|
556
|
+
"",
|
|
557
|
+
` Dispatch plan: ${params.dispatchPlanPath}`,
|
|
558
|
+
` Dispatch quota: ${params.dispatchQuotaPath}`,
|
|
559
|
+
"",
|
|
560
|
+
"The quota file contains a `wave_size` field. Dispatch at most `wave_size` subagents at a time. If `cooldown_until` is non-null, wait until that timestamp before starting the first wave.",
|
|
561
|
+
"",
|
|
562
|
+
"For each wave: launch up to `wave_size` subagents in parallel (one per plan entry), wait for all of them to finish, then start the next wave. Repeat until all entries are dispatched.",
|
|
563
|
+
]
|
|
564
|
+
: [
|
|
565
|
+
"Dispatch is prepared. Read only this dispatch plan JSON:",
|
|
566
|
+
"",
|
|
567
|
+
` ${params.dispatchPlanPath}`,
|
|
568
|
+
"",
|
|
569
|
+
"Launch one host subagent for each entry in the plan.",
|
|
570
|
+
];
|
|
531
571
|
return [
|
|
532
572
|
"# audit-code dispatch review",
|
|
533
573
|
"",
|
|
534
|
-
|
|
535
|
-
"",
|
|
536
|
-
` ${params.dispatchPlanPath}`,
|
|
574
|
+
...fileLines,
|
|
537
575
|
"",
|
|
538
|
-
"
|
|
576
|
+
"Pass each packet prompt path literally to its subagent; do not load packet prompt files into this orchestrator context.",
|
|
539
577
|
"",
|
|
540
578
|
"Subagent prompt shape:",
|
|
541
579
|
"",
|
|
@@ -544,9 +582,9 @@ function renderDispatchReviewPrompt(params) {
|
|
|
544
582
|
modelLine,
|
|
545
583
|
toolsLine,
|
|
546
584
|
"",
|
|
547
|
-
"
|
|
585
|
+
"Each subagent must submit its packet through the submit command printed in its packet prompt and stop after successful submission.",
|
|
548
586
|
"",
|
|
549
|
-
"
|
|
587
|
+
"After all waves complete, run exactly:",
|
|
550
588
|
"",
|
|
551
589
|
` ${mergeCommand}`,
|
|
552
590
|
"",
|
|
@@ -1198,6 +1236,7 @@ async function cmdNextStep(argv) {
|
|
|
1198
1236
|
repoRoot: root,
|
|
1199
1237
|
artifactPaths: {
|
|
1200
1238
|
dispatch_plan: dispatch.dispatch_plan_path,
|
|
1239
|
+
dispatch_quota: dispatch.dispatch_quota_path,
|
|
1201
1240
|
dispatch_warnings: dispatch.dispatch_warnings_path,
|
|
1202
1241
|
active_review_task: result.activeReviewRun.task_path,
|
|
1203
1242
|
pending_audit_tasks: result.activeReviewRun.pending_audit_tasks_path ?? null,
|
|
@@ -1207,6 +1246,7 @@ async function cmdNextStep(argv) {
|
|
|
1207
1246
|
artifactsDir,
|
|
1208
1247
|
activeReviewRun: result.activeReviewRun,
|
|
1209
1248
|
dispatchPlanPath: dispatch.dispatch_plan_path,
|
|
1249
|
+
dispatchQuotaPath: dispatch.dispatch_quota_path,
|
|
1210
1250
|
hostCanRestrictSubagentTools,
|
|
1211
1251
|
hostCanSelectSubagentModel,
|
|
1212
1252
|
}),
|
|
@@ -1238,6 +1278,7 @@ async function cmdRunToCompletion(argv) {
|
|
|
1238
1278
|
const agentBatchSize = getAgentBatchSize(argv, sessionConfig);
|
|
1239
1279
|
const parallelWorkers = getParallelWorkers(argv, sessionConfig);
|
|
1240
1280
|
const timeoutMs = getTimeoutMs(argv, sessionConfig);
|
|
1281
|
+
const hostModel = getHostModel(argv);
|
|
1241
1282
|
const selfCliPath = resolve(argv[1] ?? process.argv[1] ?? "");
|
|
1242
1283
|
const batchResultsDir = getBatchResultsDir(argv);
|
|
1243
1284
|
if (batchResultsDir && getFlag(argv, "--results")) {
|
|
@@ -1375,8 +1416,27 @@ async function cmdRunToCompletion(argv) {
|
|
|
1375
1416
|
return;
|
|
1376
1417
|
}
|
|
1377
1418
|
if (preferredExecutor === "agent" && parallelWorkers > 1) {
|
|
1419
|
+
const quotaState = await readQuotaState();
|
|
1420
|
+
const providerModelKey = buildProviderModelKey(provider.name, hostModel);
|
|
1421
|
+
const quotaStateEntry = quotaState.entries[providerModelKey] ?? null;
|
|
1422
|
+
const waveSchedule = scheduleWave({
|
|
1423
|
+
providerName: resolveFreshSessionProviderName(getExplicitProvider(argv), sessionConfig),
|
|
1424
|
+
sessionConfig,
|
|
1425
|
+
hostModel,
|
|
1426
|
+
requestedConcurrency: parallelWorkers,
|
|
1427
|
+
quotaStateEntry,
|
|
1428
|
+
});
|
|
1429
|
+
const waveSize = waveSchedule.wave_size;
|
|
1430
|
+
if (waveSchedule.cooldown_until) {
|
|
1431
|
+
const waitMs = new Date(waveSchedule.cooldown_until).getTime() - Date.now();
|
|
1432
|
+
if (waitMs > 0) {
|
|
1433
|
+
const cappedWait = Math.min(waitMs, 120_000);
|
|
1434
|
+
process.stderr.write(`[quota] Cooldown active — waiting ${Math.ceil(cappedWait / 1000)}s before next wave.\n`);
|
|
1435
|
+
await new Promise((r) => setTimeout(r, cappedWait));
|
|
1436
|
+
}
|
|
1437
|
+
}
|
|
1378
1438
|
const allPendingTasks = buildPendingAuditTasks(bundle);
|
|
1379
|
-
const taskGroups = chunkArray(allPendingTasks.slice(0,
|
|
1439
|
+
const taskGroups = chunkArray(allPendingTasks.slice(0, waveSize * agentBatchSize), agentBatchSize);
|
|
1380
1440
|
const workerSlots = [];
|
|
1381
1441
|
for (const rawGroup of taskGroups) {
|
|
1382
1442
|
const group = await addFileLineCountHints(root, rawGroup);
|
|
@@ -1530,6 +1590,16 @@ async function cmdRunToCompletion(argv) {
|
|
|
1530
1590
|
});
|
|
1531
1591
|
artifactsWritten.add("run-ledger.json");
|
|
1532
1592
|
}
|
|
1593
|
+
// Record outcome for adaptive learning (best-effort — never blocks dispatch)
|
|
1594
|
+
{
|
|
1595
|
+
const hasRateLimit = batchErrors.some(detectRateLimitError);
|
|
1596
|
+
await recordWaveOutcome(providerModelKey, {
|
|
1597
|
+
concurrency: workerSlots.length,
|
|
1598
|
+
estimated_tokens: waveSize * agentBatchSize * 900,
|
|
1599
|
+
outcome: hasRateLimit ? "rate_limited" : batchErrors.length > 0 ? "timeout" : "success",
|
|
1600
|
+
cooldown_until: hasRateLimit ? defaultCooldownUntil(null) : null,
|
|
1601
|
+
}, sessionConfig.quota?.empirical_half_life_hours ?? 24).catch(() => undefined);
|
|
1602
|
+
}
|
|
1533
1603
|
if (batchErrors.length > 0) {
|
|
1534
1604
|
const bundleAfter = await loadArtifactBundle(artifactsDir);
|
|
1535
1605
|
const blockedState = buildBlockedAuditState({
|
|
@@ -2117,6 +2187,7 @@ async function prepareDispatchArtifacts(params) {
|
|
|
2117
2187
|
}
|
|
2118
2188
|
const tasks = await readJsonFile(tasksPath);
|
|
2119
2189
|
const bundle = await loadArtifactBundle(artifactsDir);
|
|
2190
|
+
const sessionConfig = params.sessionConfig ?? (await loadSessionConfig(artifactsDir).catch(() => ({})));
|
|
2120
2191
|
const lensDefsPath = join(packageRoot, "dispatch", "lens-definitions.json");
|
|
2121
2192
|
const lensDefs = await readJsonFile(lensDefsPath);
|
|
2122
2193
|
await mkdir(taskResultsDir, { recursive: true });
|
|
@@ -2342,6 +2413,52 @@ async function prepareDispatchArtifacts(params) {
|
|
|
2342
2413
|
run_id: runId,
|
|
2343
2414
|
entries: resultMapEntries,
|
|
2344
2415
|
});
|
|
2416
|
+
// Compute and write dispatch-quota.json
|
|
2417
|
+
const hostModel = params.hostModel ?? null;
|
|
2418
|
+
const avgPacketTokens = plan.length > 0
|
|
2419
|
+
? Math.floor(plan.reduce((s, p) => s + p.complexity.estimated_tokens, 0) / plan.length)
|
|
2420
|
+
: 0;
|
|
2421
|
+
const quotaProviderName = resolveFreshSessionProviderName(undefined, sessionConfig);
|
|
2422
|
+
const quotaProviderKey = buildProviderModelKey(quotaProviderName, hostModel);
|
|
2423
|
+
const quotaState = await readQuotaState().catch(() => ({ version: 1, entries: {} }));
|
|
2424
|
+
const quotaStateEntry = quotaState.entries[quotaProviderKey] ?? null;
|
|
2425
|
+
const waveSchedule = scheduleWave({
|
|
2426
|
+
providerName: quotaProviderName,
|
|
2427
|
+
sessionConfig,
|
|
2428
|
+
hostModel,
|
|
2429
|
+
requestedConcurrency: sessionConfig.parallel_workers ?? 1,
|
|
2430
|
+
estimatedPacketTokens: avgPacketTokens,
|
|
2431
|
+
quotaStateEntry,
|
|
2432
|
+
});
|
|
2433
|
+
const dispatchQuota = {
|
|
2434
|
+
contract_version: "audit-code-dispatch-quota/v1alpha1",
|
|
2435
|
+
run_id: runId,
|
|
2436
|
+
model: hostModel,
|
|
2437
|
+
resolved_limits: waveSchedule.resolved_limits,
|
|
2438
|
+
confidence: waveSchedule.confidence,
|
|
2439
|
+
source: waveSchedule.source,
|
|
2440
|
+
wave_size: waveSchedule.wave_size,
|
|
2441
|
+
estimated_wave_tokens: waveSchedule.estimated_wave_tokens,
|
|
2442
|
+
cooldown_until: waveSchedule.cooldown_until,
|
|
2443
|
+
};
|
|
2444
|
+
const dispatchQuotaPath = join(runDir, "dispatch-quota.json");
|
|
2445
|
+
await writeJsonFile(dispatchQuotaPath, dispatchQuota);
|
|
2446
|
+
// Warn about packets that exceed the context budget only when we have reliable limit
|
|
2447
|
+
// information (confidence medium/high). Low-confidence limits are conservative defaults
|
|
2448
|
+
// and would produce misleading warnings since the real context window is unknown.
|
|
2449
|
+
if (waveSchedule.confidence !== "low") {
|
|
2450
|
+
const contextBudget = waveSchedule.resolved_limits.context_tokens - waveSchedule.resolved_limits.output_tokens;
|
|
2451
|
+
for (const p of plan) {
|
|
2452
|
+
if (p.complexity.estimated_tokens > contextBudget) {
|
|
2453
|
+
warnings.push({
|
|
2454
|
+
code: "oversized_packet",
|
|
2455
|
+
message: `Packet ${p.packet_id} estimated tokens (${p.complexity.estimated_tokens}) exceed ` +
|
|
2456
|
+
`context budget (${contextBudget}). This packet may fail at dispatch. ` +
|
|
2457
|
+
`Set quota.default_context_tokens or quota.models in session-config.json to override.`,
|
|
2458
|
+
});
|
|
2459
|
+
}
|
|
2460
|
+
}
|
|
2461
|
+
}
|
|
2345
2462
|
const warningsPath = warnings.length > 0
|
|
2346
2463
|
? join(runDir, "dispatch-warnings.json")
|
|
2347
2464
|
: null;
|
|
@@ -2351,6 +2468,7 @@ async function prepareDispatchArtifacts(params) {
|
|
|
2351
2468
|
return {
|
|
2352
2469
|
run_id: runId,
|
|
2353
2470
|
dispatch_plan_path: dispatchPlanPath,
|
|
2471
|
+
dispatch_quota_path: dispatchQuotaPath,
|
|
2354
2472
|
packet_count: plan.length,
|
|
2355
2473
|
task_count: orderedTasks.length,
|
|
2356
2474
|
largest_packet: largestPacketId
|
|
@@ -2372,6 +2490,7 @@ async function cmdPrepareDispatch(argv) {
|
|
|
2372
2490
|
runId,
|
|
2373
2491
|
artifactsDir: getArtifactsDir(argv),
|
|
2374
2492
|
root: getFlag(argv, "--root") ? getRootDir(argv) : undefined,
|
|
2493
|
+
hostModel: getHostModel(argv),
|
|
2375
2494
|
});
|
|
2376
2495
|
console.log(JSON.stringify(result, null, 2));
|
|
2377
2496
|
}
|
|
@@ -2923,6 +3042,45 @@ async function cmdCleanup(argv) {
|
|
|
2923
3042
|
async function cmdMcp(argv) {
|
|
2924
3043
|
await runAuditCodeMcpServer(argv.slice(3));
|
|
2925
3044
|
}
|
|
3045
|
+
async function cmdQuota(argv) {
|
|
3046
|
+
const artifactsDir = getArtifactsDir(argv);
|
|
3047
|
+
const sessionConfig = await loadSessionConfig(artifactsDir).catch(() => ({}));
|
|
3048
|
+
const explicitProvider = getExplicitProvider(argv);
|
|
3049
|
+
const hostModel = getHostModel(argv);
|
|
3050
|
+
const probeMode = getQuotaProbeMode(argv, sessionConfig);
|
|
3051
|
+
const providerName = resolveFreshSessionProviderName(explicitProvider, sessionConfig);
|
|
3052
|
+
const providerModelKey = buildProviderModelKey(providerName, hostModel);
|
|
3053
|
+
const { limits, source, confidence } = resolveLimits({ providerName, sessionConfig, hostModel });
|
|
3054
|
+
const probeResult = await probeProvider(providerName, probeMode);
|
|
3055
|
+
const quotaState = await readQuotaState().catch(() => ({ version: 1, entries: {} }));
|
|
3056
|
+
const quotaStateEntry = quotaState.entries[providerModelKey] ?? null;
|
|
3057
|
+
const halfLifeHours = sessionConfig.quota?.empirical_half_life_hours ?? 24;
|
|
3058
|
+
const waveSchedule = scheduleWave({
|
|
3059
|
+
providerName,
|
|
3060
|
+
sessionConfig,
|
|
3061
|
+
hostModel,
|
|
3062
|
+
requestedConcurrency: sessionConfig.parallel_workers ?? 1,
|
|
3063
|
+
quotaStateEntry,
|
|
3064
|
+
});
|
|
3065
|
+
console.log(JSON.stringify({
|
|
3066
|
+
provider: providerName,
|
|
3067
|
+
model: hostModel,
|
|
3068
|
+
provider_model_key: providerModelKey,
|
|
3069
|
+
resolved_limits: limits,
|
|
3070
|
+
confidence,
|
|
3071
|
+
source,
|
|
3072
|
+
probe: probeResult,
|
|
3073
|
+
learned_caps: quotaStateEntry
|
|
3074
|
+
? {
|
|
3075
|
+
max_safe_concurrency: computeMaxSafeConcurrency(quotaStateEntry, halfLifeHours),
|
|
3076
|
+
cooldown_until: quotaStateEntry.cooldown_until,
|
|
3077
|
+
last_429_at: quotaStateEntry.last_429_at,
|
|
3078
|
+
}
|
|
3079
|
+
: null,
|
|
3080
|
+
wave_schedule: waveSchedule,
|
|
3081
|
+
quota_state_path: getQuotaStatePath(),
|
|
3082
|
+
}, null, 2));
|
|
3083
|
+
}
|
|
2926
3084
|
async function main(argv) {
|
|
2927
3085
|
const command = argv[2] ?? "sample-run";
|
|
2928
3086
|
switch (command) {
|
|
@@ -2989,9 +3147,12 @@ async function main(argv) {
|
|
|
2989
3147
|
case "validate-result":
|
|
2990
3148
|
await cmdValidateResult(argv);
|
|
2991
3149
|
return;
|
|
3150
|
+
case "quota":
|
|
3151
|
+
await cmdQuota(argv);
|
|
3152
|
+
return;
|
|
2992
3153
|
default:
|
|
2993
3154
|
console.error(`Unknown command: ${command}`);
|
|
2994
|
-
console.error("Available commands: sample-run, advance-audit, next-step, run-to-completion, worker-run, import-external-analyzer, intake, plan, ingest-results, explain-task, update-runtime-validation, validate, validate-results, requeue, synthesize, cleanup, mcp, prepare-dispatch, merge-and-ingest, submit-packet, validate-result");
|
|
3155
|
+
console.error("Available commands: sample-run, advance-audit, next-step, run-to-completion, worker-run, import-external-analyzer, intake, plan, ingest-results, explain-task, update-runtime-validation, validate, validate-results, requeue, synthesize, cleanup, mcp, prepare-dispatch, merge-and-ingest, submit-packet, validate-result, quota");
|
|
2995
3156
|
process.exitCode = 1;
|
|
2996
3157
|
}
|
|
2997
3158
|
}
|
|
@@ -6,6 +6,11 @@ export interface BuildReviewPacketOptions {
|
|
|
6
6
|
lineIndex?: Record<string, number>;
|
|
7
7
|
maxTasksPerPacket?: number;
|
|
8
8
|
targetPacketLines?: number;
|
|
9
|
+
/**
|
|
10
|
+
* Available context budget in tokens (context_tokens − reserved_output_tokens).
|
|
11
|
+
* When provided, targetPacketLines is capped to fit within this budget.
|
|
12
|
+
*/
|
|
13
|
+
maxContextTokens?: number;
|
|
9
14
|
}
|
|
10
15
|
export declare function buildReviewPackets(tasks: AuditTask[], options?: BuildReviewPacketOptions): ReviewPacket[];
|
|
11
16
|
export declare function orderTasksForPacketReview(tasks: AuditTask[], options?: BuildReviewPacketOptions): AuditTask[];
|
|
@@ -949,7 +949,11 @@ function buildPacket(tasks, packetIndex, lineIndex, graphEdges = [], graphBundle
|
|
|
949
949
|
}
|
|
950
950
|
function buildReviewPacketPlanningData(tasks, options = {}) {
|
|
951
951
|
const maxTasksPerPacket = options.maxTasksPerPacket ?? DEFAULT_MAX_TASKS_PER_PACKET;
|
|
952
|
-
const
|
|
952
|
+
const configuredTargetLines = options.targetPacketLines ?? DEFAULT_TARGET_PACKET_LINES;
|
|
953
|
+
const targetPacketLines = options.maxContextTokens != null
|
|
954
|
+
? Math.min(configuredTargetLines, Math.max(1, Math.floor((options.maxContextTokens - ESTIMATED_PACKET_PROMPT_TOKENS) /
|
|
955
|
+
ESTIMATED_TOKENS_PER_LINE)))
|
|
956
|
+
: configuredTargetLines;
|
|
953
957
|
const graphEdges = collectGraphEdges(options.graphBundle);
|
|
954
958
|
const groups = buildTaskGroups(tasks);
|
|
955
959
|
const planningGraphEdges = buildPlanningGraphEdges(groups, graphEdges, options.graphBundle, options.lineIndex, targetPacketLines);
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
export { resolveLimits, lookupKnownModel, classifyProvider } from "./limits.js";
|
|
2
|
+
export type { LimitResolutionResult, ResolveLimitsOptions, ProviderType } from "./limits.js";
|
|
3
|
+
export { readQuotaState, writeQuotaState, computeMaxSafeConcurrency, recordWaveOutcome, getQuotaStatePath, decayWeight, applyDecayToEntry, } from "./state.js";
|
|
4
|
+
export { scheduleWave, buildProviderModelKey } from "./scheduler.js";
|
|
5
|
+
export type { ScheduleWaveOptions } from "./scheduler.js";
|
|
6
|
+
export { probeProvider } from "./probe.js";
|
|
7
|
+
export type { ProbeResult } from "./probe.js";
|
|
8
|
+
export type { ResolvedLimits, LimitSource, LimitConfidence, QuotaState, QuotaStateEntry, ConcurrencyBucket, WaveSchedule, DispatchQuota, ObservedWaveOutcome, } from "./types.js";
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
export { resolveLimits, lookupKnownModel, classifyProvider } from "./limits.js";
|
|
2
|
+
export { readQuotaState, writeQuotaState, computeMaxSafeConcurrency, recordWaveOutcome, getQuotaStatePath, decayWeight, applyDecayToEntry, } from "./state.js";
|
|
3
|
+
export { scheduleWave, buildProviderModelKey } from "./scheduler.js";
|
|
4
|
+
export { probeProvider } from "./probe.js";
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import type { ResolvedProviderName, SessionConfig } from "../types/sessionConfig.js";
|
|
2
|
+
import type { LimitConfidence, LimitSource, ResolvedLimits } from "./types.js";
|
|
3
|
+
export type ProviderType = "hosted" | "local" | "unknown";
|
|
4
|
+
export declare function classifyProvider(providerName: ResolvedProviderName): ProviderType;
|
|
5
|
+
export declare function lookupKnownModel(modelKey: string): Pick<ResolvedLimits, "context_tokens" | "output_tokens"> | undefined;
|
|
6
|
+
export interface LimitResolutionResult {
|
|
7
|
+
limits: ResolvedLimits;
|
|
8
|
+
source: LimitSource;
|
|
9
|
+
confidence: LimitConfidence;
|
|
10
|
+
}
|
|
11
|
+
export interface ResolveLimitsOptions {
|
|
12
|
+
providerName: ResolvedProviderName;
|
|
13
|
+
sessionConfig: SessionConfig;
|
|
14
|
+
hostModel?: string | null;
|
|
15
|
+
}
|
|
16
|
+
export declare function resolveLimits(options: ResolveLimitsOptions): LimitResolutionResult;
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
// RPM/TPM are omitted here — they are tier-dependent and must come from learning.
|
|
2
|
+
const KNOWN_MODEL_LIMITS = {
|
|
3
|
+
"anthropic/claude-opus-4-7": { context_tokens: 200_000, output_tokens: 32_000 },
|
|
4
|
+
"anthropic/claude-sonnet-4-6": { context_tokens: 200_000, output_tokens: 8_192 },
|
|
5
|
+
"anthropic/claude-haiku-4-5": { context_tokens: 200_000, output_tokens: 8_192 },
|
|
6
|
+
"anthropic/claude-opus-4-5": { context_tokens: 200_000, output_tokens: 8_192 },
|
|
7
|
+
"anthropic/claude-sonnet-4-5": { context_tokens: 200_000, output_tokens: 8_192 },
|
|
8
|
+
"openai/gpt-4o": { context_tokens: 128_000, output_tokens: 16_384 },
|
|
9
|
+
"openai/gpt-4o-mini": { context_tokens: 128_000, output_tokens: 16_384 },
|
|
10
|
+
"google/gemini-2.0-flash": { context_tokens: 1_048_576, output_tokens: 8_192 },
|
|
11
|
+
"google/gemini-1.5-pro": { context_tokens: 2_097_152, output_tokens: 8_192 },
|
|
12
|
+
};
|
|
13
|
+
export function classifyProvider(providerName) {
|
|
14
|
+
switch (providerName) {
|
|
15
|
+
case "claude-code":
|
|
16
|
+
case "opencode":
|
|
17
|
+
return "hosted";
|
|
18
|
+
case "local-subprocess":
|
|
19
|
+
return "local";
|
|
20
|
+
case "subprocess-template":
|
|
21
|
+
case "vscode-task":
|
|
22
|
+
default:
|
|
23
|
+
return "unknown";
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
export function lookupKnownModel(modelKey) {
|
|
27
|
+
return KNOWN_MODEL_LIMITS[modelKey.toLowerCase().trim()];
|
|
28
|
+
}
|
|
29
|
+
function defaultLimits(sessionConfig) {
|
|
30
|
+
const quota = sessionConfig.quota ?? {};
|
|
31
|
+
return {
|
|
32
|
+
context_tokens: quota.default_context_tokens ?? 32_000,
|
|
33
|
+
output_tokens: quota.reserved_output_tokens ?? 4_096,
|
|
34
|
+
requests_per_minute: null,
|
|
35
|
+
input_tokens_per_minute: null,
|
|
36
|
+
output_tokens_per_minute: null,
|
|
37
|
+
};
|
|
38
|
+
}
|
|
39
|
+
export function resolveLimits(options) {
|
|
40
|
+
const { providerName: _providerName, sessionConfig, hostModel } = options;
|
|
41
|
+
const quota = sessionConfig.quota ?? {};
|
|
42
|
+
const defaults = defaultLimits(sessionConfig);
|
|
43
|
+
// 1. Explicit per-model config overrides
|
|
44
|
+
if (hostModel && quota.models?.[hostModel]) {
|
|
45
|
+
const override = quota.models[hostModel];
|
|
46
|
+
return {
|
|
47
|
+
limits: {
|
|
48
|
+
context_tokens: override.context_tokens ?? defaults.context_tokens,
|
|
49
|
+
output_tokens: override.output_tokens ?? defaults.output_tokens,
|
|
50
|
+
requests_per_minute: override.requests_per_minute ?? null,
|
|
51
|
+
input_tokens_per_minute: override.input_tokens_per_minute ?? null,
|
|
52
|
+
output_tokens_per_minute: override.output_tokens_per_minute ?? null,
|
|
53
|
+
},
|
|
54
|
+
source: "explicit_config",
|
|
55
|
+
confidence: "high",
|
|
56
|
+
};
|
|
57
|
+
}
|
|
58
|
+
// 2. Static known-model database (context/output only; RPM/TPM from learning)
|
|
59
|
+
if (hostModel) {
|
|
60
|
+
const known = lookupKnownModel(hostModel);
|
|
61
|
+
if (known) {
|
|
62
|
+
return {
|
|
63
|
+
limits: {
|
|
64
|
+
context_tokens: known.context_tokens,
|
|
65
|
+
output_tokens: known.output_tokens,
|
|
66
|
+
requests_per_minute: null,
|
|
67
|
+
input_tokens_per_minute: null,
|
|
68
|
+
output_tokens_per_minute: null,
|
|
69
|
+
},
|
|
70
|
+
source: "known_metadata",
|
|
71
|
+
confidence: "medium",
|
|
72
|
+
};
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
// 3. Conservative defaults for all provider types
|
|
76
|
+
return { limits: defaults, source: "default", confidence: "low" };
|
|
77
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
export interface ProbeResult {
|
|
2
|
+
supported: boolean;
|
|
3
|
+
reason: string;
|
|
4
|
+
}
|
|
5
|
+
/**
|
|
6
|
+
* Probe a provider to discover its rate limits.
|
|
7
|
+
*
|
|
8
|
+
* Only subprocess-template supports direct probing since it is the only
|
|
9
|
+
* provider where the auditor controls the API call. IDE providers
|
|
10
|
+
* (claude-code, opencode) select the model internally; their limits come
|
|
11
|
+
* from known-model metadata or learned behavior.
|
|
12
|
+
*/
|
|
13
|
+
export declare function probeProvider(providerName: string, probeMode?: "auto" | "never" | "force"): Promise<ProbeResult>;
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Probe a provider to discover its rate limits.
|
|
3
|
+
*
|
|
4
|
+
* Only subprocess-template supports direct probing since it is the only
|
|
5
|
+
* provider where the auditor controls the API call. IDE providers
|
|
6
|
+
* (claude-code, opencode) select the model internally; their limits come
|
|
7
|
+
* from known-model metadata or learned behavior.
|
|
8
|
+
*/
|
|
9
|
+
export async function probeProvider(providerName, probeMode = "auto") {
|
|
10
|
+
if (probeMode === "never") {
|
|
11
|
+
return { supported: false, reason: "probe disabled by config" };
|
|
12
|
+
}
|
|
13
|
+
if (providerName !== "subprocess-template") {
|
|
14
|
+
return {
|
|
15
|
+
supported: false,
|
|
16
|
+
reason: `probe not applicable for ${providerName} — limits come from known-model metadata or learned behavior`,
|
|
17
|
+
};
|
|
18
|
+
}
|
|
19
|
+
// subprocess-template probe not yet implemented
|
|
20
|
+
return { supported: false, reason: "subprocess-template probe not yet implemented" };
|
|
21
|
+
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import type { ResolvedProviderName, SessionConfig } from "../types/sessionConfig.js";
|
|
2
|
+
import type { QuotaStateEntry, WaveSchedule } from "./types.js";
|
|
3
|
+
export interface ScheduleWaveOptions {
|
|
4
|
+
providerName: ResolvedProviderName;
|
|
5
|
+
sessionConfig: SessionConfig;
|
|
6
|
+
hostModel: string | null;
|
|
7
|
+
requestedConcurrency: number;
|
|
8
|
+
/** Average estimated tokens per packet/worker. Used for TPM budget. */
|
|
9
|
+
estimatedPacketTokens?: number;
|
|
10
|
+
quotaStateEntry?: QuotaStateEntry | null;
|
|
11
|
+
}
|
|
12
|
+
export declare function scheduleWave(options: ScheduleWaveOptions): WaveSchedule;
|
|
13
|
+
/** Build the state key used for indexing quota-state.json entries. */
|
|
14
|
+
export declare function buildProviderModelKey(providerName: string, hostModel: string | null | undefined): string;
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
import { classifyProvider, resolveLimits } from "./limits.js";
|
|
2
|
+
import { computeMaxSafeConcurrency } from "./state.js";
|
|
3
|
+
export function scheduleWave(options) {
|
|
4
|
+
const { providerName, sessionConfig, hostModel, requestedConcurrency, estimatedPacketTokens = 0, quotaStateEntry = null, } = options;
|
|
5
|
+
const quota = sessionConfig.quota ?? {};
|
|
6
|
+
if (quota.enabled === false) {
|
|
7
|
+
const limits = {
|
|
8
|
+
context_tokens: quota.default_context_tokens ?? 32_000,
|
|
9
|
+
output_tokens: quota.reserved_output_tokens ?? 4_096,
|
|
10
|
+
requests_per_minute: null,
|
|
11
|
+
input_tokens_per_minute: null,
|
|
12
|
+
output_tokens_per_minute: null,
|
|
13
|
+
};
|
|
14
|
+
return {
|
|
15
|
+
wave_size: requestedConcurrency,
|
|
16
|
+
estimated_wave_tokens: requestedConcurrency * estimatedPacketTokens,
|
|
17
|
+
cooldown_until: null,
|
|
18
|
+
confidence: "high",
|
|
19
|
+
source: "default",
|
|
20
|
+
resolved_limits: limits,
|
|
21
|
+
model: hostModel,
|
|
22
|
+
};
|
|
23
|
+
}
|
|
24
|
+
const safetyMargin = quota.safety_margin ?? 0.8;
|
|
25
|
+
const halfLifeHours = quota.empirical_half_life_hours ?? 24;
|
|
26
|
+
const providerType = classifyProvider(providerName);
|
|
27
|
+
const { limits, source, confidence } = resolveLimits({ providerName, sessionConfig, hostModel });
|
|
28
|
+
let waveSize = requestedConcurrency;
|
|
29
|
+
let cooldownUntil = null;
|
|
30
|
+
// Respect an active cooldown period
|
|
31
|
+
if (quotaStateEntry?.cooldown_until) {
|
|
32
|
+
const cooldownExpiry = new Date(quotaStateEntry.cooldown_until).getTime();
|
|
33
|
+
if (cooldownExpiry > Date.now()) {
|
|
34
|
+
cooldownUntil = quotaStateEntry.cooldown_until;
|
|
35
|
+
waveSize = 1;
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
if (!cooldownUntil) {
|
|
39
|
+
// Cap by requests-per-minute
|
|
40
|
+
if (limits.requests_per_minute != null) {
|
|
41
|
+
const rpmCap = Math.max(1, Math.floor(limits.requests_per_minute * safetyMargin));
|
|
42
|
+
waveSize = Math.min(waveSize, rpmCap);
|
|
43
|
+
}
|
|
44
|
+
// Cap by input tokens-per-minute
|
|
45
|
+
if (limits.input_tokens_per_minute != null && estimatedPacketTokens > 0) {
|
|
46
|
+
const tpmCap = Math.max(1, Math.floor((limits.input_tokens_per_minute * safetyMargin) / estimatedPacketTokens));
|
|
47
|
+
waveSize = Math.min(waveSize, tpmCap);
|
|
48
|
+
}
|
|
49
|
+
if (quotaStateEntry) {
|
|
50
|
+
const learnedCap = computeMaxSafeConcurrency(quotaStateEntry, halfLifeHours);
|
|
51
|
+
waveSize = Math.min(waveSize, learnedCap);
|
|
52
|
+
}
|
|
53
|
+
else if (providerType === "hosted" && source === "default") {
|
|
54
|
+
// Unknown hosted provider with no learned data and no model-specific limits —
|
|
55
|
+
// be conservative. If the caller supplied RPM/TPM caps those already govern rate;
|
|
56
|
+
// this guard only triggers when we have no rate information at all.
|
|
57
|
+
const conservativeDefault = quota.unknown_hosted_concurrency ?? 1;
|
|
58
|
+
waveSize = Math.min(waveSize, conservativeDefault);
|
|
59
|
+
}
|
|
60
|
+
// Local providers with no learned data: use requestedConcurrency (no rate pressure)
|
|
61
|
+
}
|
|
62
|
+
waveSize = Math.max(1, waveSize);
|
|
63
|
+
return {
|
|
64
|
+
wave_size: waveSize,
|
|
65
|
+
estimated_wave_tokens: waveSize * estimatedPacketTokens,
|
|
66
|
+
cooldown_until: cooldownUntil,
|
|
67
|
+
confidence,
|
|
68
|
+
source,
|
|
69
|
+
resolved_limits: limits,
|
|
70
|
+
model: hostModel,
|
|
71
|
+
};
|
|
72
|
+
}
|
|
73
|
+
/** Build the state key used for indexing quota-state.json entries. */
|
|
74
|
+
export function buildProviderModelKey(providerName, hostModel) {
|
|
75
|
+
return hostModel ? `${providerName}/${hostModel}` : `${providerName}/*`;
|
|
76
|
+
}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import type { ObservedWaveOutcome, QuotaState, QuotaStateEntry } from "./types.js";
|
|
2
|
+
export declare function getQuotaStatePath(): string;
|
|
3
|
+
export declare function decayWeight(weight: number, elapsedHours: number, halfLifeHours: number): number;
|
|
4
|
+
export declare function applyDecayToEntry(entry: QuotaStateEntry, halfLifeHours: number): QuotaStateEntry;
|
|
5
|
+
export declare function readQuotaState(): Promise<QuotaState>;
|
|
6
|
+
export declare function writeQuotaState(state: QuotaState): Promise<void>;
|
|
7
|
+
/**
|
|
8
|
+
* Returns the highest concurrency level for which decayed success evidence
|
|
9
|
+
* exceeds failure evidence, with a minimum of 1.
|
|
10
|
+
*/
|
|
11
|
+
export declare function computeMaxSafeConcurrency(entry: QuotaStateEntry, halfLifeHours: number, maxToCheck?: number): number;
|
|
12
|
+
export declare function recordWaveOutcome(providerModelKey: string, outcome: ObservedWaveOutcome, halfLifeHours: number): Promise<void>;
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
import { mkdir, readFile, writeFile } from "node:fs/promises";
|
|
2
|
+
import { homedir } from "node:os";
|
|
3
|
+
import { join } from "node:path";
|
|
4
|
+
const STATE_DIR = join(homedir(), ".audit-code");
|
|
5
|
+
const STATE_PATH = join(STATE_DIR, "quota-state.json");
|
|
6
|
+
// A bucket needs at least this much success weight before we trust it.
|
|
7
|
+
const MIN_EVIDENCE_WEIGHT = 0.5;
|
|
8
|
+
export function getQuotaStatePath() {
|
|
9
|
+
return STATE_PATH;
|
|
10
|
+
}
|
|
11
|
+
export function decayWeight(weight, elapsedHours, halfLifeHours) {
|
|
12
|
+
if (halfLifeHours <= 0 || weight <= 0)
|
|
13
|
+
return 0;
|
|
14
|
+
return weight * Math.pow(0.5, elapsedHours / halfLifeHours);
|
|
15
|
+
}
|
|
16
|
+
export function applyDecayToEntry(entry, halfLifeHours) {
|
|
17
|
+
const elapsedHours = (Date.now() - new Date(entry.updated_at).getTime()) / (1000 * 60 * 60);
|
|
18
|
+
if (elapsedHours < 0.001)
|
|
19
|
+
return entry;
|
|
20
|
+
const decayed = {};
|
|
21
|
+
for (const [key, bucket] of Object.entries(entry.buckets)) {
|
|
22
|
+
decayed[key] = {
|
|
23
|
+
success_weight: decayWeight(bucket.success_weight, elapsedHours, halfLifeHours),
|
|
24
|
+
failure_weight: decayWeight(bucket.failure_weight, elapsedHours, halfLifeHours),
|
|
25
|
+
};
|
|
26
|
+
}
|
|
27
|
+
return { ...entry, buckets: decayed };
|
|
28
|
+
}
|
|
29
|
+
function isQuotaState(value) {
|
|
30
|
+
return (value !== null &&
|
|
31
|
+
typeof value === "object" &&
|
|
32
|
+
!Array.isArray(value) &&
|
|
33
|
+
value["version"] === 1 &&
|
|
34
|
+
typeof value["entries"] === "object");
|
|
35
|
+
}
|
|
36
|
+
export async function readQuotaState() {
|
|
37
|
+
try {
|
|
38
|
+
const raw = await readFile(STATE_PATH, "utf8");
|
|
39
|
+
const parsed = JSON.parse(raw);
|
|
40
|
+
if (isQuotaState(parsed))
|
|
41
|
+
return parsed;
|
|
42
|
+
}
|
|
43
|
+
catch {
|
|
44
|
+
// File not found or malformed — start fresh
|
|
45
|
+
}
|
|
46
|
+
return { version: 1, entries: {} };
|
|
47
|
+
}
|
|
48
|
+
export async function writeQuotaState(state) {
|
|
49
|
+
await mkdir(STATE_DIR, { recursive: true });
|
|
50
|
+
await writeFile(STATE_PATH, JSON.stringify(state, null, 2) + "\n", "utf8");
|
|
51
|
+
}
|
|
52
|
+
/**
|
|
53
|
+
* Returns the highest concurrency level for which decayed success evidence
|
|
54
|
+
* exceeds failure evidence, with a minimum of 1.
|
|
55
|
+
*/
|
|
56
|
+
export function computeMaxSafeConcurrency(entry, halfLifeHours, maxToCheck = 32) {
|
|
57
|
+
const decayed = applyDecayToEntry(entry, halfLifeHours);
|
|
58
|
+
let maxSafe = 1;
|
|
59
|
+
for (let n = 1; n <= maxToCheck; n++) {
|
|
60
|
+
const bucket = decayed.buckets[String(n)];
|
|
61
|
+
if (!bucket)
|
|
62
|
+
break;
|
|
63
|
+
if (bucket.success_weight >= MIN_EVIDENCE_WEIGHT &&
|
|
64
|
+
bucket.success_weight > bucket.failure_weight) {
|
|
65
|
+
maxSafe = n;
|
|
66
|
+
}
|
|
67
|
+
else {
|
|
68
|
+
break;
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
return maxSafe;
|
|
72
|
+
}
|
|
73
|
+
function blankEntry() {
|
|
74
|
+
return { updated_at: new Date().toISOString(), buckets: {}, cooldown_until: null, last_429_at: null };
|
|
75
|
+
}
|
|
76
|
+
export async function recordWaveOutcome(providerModelKey, outcome, halfLifeHours) {
|
|
77
|
+
const state = await readQuotaState();
|
|
78
|
+
const entry = applyDecayToEntry(state.entries[providerModelKey] ?? blankEntry(), halfLifeHours);
|
|
79
|
+
if (outcome.outcome === "success") {
|
|
80
|
+
// Success at N proves 1..N are all safe
|
|
81
|
+
for (let n = 1; n <= outcome.concurrency; n++) {
|
|
82
|
+
const bucket = entry.buckets[String(n)] ?? { success_weight: 0, failure_weight: 0 };
|
|
83
|
+
bucket.success_weight += 1.0;
|
|
84
|
+
entry.buckets[String(n)] = bucket;
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
else {
|
|
88
|
+
entry.last_429_at = new Date().toISOString();
|
|
89
|
+
if (outcome.cooldown_until)
|
|
90
|
+
entry.cooldown_until = outcome.cooldown_until;
|
|
91
|
+
// Failure at N marks N and above as unsafe
|
|
92
|
+
for (let n = outcome.concurrency; n <= outcome.concurrency + 4; n++) {
|
|
93
|
+
const bucket = entry.buckets[String(n)] ?? { success_weight: 0, failure_weight: 0 };
|
|
94
|
+
bucket.failure_weight += 1.0;
|
|
95
|
+
entry.buckets[String(n)] = bucket;
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
entry.updated_at = new Date().toISOString();
|
|
99
|
+
state.entries[providerModelKey] = entry;
|
|
100
|
+
await writeQuotaState(state);
|
|
101
|
+
}
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
export type LimitSource = "explicit_config" | "cli_flags" | "known_metadata" | "learned" | "default";
|
|
2
|
+
export type LimitConfidence = "high" | "medium" | "low";
|
|
3
|
+
export interface ResolvedLimits {
|
|
4
|
+
context_tokens: number;
|
|
5
|
+
output_tokens: number;
|
|
6
|
+
requests_per_minute: number | null;
|
|
7
|
+
input_tokens_per_minute: number | null;
|
|
8
|
+
output_tokens_per_minute: number | null;
|
|
9
|
+
}
|
|
10
|
+
export interface ConcurrencyBucket {
|
|
11
|
+
success_weight: number;
|
|
12
|
+
failure_weight: number;
|
|
13
|
+
}
|
|
14
|
+
export interface QuotaStateEntry {
|
|
15
|
+
updated_at: string;
|
|
16
|
+
buckets: Record<string, ConcurrencyBucket>;
|
|
17
|
+
cooldown_until: string | null;
|
|
18
|
+
last_429_at: string | null;
|
|
19
|
+
}
|
|
20
|
+
export interface QuotaState {
|
|
21
|
+
version: 1;
|
|
22
|
+
entries: Record<string, QuotaStateEntry>;
|
|
23
|
+
}
|
|
24
|
+
export interface WaveSchedule {
|
|
25
|
+
wave_size: number;
|
|
26
|
+
estimated_wave_tokens: number;
|
|
27
|
+
cooldown_until: string | null;
|
|
28
|
+
confidence: LimitConfidence;
|
|
29
|
+
source: LimitSource;
|
|
30
|
+
resolved_limits: ResolvedLimits;
|
|
31
|
+
model: string | null;
|
|
32
|
+
}
|
|
33
|
+
export interface DispatchQuota {
|
|
34
|
+
contract_version: "audit-code-dispatch-quota/v1alpha1";
|
|
35
|
+
run_id: string;
|
|
36
|
+
model: string | null;
|
|
37
|
+
resolved_limits: ResolvedLimits;
|
|
38
|
+
confidence: LimitConfidence;
|
|
39
|
+
source: LimitSource;
|
|
40
|
+
wave_size: number;
|
|
41
|
+
estimated_wave_tokens: number;
|
|
42
|
+
cooldown_until: string | null;
|
|
43
|
+
}
|
|
44
|
+
export interface ObservedWaveOutcome {
|
|
45
|
+
concurrency: number;
|
|
46
|
+
estimated_tokens: number;
|
|
47
|
+
outcome: "success" | "rate_limited" | "timeout";
|
|
48
|
+
cooldown_until?: string | null;
|
|
49
|
+
reset_at?: string | null;
|
|
50
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -20,6 +20,33 @@ export interface VSCodeTaskConfig {
|
|
|
20
20
|
command_template: string[];
|
|
21
21
|
env?: Record<string, string>;
|
|
22
22
|
}
|
|
23
|
+
export interface QuotaModelLimits {
|
|
24
|
+
context_tokens?: number;
|
|
25
|
+
output_tokens?: number;
|
|
26
|
+
requests_per_minute?: number;
|
|
27
|
+
input_tokens_per_minute?: number;
|
|
28
|
+
output_tokens_per_minute?: number;
|
|
29
|
+
}
|
|
30
|
+
export interface QuotaConfig {
|
|
31
|
+
/** Set to false to disable all quota scheduling (default: true). */
|
|
32
|
+
enabled?: boolean;
|
|
33
|
+
/** Whether to probe the provider for live limits (default: "auto"). */
|
|
34
|
+
probe?: "auto" | "never" | "force";
|
|
35
|
+
/** Fraction of known limits to actually use (default: 0.8). */
|
|
36
|
+
safety_margin?: number;
|
|
37
|
+
/** Concurrency ceiling for hosted providers with no learned data (default: 1). */
|
|
38
|
+
unknown_hosted_concurrency?: number;
|
|
39
|
+
/** Concurrency for local providers with no learned data (default: "unlimited"). */
|
|
40
|
+
unknown_local_concurrency?: number | "unlimited";
|
|
41
|
+
/** Assumed context window when the model is not recognized (default: 32000). */
|
|
42
|
+
default_context_tokens?: number;
|
|
43
|
+
/** Tokens reserved for model output per request (default: 4096). */
|
|
44
|
+
reserved_output_tokens?: number;
|
|
45
|
+
/** Half-life of empirical success/failure evidence in hours (default: 24). */
|
|
46
|
+
empirical_half_life_hours?: number;
|
|
47
|
+
/** Per-model overrides keyed by "provider/model". */
|
|
48
|
+
models?: Record<string, QuotaModelLimits>;
|
|
49
|
+
}
|
|
23
50
|
export declare const PROVIDER_SECTION_KEYS: {
|
|
24
51
|
readonly "subprocess-template": "subprocess_template";
|
|
25
52
|
readonly "claude-code": "claude_code";
|
|
@@ -40,4 +67,5 @@ export interface SessionConfig {
|
|
|
40
67
|
vscode_task?: VSCodeTaskConfig;
|
|
41
68
|
agent_task_batch_size?: number;
|
|
42
69
|
parallel_workers?: number;
|
|
70
|
+
quota?: QuotaConfig;
|
|
43
71
|
}
|
package/package.json
CHANGED
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "http://json-schema.org/draft-07/schema#",
|
|
3
|
+
"$id": "audit-code-dispatch-quota/v1alpha1",
|
|
4
|
+
"title": "DispatchQuota",
|
|
5
|
+
"description": "Quota schedule for a prepare-dispatch run. Written beside dispatch-plan.json. Hosts must launch at most wave_size packets per wave, then re-read this file before the next wave to pick up any updated limits.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"required": [
|
|
8
|
+
"contract_version",
|
|
9
|
+
"run_id",
|
|
10
|
+
"model",
|
|
11
|
+
"resolved_limits",
|
|
12
|
+
"confidence",
|
|
13
|
+
"source",
|
|
14
|
+
"wave_size",
|
|
15
|
+
"estimated_wave_tokens",
|
|
16
|
+
"cooldown_until"
|
|
17
|
+
],
|
|
18
|
+
"additionalProperties": false,
|
|
19
|
+
"properties": {
|
|
20
|
+
"contract_version": {
|
|
21
|
+
"type": "string",
|
|
22
|
+
"const": "audit-code-dispatch-quota/v1alpha1"
|
|
23
|
+
},
|
|
24
|
+
"run_id": {
|
|
25
|
+
"type": "string",
|
|
26
|
+
"description": "The dispatch run this quota schedule applies to."
|
|
27
|
+
},
|
|
28
|
+
"model": {
|
|
29
|
+
"type": ["string", "null"],
|
|
30
|
+
"description": "The host model this schedule was computed for, or null if unknown."
|
|
31
|
+
},
|
|
32
|
+
"resolved_limits": {
|
|
33
|
+
"type": "object",
|
|
34
|
+
"description": "The rate and context limits used to compute the wave size.",
|
|
35
|
+
"required": [
|
|
36
|
+
"context_tokens",
|
|
37
|
+
"output_tokens",
|
|
38
|
+
"requests_per_minute",
|
|
39
|
+
"input_tokens_per_minute",
|
|
40
|
+
"output_tokens_per_minute"
|
|
41
|
+
],
|
|
42
|
+
"additionalProperties": false,
|
|
43
|
+
"properties": {
|
|
44
|
+
"context_tokens": { "type": "integer", "minimum": 1 },
|
|
45
|
+
"output_tokens": { "type": "integer", "minimum": 1 },
|
|
46
|
+
"requests_per_minute": { "type": ["integer", "null"], "minimum": 1 },
|
|
47
|
+
"input_tokens_per_minute": { "type": ["integer", "null"], "minimum": 1 },
|
|
48
|
+
"output_tokens_per_minute": { "type": ["integer", "null"], "minimum": 1 }
|
|
49
|
+
}
|
|
50
|
+
},
|
|
51
|
+
"confidence": {
|
|
52
|
+
"type": "string",
|
|
53
|
+
"enum": ["high", "medium", "low"],
|
|
54
|
+
"description": "How confident the scheduler is in the resolved limits."
|
|
55
|
+
},
|
|
56
|
+
"source": {
|
|
57
|
+
"type": "string",
|
|
58
|
+
"enum": ["explicit_config", "cli_flags", "known_metadata", "learned", "default"],
|
|
59
|
+
"description": "Where the resolved limits came from."
|
|
60
|
+
},
|
|
61
|
+
"wave_size": {
|
|
62
|
+
"type": "integer",
|
|
63
|
+
"minimum": 1,
|
|
64
|
+
"description": "Maximum number of packets to dispatch in a single wave."
|
|
65
|
+
},
|
|
66
|
+
"estimated_wave_tokens": {
|
|
67
|
+
"type": "integer",
|
|
68
|
+
"minimum": 0,
|
|
69
|
+
"description": "Estimated total input tokens for one wave at the recommended wave_size."
|
|
70
|
+
},
|
|
71
|
+
"cooldown_until": {
|
|
72
|
+
"type": ["string", "null"],
|
|
73
|
+
"format": "date-time",
|
|
74
|
+
"description": "If non-null, the host should wait until this timestamp before launching the next wave."
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
}
|