auditor-lambda 0.3.33 → 0.3.36
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +40 -30
- package/dist/orchestrator/reviewPackets.d.ts +3 -0
- package/dist/orchestrator/reviewPackets.js +13 -2
- package/dist/orchestrator/selectiveDeepening.d.ts +2 -0
- package/dist/orchestrator/selectiveDeepening.js +10 -1
- package/dist/orchestrator/state.js +2 -17
- package/dist/providers/opencodeProvider.js +23 -3
- package/dist/providers/spawnLoggedCommand.js +0 -5
- package/dist/quota/compositeQuotaSource.d.ts +7 -0
- package/dist/quota/compositeQuotaSource.js +20 -0
- package/dist/quota/errorParsers/claudeCodeErrorParser.d.ts +6 -0
- package/dist/quota/errorParsers/claudeCodeErrorParser.js +39 -0
- package/dist/quota/errorParsers/genericErrorParser.d.ts +9 -0
- package/dist/quota/errorParsers/genericErrorParser.js +7 -0
- package/dist/quota/errorParsers/index.d.ts +5 -0
- package/dist/quota/errorParsers/index.js +12 -0
- package/dist/quota/errorParsing.d.ts +7 -0
- package/dist/quota/errorParsing.js +69 -0
- package/dist/quota/fileLock.d.ts +6 -0
- package/dist/quota/fileLock.js +64 -0
- package/dist/quota/index.d.ts +11 -1
- package/dist/quota/index.js +7 -1
- package/dist/quota/learnedQuotaSource.d.ts +7 -0
- package/dist/quota/learnedQuotaSource.js +25 -0
- package/dist/quota/probe.d.ts +1 -4
- package/dist/quota/probe.js +1 -4
- package/dist/quota/quotaSource.d.ts +12 -0
- package/dist/quota/quotaSource.js +1 -0
- package/dist/quota/scheduler.d.ts +5 -1
- package/dist/quota/scheduler.js +51 -9
- package/dist/quota/slidingWindow.d.ts +4 -0
- package/dist/quota/slidingWindow.js +28 -0
- package/dist/quota/state.d.ts +3 -0
- package/dist/quota/state.js +57 -14
- package/dist/quota/types.d.ts +11 -2
- package/dist/reporting/mergeFindings.js +115 -23
- package/dist/types/sessionConfig.d.ts +2 -0
- package/package.json +1 -1
- package/schemas/dispatch_quota.schema.json +23 -2
package/dist/quota/index.js
CHANGED
|
@@ -1,5 +1,11 @@
|
|
|
1
1
|
export { resolveLimits, lookupKnownModel, classifyProvider } from "./limits.js";
|
|
2
2
|
export { detectHostActiveSubagentLimit, resolveHostActiveSubagentLimit, } from "./hostLimits.js";
|
|
3
|
-
export { readQuotaState, writeQuotaState, computeMaxSafeConcurrency, recordWaveOutcome, getQuotaStatePath, decayWeight, applyDecayToEntry, } from "./state.js";
|
|
3
|
+
export { readQuotaState, writeQuotaState, computeMaxSafeConcurrency, recordWaveOutcome, getQuotaStatePath, decayWeight, applyDecayToEntry, computeBackoffCooldownMs, computeBackoffFailureWeight, computeRampUpConcurrency, } from "./state.js";
|
|
4
4
|
export { scheduleWave, buildProviderModelKey } from "./scheduler.js";
|
|
5
|
+
export { detectRateLimitError, computeCooldownUntil } from "./errorParsing.js";
|
|
6
|
+
export { acquireLock, releaseLock, withFileLock, FileLockTimeoutError } from "./fileLock.js";
|
|
7
|
+
export { runSlidingWindow } from "./slidingWindow.js";
|
|
5
8
|
export { probeProvider } from "./probe.js";
|
|
9
|
+
export { GenericErrorParser, ClaudeCodeErrorParser, getErrorParserForProvider } from "./errorParsers/index.js";
|
|
10
|
+
export { LearnedQuotaSource } from "./learnedQuotaSource.js";
|
|
11
|
+
export { CompositeQuotaSource } from "./compositeQuotaSource.js";
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import type { QuotaSource, QuotaUsageSnapshot } from "./quotaSource.js";
|
|
2
|
+
export declare class LearnedQuotaSource implements QuotaSource {
|
|
3
|
+
readonly name = "learned";
|
|
4
|
+
private halfLifeHours;
|
|
5
|
+
constructor(halfLifeHours?: number);
|
|
6
|
+
queryCurrentUsage(providerModelKey: string): Promise<QuotaUsageSnapshot | null>;
|
|
7
|
+
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import { readQuotaState, computeMaxSafeConcurrency } from "./state.js";
|
|
2
|
+
export class LearnedQuotaSource {
|
|
3
|
+
name = "learned";
|
|
4
|
+
halfLifeHours;
|
|
5
|
+
constructor(halfLifeHours = 24) {
|
|
6
|
+
this.halfLifeHours = halfLifeHours;
|
|
7
|
+
}
|
|
8
|
+
async queryCurrentUsage(providerModelKey) {
|
|
9
|
+
const state = await readQuotaState();
|
|
10
|
+
const entry = state.entries[providerModelKey];
|
|
11
|
+
if (!entry)
|
|
12
|
+
return null;
|
|
13
|
+
const maxSafe = computeMaxSafeConcurrency(entry, this.halfLifeHours);
|
|
14
|
+
const isInCooldown = entry.cooldown_until != null &&
|
|
15
|
+
new Date(entry.cooldown_until).getTime() > Date.now();
|
|
16
|
+
return {
|
|
17
|
+
remaining_pct: isInCooldown ? 0 : null,
|
|
18
|
+
reset_at: isInCooldown ? entry.cooldown_until : null,
|
|
19
|
+
requests_remaining: maxSafe,
|
|
20
|
+
tokens_remaining: null,
|
|
21
|
+
captured_at: entry.updated_at,
|
|
22
|
+
source: "learned",
|
|
23
|
+
};
|
|
24
|
+
}
|
|
25
|
+
}
|
package/dist/quota/probe.d.ts
CHANGED
|
@@ -5,9 +5,6 @@ export interface ProbeResult {
|
|
|
5
5
|
/**
|
|
6
6
|
* Probe a provider to discover its rate limits.
|
|
7
7
|
*
|
|
8
|
-
*
|
|
9
|
-
* provider where the auditor controls the API call. IDE providers
|
|
10
|
-
* (claude-code, opencode) select the model internally; their limits come
|
|
11
|
-
* from known-model metadata or learned behavior.
|
|
8
|
+
* @deprecated Phase 3A replaces this with the QuotaSource abstraction.
|
|
12
9
|
*/
|
|
13
10
|
export declare function probeProvider(providerName: string, probeMode?: "auto" | "never" | "force"): Promise<ProbeResult>;
|
package/dist/quota/probe.js
CHANGED
|
@@ -1,10 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Probe a provider to discover its rate limits.
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
* provider where the auditor controls the API call. IDE providers
|
|
6
|
-
* (claude-code, opencode) select the model internally; their limits come
|
|
7
|
-
* from known-model metadata or learned behavior.
|
|
4
|
+
* @deprecated Phase 3A replaces this with the QuotaSource abstraction.
|
|
8
5
|
*/
|
|
9
6
|
export async function probeProvider(providerName, probeMode = "auto") {
|
|
10
7
|
if (probeMode === "never") {
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
export interface QuotaUsageSnapshot {
|
|
2
|
+
remaining_pct: number | null;
|
|
3
|
+
reset_at: string | null;
|
|
4
|
+
requests_remaining: number | null;
|
|
5
|
+
tokens_remaining: number | null;
|
|
6
|
+
captured_at: string;
|
|
7
|
+
source: string;
|
|
8
|
+
}
|
|
9
|
+
export interface QuotaSource {
|
|
10
|
+
readonly name: string;
|
|
11
|
+
queryCurrentUsage(providerModelKey: string): Promise<QuotaUsageSnapshot | null>;
|
|
12
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -1,14 +1,18 @@
|
|
|
1
1
|
import type { ResolvedProviderName, SessionConfig } from "../types/sessionConfig.js";
|
|
2
2
|
import type { HostConcurrencyLimit, QuotaStateEntry, WaveSchedule } from "./types.js";
|
|
3
|
+
import type { QuotaUsageSnapshot } from "./quotaSource.js";
|
|
3
4
|
export interface ScheduleWaveOptions {
|
|
4
5
|
providerName: ResolvedProviderName;
|
|
5
6
|
sessionConfig: SessionConfig;
|
|
6
7
|
hostModel: string | null;
|
|
7
8
|
requestedConcurrency: number;
|
|
8
|
-
/**
|
|
9
|
+
/** Per-slot estimated tokens (one entry per worker slot). Used for TPM budget. */
|
|
10
|
+
estimatedSlotTokens?: number[];
|
|
11
|
+
/** @deprecated Use estimatedSlotTokens instead. Average tokens per slot — used as fallback. */
|
|
9
12
|
estimatedPacketTokens?: number;
|
|
10
13
|
quotaStateEntry?: QuotaStateEntry | null;
|
|
11
14
|
hostConcurrencyLimit?: HostConcurrencyLimit | null;
|
|
15
|
+
quotaSourceSnapshot?: QuotaUsageSnapshot | null;
|
|
12
16
|
}
|
|
13
17
|
export declare function scheduleWave(options: ScheduleWaveOptions): WaveSchedule;
|
|
14
18
|
/** Build the state key used for indexing quota-state.json entries. */
|
package/dist/quota/scheduler.js
CHANGED
|
@@ -1,7 +1,20 @@
|
|
|
1
1
|
import { classifyProvider, resolveLimits } from "./limits.js";
|
|
2
|
-
import { computeMaxSafeConcurrency } from "./state.js";
|
|
2
|
+
import { computeMaxSafeConcurrency, computeRampUpConcurrency } from "./state.js";
|
|
3
|
+
function sumTopN(sorted, n) {
|
|
4
|
+
let sum = 0;
|
|
5
|
+
for (let i = 0; i < Math.min(n, sorted.length); i++)
|
|
6
|
+
sum += sorted[i];
|
|
7
|
+
return sum;
|
|
8
|
+
}
|
|
3
9
|
export function scheduleWave(options) {
|
|
4
|
-
const { providerName, sessionConfig, hostModel, requestedConcurrency, estimatedPacketTokens = 0, quotaStateEntry = null, hostConcurrencyLimit = null, } = options;
|
|
10
|
+
const { providerName, sessionConfig, hostModel, requestedConcurrency, estimatedSlotTokens, estimatedPacketTokens = 0, quotaStateEntry = null, hostConcurrencyLimit = null, quotaSourceSnapshot = null, } = options;
|
|
11
|
+
// Descending sort so sumTopN picks the largest slots
|
|
12
|
+
const slotsSorted = estimatedSlotTokens
|
|
13
|
+
? [...estimatedSlotTokens].sort((a, b) => b - a)
|
|
14
|
+
: null;
|
|
15
|
+
const avgTokens = slotsSorted && slotsSorted.length > 0
|
|
16
|
+
? Math.floor(slotsSorted.reduce((a, b) => a + b, 0) / slotsSorted.length)
|
|
17
|
+
: estimatedPacketTokens;
|
|
5
18
|
const quota = sessionConfig.quota ?? {};
|
|
6
19
|
const applyHostConcurrencyLimit = (waveSize) => {
|
|
7
20
|
if (hostConcurrencyLimit === null)
|
|
@@ -19,7 +32,7 @@ export function scheduleWave(options) {
|
|
|
19
32
|
};
|
|
20
33
|
return {
|
|
21
34
|
wave_size: waveSize,
|
|
22
|
-
estimated_wave_tokens: waveSize *
|
|
35
|
+
estimated_wave_tokens: slotsSorted ? sumTopN(slotsSorted, waveSize) : waveSize * avgTokens,
|
|
23
36
|
cooldown_until: null,
|
|
24
37
|
confidence: "high",
|
|
25
38
|
source: "default",
|
|
@@ -48,12 +61,25 @@ export function scheduleWave(options) {
|
|
|
48
61
|
waveSize = Math.min(waveSize, rpmCap);
|
|
49
62
|
}
|
|
50
63
|
// Cap by input tokens-per-minute
|
|
51
|
-
if (limits.input_tokens_per_minute != null &&
|
|
52
|
-
const
|
|
53
|
-
|
|
64
|
+
if (limits.input_tokens_per_minute != null && avgTokens > 0) {
|
|
65
|
+
const tpmBudget = limits.input_tokens_per_minute * safetyMargin;
|
|
66
|
+
if (slotsSorted && slotsSorted.length > 0) {
|
|
67
|
+
let candidateSize = waveSize;
|
|
68
|
+
while (candidateSize > 1 && sumTopN(slotsSorted, candidateSize) > tpmBudget) {
|
|
69
|
+
candidateSize--;
|
|
70
|
+
}
|
|
71
|
+
waveSize = Math.max(1, candidateSize);
|
|
72
|
+
}
|
|
73
|
+
else {
|
|
74
|
+
const tpmCap = Math.max(1, Math.floor(tpmBudget / avgTokens));
|
|
75
|
+
waveSize = Math.min(waveSize, tpmCap);
|
|
76
|
+
}
|
|
54
77
|
}
|
|
55
78
|
if (quotaStateEntry) {
|
|
56
|
-
const
|
|
79
|
+
const rampUp = quota.ramp_up_enabled !== false;
|
|
80
|
+
const learnedCap = rampUp
|
|
81
|
+
? computeRampUpConcurrency(quotaStateEntry, halfLifeHours)
|
|
82
|
+
: computeMaxSafeConcurrency(quotaStateEntry, halfLifeHours);
|
|
57
83
|
waveSize = Math.min(waveSize, learnedCap);
|
|
58
84
|
}
|
|
59
85
|
else {
|
|
@@ -61,22 +87,38 @@ export function scheduleWave(options) {
|
|
|
61
87
|
const fallbackCap = providerType === "local"
|
|
62
88
|
? quota.unknown_local_concurrency
|
|
63
89
|
: (quota.unknown_hosted_concurrency ?? 1);
|
|
64
|
-
if (
|
|
90
|
+
if (fallbackCap === "unlimited") {
|
|
91
|
+
// no cap — "unlimited" intentionally skips clamping
|
|
92
|
+
}
|
|
93
|
+
else if (typeof fallbackCap === "number" && Number.isFinite(fallbackCap)) {
|
|
65
94
|
waveSize = Math.min(waveSize, Math.max(1, Math.floor(fallbackCap)));
|
|
66
95
|
}
|
|
67
96
|
}
|
|
68
97
|
}
|
|
98
|
+
// Apply real-time quota source data if available
|
|
99
|
+
if (quotaSourceSnapshot && !cooldownUntil) {
|
|
100
|
+
if (quotaSourceSnapshot.remaining_pct != null && quotaSourceSnapshot.remaining_pct < 0.1) {
|
|
101
|
+
waveSize = 1;
|
|
102
|
+
if (quotaSourceSnapshot.reset_at) {
|
|
103
|
+
cooldownUntil = quotaSourceSnapshot.reset_at;
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
else if (quotaSourceSnapshot.remaining_pct != null && quotaSourceSnapshot.remaining_pct < 0.3) {
|
|
107
|
+
waveSize = Math.min(waveSize, Math.max(1, Math.floor(waveSize * 0.5)));
|
|
108
|
+
}
|
|
109
|
+
}
|
|
69
110
|
waveSize = applyHostConcurrencyLimit(waveSize);
|
|
70
111
|
waveSize = Math.max(1, waveSize);
|
|
71
112
|
return {
|
|
72
113
|
wave_size: waveSize,
|
|
73
|
-
estimated_wave_tokens: waveSize *
|
|
114
|
+
estimated_wave_tokens: slotsSorted ? sumTopN(slotsSorted, waveSize) : waveSize * avgTokens,
|
|
74
115
|
cooldown_until: cooldownUntil,
|
|
75
116
|
confidence,
|
|
76
117
|
source,
|
|
77
118
|
resolved_limits: limits,
|
|
78
119
|
host_concurrency_limit: hostConcurrencyLimit,
|
|
79
120
|
model: hostModel,
|
|
121
|
+
quota_source_snapshot: quotaSourceSnapshot,
|
|
80
122
|
};
|
|
81
123
|
}
|
|
82
124
|
/** Build the state key used for indexing quota-state.json entries. */
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
export interface SlidingWindowResult<T> {
|
|
2
|
+
results: PromiseSettledResult<T>[];
|
|
3
|
+
}
|
|
4
|
+
export declare function runSlidingWindow<T>(tasks: Array<() => Promise<T>>, concurrency: number, onComplete?: (index: number, result: PromiseSettledResult<T>) => void): Promise<SlidingWindowResult<T>>;
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
export async function runSlidingWindow(tasks, concurrency, onComplete) {
|
|
2
|
+
const results = new Array(tasks.length);
|
|
3
|
+
let nextIndex = 0;
|
|
4
|
+
async function runOne(index) {
|
|
5
|
+
let result;
|
|
6
|
+
try {
|
|
7
|
+
const value = await tasks[index]();
|
|
8
|
+
result = { status: "fulfilled", value };
|
|
9
|
+
}
|
|
10
|
+
catch (reason) {
|
|
11
|
+
result = { status: "rejected", reason };
|
|
12
|
+
}
|
|
13
|
+
results[index] = result;
|
|
14
|
+
onComplete?.(index, result);
|
|
15
|
+
if (nextIndex < tasks.length) {
|
|
16
|
+
const next = nextIndex++;
|
|
17
|
+
await runOne(next);
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
const initialBatch = Math.min(concurrency, tasks.length);
|
|
21
|
+
const runners = [];
|
|
22
|
+
for (let i = 0; i < initialBatch; i++) {
|
|
23
|
+
const idx = nextIndex++;
|
|
24
|
+
runners.push(runOne(idx));
|
|
25
|
+
}
|
|
26
|
+
await Promise.all(runners);
|
|
27
|
+
return { results };
|
|
28
|
+
}
|
package/dist/quota/state.d.ts
CHANGED
|
@@ -9,4 +9,7 @@ export declare function writeQuotaState(state: QuotaState): Promise<void>;
|
|
|
9
9
|
* exceeds failure evidence, with a minimum of 1.
|
|
10
10
|
*/
|
|
11
11
|
export declare function computeMaxSafeConcurrency(entry: QuotaStateEntry, halfLifeHours: number, maxToCheck?: number): number;
|
|
12
|
+
export declare function computeRampUpConcurrency(entry: QuotaStateEntry, halfLifeHours: number, maxToCheck?: number): number;
|
|
13
|
+
export declare function computeBackoffCooldownMs(consecutive429Count: number): number;
|
|
14
|
+
export declare function computeBackoffFailureWeight(consecutive429Count: number): number;
|
|
12
15
|
export declare function recordWaveOutcome(providerModelKey: string, outcome: ObservedWaveOutcome, halfLifeHours: number): Promise<void>;
|
package/dist/quota/state.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { mkdir, readFile, writeFile } from "node:fs/promises";
|
|
2
2
|
import { homedir } from "node:os";
|
|
3
3
|
import { join } from "node:path";
|
|
4
|
+
import { withFileLock } from "./fileLock.js";
|
|
4
5
|
const STATE_DIR = join(homedir(), ".audit-code");
|
|
5
6
|
const STATE_PATH = join(STATE_DIR, "quota-state.json");
|
|
6
7
|
// A bucket needs at least this much success weight before we trust it.
|
|
@@ -27,31 +28,38 @@ export function applyDecayToEntry(entry, halfLifeHours) {
|
|
|
27
28
|
return { ...entry, buckets: decayed };
|
|
28
29
|
}
|
|
29
30
|
function isQuotaState(value) {
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
31
|
+
if (value === null || typeof value !== "object" || Array.isArray(value))
|
|
32
|
+
return false;
|
|
33
|
+
const obj = value;
|
|
34
|
+
const version = obj["version"];
|
|
35
|
+
return (version === 1 || version === 2) && typeof obj["entries"] === "object";
|
|
35
36
|
}
|
|
36
37
|
export async function readQuotaState() {
|
|
37
38
|
try {
|
|
38
39
|
const raw = await readFile(STATE_PATH, "utf8");
|
|
39
40
|
const parsed = JSON.parse(raw);
|
|
40
|
-
if (isQuotaState(parsed))
|
|
41
|
+
if (isQuotaState(parsed)) {
|
|
42
|
+
if (parsed.version === 1) {
|
|
43
|
+
for (const entry of Object.values(parsed.entries)) {
|
|
44
|
+
entry.consecutive_429_count ??= 0;
|
|
45
|
+
}
|
|
46
|
+
}
|
|
41
47
|
return parsed;
|
|
42
|
-
|
|
48
|
+
}
|
|
49
|
+
process.stderr.write(`[quota] ignoring invalid quota state at ${STATE_PATH}: expected { version: 1|2, entries: object }\n`);
|
|
43
50
|
}
|
|
44
51
|
catch (error) {
|
|
45
52
|
if (error.code === "ENOENT") {
|
|
46
|
-
return { version:
|
|
53
|
+
return { version: 2, entries: {} };
|
|
47
54
|
}
|
|
48
55
|
process.stderr.write(`[quota] ignoring unreadable quota state at ${STATE_PATH}: ${error instanceof Error ? error.message : String(error)}\n`);
|
|
49
56
|
}
|
|
50
|
-
return { version:
|
|
57
|
+
return { version: 2, entries: {} };
|
|
51
58
|
}
|
|
52
59
|
export async function writeQuotaState(state) {
|
|
53
60
|
await mkdir(STATE_DIR, { recursive: true });
|
|
54
|
-
|
|
61
|
+
const normalized = { ...state, version: 2 };
|
|
62
|
+
await writeFile(STATE_PATH, JSON.stringify(normalized, null, 2) + "\n", "utf8");
|
|
55
63
|
}
|
|
56
64
|
/**
|
|
57
65
|
* Returns the highest concurrency level for which decayed success evidence
|
|
@@ -74,14 +82,39 @@ export function computeMaxSafeConcurrency(entry, halfLifeHours, maxToCheck = 32)
|
|
|
74
82
|
}
|
|
75
83
|
return maxSafe;
|
|
76
84
|
}
|
|
85
|
+
const RAMP_UP_MIN_SUCCESSES = 2;
|
|
86
|
+
export function computeRampUpConcurrency(entry, halfLifeHours, maxToCheck = 32) {
|
|
87
|
+
const maxSafe = computeMaxSafeConcurrency(entry, halfLifeHours, maxToCheck);
|
|
88
|
+
const decayed = applyDecayToEntry(entry, halfLifeHours);
|
|
89
|
+
const bucket = decayed.buckets[String(maxSafe)];
|
|
90
|
+
if (bucket &&
|
|
91
|
+
bucket.success_weight >= RAMP_UP_MIN_SUCCESSES &&
|
|
92
|
+
bucket.failure_weight === 0) {
|
|
93
|
+
return maxSafe + 1;
|
|
94
|
+
}
|
|
95
|
+
return maxSafe;
|
|
96
|
+
}
|
|
77
97
|
function blankEntry() {
|
|
78
98
|
return { updated_at: new Date().toISOString(), buckets: {}, cooldown_until: null, last_429_at: null };
|
|
79
99
|
}
|
|
100
|
+
const BASE_COOLDOWN_MS = 60_000;
|
|
101
|
+
const MAX_COOLDOWN_MS = 15 * 60_000;
|
|
102
|
+
export function computeBackoffCooldownMs(consecutive429Count) {
|
|
103
|
+
const ms = BASE_COOLDOWN_MS * Math.pow(2, Math.max(0, consecutive429Count - 1));
|
|
104
|
+
return Math.min(ms, MAX_COOLDOWN_MS);
|
|
105
|
+
}
|
|
106
|
+
export function computeBackoffFailureWeight(consecutive429Count) {
|
|
107
|
+
return 1.0 + 0.5 * Math.max(0, consecutive429Count - 1);
|
|
108
|
+
}
|
|
109
|
+
const LOCK_PATH = STATE_PATH + ".lock";
|
|
80
110
|
export async function recordWaveOutcome(providerModelKey, outcome, halfLifeHours) {
|
|
111
|
+
await withFileLock(LOCK_PATH, () => recordWaveOutcomeUnsafe(providerModelKey, outcome, halfLifeHours));
|
|
112
|
+
}
|
|
113
|
+
async function recordWaveOutcomeUnsafe(providerModelKey, outcome, halfLifeHours) {
|
|
81
114
|
const state = await readQuotaState();
|
|
82
115
|
const entry = applyDecayToEntry(state.entries[providerModelKey] ?? blankEntry(), halfLifeHours);
|
|
83
116
|
if (outcome.outcome === "success") {
|
|
84
|
-
|
|
117
|
+
entry.consecutive_429_count = 0;
|
|
85
118
|
for (let n = 1; n <= outcome.concurrency; n++) {
|
|
86
119
|
const bucket = entry.buckets[String(n)] ?? { success_weight: 0, failure_weight: 0 };
|
|
87
120
|
bucket.success_weight += 1.0;
|
|
@@ -89,13 +122,23 @@ export async function recordWaveOutcome(providerModelKey, outcome, halfLifeHours
|
|
|
89
122
|
}
|
|
90
123
|
}
|
|
91
124
|
else {
|
|
125
|
+
const prev429Count = entry.consecutive_429_count ?? 0;
|
|
126
|
+
const new429Count = outcome.outcome === "rate_limited" ? prev429Count + 1 : prev429Count;
|
|
127
|
+
entry.consecutive_429_count = new429Count;
|
|
92
128
|
entry.last_429_at = new Date().toISOString();
|
|
93
|
-
if (outcome.
|
|
129
|
+
if (outcome.outcome === "rate_limited" && new429Count > 0) {
|
|
130
|
+
const backoffMs = computeBackoffCooldownMs(new429Count);
|
|
131
|
+
entry.cooldown_until = new Date(Date.now() + backoffMs).toISOString();
|
|
132
|
+
}
|
|
133
|
+
else if (outcome.cooldown_until) {
|
|
94
134
|
entry.cooldown_until = outcome.cooldown_until;
|
|
95
|
-
|
|
135
|
+
}
|
|
136
|
+
const failureWeight = outcome.outcome === "rate_limited"
|
|
137
|
+
? computeBackoffFailureWeight(new429Count)
|
|
138
|
+
: 1.0;
|
|
96
139
|
for (let n = outcome.concurrency; n <= outcome.concurrency + 4; n++) {
|
|
97
140
|
const bucket = entry.buckets[String(n)] ?? { success_weight: 0, failure_weight: 0 };
|
|
98
|
-
bucket.failure_weight +=
|
|
141
|
+
bucket.failure_weight += failureWeight;
|
|
99
142
|
entry.buckets[String(n)] = bucket;
|
|
100
143
|
}
|
|
101
144
|
}
|
package/dist/quota/types.d.ts
CHANGED
|
@@ -22,9 +22,10 @@ export interface QuotaStateEntry {
|
|
|
22
22
|
buckets: Record<string, ConcurrencyBucket>;
|
|
23
23
|
cooldown_until: string | null;
|
|
24
24
|
last_429_at: string | null;
|
|
25
|
+
consecutive_429_count?: number;
|
|
25
26
|
}
|
|
26
27
|
export interface QuotaState {
|
|
27
|
-
version: 1;
|
|
28
|
+
version: 1 | 2;
|
|
28
29
|
entries: Record<string, QuotaStateEntry>;
|
|
29
30
|
}
|
|
30
31
|
export interface WaveSchedule {
|
|
@@ -36,9 +37,15 @@ export interface WaveSchedule {
|
|
|
36
37
|
resolved_limits: ResolvedLimits;
|
|
37
38
|
host_concurrency_limit: HostConcurrencyLimit | null;
|
|
38
39
|
model: string | null;
|
|
40
|
+
quota_source_snapshot?: import("./quotaSource.js").QuotaUsageSnapshot | null;
|
|
41
|
+
}
|
|
42
|
+
export interface BackoffState {
|
|
43
|
+
consecutive_429_count: number;
|
|
44
|
+
current_cooldown_ms: number;
|
|
45
|
+
current_failure_weight: number;
|
|
39
46
|
}
|
|
40
47
|
export interface DispatchQuota {
|
|
41
|
-
contract_version: "audit-code-dispatch-quota/v1alpha1";
|
|
48
|
+
contract_version: "audit-code-dispatch-quota/v1alpha1" | "audit-code-dispatch-quota/v1alpha2";
|
|
42
49
|
run_id: string;
|
|
43
50
|
model: string | null;
|
|
44
51
|
resolved_limits: ResolvedLimits;
|
|
@@ -48,6 +55,8 @@ export interface DispatchQuota {
|
|
|
48
55
|
wave_size: number;
|
|
49
56
|
estimated_wave_tokens: number;
|
|
50
57
|
cooldown_until: string | null;
|
|
58
|
+
quota_source_snapshot?: import("./quotaSource.js").QuotaUsageSnapshot | null;
|
|
59
|
+
backoff_state?: BackoffState | null;
|
|
51
60
|
}
|
|
52
61
|
export interface ObservedWaveOutcome {
|
|
53
62
|
concurrency: number;
|
|
@@ -92,6 +92,78 @@ function mergeAffectedFiles(existing, incoming) {
|
|
|
92
92
|
}
|
|
93
93
|
existing.affected_files.sort((a, b) => a.path.localeCompare(b.path) || (a.line_start ?? 0) - (b.line_start ?? 0));
|
|
94
94
|
}
|
|
95
|
+
function absorbFinding(survivor, absorbed) {
|
|
96
|
+
mergeAffectedFiles(survivor, absorbed);
|
|
97
|
+
survivor.evidence = [
|
|
98
|
+
...new Set([
|
|
99
|
+
...(survivor.evidence ?? []),
|
|
100
|
+
...(absorbed.evidence ?? []),
|
|
101
|
+
]),
|
|
102
|
+
];
|
|
103
|
+
survivor.systemic = Boolean(survivor.systemic || absorbed.systemic);
|
|
104
|
+
if (absorbed.summary.length > survivor.summary.length) {
|
|
105
|
+
survivor.summary = absorbed.summary;
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
function lineRangeOverlaps(a, b) {
|
|
109
|
+
const aFile = a.affected_files[0];
|
|
110
|
+
const bFile = b.affected_files[0];
|
|
111
|
+
if (!aFile || !bFile)
|
|
112
|
+
return false;
|
|
113
|
+
if (aFile.path !== bFile.path)
|
|
114
|
+
return false;
|
|
115
|
+
const aStart = aFile.line_start ?? 0;
|
|
116
|
+
const aEnd = aFile.line_end ?? aStart;
|
|
117
|
+
const bStart = bFile.line_start ?? 0;
|
|
118
|
+
const bEnd = bFile.line_end ?? bStart;
|
|
119
|
+
if (aEnd === 0 && bEnd === 0)
|
|
120
|
+
return true;
|
|
121
|
+
return aStart <= bEnd && bStart <= aEnd;
|
|
122
|
+
}
|
|
123
|
+
function deduplicateSameLens(findings) {
|
|
124
|
+
const groups = new Map();
|
|
125
|
+
for (const finding of findings) {
|
|
126
|
+
const key = `${normalizeText(finding.lens)}:${primaryPath(finding)}`;
|
|
127
|
+
const group = groups.get(key);
|
|
128
|
+
if (group) {
|
|
129
|
+
group.push(finding);
|
|
130
|
+
}
|
|
131
|
+
else {
|
|
132
|
+
groups.set(key, [finding]);
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
const removed = new Set();
|
|
136
|
+
for (const group of groups.values()) {
|
|
137
|
+
if (group.length < 2)
|
|
138
|
+
continue;
|
|
139
|
+
for (let i = 0; i < group.length; i++) {
|
|
140
|
+
if (removed.has(group[i]))
|
|
141
|
+
continue;
|
|
142
|
+
for (let j = i + 1; j < group.length; j++) {
|
|
143
|
+
if (removed.has(group[j]))
|
|
144
|
+
continue;
|
|
145
|
+
const a = group[i];
|
|
146
|
+
const b = group[j];
|
|
147
|
+
const titleSim = wordJaccard(a.title, b.title);
|
|
148
|
+
const catMatch = normalizeText(a.category) === normalizeText(b.category);
|
|
149
|
+
const threshold = catMatch ? 0.35 : 0.45;
|
|
150
|
+
if (titleSim < threshold)
|
|
151
|
+
continue;
|
|
152
|
+
if (!lineRangeOverlaps(a, b) && filePathOverlap(a, b) < 0.5)
|
|
153
|
+
continue;
|
|
154
|
+
const aSev = severityRank(a.severity);
|
|
155
|
+
const bSev = severityRank(b.severity);
|
|
156
|
+
const aConf = confidenceRank(a.confidence);
|
|
157
|
+
const bConf = confidenceRank(b.confidence);
|
|
158
|
+
const keepA = aSev > bSev || (aSev === bSev && aConf >= bConf);
|
|
159
|
+
const [survivor, absorbed] = keepA ? [a, b] : [b, a];
|
|
160
|
+
absorbFinding(survivor, absorbed);
|
|
161
|
+
removed.add(absorbed);
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
return findings.filter((f) => !removed.has(f));
|
|
166
|
+
}
|
|
95
167
|
function deduplicateCrossLens(findings) {
|
|
96
168
|
const groups = new Map();
|
|
97
169
|
for (const finding of findings) {
|
|
@@ -131,27 +203,41 @@ function deduplicateCrossLens(findings) {
|
|
|
131
203
|
const bConf = confidenceRank(b.confidence);
|
|
132
204
|
const keepA = aSev > bSev || (aSev === bSev && aConf >= bConf);
|
|
133
205
|
const [survivor, absorbed] = keepA ? [a, b] : [b, a];
|
|
134
|
-
|
|
135
|
-
survivor.evidence = [
|
|
136
|
-
...new Set([
|
|
137
|
-
...(survivor.evidence ?? []),
|
|
138
|
-
...(absorbed.evidence ?? []),
|
|
139
|
-
]),
|
|
140
|
-
];
|
|
141
|
-
survivor.systemic = Boolean(survivor.systemic || absorbed.systemic);
|
|
142
|
-
if (absorbed.summary.length > survivor.summary.length) {
|
|
143
|
-
survivor.summary = absorbed.summary;
|
|
144
|
-
}
|
|
206
|
+
absorbFinding(survivor, absorbed);
|
|
145
207
|
removed.add(absorbed);
|
|
146
208
|
}
|
|
147
209
|
}
|
|
148
210
|
}
|
|
149
211
|
return findings.filter((f) => !removed.has(f));
|
|
150
212
|
}
|
|
213
|
+
function relevantRuntimeEvidence(finding, report) {
|
|
214
|
+
if (!report)
|
|
215
|
+
return [];
|
|
216
|
+
const findingPaths = new Set(finding.affected_files.map((f) => f.path));
|
|
217
|
+
return report.results
|
|
218
|
+
.filter((result) => result.status !== "pending")
|
|
219
|
+
.filter((result) => {
|
|
220
|
+
const taskPaths = result.notes
|
|
221
|
+
?.flatMap((note) => {
|
|
222
|
+
const match = note.match(/Target paths:\s*(.+)/);
|
|
223
|
+
return match ? match[1].split(",").map((p) => p.trim()) : [];
|
|
224
|
+
}) ?? [];
|
|
225
|
+
if (taskPaths.length === 0)
|
|
226
|
+
return true;
|
|
227
|
+
return taskPaths.some((p) => findingPaths.has(p));
|
|
228
|
+
})
|
|
229
|
+
.map((result) => `${result.task_id}: ${result.status} — ${result.summary}`);
|
|
230
|
+
}
|
|
231
|
+
function relevantExternalEvidence(finding, results) {
|
|
232
|
+
if (!results)
|
|
233
|
+
return [];
|
|
234
|
+
const findingPaths = new Set(finding.affected_files.map((f) => f.path));
|
|
235
|
+
return results.results
|
|
236
|
+
.filter((item) => findingPaths.has(item.path))
|
|
237
|
+
.map((item) => `external:${results.tool}:${item.path}:${item.summary}`);
|
|
238
|
+
}
|
|
151
239
|
export function mergeFindings(results, runtimeReport, externalAnalyzerResults) {
|
|
152
240
|
const merged = new Map();
|
|
153
|
-
const runtimeEvidence = runtimeSummary(runtimeReport);
|
|
154
|
-
const analyzerEvidence = externalSummary(externalAnalyzerResults);
|
|
155
241
|
for (const result of results) {
|
|
156
242
|
for (const finding of result.findings) {
|
|
157
243
|
const key = findingKey(finding);
|
|
@@ -160,13 +246,7 @@ export function mergeFindings(results, runtimeReport, externalAnalyzerResults) {
|
|
|
160
246
|
merged.set(key, {
|
|
161
247
|
...finding,
|
|
162
248
|
affected_files: [...finding.affected_files],
|
|
163
|
-
evidence: [
|
|
164
|
-
...new Set([
|
|
165
|
-
...(finding.evidence ?? []),
|
|
166
|
-
...runtimeEvidence,
|
|
167
|
-
...analyzerEvidence,
|
|
168
|
-
]),
|
|
169
|
-
],
|
|
249
|
+
evidence: [...(finding.evidence ?? [])],
|
|
170
250
|
});
|
|
171
251
|
continue;
|
|
172
252
|
}
|
|
@@ -188,13 +268,25 @@ export function mergeFindings(results, runtimeReport, externalAnalyzerResults) {
|
|
|
188
268
|
...new Set([
|
|
189
269
|
...(existing.evidence ?? []),
|
|
190
270
|
...(finding.evidence ?? []),
|
|
191
|
-
...runtimeEvidence,
|
|
192
|
-
...analyzerEvidence,
|
|
193
271
|
]),
|
|
194
272
|
];
|
|
195
273
|
}
|
|
196
274
|
}
|
|
197
|
-
|
|
275
|
+
for (const finding of merged.values()) {
|
|
276
|
+
const runtimeEv = relevantRuntimeEvidence(finding, runtimeReport);
|
|
277
|
+
const externalEv = relevantExternalEvidence(finding, externalAnalyzerResults);
|
|
278
|
+
if (runtimeEv.length > 0 || externalEv.length > 0) {
|
|
279
|
+
finding.evidence = [
|
|
280
|
+
...new Set([
|
|
281
|
+
...(finding.evidence ?? []),
|
|
282
|
+
...runtimeEv,
|
|
283
|
+
...externalEv,
|
|
284
|
+
]),
|
|
285
|
+
];
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
const dedupedSameLens = deduplicateSameLens([...merged.values()]);
|
|
289
|
+
return deduplicateCrossLens(dedupedSameLens).sort((a, b) => {
|
|
198
290
|
const severityDelta = severityRank(b.severity) - severityRank(a.severity);
|
|
199
291
|
if (severityDelta !== 0)
|
|
200
292
|
return severityDelta;
|
|
@@ -44,6 +44,8 @@ export interface QuotaConfig {
|
|
|
44
44
|
reserved_output_tokens?: number;
|
|
45
45
|
/** Half-life of empirical success/failure evidence in hours (default: 24). */
|
|
46
46
|
empirical_half_life_hours?: number;
|
|
47
|
+
/** Allow the scheduler to try concurrency maxSafe+1 after consecutive successes (default: true). */
|
|
48
|
+
ramp_up_enabled?: boolean;
|
|
47
49
|
/** Hard host ceiling for simultaneously active conversation subagents. */
|
|
48
50
|
host_active_subagent_limit?: number;
|
|
49
51
|
/** Per-model overrides keyed by "provider/model". */
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"$schema": "http://json-schema.org/draft-07/schema#",
|
|
3
|
-
"$id": "audit-code-dispatch-quota/
|
|
3
|
+
"$id": "audit-code-dispatch-quota/v1alpha2",
|
|
4
4
|
"title": "DispatchQuota",
|
|
5
5
|
"description": "Quota schedule for a prepare-dispatch run. Written beside dispatch-plan.json. Hosts must launch at most wave_size packets per wave, then re-read this file before the next wave to pick up any updated limits.",
|
|
6
6
|
"type": "object",
|
|
@@ -20,7 +20,7 @@
|
|
|
20
20
|
"properties": {
|
|
21
21
|
"contract_version": {
|
|
22
22
|
"type": "string",
|
|
23
|
-
"
|
|
23
|
+
"enum": ["audit-code-dispatch-quota/v1alpha1", "audit-code-dispatch-quota/v1alpha2"]
|
|
24
24
|
},
|
|
25
25
|
"run_id": {
|
|
26
26
|
"type": "string",
|
|
@@ -97,6 +97,27 @@
|
|
|
97
97
|
"type": ["string", "null"],
|
|
98
98
|
"format": "date-time",
|
|
99
99
|
"description": "If non-null, the host should wait until this timestamp before launching the next wave."
|
|
100
|
+
},
|
|
101
|
+
"quota_source_snapshot": {
|
|
102
|
+
"type": ["object", "null"],
|
|
103
|
+
"description": "Real-time usage snapshot from a QuotaSource, if available.",
|
|
104
|
+
"properties": {
|
|
105
|
+
"remaining_pct": { "type": ["number", "null"] },
|
|
106
|
+
"reset_at": { "type": ["string", "null"], "format": "date-time" },
|
|
107
|
+
"requests_remaining": { "type": ["integer", "null"] },
|
|
108
|
+
"tokens_remaining": { "type": ["integer", "null"] },
|
|
109
|
+
"captured_at": { "type": "string", "format": "date-time" },
|
|
110
|
+
"source": { "type": "string" }
|
|
111
|
+
}
|
|
112
|
+
},
|
|
113
|
+
"backoff_state": {
|
|
114
|
+
"type": ["object", "null"],
|
|
115
|
+
"description": "Exponential backoff state for repeated rate-limit errors.",
|
|
116
|
+
"properties": {
|
|
117
|
+
"consecutive_429_count": { "type": "integer", "minimum": 0 },
|
|
118
|
+
"current_cooldown_ms": { "type": "integer", "minimum": 0 },
|
|
119
|
+
"current_failure_weight": { "type": "number", "minimum": 0 }
|
|
120
|
+
}
|
|
100
121
|
}
|
|
101
122
|
}
|
|
102
123
|
}
|