auditor-lambda 0.3.20 → 0.3.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +12 -2
- package/audit-code-wrapper-lib.mjs +91 -32
- package/dist/cli.js +738 -11
- package/dist/orchestrator/reviewPackets.d.ts +5 -0
- package/dist/orchestrator/reviewPackets.js +5 -1
- package/dist/prompts/renderWorkerPrompt.js +1 -0
- package/dist/quota/index.d.ts +8 -0
- package/dist/quota/index.js +4 -0
- package/dist/quota/limits.d.ts +16 -0
- package/dist/quota/limits.js +77 -0
- package/dist/quota/probe.d.ts +13 -0
- package/dist/quota/probe.js +21 -0
- package/dist/quota/scheduler.d.ts +14 -0
- package/dist/quota/scheduler.js +76 -0
- package/dist/quota/state.d.ts +12 -0
- package/dist/quota/state.js +101 -0
- package/dist/quota/types.d.ts +50 -0
- package/dist/quota/types.js +1 -0
- package/dist/supervisor/operatorHandoff.js +3 -7
- package/dist/types/sessionConfig.d.ts +28 -0
- package/docs/contracts.md +23 -1
- package/docs/operator-guide.md +11 -4
- package/docs/product.md +4 -3
- package/package.json +1 -1
- package/schemas/dispatch_quota.schema.json +77 -0
- package/scripts/postinstall.mjs +33 -0
- package/skills/audit-code/audit-code.prompt.md +15 -170
|
@@ -6,6 +6,11 @@ export interface BuildReviewPacketOptions {
|
|
|
6
6
|
lineIndex?: Record<string, number>;
|
|
7
7
|
maxTasksPerPacket?: number;
|
|
8
8
|
targetPacketLines?: number;
|
|
9
|
+
/**
|
|
10
|
+
* Available context budget in tokens (context_tokens − reserved_output_tokens).
|
|
11
|
+
* When provided, targetPacketLines is capped to fit within this budget.
|
|
12
|
+
*/
|
|
13
|
+
maxContextTokens?: number;
|
|
9
14
|
}
|
|
10
15
|
export declare function buildReviewPackets(tasks: AuditTask[], options?: BuildReviewPacketOptions): ReviewPacket[];
|
|
11
16
|
export declare function orderTasksForPacketReview(tasks: AuditTask[], options?: BuildReviewPacketOptions): AuditTask[];
|
|
@@ -949,7 +949,11 @@ function buildPacket(tasks, packetIndex, lineIndex, graphEdges = [], graphBundle
|
|
|
949
949
|
}
|
|
950
950
|
function buildReviewPacketPlanningData(tasks, options = {}) {
|
|
951
951
|
const maxTasksPerPacket = options.maxTasksPerPacket ?? DEFAULT_MAX_TASKS_PER_PACKET;
|
|
952
|
-
const
|
|
952
|
+
const configuredTargetLines = options.targetPacketLines ?? DEFAULT_TARGET_PACKET_LINES;
|
|
953
|
+
const targetPacketLines = options.maxContextTokens != null
|
|
954
|
+
? Math.min(configuredTargetLines, Math.max(1, Math.floor((options.maxContextTokens - ESTIMATED_PACKET_PROMPT_TOKENS) /
|
|
955
|
+
ESTIMATED_TOKENS_PER_LINE)))
|
|
956
|
+
: configuredTargetLines;
|
|
953
957
|
const graphEdges = collectGraphEdges(options.graphBundle);
|
|
954
958
|
const groups = buildTaskGroups(tasks);
|
|
955
959
|
const planningGraphEdges = buildPlanningGraphEdges(groups, graphEdges, options.graphBundle, options.lineIndex, targetPacketLines);
|
|
@@ -12,6 +12,7 @@ export function renderWorkerPrompt(task) {
|
|
|
12
12
|
`Read: ${tasksPath}`,
|
|
13
13
|
"Scope: review only the tasks listed in the Read file. Do not add tasks,",
|
|
14
14
|
"edit source files, remediate findings, run unrelated audits, or write result_path.",
|
|
15
|
+
"Prefer host Read and Grep tools for source inspection. On native Windows, do not use Unix pipelines like `grep ... | head`; if shell search is unavoidable, use `Select-String` as a fallback.",
|
|
15
16
|
"For each listed task: read the assigned file_paths under the specified lens,",
|
|
16
17
|
"using targeted reads/searches where they give complete enough evidence without loading unrelated context,",
|
|
17
18
|
"and emit exactly one AuditResult object with:",
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
export { resolveLimits, lookupKnownModel, classifyProvider } from "./limits.js";
|
|
2
|
+
export type { LimitResolutionResult, ResolveLimitsOptions, ProviderType } from "./limits.js";
|
|
3
|
+
export { readQuotaState, writeQuotaState, computeMaxSafeConcurrency, recordWaveOutcome, getQuotaStatePath, decayWeight, applyDecayToEntry, } from "./state.js";
|
|
4
|
+
export { scheduleWave, buildProviderModelKey } from "./scheduler.js";
|
|
5
|
+
export type { ScheduleWaveOptions } from "./scheduler.js";
|
|
6
|
+
export { probeProvider } from "./probe.js";
|
|
7
|
+
export type { ProbeResult } from "./probe.js";
|
|
8
|
+
export type { ResolvedLimits, LimitSource, LimitConfidence, QuotaState, QuotaStateEntry, ConcurrencyBucket, WaveSchedule, DispatchQuota, ObservedWaveOutcome, } from "./types.js";
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
export { resolveLimits, lookupKnownModel, classifyProvider } from "./limits.js";
|
|
2
|
+
export { readQuotaState, writeQuotaState, computeMaxSafeConcurrency, recordWaveOutcome, getQuotaStatePath, decayWeight, applyDecayToEntry, } from "./state.js";
|
|
3
|
+
export { scheduleWave, buildProviderModelKey } from "./scheduler.js";
|
|
4
|
+
export { probeProvider } from "./probe.js";
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import type { ResolvedProviderName, SessionConfig } from "../types/sessionConfig.js";
|
|
2
|
+
import type { LimitConfidence, LimitSource, ResolvedLimits } from "./types.js";
|
|
3
|
+
export type ProviderType = "hosted" | "local" | "unknown";
|
|
4
|
+
export declare function classifyProvider(providerName: ResolvedProviderName): ProviderType;
|
|
5
|
+
export declare function lookupKnownModel(modelKey: string): Pick<ResolvedLimits, "context_tokens" | "output_tokens"> | undefined;
|
|
6
|
+
export interface LimitResolutionResult {
|
|
7
|
+
limits: ResolvedLimits;
|
|
8
|
+
source: LimitSource;
|
|
9
|
+
confidence: LimitConfidence;
|
|
10
|
+
}
|
|
11
|
+
export interface ResolveLimitsOptions {
|
|
12
|
+
providerName: ResolvedProviderName;
|
|
13
|
+
sessionConfig: SessionConfig;
|
|
14
|
+
hostModel?: string | null;
|
|
15
|
+
}
|
|
16
|
+
export declare function resolveLimits(options: ResolveLimitsOptions): LimitResolutionResult;
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
// RPM/TPM are omitted here — they are tier-dependent and must come from learning.
|
|
2
|
+
const KNOWN_MODEL_LIMITS = {
|
|
3
|
+
"anthropic/claude-opus-4-7": { context_tokens: 200_000, output_tokens: 32_000 },
|
|
4
|
+
"anthropic/claude-sonnet-4-6": { context_tokens: 200_000, output_tokens: 8_192 },
|
|
5
|
+
"anthropic/claude-haiku-4-5": { context_tokens: 200_000, output_tokens: 8_192 },
|
|
6
|
+
"anthropic/claude-opus-4-5": { context_tokens: 200_000, output_tokens: 8_192 },
|
|
7
|
+
"anthropic/claude-sonnet-4-5": { context_tokens: 200_000, output_tokens: 8_192 },
|
|
8
|
+
"openai/gpt-4o": { context_tokens: 128_000, output_tokens: 16_384 },
|
|
9
|
+
"openai/gpt-4o-mini": { context_tokens: 128_000, output_tokens: 16_384 },
|
|
10
|
+
"google/gemini-2.0-flash": { context_tokens: 1_048_576, output_tokens: 8_192 },
|
|
11
|
+
"google/gemini-1.5-pro": { context_tokens: 2_097_152, output_tokens: 8_192 },
|
|
12
|
+
};
|
|
13
|
+
export function classifyProvider(providerName) {
|
|
14
|
+
switch (providerName) {
|
|
15
|
+
case "claude-code":
|
|
16
|
+
case "opencode":
|
|
17
|
+
return "hosted";
|
|
18
|
+
case "local-subprocess":
|
|
19
|
+
return "local";
|
|
20
|
+
case "subprocess-template":
|
|
21
|
+
case "vscode-task":
|
|
22
|
+
default:
|
|
23
|
+
return "unknown";
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
export function lookupKnownModel(modelKey) {
|
|
27
|
+
return KNOWN_MODEL_LIMITS[modelKey.toLowerCase().trim()];
|
|
28
|
+
}
|
|
29
|
+
function defaultLimits(sessionConfig) {
|
|
30
|
+
const quota = sessionConfig.quota ?? {};
|
|
31
|
+
return {
|
|
32
|
+
context_tokens: quota.default_context_tokens ?? 32_000,
|
|
33
|
+
output_tokens: quota.reserved_output_tokens ?? 4_096,
|
|
34
|
+
requests_per_minute: null,
|
|
35
|
+
input_tokens_per_minute: null,
|
|
36
|
+
output_tokens_per_minute: null,
|
|
37
|
+
};
|
|
38
|
+
}
|
|
39
|
+
export function resolveLimits(options) {
|
|
40
|
+
const { providerName: _providerName, sessionConfig, hostModel } = options;
|
|
41
|
+
const quota = sessionConfig.quota ?? {};
|
|
42
|
+
const defaults = defaultLimits(sessionConfig);
|
|
43
|
+
// 1. Explicit per-model config overrides
|
|
44
|
+
if (hostModel && quota.models?.[hostModel]) {
|
|
45
|
+
const override = quota.models[hostModel];
|
|
46
|
+
return {
|
|
47
|
+
limits: {
|
|
48
|
+
context_tokens: override.context_tokens ?? defaults.context_tokens,
|
|
49
|
+
output_tokens: override.output_tokens ?? defaults.output_tokens,
|
|
50
|
+
requests_per_minute: override.requests_per_minute ?? null,
|
|
51
|
+
input_tokens_per_minute: override.input_tokens_per_minute ?? null,
|
|
52
|
+
output_tokens_per_minute: override.output_tokens_per_minute ?? null,
|
|
53
|
+
},
|
|
54
|
+
source: "explicit_config",
|
|
55
|
+
confidence: "high",
|
|
56
|
+
};
|
|
57
|
+
}
|
|
58
|
+
// 2. Static known-model database (context/output only; RPM/TPM from learning)
|
|
59
|
+
if (hostModel) {
|
|
60
|
+
const known = lookupKnownModel(hostModel);
|
|
61
|
+
if (known) {
|
|
62
|
+
return {
|
|
63
|
+
limits: {
|
|
64
|
+
context_tokens: known.context_tokens,
|
|
65
|
+
output_tokens: known.output_tokens,
|
|
66
|
+
requests_per_minute: null,
|
|
67
|
+
input_tokens_per_minute: null,
|
|
68
|
+
output_tokens_per_minute: null,
|
|
69
|
+
},
|
|
70
|
+
source: "known_metadata",
|
|
71
|
+
confidence: "medium",
|
|
72
|
+
};
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
// 3. Conservative defaults for all provider types
|
|
76
|
+
return { limits: defaults, source: "default", confidence: "low" };
|
|
77
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
export interface ProbeResult {
|
|
2
|
+
supported: boolean;
|
|
3
|
+
reason: string;
|
|
4
|
+
}
|
|
5
|
+
/**
|
|
6
|
+
* Probe a provider to discover its rate limits.
|
|
7
|
+
*
|
|
8
|
+
* Only subprocess-template supports direct probing since it is the only
|
|
9
|
+
* provider where the auditor controls the API call. IDE providers
|
|
10
|
+
* (claude-code, opencode) select the model internally; their limits come
|
|
11
|
+
* from known-model metadata or learned behavior.
|
|
12
|
+
*/
|
|
13
|
+
export declare function probeProvider(providerName: string, probeMode?: "auto" | "never" | "force"): Promise<ProbeResult>;
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Probe a provider to discover its rate limits.
|
|
3
|
+
*
|
|
4
|
+
* Only subprocess-template supports direct probing since it is the only
|
|
5
|
+
* provider where the auditor controls the API call. IDE providers
|
|
6
|
+
* (claude-code, opencode) select the model internally; their limits come
|
|
7
|
+
* from known-model metadata or learned behavior.
|
|
8
|
+
*/
|
|
9
|
+
export async function probeProvider(providerName, probeMode = "auto") {
|
|
10
|
+
if (probeMode === "never") {
|
|
11
|
+
return { supported: false, reason: "probe disabled by config" };
|
|
12
|
+
}
|
|
13
|
+
if (providerName !== "subprocess-template") {
|
|
14
|
+
return {
|
|
15
|
+
supported: false,
|
|
16
|
+
reason: `probe not applicable for ${providerName} — limits come from known-model metadata or learned behavior`,
|
|
17
|
+
};
|
|
18
|
+
}
|
|
19
|
+
// subprocess-template probe not yet implemented
|
|
20
|
+
return { supported: false, reason: "subprocess-template probe not yet implemented" };
|
|
21
|
+
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import type { ResolvedProviderName, SessionConfig } from "../types/sessionConfig.js";
|
|
2
|
+
import type { QuotaStateEntry, WaveSchedule } from "./types.js";
|
|
3
|
+
export interface ScheduleWaveOptions {
|
|
4
|
+
providerName: ResolvedProviderName;
|
|
5
|
+
sessionConfig: SessionConfig;
|
|
6
|
+
hostModel: string | null;
|
|
7
|
+
requestedConcurrency: number;
|
|
8
|
+
/** Average estimated tokens per packet/worker. Used for TPM budget. */
|
|
9
|
+
estimatedPacketTokens?: number;
|
|
10
|
+
quotaStateEntry?: QuotaStateEntry | null;
|
|
11
|
+
}
|
|
12
|
+
export declare function scheduleWave(options: ScheduleWaveOptions): WaveSchedule;
|
|
13
|
+
/** Build the state key used for indexing quota-state.json entries. */
|
|
14
|
+
export declare function buildProviderModelKey(providerName: string, hostModel: string | null | undefined): string;
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
import { classifyProvider, resolveLimits } from "./limits.js";
|
|
2
|
+
import { computeMaxSafeConcurrency } from "./state.js";
|
|
3
|
+
export function scheduleWave(options) {
|
|
4
|
+
const { providerName, sessionConfig, hostModel, requestedConcurrency, estimatedPacketTokens = 0, quotaStateEntry = null, } = options;
|
|
5
|
+
const quota = sessionConfig.quota ?? {};
|
|
6
|
+
if (quota.enabled === false) {
|
|
7
|
+
const limits = {
|
|
8
|
+
context_tokens: quota.default_context_tokens ?? 32_000,
|
|
9
|
+
output_tokens: quota.reserved_output_tokens ?? 4_096,
|
|
10
|
+
requests_per_minute: null,
|
|
11
|
+
input_tokens_per_minute: null,
|
|
12
|
+
output_tokens_per_minute: null,
|
|
13
|
+
};
|
|
14
|
+
return {
|
|
15
|
+
wave_size: requestedConcurrency,
|
|
16
|
+
estimated_wave_tokens: requestedConcurrency * estimatedPacketTokens,
|
|
17
|
+
cooldown_until: null,
|
|
18
|
+
confidence: "high",
|
|
19
|
+
source: "default",
|
|
20
|
+
resolved_limits: limits,
|
|
21
|
+
model: hostModel,
|
|
22
|
+
};
|
|
23
|
+
}
|
|
24
|
+
const safetyMargin = quota.safety_margin ?? 0.8;
|
|
25
|
+
const halfLifeHours = quota.empirical_half_life_hours ?? 24;
|
|
26
|
+
const providerType = classifyProvider(providerName);
|
|
27
|
+
const { limits, source, confidence } = resolveLimits({ providerName, sessionConfig, hostModel });
|
|
28
|
+
let waveSize = requestedConcurrency;
|
|
29
|
+
let cooldownUntil = null;
|
|
30
|
+
// Respect an active cooldown period
|
|
31
|
+
if (quotaStateEntry?.cooldown_until) {
|
|
32
|
+
const cooldownExpiry = new Date(quotaStateEntry.cooldown_until).getTime();
|
|
33
|
+
if (cooldownExpiry > Date.now()) {
|
|
34
|
+
cooldownUntil = quotaStateEntry.cooldown_until;
|
|
35
|
+
waveSize = 1;
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
if (!cooldownUntil) {
|
|
39
|
+
// Cap by requests-per-minute
|
|
40
|
+
if (limits.requests_per_minute != null) {
|
|
41
|
+
const rpmCap = Math.max(1, Math.floor(limits.requests_per_minute * safetyMargin));
|
|
42
|
+
waveSize = Math.min(waveSize, rpmCap);
|
|
43
|
+
}
|
|
44
|
+
// Cap by input tokens-per-minute
|
|
45
|
+
if (limits.input_tokens_per_minute != null && estimatedPacketTokens > 0) {
|
|
46
|
+
const tpmCap = Math.max(1, Math.floor((limits.input_tokens_per_minute * safetyMargin) / estimatedPacketTokens));
|
|
47
|
+
waveSize = Math.min(waveSize, tpmCap);
|
|
48
|
+
}
|
|
49
|
+
if (quotaStateEntry) {
|
|
50
|
+
const learnedCap = computeMaxSafeConcurrency(quotaStateEntry, halfLifeHours);
|
|
51
|
+
waveSize = Math.min(waveSize, learnedCap);
|
|
52
|
+
}
|
|
53
|
+
else if (providerType === "hosted" && source === "default") {
|
|
54
|
+
// Unknown hosted provider with no learned data and no model-specific limits —
|
|
55
|
+
// be conservative. If the caller supplied RPM/TPM caps those already govern rate;
|
|
56
|
+
// this guard only triggers when we have no rate information at all.
|
|
57
|
+
const conservativeDefault = quota.unknown_hosted_concurrency ?? 1;
|
|
58
|
+
waveSize = Math.min(waveSize, conservativeDefault);
|
|
59
|
+
}
|
|
60
|
+
// Local providers with no learned data: use requestedConcurrency (no rate pressure)
|
|
61
|
+
}
|
|
62
|
+
waveSize = Math.max(1, waveSize);
|
|
63
|
+
return {
|
|
64
|
+
wave_size: waveSize,
|
|
65
|
+
estimated_wave_tokens: waveSize * estimatedPacketTokens,
|
|
66
|
+
cooldown_until: cooldownUntil,
|
|
67
|
+
confidence,
|
|
68
|
+
source,
|
|
69
|
+
resolved_limits: limits,
|
|
70
|
+
model: hostModel,
|
|
71
|
+
};
|
|
72
|
+
}
|
|
73
|
+
/** Build the state key used for indexing quota-state.json entries. */
|
|
74
|
+
export function buildProviderModelKey(providerName, hostModel) {
|
|
75
|
+
return hostModel ? `${providerName}/${hostModel}` : `${providerName}/*`;
|
|
76
|
+
}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import type { ObservedWaveOutcome, QuotaState, QuotaStateEntry } from "./types.js";
|
|
2
|
+
export declare function getQuotaStatePath(): string;
|
|
3
|
+
export declare function decayWeight(weight: number, elapsedHours: number, halfLifeHours: number): number;
|
|
4
|
+
export declare function applyDecayToEntry(entry: QuotaStateEntry, halfLifeHours: number): QuotaStateEntry;
|
|
5
|
+
export declare function readQuotaState(): Promise<QuotaState>;
|
|
6
|
+
export declare function writeQuotaState(state: QuotaState): Promise<void>;
|
|
7
|
+
/**
|
|
8
|
+
* Returns the highest concurrency level for which decayed success evidence
|
|
9
|
+
* exceeds failure evidence, with a minimum of 1.
|
|
10
|
+
*/
|
|
11
|
+
export declare function computeMaxSafeConcurrency(entry: QuotaStateEntry, halfLifeHours: number, maxToCheck?: number): number;
|
|
12
|
+
export declare function recordWaveOutcome(providerModelKey: string, outcome: ObservedWaveOutcome, halfLifeHours: number): Promise<void>;
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
import { mkdir, readFile, writeFile } from "node:fs/promises";
|
|
2
|
+
import { homedir } from "node:os";
|
|
3
|
+
import { join } from "node:path";
|
|
4
|
+
const STATE_DIR = join(homedir(), ".audit-code");
|
|
5
|
+
const STATE_PATH = join(STATE_DIR, "quota-state.json");
|
|
6
|
+
// A bucket needs at least this much success weight before we trust it.
|
|
7
|
+
const MIN_EVIDENCE_WEIGHT = 0.5;
|
|
8
|
+
export function getQuotaStatePath() {
|
|
9
|
+
return STATE_PATH;
|
|
10
|
+
}
|
|
11
|
+
export function decayWeight(weight, elapsedHours, halfLifeHours) {
|
|
12
|
+
if (halfLifeHours <= 0 || weight <= 0)
|
|
13
|
+
return 0;
|
|
14
|
+
return weight * Math.pow(0.5, elapsedHours / halfLifeHours);
|
|
15
|
+
}
|
|
16
|
+
export function applyDecayToEntry(entry, halfLifeHours) {
|
|
17
|
+
const elapsedHours = (Date.now() - new Date(entry.updated_at).getTime()) / (1000 * 60 * 60);
|
|
18
|
+
if (elapsedHours < 0.001)
|
|
19
|
+
return entry;
|
|
20
|
+
const decayed = {};
|
|
21
|
+
for (const [key, bucket] of Object.entries(entry.buckets)) {
|
|
22
|
+
decayed[key] = {
|
|
23
|
+
success_weight: decayWeight(bucket.success_weight, elapsedHours, halfLifeHours),
|
|
24
|
+
failure_weight: decayWeight(bucket.failure_weight, elapsedHours, halfLifeHours),
|
|
25
|
+
};
|
|
26
|
+
}
|
|
27
|
+
return { ...entry, buckets: decayed };
|
|
28
|
+
}
|
|
29
|
+
function isQuotaState(value) {
|
|
30
|
+
return (value !== null &&
|
|
31
|
+
typeof value === "object" &&
|
|
32
|
+
!Array.isArray(value) &&
|
|
33
|
+
value["version"] === 1 &&
|
|
34
|
+
typeof value["entries"] === "object");
|
|
35
|
+
}
|
|
36
|
+
export async function readQuotaState() {
|
|
37
|
+
try {
|
|
38
|
+
const raw = await readFile(STATE_PATH, "utf8");
|
|
39
|
+
const parsed = JSON.parse(raw);
|
|
40
|
+
if (isQuotaState(parsed))
|
|
41
|
+
return parsed;
|
|
42
|
+
}
|
|
43
|
+
catch {
|
|
44
|
+
// File not found or malformed — start fresh
|
|
45
|
+
}
|
|
46
|
+
return { version: 1, entries: {} };
|
|
47
|
+
}
|
|
48
|
+
export async function writeQuotaState(state) {
|
|
49
|
+
await mkdir(STATE_DIR, { recursive: true });
|
|
50
|
+
await writeFile(STATE_PATH, JSON.stringify(state, null, 2) + "\n", "utf8");
|
|
51
|
+
}
|
|
52
|
+
/**
|
|
53
|
+
* Returns the highest concurrency level for which decayed success evidence
|
|
54
|
+
* exceeds failure evidence, with a minimum of 1.
|
|
55
|
+
*/
|
|
56
|
+
export function computeMaxSafeConcurrency(entry, halfLifeHours, maxToCheck = 32) {
|
|
57
|
+
const decayed = applyDecayToEntry(entry, halfLifeHours);
|
|
58
|
+
let maxSafe = 1;
|
|
59
|
+
for (let n = 1; n <= maxToCheck; n++) {
|
|
60
|
+
const bucket = decayed.buckets[String(n)];
|
|
61
|
+
if (!bucket)
|
|
62
|
+
break;
|
|
63
|
+
if (bucket.success_weight >= MIN_EVIDENCE_WEIGHT &&
|
|
64
|
+
bucket.success_weight > bucket.failure_weight) {
|
|
65
|
+
maxSafe = n;
|
|
66
|
+
}
|
|
67
|
+
else {
|
|
68
|
+
break;
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
return maxSafe;
|
|
72
|
+
}
|
|
73
|
+
function blankEntry() {
|
|
74
|
+
return { updated_at: new Date().toISOString(), buckets: {}, cooldown_until: null, last_429_at: null };
|
|
75
|
+
}
|
|
76
|
+
export async function recordWaveOutcome(providerModelKey, outcome, halfLifeHours) {
|
|
77
|
+
const state = await readQuotaState();
|
|
78
|
+
const entry = applyDecayToEntry(state.entries[providerModelKey] ?? blankEntry(), halfLifeHours);
|
|
79
|
+
if (outcome.outcome === "success") {
|
|
80
|
+
// Success at N proves 1..N are all safe
|
|
81
|
+
for (let n = 1; n <= outcome.concurrency; n++) {
|
|
82
|
+
const bucket = entry.buckets[String(n)] ?? { success_weight: 0, failure_weight: 0 };
|
|
83
|
+
bucket.success_weight += 1.0;
|
|
84
|
+
entry.buckets[String(n)] = bucket;
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
else {
|
|
88
|
+
entry.last_429_at = new Date().toISOString();
|
|
89
|
+
if (outcome.cooldown_until)
|
|
90
|
+
entry.cooldown_until = outcome.cooldown_until;
|
|
91
|
+
// Failure at N marks N and above as unsafe
|
|
92
|
+
for (let n = outcome.concurrency; n <= outcome.concurrency + 4; n++) {
|
|
93
|
+
const bucket = entry.buckets[String(n)] ?? { success_weight: 0, failure_weight: 0 };
|
|
94
|
+
bucket.failure_weight += 1.0;
|
|
95
|
+
entry.buckets[String(n)] = bucket;
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
entry.updated_at = new Date().toISOString();
|
|
99
|
+
state.entries[providerModelKey] = entry;
|
|
100
|
+
await writeQuotaState(state);
|
|
101
|
+
}
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
export type LimitSource = "explicit_config" | "cli_flags" | "known_metadata" | "learned" | "default";
|
|
2
|
+
export type LimitConfidence = "high" | "medium" | "low";
|
|
3
|
+
export interface ResolvedLimits {
|
|
4
|
+
context_tokens: number;
|
|
5
|
+
output_tokens: number;
|
|
6
|
+
requests_per_minute: number | null;
|
|
7
|
+
input_tokens_per_minute: number | null;
|
|
8
|
+
output_tokens_per_minute: number | null;
|
|
9
|
+
}
|
|
10
|
+
export interface ConcurrencyBucket {
|
|
11
|
+
success_weight: number;
|
|
12
|
+
failure_weight: number;
|
|
13
|
+
}
|
|
14
|
+
export interface QuotaStateEntry {
|
|
15
|
+
updated_at: string;
|
|
16
|
+
buckets: Record<string, ConcurrencyBucket>;
|
|
17
|
+
cooldown_until: string | null;
|
|
18
|
+
last_429_at: string | null;
|
|
19
|
+
}
|
|
20
|
+
export interface QuotaState {
|
|
21
|
+
version: 1;
|
|
22
|
+
entries: Record<string, QuotaStateEntry>;
|
|
23
|
+
}
|
|
24
|
+
export interface WaveSchedule {
|
|
25
|
+
wave_size: number;
|
|
26
|
+
estimated_wave_tokens: number;
|
|
27
|
+
cooldown_until: string | null;
|
|
28
|
+
confidence: LimitConfidence;
|
|
29
|
+
source: LimitSource;
|
|
30
|
+
resolved_limits: ResolvedLimits;
|
|
31
|
+
model: string | null;
|
|
32
|
+
}
|
|
33
|
+
export interface DispatchQuota {
|
|
34
|
+
contract_version: "audit-code-dispatch-quota/v1alpha1";
|
|
35
|
+
run_id: string;
|
|
36
|
+
model: string | null;
|
|
37
|
+
resolved_limits: ResolvedLimits;
|
|
38
|
+
confidence: LimitConfidence;
|
|
39
|
+
source: LimitSource;
|
|
40
|
+
wave_size: number;
|
|
41
|
+
estimated_wave_tokens: number;
|
|
42
|
+
cooldown_until: string | null;
|
|
43
|
+
}
|
|
44
|
+
export interface ObservedWaveOutcome {
|
|
45
|
+
concurrency: number;
|
|
46
|
+
estimated_tokens: number;
|
|
47
|
+
outcome: "success" | "rate_limited" | "timeout";
|
|
48
|
+
cooldown_until?: string | null;
|
|
49
|
+
reset_at?: string | null;
|
|
50
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -105,9 +105,7 @@ function buildSuggestedCommands(artifactsDir, suggestedInputs, status, activeRev
|
|
|
105
105
|
return [
|
|
106
106
|
renderShellCommand([
|
|
107
107
|
"audit-code",
|
|
108
|
-
"
|
|
109
|
-
"--run-id",
|
|
110
|
-
activeReviewRun.run_id,
|
|
108
|
+
"next-step",
|
|
111
109
|
"--artifacts-dir",
|
|
112
110
|
artifactsDir,
|
|
113
111
|
]),
|
|
@@ -170,7 +168,7 @@ function renderMarkdown(handoff) {
|
|
|
170
168
|
lines.push(`- ${command}`);
|
|
171
169
|
}
|
|
172
170
|
if (handoff.active_review_run) {
|
|
173
|
-
lines.push("- Use
|
|
171
|
+
lines.push("- Use next-step so the backend renders either packet dispatch or single-task fallback after the host reports capabilities.");
|
|
174
172
|
}
|
|
175
173
|
}
|
|
176
174
|
if (handoff.active_review_run) {
|
|
@@ -233,9 +231,7 @@ export function buildAuditCodeHandoff(params) {
|
|
|
233
231
|
if (params.state.status === BLOCKED_STATUS && params.activeReviewRun) {
|
|
234
232
|
handoff.quick_start = renderShellCommand([
|
|
235
233
|
"audit-code",
|
|
236
|
-
"
|
|
237
|
-
"--run-id",
|
|
238
|
-
params.activeReviewRun.run_id,
|
|
234
|
+
"next-step",
|
|
239
235
|
"--artifacts-dir",
|
|
240
236
|
params.artifactsDir,
|
|
241
237
|
]);
|
|
@@ -20,6 +20,33 @@ export interface VSCodeTaskConfig {
|
|
|
20
20
|
command_template: string[];
|
|
21
21
|
env?: Record<string, string>;
|
|
22
22
|
}
|
|
23
|
+
export interface QuotaModelLimits {
|
|
24
|
+
context_tokens?: number;
|
|
25
|
+
output_tokens?: number;
|
|
26
|
+
requests_per_minute?: number;
|
|
27
|
+
input_tokens_per_minute?: number;
|
|
28
|
+
output_tokens_per_minute?: number;
|
|
29
|
+
}
|
|
30
|
+
export interface QuotaConfig {
|
|
31
|
+
/** Set to false to disable all quota scheduling (default: true). */
|
|
32
|
+
enabled?: boolean;
|
|
33
|
+
/** Whether to probe the provider for live limits (default: "auto"). */
|
|
34
|
+
probe?: "auto" | "never" | "force";
|
|
35
|
+
/** Fraction of known limits to actually use (default: 0.8). */
|
|
36
|
+
safety_margin?: number;
|
|
37
|
+
/** Concurrency ceiling for hosted providers with no learned data (default: 1). */
|
|
38
|
+
unknown_hosted_concurrency?: number;
|
|
39
|
+
/** Concurrency for local providers with no learned data (default: "unlimited"). */
|
|
40
|
+
unknown_local_concurrency?: number | "unlimited";
|
|
41
|
+
/** Assumed context window when the model is not recognized (default: 32000). */
|
|
42
|
+
default_context_tokens?: number;
|
|
43
|
+
/** Tokens reserved for model output per request (default: 4096). */
|
|
44
|
+
reserved_output_tokens?: number;
|
|
45
|
+
/** Half-life of empirical success/failure evidence in hours (default: 24). */
|
|
46
|
+
empirical_half_life_hours?: number;
|
|
47
|
+
/** Per-model overrides keyed by "provider/model". */
|
|
48
|
+
models?: Record<string, QuotaModelLimits>;
|
|
49
|
+
}
|
|
23
50
|
export declare const PROVIDER_SECTION_KEYS: {
|
|
24
51
|
readonly "subprocess-template": "subprocess_template";
|
|
25
52
|
readonly "claude-code": "claude_code";
|
|
@@ -40,4 +67,5 @@ export interface SessionConfig {
|
|
|
40
67
|
vscode_task?: VSCodeTaskConfig;
|
|
41
68
|
agent_task_batch_size?: number;
|
|
42
69
|
parallel_workers?: number;
|
|
70
|
+
quota?: QuotaConfig;
|
|
43
71
|
}
|
package/docs/contracts.md
CHANGED
|
@@ -77,6 +77,23 @@ The backend stores resumable artifacts under `.audit-artifacts/`, including:
|
|
|
77
77
|
Consumers should treat these as versioned JSON artifacts and validate them with
|
|
78
78
|
`audit-code validate` rather than inferring state from filenames alone.
|
|
79
79
|
|
|
80
|
+
## Step artifacts
|
|
81
|
+
|
|
82
|
+
The conversation-first `/audit-code` prompt is a loader. It runs
|
|
83
|
+
`audit-code next-step` and then follows only the returned step prompt. The
|
|
84
|
+
backend writes the current step contract to:
|
|
85
|
+
|
|
86
|
+
- `<artifacts_dir>/steps/current-step.json`
|
|
87
|
+
- `<artifacts_dir>/steps/current-prompt.md`
|
|
88
|
+
|
|
89
|
+
`current-step.json` uses `contract_version: "audit-code-step/v1alpha1"` and
|
|
90
|
+
includes `step_kind`, `prompt_path`, `status`, `run_id`, `allowed_commands`,
|
|
91
|
+
`stop_condition`, `repo_root`, `artifacts_dir`, and relevant `artifact_paths`.
|
|
92
|
+
|
|
93
|
+
When semantic review is blocked, `next-step` first emits a `capability_check`.
|
|
94
|
+
After the host reports `--host-can-dispatch-subagents true|false`, the backend
|
|
95
|
+
renders exactly one review path: packet dispatch or the single-task fallback.
|
|
96
|
+
|
|
80
97
|
## Dispatch packets
|
|
81
98
|
|
|
82
99
|
Packet dispatch preserves the existing `AuditTask` and `AuditResult`
|
|
@@ -92,13 +109,18 @@ Planning artifacts are shaped by:
|
|
|
92
109
|
Normal packet flow:
|
|
93
110
|
|
|
94
111
|
```text
|
|
95
|
-
audit-code
|
|
112
|
+
audit-code next-step --host-can-dispatch-subagents true
|
|
113
|
+
backend prepares dispatch-plan.json
|
|
96
114
|
conversation launches one worker per dispatch-plan entry
|
|
97
115
|
worker reads entry.prompt_path
|
|
98
116
|
worker submits AuditResult[] through submit-packet
|
|
99
117
|
audit-code merge-and-ingest --run-id <run_id> --artifacts-dir <artifacts_dir>
|
|
100
118
|
```
|
|
101
119
|
|
|
120
|
+
`audit-code prepare-dispatch --run-id <run_id> --artifacts-dir
|
|
121
|
+
<artifacts_dir>` remains available for compatibility and tests, but generic
|
|
122
|
+
handoff fields point users and prompts to `next-step`.
|
|
123
|
+
|
|
102
124
|
Packet artifacts:
|
|
103
125
|
|
|
104
126
|
- `<artifacts_dir>/runs/<run_id>/dispatch-plan.json`
|
package/docs/operator-guide.md
CHANGED
|
@@ -43,7 +43,7 @@ Host-specific files may include:
|
|
|
43
43
|
|
|
44
44
|
- Codex: managed `AGENTS.md` fallback guidance
|
|
45
45
|
- Claude Desktop: project template, remote MCP connector, local MCP bundle
|
|
46
|
-
- OpenCode: `opencode.json` with `/audit-code`
|
|
46
|
+
- OpenCode: `opencode.json` with auditor MCP server and permission wiring; the `/audit-code` command is global npm-installed state
|
|
47
47
|
- VS Code/Copilot: prompt, custom agent, instructions, and `.vscode/mcp.json`
|
|
48
48
|
- Antigravity: planning-mode and MCP-oriented guidance
|
|
49
49
|
|
|
@@ -64,9 +64,10 @@ with the canonical `/audit-code` spelling.
|
|
|
64
64
|
Claude Desktop is treated as an MCP-first host. Use the generated project
|
|
65
65
|
template and local bundle artifacts when installing the integration.
|
|
66
66
|
|
|
67
|
-
OpenCode uses
|
|
68
|
-
|
|
69
|
-
|
|
67
|
+
OpenCode uses the global command seeded by `npm install -g auditor-lambda`.
|
|
68
|
+
The generated project `opencode.json` should not define `command["audit-code"]`;
|
|
69
|
+
it only wires the auditor MCP server and project permissions. VS Code uses
|
|
70
|
+
repo-local prompt and MCP configuration files.
|
|
70
71
|
|
|
71
72
|
Antigravity should be treated as a workflow-and-artifacts host until it has a
|
|
72
73
|
stable project-local config surface. Use generated planning-mode guidance,
|
|
@@ -100,6 +101,7 @@ The wrapper:
|
|
|
100
101
|
Useful fallback commands:
|
|
101
102
|
|
|
102
103
|
```bash
|
|
104
|
+
audit-code next-step
|
|
103
105
|
audit-code --single-step
|
|
104
106
|
audit-code --results /path/to/audit_results.json
|
|
105
107
|
audit-code --batch-results /path/to/results-dir
|
|
@@ -111,6 +113,11 @@ audit-code cleanup
|
|
|
111
113
|
audit-code mcp
|
|
112
114
|
```
|
|
113
115
|
|
|
116
|
+
`audit-code next-step` is the backend-rendered step engine used by the
|
|
117
|
+
conversation prompt. It writes `.audit-artifacts/steps/current-step.json` and
|
|
118
|
+
`.audit-artifacts/steps/current-prompt.md`, then the host should follow only
|
|
119
|
+
that prompt.
|
|
120
|
+
|
|
114
121
|
`audit-code validate` checks artifact shape, cross-artifact consistency,
|
|
115
122
|
session config, and explicit provider readiness.
|
|
116
123
|
|
package/docs/product.md
CHANGED
|
@@ -148,9 +148,10 @@ Readiness should be judged through three checks:
|
|
|
148
148
|
- field-trial quality: run real repositories through planning, validate
|
|
149
149
|
artifacts, and use `audit_plan_metrics.json` to track packet count, weak
|
|
150
150
|
packet count, average cohesion, merge edge kinds, and weak-packet samples
|
|
151
|
-
- full-loop behavior: prove `
|
|
152
|
-
`submit-packet`, `merge-and-ingest`, selective deepening,
|
|
153
|
-
and final `audit-report.md` promotion in at least one
|
|
151
|
+
- full-loop behavior: prove `next-step` capability routing, packet dispatch,
|
|
152
|
+
worker review, `submit-packet`, `merge-and-ingest`, selective deepening,
|
|
153
|
+
runtime validation, and final `audit-report.md` promotion in at least one
|
|
154
|
+
real host flow
|
|
154
155
|
- release hygiene: keep `npm run verify:release`, linked smoke, packaged
|
|
155
156
|
smoke, tarball preview, and Trusted Publishing green from a clean checkout
|
|
156
157
|
|