auditor-lambda 0.3.20 → 0.3.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,6 +6,11 @@ export interface BuildReviewPacketOptions {
6
6
  lineIndex?: Record<string, number>;
7
7
  maxTasksPerPacket?: number;
8
8
  targetPacketLines?: number;
9
+ /**
10
+ * Available context budget in tokens (context_tokens − reserved_output_tokens).
11
+ * When provided, targetPacketLines is capped to fit within this budget.
12
+ */
13
+ maxContextTokens?: number;
9
14
  }
10
15
  export declare function buildReviewPackets(tasks: AuditTask[], options?: BuildReviewPacketOptions): ReviewPacket[];
11
16
  export declare function orderTasksForPacketReview(tasks: AuditTask[], options?: BuildReviewPacketOptions): AuditTask[];
@@ -949,7 +949,11 @@ function buildPacket(tasks, packetIndex, lineIndex, graphEdges = [], graphBundle
949
949
  }
950
950
  function buildReviewPacketPlanningData(tasks, options = {}) {
951
951
  const maxTasksPerPacket = options.maxTasksPerPacket ?? DEFAULT_MAX_TASKS_PER_PACKET;
952
- const targetPacketLines = options.targetPacketLines ?? DEFAULT_TARGET_PACKET_LINES;
952
+ const configuredTargetLines = options.targetPacketLines ?? DEFAULT_TARGET_PACKET_LINES;
953
+ const targetPacketLines = options.maxContextTokens != null
954
+ ? Math.min(configuredTargetLines, Math.max(1, Math.floor((options.maxContextTokens - ESTIMATED_PACKET_PROMPT_TOKENS) /
955
+ ESTIMATED_TOKENS_PER_LINE)))
956
+ : configuredTargetLines;
953
957
  const graphEdges = collectGraphEdges(options.graphBundle);
954
958
  const groups = buildTaskGroups(tasks);
955
959
  const planningGraphEdges = buildPlanningGraphEdges(groups, graphEdges, options.graphBundle, options.lineIndex, targetPacketLines);
@@ -12,6 +12,7 @@ export function renderWorkerPrompt(task) {
12
12
  `Read: ${tasksPath}`,
13
13
  "Scope: review only the tasks listed in the Read file. Do not add tasks,",
14
14
  "edit source files, remediate findings, run unrelated audits, or write result_path.",
15
+ "Prefer host Read and Grep tools for source inspection. On native Windows, do not use Unix pipelines like `grep ... | head`; if shell search is unavoidable, use `Select-String` as a fallback.",
15
16
  "For each listed task: read the assigned file_paths under the specified lens,",
16
17
  "using targeted reads/searches where they give complete enough evidence without loading unrelated context,",
17
18
  "and emit exactly one AuditResult object with:",
@@ -0,0 +1,8 @@
1
+ export { resolveLimits, lookupKnownModel, classifyProvider } from "./limits.js";
2
+ export type { LimitResolutionResult, ResolveLimitsOptions, ProviderType } from "./limits.js";
3
+ export { readQuotaState, writeQuotaState, computeMaxSafeConcurrency, recordWaveOutcome, getQuotaStatePath, decayWeight, applyDecayToEntry, } from "./state.js";
4
+ export { scheduleWave, buildProviderModelKey } from "./scheduler.js";
5
+ export type { ScheduleWaveOptions } from "./scheduler.js";
6
+ export { probeProvider } from "./probe.js";
7
+ export type { ProbeResult } from "./probe.js";
8
+ export type { ResolvedLimits, LimitSource, LimitConfidence, QuotaState, QuotaStateEntry, ConcurrencyBucket, WaveSchedule, DispatchQuota, ObservedWaveOutcome, } from "./types.js";
@@ -0,0 +1,4 @@
1
+ export { resolveLimits, lookupKnownModel, classifyProvider } from "./limits.js";
2
+ export { readQuotaState, writeQuotaState, computeMaxSafeConcurrency, recordWaveOutcome, getQuotaStatePath, decayWeight, applyDecayToEntry, } from "./state.js";
3
+ export { scheduleWave, buildProviderModelKey } from "./scheduler.js";
4
+ export { probeProvider } from "./probe.js";
@@ -0,0 +1,16 @@
1
+ import type { ResolvedProviderName, SessionConfig } from "../types/sessionConfig.js";
2
+ import type { LimitConfidence, LimitSource, ResolvedLimits } from "./types.js";
3
+ export type ProviderType = "hosted" | "local" | "unknown";
4
+ export declare function classifyProvider(providerName: ResolvedProviderName): ProviderType;
5
+ export declare function lookupKnownModel(modelKey: string): Pick<ResolvedLimits, "context_tokens" | "output_tokens"> | undefined;
6
+ export interface LimitResolutionResult {
7
+ limits: ResolvedLimits;
8
+ source: LimitSource;
9
+ confidence: LimitConfidence;
10
+ }
11
+ export interface ResolveLimitsOptions {
12
+ providerName: ResolvedProviderName;
13
+ sessionConfig: SessionConfig;
14
+ hostModel?: string | null;
15
+ }
16
+ export declare function resolveLimits(options: ResolveLimitsOptions): LimitResolutionResult;
@@ -0,0 +1,77 @@
1
+ // RPM/TPM are omitted here — they are tier-dependent and must come from learning.
2
+ const KNOWN_MODEL_LIMITS = {
3
+ "anthropic/claude-opus-4-7": { context_tokens: 200_000, output_tokens: 32_000 },
4
+ "anthropic/claude-sonnet-4-6": { context_tokens: 200_000, output_tokens: 8_192 },
5
+ "anthropic/claude-haiku-4-5": { context_tokens: 200_000, output_tokens: 8_192 },
6
+ "anthropic/claude-opus-4-5": { context_tokens: 200_000, output_tokens: 8_192 },
7
+ "anthropic/claude-sonnet-4-5": { context_tokens: 200_000, output_tokens: 8_192 },
8
+ "openai/gpt-4o": { context_tokens: 128_000, output_tokens: 16_384 },
9
+ "openai/gpt-4o-mini": { context_tokens: 128_000, output_tokens: 16_384 },
10
+ "google/gemini-2.0-flash": { context_tokens: 1_048_576, output_tokens: 8_192 },
11
+ "google/gemini-1.5-pro": { context_tokens: 2_097_152, output_tokens: 8_192 },
12
+ };
13
+ export function classifyProvider(providerName) {
14
+ switch (providerName) {
15
+ case "claude-code":
16
+ case "opencode":
17
+ return "hosted";
18
+ case "local-subprocess":
19
+ return "local";
20
+ case "subprocess-template":
21
+ case "vscode-task":
22
+ default:
23
+ return "unknown";
24
+ }
25
+ }
26
+ export function lookupKnownModel(modelKey) {
27
+ return KNOWN_MODEL_LIMITS[modelKey.toLowerCase().trim()];
28
+ }
29
+ function defaultLimits(sessionConfig) {
30
+ const quota = sessionConfig.quota ?? {};
31
+ return {
32
+ context_tokens: quota.default_context_tokens ?? 32_000,
33
+ output_tokens: quota.reserved_output_tokens ?? 4_096,
34
+ requests_per_minute: null,
35
+ input_tokens_per_minute: null,
36
+ output_tokens_per_minute: null,
37
+ };
38
+ }
39
+ export function resolveLimits(options) {
40
+ const { providerName: _providerName, sessionConfig, hostModel } = options;
41
+ const quota = sessionConfig.quota ?? {};
42
+ const defaults = defaultLimits(sessionConfig);
43
+ // 1. Explicit per-model config overrides
44
+ if (hostModel && quota.models?.[hostModel]) {
45
+ const override = quota.models[hostModel];
46
+ return {
47
+ limits: {
48
+ context_tokens: override.context_tokens ?? defaults.context_tokens,
49
+ output_tokens: override.output_tokens ?? defaults.output_tokens,
50
+ requests_per_minute: override.requests_per_minute ?? null,
51
+ input_tokens_per_minute: override.input_tokens_per_minute ?? null,
52
+ output_tokens_per_minute: override.output_tokens_per_minute ?? null,
53
+ },
54
+ source: "explicit_config",
55
+ confidence: "high",
56
+ };
57
+ }
58
+ // 2. Static known-model database (context/output only; RPM/TPM from learning)
59
+ if (hostModel) {
60
+ const known = lookupKnownModel(hostModel);
61
+ if (known) {
62
+ return {
63
+ limits: {
64
+ context_tokens: known.context_tokens,
65
+ output_tokens: known.output_tokens,
66
+ requests_per_minute: null,
67
+ input_tokens_per_minute: null,
68
+ output_tokens_per_minute: null,
69
+ },
70
+ source: "known_metadata",
71
+ confidence: "medium",
72
+ };
73
+ }
74
+ }
75
+ // 3. Conservative defaults for all provider types
76
+ return { limits: defaults, source: "default", confidence: "low" };
77
+ }
@@ -0,0 +1,13 @@
1
+ export interface ProbeResult {
2
+ supported: boolean;
3
+ reason: string;
4
+ }
5
+ /**
6
+ * Probe a provider to discover its rate limits.
7
+ *
8
+ * Only subprocess-template supports direct probing since it is the only
9
+ * provider where the auditor controls the API call. IDE providers
10
+ * (claude-code, opencode) select the model internally; their limits come
11
+ * from known-model metadata or learned behavior.
12
+ */
13
+ export declare function probeProvider(providerName: string, probeMode?: "auto" | "never" | "force"): Promise<ProbeResult>;
@@ -0,0 +1,21 @@
1
+ /**
2
+ * Probe a provider to discover its rate limits.
3
+ *
4
+ * Only subprocess-template supports direct probing since it is the only
5
+ * provider where the auditor controls the API call. IDE providers
6
+ * (claude-code, opencode) select the model internally; their limits come
7
+ * from known-model metadata or learned behavior.
8
+ */
9
+ export async function probeProvider(providerName, probeMode = "auto") {
10
+ if (probeMode === "never") {
11
+ return { supported: false, reason: "probe disabled by config" };
12
+ }
13
+ if (providerName !== "subprocess-template") {
14
+ return {
15
+ supported: false,
16
+ reason: `probe not applicable for ${providerName} — limits come from known-model metadata or learned behavior`,
17
+ };
18
+ }
19
+ // subprocess-template probe not yet implemented
20
+ return { supported: false, reason: "subprocess-template probe not yet implemented" };
21
+ }
@@ -0,0 +1,14 @@
1
+ import type { ResolvedProviderName, SessionConfig } from "../types/sessionConfig.js";
2
+ import type { QuotaStateEntry, WaveSchedule } from "./types.js";
3
+ export interface ScheduleWaveOptions {
4
+ providerName: ResolvedProviderName;
5
+ sessionConfig: SessionConfig;
6
+ hostModel: string | null;
7
+ requestedConcurrency: number;
8
+ /** Average estimated tokens per packet/worker. Used for TPM budget. */
9
+ estimatedPacketTokens?: number;
10
+ quotaStateEntry?: QuotaStateEntry | null;
11
+ }
12
+ export declare function scheduleWave(options: ScheduleWaveOptions): WaveSchedule;
13
+ /** Build the state key used for indexing quota-state.json entries. */
14
+ export declare function buildProviderModelKey(providerName: string, hostModel: string | null | undefined): string;
@@ -0,0 +1,76 @@
1
+ import { classifyProvider, resolveLimits } from "./limits.js";
2
+ import { computeMaxSafeConcurrency } from "./state.js";
3
+ export function scheduleWave(options) {
4
+ const { providerName, sessionConfig, hostModel, requestedConcurrency, estimatedPacketTokens = 0, quotaStateEntry = null, } = options;
5
+ const quota = sessionConfig.quota ?? {};
6
+ if (quota.enabled === false) {
7
+ const limits = {
8
+ context_tokens: quota.default_context_tokens ?? 32_000,
9
+ output_tokens: quota.reserved_output_tokens ?? 4_096,
10
+ requests_per_minute: null,
11
+ input_tokens_per_minute: null,
12
+ output_tokens_per_minute: null,
13
+ };
14
+ return {
15
+ wave_size: requestedConcurrency,
16
+ estimated_wave_tokens: requestedConcurrency * estimatedPacketTokens,
17
+ cooldown_until: null,
18
+ confidence: "high",
19
+ source: "default",
20
+ resolved_limits: limits,
21
+ model: hostModel,
22
+ };
23
+ }
24
+ const safetyMargin = quota.safety_margin ?? 0.8;
25
+ const halfLifeHours = quota.empirical_half_life_hours ?? 24;
26
+ const providerType = classifyProvider(providerName);
27
+ const { limits, source, confidence } = resolveLimits({ providerName, sessionConfig, hostModel });
28
+ let waveSize = requestedConcurrency;
29
+ let cooldownUntil = null;
30
+ // Respect an active cooldown period
31
+ if (quotaStateEntry?.cooldown_until) {
32
+ const cooldownExpiry = new Date(quotaStateEntry.cooldown_until).getTime();
33
+ if (cooldownExpiry > Date.now()) {
34
+ cooldownUntil = quotaStateEntry.cooldown_until;
35
+ waveSize = 1;
36
+ }
37
+ }
38
+ if (!cooldownUntil) {
39
+ // Cap by requests-per-minute
40
+ if (limits.requests_per_minute != null) {
41
+ const rpmCap = Math.max(1, Math.floor(limits.requests_per_minute * safetyMargin));
42
+ waveSize = Math.min(waveSize, rpmCap);
43
+ }
44
+ // Cap by input tokens-per-minute
45
+ if (limits.input_tokens_per_minute != null && estimatedPacketTokens > 0) {
46
+ const tpmCap = Math.max(1, Math.floor((limits.input_tokens_per_minute * safetyMargin) / estimatedPacketTokens));
47
+ waveSize = Math.min(waveSize, tpmCap);
48
+ }
49
+ if (quotaStateEntry) {
50
+ const learnedCap = computeMaxSafeConcurrency(quotaStateEntry, halfLifeHours);
51
+ waveSize = Math.min(waveSize, learnedCap);
52
+ }
53
+ else if (providerType === "hosted" && source === "default") {
54
+ // Unknown hosted provider with no learned data and no model-specific limits —
55
+ // be conservative. If the caller supplied RPM/TPM caps those already govern rate;
56
+ // this guard only triggers when we have no rate information at all.
57
+ const conservativeDefault = quota.unknown_hosted_concurrency ?? 1;
58
+ waveSize = Math.min(waveSize, conservativeDefault);
59
+ }
60
+ // Local providers with no learned data: use requestedConcurrency (no rate pressure)
61
+ }
62
+ waveSize = Math.max(1, waveSize);
63
+ return {
64
+ wave_size: waveSize,
65
+ estimated_wave_tokens: waveSize * estimatedPacketTokens,
66
+ cooldown_until: cooldownUntil,
67
+ confidence,
68
+ source,
69
+ resolved_limits: limits,
70
+ model: hostModel,
71
+ };
72
+ }
73
+ /** Build the state key used for indexing quota-state.json entries. */
74
+ export function buildProviderModelKey(providerName, hostModel) {
75
+ return hostModel ? `${providerName}/${hostModel}` : `${providerName}/*`;
76
+ }
@@ -0,0 +1,12 @@
1
+ import type { ObservedWaveOutcome, QuotaState, QuotaStateEntry } from "./types.js";
2
+ export declare function getQuotaStatePath(): string;
3
+ export declare function decayWeight(weight: number, elapsedHours: number, halfLifeHours: number): number;
4
+ export declare function applyDecayToEntry(entry: QuotaStateEntry, halfLifeHours: number): QuotaStateEntry;
5
+ export declare function readQuotaState(): Promise<QuotaState>;
6
+ export declare function writeQuotaState(state: QuotaState): Promise<void>;
7
+ /**
8
+ * Returns the highest concurrency level for which decayed success evidence
9
+ * exceeds failure evidence, with a minimum of 1.
10
+ */
11
+ export declare function computeMaxSafeConcurrency(entry: QuotaStateEntry, halfLifeHours: number, maxToCheck?: number): number;
12
+ export declare function recordWaveOutcome(providerModelKey: string, outcome: ObservedWaveOutcome, halfLifeHours: number): Promise<void>;
@@ -0,0 +1,101 @@
1
+ import { mkdir, readFile, writeFile } from "node:fs/promises";
2
+ import { homedir } from "node:os";
3
+ import { join } from "node:path";
4
+ const STATE_DIR = join(homedir(), ".audit-code");
5
+ const STATE_PATH = join(STATE_DIR, "quota-state.json");
6
+ // A bucket needs at least this much success weight before we trust it.
7
+ const MIN_EVIDENCE_WEIGHT = 0.5;
8
+ export function getQuotaStatePath() {
9
+ return STATE_PATH;
10
+ }
11
+ export function decayWeight(weight, elapsedHours, halfLifeHours) {
12
+ if (halfLifeHours <= 0 || weight <= 0)
13
+ return 0;
14
+ return weight * Math.pow(0.5, elapsedHours / halfLifeHours);
15
+ }
16
+ export function applyDecayToEntry(entry, halfLifeHours) {
17
+ const elapsedHours = (Date.now() - new Date(entry.updated_at).getTime()) / (1000 * 60 * 60);
18
+ if (elapsedHours < 0.001)
19
+ return entry;
20
+ const decayed = {};
21
+ for (const [key, bucket] of Object.entries(entry.buckets)) {
22
+ decayed[key] = {
23
+ success_weight: decayWeight(bucket.success_weight, elapsedHours, halfLifeHours),
24
+ failure_weight: decayWeight(bucket.failure_weight, elapsedHours, halfLifeHours),
25
+ };
26
+ }
27
+ return { ...entry, buckets: decayed };
28
+ }
29
+ function isQuotaState(value) {
30
+ return (value !== null &&
31
+ typeof value === "object" &&
32
+ !Array.isArray(value) &&
33
+ value["version"] === 1 &&
34
+ typeof value["entries"] === "object");
35
+ }
36
+ export async function readQuotaState() {
37
+ try {
38
+ const raw = await readFile(STATE_PATH, "utf8");
39
+ const parsed = JSON.parse(raw);
40
+ if (isQuotaState(parsed))
41
+ return parsed;
42
+ }
43
+ catch {
44
+ // File not found or malformed — start fresh
45
+ }
46
+ return { version: 1, entries: {} };
47
+ }
48
+ export async function writeQuotaState(state) {
49
+ await mkdir(STATE_DIR, { recursive: true });
50
+ await writeFile(STATE_PATH, JSON.stringify(state, null, 2) + "\n", "utf8");
51
+ }
52
+ /**
53
+ * Returns the highest concurrency level for which decayed success evidence
54
+ * exceeds failure evidence, with a minimum of 1.
55
+ */
56
+ export function computeMaxSafeConcurrency(entry, halfLifeHours, maxToCheck = 32) {
57
+ const decayed = applyDecayToEntry(entry, halfLifeHours);
58
+ let maxSafe = 1;
59
+ for (let n = 1; n <= maxToCheck; n++) {
60
+ const bucket = decayed.buckets[String(n)];
61
+ if (!bucket)
62
+ break;
63
+ if (bucket.success_weight >= MIN_EVIDENCE_WEIGHT &&
64
+ bucket.success_weight > bucket.failure_weight) {
65
+ maxSafe = n;
66
+ }
67
+ else {
68
+ break;
69
+ }
70
+ }
71
+ return maxSafe;
72
+ }
73
+ function blankEntry() {
74
+ return { updated_at: new Date().toISOString(), buckets: {}, cooldown_until: null, last_429_at: null };
75
+ }
76
+ export async function recordWaveOutcome(providerModelKey, outcome, halfLifeHours) {
77
+ const state = await readQuotaState();
78
+ const entry = applyDecayToEntry(state.entries[providerModelKey] ?? blankEntry(), halfLifeHours);
79
+ if (outcome.outcome === "success") {
80
+ // Success at N proves 1..N are all safe
81
+ for (let n = 1; n <= outcome.concurrency; n++) {
82
+ const bucket = entry.buckets[String(n)] ?? { success_weight: 0, failure_weight: 0 };
83
+ bucket.success_weight += 1.0;
84
+ entry.buckets[String(n)] = bucket;
85
+ }
86
+ }
87
+ else {
88
+ entry.last_429_at = new Date().toISOString();
89
+ if (outcome.cooldown_until)
90
+ entry.cooldown_until = outcome.cooldown_until;
91
+ // Failure at N marks N and above as unsafe
92
+ for (let n = outcome.concurrency; n <= outcome.concurrency + 4; n++) {
93
+ const bucket = entry.buckets[String(n)] ?? { success_weight: 0, failure_weight: 0 };
94
+ bucket.failure_weight += 1.0;
95
+ entry.buckets[String(n)] = bucket;
96
+ }
97
+ }
98
+ entry.updated_at = new Date().toISOString();
99
+ state.entries[providerModelKey] = entry;
100
+ await writeQuotaState(state);
101
+ }
@@ -0,0 +1,50 @@
1
+ export type LimitSource = "explicit_config" | "cli_flags" | "known_metadata" | "learned" | "default";
2
+ export type LimitConfidence = "high" | "medium" | "low";
3
+ export interface ResolvedLimits {
4
+ context_tokens: number;
5
+ output_tokens: number;
6
+ requests_per_minute: number | null;
7
+ input_tokens_per_minute: number | null;
8
+ output_tokens_per_minute: number | null;
9
+ }
10
+ export interface ConcurrencyBucket {
11
+ success_weight: number;
12
+ failure_weight: number;
13
+ }
14
+ export interface QuotaStateEntry {
15
+ updated_at: string;
16
+ buckets: Record<string, ConcurrencyBucket>;
17
+ cooldown_until: string | null;
18
+ last_429_at: string | null;
19
+ }
20
+ export interface QuotaState {
21
+ version: 1;
22
+ entries: Record<string, QuotaStateEntry>;
23
+ }
24
+ export interface WaveSchedule {
25
+ wave_size: number;
26
+ estimated_wave_tokens: number;
27
+ cooldown_until: string | null;
28
+ confidence: LimitConfidence;
29
+ source: LimitSource;
30
+ resolved_limits: ResolvedLimits;
31
+ model: string | null;
32
+ }
33
+ export interface DispatchQuota {
34
+ contract_version: "audit-code-dispatch-quota/v1alpha1";
35
+ run_id: string;
36
+ model: string | null;
37
+ resolved_limits: ResolvedLimits;
38
+ confidence: LimitConfidence;
39
+ source: LimitSource;
40
+ wave_size: number;
41
+ estimated_wave_tokens: number;
42
+ cooldown_until: string | null;
43
+ }
44
+ export interface ObservedWaveOutcome {
45
+ concurrency: number;
46
+ estimated_tokens: number;
47
+ outcome: "success" | "rate_limited" | "timeout";
48
+ cooldown_until?: string | null;
49
+ reset_at?: string | null;
50
+ }
@@ -0,0 +1 @@
1
+ export {};
@@ -105,9 +105,7 @@ function buildSuggestedCommands(artifactsDir, suggestedInputs, status, activeRev
105
105
  return [
106
106
  renderShellCommand([
107
107
  "audit-code",
108
- "prepare-dispatch",
109
- "--run-id",
110
- activeReviewRun.run_id,
108
+ "next-step",
111
109
  "--artifacts-dir",
112
110
  artifactsDir,
113
111
  ]),
@@ -170,7 +168,7 @@ function renderMarkdown(handoff) {
170
168
  lines.push(`- ${command}`);
171
169
  }
172
170
  if (handoff.active_review_run) {
173
- lines.push("- Use packet dispatch commands only when the conversation host exposes a callable subagent tool; otherwise follow the single-task fallback.");
171
+ lines.push("- Use next-step so the backend renders either packet dispatch or single-task fallback after the host reports capabilities.");
174
172
  }
175
173
  }
176
174
  if (handoff.active_review_run) {
@@ -233,9 +231,7 @@ export function buildAuditCodeHandoff(params) {
233
231
  if (params.state.status === BLOCKED_STATUS && params.activeReviewRun) {
234
232
  handoff.quick_start = renderShellCommand([
235
233
  "audit-code",
236
- "prepare-dispatch",
237
- "--run-id",
238
- params.activeReviewRun.run_id,
234
+ "next-step",
239
235
  "--artifacts-dir",
240
236
  params.artifactsDir,
241
237
  ]);
@@ -20,6 +20,33 @@ export interface VSCodeTaskConfig {
20
20
  command_template: string[];
21
21
  env?: Record<string, string>;
22
22
  }
23
+ export interface QuotaModelLimits {
24
+ context_tokens?: number;
25
+ output_tokens?: number;
26
+ requests_per_minute?: number;
27
+ input_tokens_per_minute?: number;
28
+ output_tokens_per_minute?: number;
29
+ }
30
+ export interface QuotaConfig {
31
+ /** Set to false to disable all quota scheduling (default: true). */
32
+ enabled?: boolean;
33
+ /** Whether to probe the provider for live limits (default: "auto"). */
34
+ probe?: "auto" | "never" | "force";
35
+ /** Fraction of known limits to actually use (default: 0.8). */
36
+ safety_margin?: number;
37
+ /** Concurrency ceiling for hosted providers with no learned data (default: 1). */
38
+ unknown_hosted_concurrency?: number;
39
+ /** Concurrency for local providers with no learned data (default: "unlimited"). */
40
+ unknown_local_concurrency?: number | "unlimited";
41
+ /** Assumed context window when the model is not recognized (default: 32000). */
42
+ default_context_tokens?: number;
43
+ /** Tokens reserved for model output per request (default: 4096). */
44
+ reserved_output_tokens?: number;
45
+ /** Half-life of empirical success/failure evidence in hours (default: 24). */
46
+ empirical_half_life_hours?: number;
47
+ /** Per-model overrides keyed by "provider/model". */
48
+ models?: Record<string, QuotaModelLimits>;
49
+ }
23
50
  export declare const PROVIDER_SECTION_KEYS: {
24
51
  readonly "subprocess-template": "subprocess_template";
25
52
  readonly "claude-code": "claude_code";
@@ -40,4 +67,5 @@ export interface SessionConfig {
40
67
  vscode_task?: VSCodeTaskConfig;
41
68
  agent_task_batch_size?: number;
42
69
  parallel_workers?: number;
70
+ quota?: QuotaConfig;
43
71
  }
package/docs/contracts.md CHANGED
@@ -77,6 +77,23 @@ The backend stores resumable artifacts under `.audit-artifacts/`, including:
77
77
  Consumers should treat these as versioned JSON artifacts and validate them with
78
78
  `audit-code validate` rather than inferring state from filenames alone.
79
79
 
80
+ ## Step artifacts
81
+
82
+ The conversation-first `/audit-code` prompt is a loader. It runs
83
+ `audit-code next-step` and then follows only the returned step prompt. The
84
+ backend writes the current step contract to:
85
+
86
+ - `<artifacts_dir>/steps/current-step.json`
87
+ - `<artifacts_dir>/steps/current-prompt.md`
88
+
89
+ `current-step.json` uses `contract_version: "audit-code-step/v1alpha1"` and
90
+ includes `step_kind`, `prompt_path`, `status`, `run_id`, `allowed_commands`,
91
+ `stop_condition`, `repo_root`, `artifacts_dir`, and relevant `artifact_paths`.
92
+
93
+ When semantic review is blocked, `next-step` first emits a `capability_check`.
94
+ After the host reports `--host-can-dispatch-subagents true|false`, the backend
95
+ renders exactly one review path: packet dispatch or the single-task fallback.
96
+
80
97
  ## Dispatch packets
81
98
 
82
99
  Packet dispatch preserves the existing `AuditTask` and `AuditResult`
@@ -92,13 +109,18 @@ Planning artifacts are shaped by:
92
109
  Normal packet flow:
93
110
 
94
111
  ```text
95
- audit-code prepare-dispatch --run-id <run_id> --artifacts-dir <artifacts_dir>
112
+ audit-code next-step --host-can-dispatch-subagents true
113
+ backend prepares dispatch-plan.json
96
114
  conversation launches one worker per dispatch-plan entry
97
115
  worker reads entry.prompt_path
98
116
  worker submits AuditResult[] through submit-packet
99
117
  audit-code merge-and-ingest --run-id <run_id> --artifacts-dir <artifacts_dir>
100
118
  ```
101
119
 
120
+ `audit-code prepare-dispatch --run-id <run_id> --artifacts-dir
121
+ <artifacts_dir>` remains available for compatibility and tests, but generic
122
+ handoff fields point users and prompts to `next-step`.
123
+
102
124
  Packet artifacts:
103
125
 
104
126
  - `<artifacts_dir>/runs/<run_id>/dispatch-plan.json`
@@ -43,7 +43,7 @@ Host-specific files may include:
43
43
 
44
44
  - Codex: managed `AGENTS.md` fallback guidance
45
45
  - Claude Desktop: project template, remote MCP connector, local MCP bundle
46
- - OpenCode: `opencode.json` with `/audit-code` slash command and auditor MCP server
46
+ - OpenCode: `opencode.json` with auditor MCP server and permission wiring; the `/audit-code` command is global npm-installed state
47
47
  - VS Code/Copilot: prompt, custom agent, instructions, and `.vscode/mcp.json`
48
48
  - Antigravity: planning-mode and MCP-oriented guidance
49
49
 
@@ -64,9 +64,10 @@ with the canonical `/audit-code` spelling.
64
64
  Claude Desktop is treated as an MCP-first host. Use the generated project
65
65
  template and local bundle artifacts when installing the integration.
66
66
 
67
- OpenCode uses `opencode.json` (generated by `audit-code ensure` or `audit-code
68
- install`) which registers the `/audit-code` slash command and the auditor MCP
69
- server together. VS Code uses repo-local prompt and MCP configuration files.
67
+ OpenCode uses the global command seeded by `npm install -g auditor-lambda`.
68
+ The generated project `opencode.json` should not define `command["audit-code"]`;
69
+ it only wires the auditor MCP server and project permissions. VS Code uses
70
+ repo-local prompt and MCP configuration files.
70
71
 
71
72
  Antigravity should be treated as a workflow-and-artifacts host until it has a
72
73
  stable project-local config surface. Use generated planning-mode guidance,
@@ -100,6 +101,7 @@ The wrapper:
100
101
  Useful fallback commands:
101
102
 
102
103
  ```bash
104
+ audit-code next-step
103
105
  audit-code --single-step
104
106
  audit-code --results /path/to/audit_results.json
105
107
  audit-code --batch-results /path/to/results-dir
@@ -111,6 +113,11 @@ audit-code cleanup
111
113
  audit-code mcp
112
114
  ```
113
115
 
116
+ `audit-code next-step` is the backend-rendered step engine used by the
117
+ conversation prompt. It writes `.audit-artifacts/steps/current-step.json` and
118
+ `.audit-artifacts/steps/current-prompt.md`, then the host should follow only
119
+ that prompt.
120
+
114
121
  `audit-code validate` checks artifact shape, cross-artifact consistency,
115
122
  session config, and explicit provider readiness.
116
123
 
package/docs/product.md CHANGED
@@ -148,9 +148,10 @@ Readiness should be judged through three checks:
148
148
  - field-trial quality: run real repositories through planning, validate
149
149
  artifacts, and use `audit_plan_metrics.json` to track packet count, weak
150
150
  packet count, average cohesion, merge edge kinds, and weak-packet samples
151
- - full-loop behavior: prove `prepare-dispatch`, worker review,
152
- `submit-packet`, `merge-and-ingest`, selective deepening, runtime validation,
153
- and final `audit-report.md` promotion in at least one real host flow
151
+ - full-loop behavior: prove `next-step` capability routing, packet dispatch,
152
+ worker review, `submit-packet`, `merge-and-ingest`, selective deepening,
153
+ runtime validation, and final `audit-report.md` promotion in at least one
154
+ real host flow
154
155
  - release hygiene: keep `npm run verify:release`, linked smoke, packaged
155
156
  smoke, tarball preview, and Trusted Publishing green from a clean checkout
156
157
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "auditor-lambda",
3
- "version": "0.3.20",
3
+ "version": "0.3.22",
4
4
  "private": false,
5
5
  "description": "Portable hybrid code-auditing framework for arbitrary repositories.",
6
6
  "type": "module",