npm - auditor-lambda - Versions diffs - 0.3.20 → 0.3.22 - Mend

auditor-lambda 0.3.20 → 0.3.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

package/README.md +12 -2
package/audit-code-wrapper-lib.mjs +91 -32
package/dist/cli.js +738 -11
package/dist/orchestrator/reviewPackets.d.ts +5 -0
package/dist/orchestrator/reviewPackets.js +5 -1
package/dist/prompts/renderWorkerPrompt.js +1 -0
package/dist/quota/index.d.ts +8 -0
package/dist/quota/index.js +4 -0
package/dist/quota/limits.d.ts +16 -0
package/dist/quota/limits.js +77 -0
package/dist/quota/probe.d.ts +13 -0
package/dist/quota/probe.js +21 -0
package/dist/quota/scheduler.d.ts +14 -0
package/dist/quota/scheduler.js +76 -0
package/dist/quota/state.d.ts +12 -0
package/dist/quota/state.js +101 -0
package/dist/quota/types.d.ts +50 -0
package/dist/quota/types.js +1 -0
package/dist/supervisor/operatorHandoff.js +3 -7
package/dist/types/sessionConfig.d.ts +28 -0
package/docs/contracts.md +23 -1
package/docs/operator-guide.md +11 -4
package/docs/product.md +4 -3
package/package.json +1 -1
package/schemas/dispatch_quota.schema.json +77 -0
package/scripts/postinstall.mjs +33 -0
package/skills/audit-code/audit-code.prompt.md +15 -170

package/dist/orchestrator/reviewPackets.d.ts CHANGED Viewed

@@ -6,6 +6,11 @@ export interface BuildReviewPacketOptions {
     lineIndex?: Record<string, number>;
     maxTasksPerPacket?: number;
     targetPacketLines?: number;
+    /**
+     * Available context budget in tokens (context_tokens − reserved_output_tokens).
+     * When provided, targetPacketLines is capped to fit within this budget.
+     */
+    maxContextTokens?: number;
 }
 export declare function buildReviewPackets(tasks: AuditTask[], options?: BuildReviewPacketOptions): ReviewPacket[];
 export declare function orderTasksForPacketReview(tasks: AuditTask[], options?: BuildReviewPacketOptions): AuditTask[];

package/dist/orchestrator/reviewPackets.js CHANGED Viewed

@@ -949,7 +949,11 @@ function buildPacket(tasks, packetIndex, lineIndex, graphEdges = [], graphBundle
 }
 function buildReviewPacketPlanningData(tasks, options = {}) {
     const maxTasksPerPacket = options.maxTasksPerPacket ?? DEFAULT_MAX_TASKS_PER_PACKET;
-    const targetPacketLines = options.targetPacketLines ?? DEFAULT_TARGET_PACKET_LINES;
+    const configuredTargetLines = options.targetPacketLines ?? DEFAULT_TARGET_PACKET_LINES;
+    const targetPacketLines = options.maxContextTokens != null
+        ? Math.min(configuredTargetLines, Math.max(1, Math.floor((options.maxContextTokens - ESTIMATED_PACKET_PROMPT_TOKENS) /
+            ESTIMATED_TOKENS_PER_LINE)))
+        : configuredTargetLines;
     const graphEdges = collectGraphEdges(options.graphBundle);
     const groups = buildTaskGroups(tasks);
     const planningGraphEdges = buildPlanningGraphEdges(groups, graphEdges, options.graphBundle, options.lineIndex, targetPacketLines);

package/dist/prompts/renderWorkerPrompt.js CHANGED Viewed

@@ -12,6 +12,7 @@ export function renderWorkerPrompt(task) {
             `Read: ${tasksPath}`,
             "Scope: review only the tasks listed in the Read file. Do not add tasks,",
             "edit source files, remediate findings, run unrelated audits, or write result_path.",
+            "Prefer host Read and Grep tools for source inspection. On native Windows, do not use Unix pipelines like `grep ... | head`; if shell search is unavoidable, use `Select-String` as a fallback.",
             "For each listed task: read the assigned file_paths under the specified lens,",
             "using targeted reads/searches where they give complete enough evidence without loading unrelated context,",
             "and emit exactly one AuditResult object with:",

package/dist/quota/index.d.ts ADDED Viewed

@@ -0,0 +1,8 @@
+export { resolveLimits, lookupKnownModel, classifyProvider } from "./limits.js";
+export type { LimitResolutionResult, ResolveLimitsOptions, ProviderType } from "./limits.js";
+export { readQuotaState, writeQuotaState, computeMaxSafeConcurrency, recordWaveOutcome, getQuotaStatePath, decayWeight, applyDecayToEntry, } from "./state.js";
+export { scheduleWave, buildProviderModelKey } from "./scheduler.js";
+export type { ScheduleWaveOptions } from "./scheduler.js";
+export { probeProvider } from "./probe.js";
+export type { ProbeResult } from "./probe.js";
+export type { ResolvedLimits, LimitSource, LimitConfidence, QuotaState, QuotaStateEntry, ConcurrencyBucket, WaveSchedule, DispatchQuota, ObservedWaveOutcome, } from "./types.js";

package/dist/quota/index.js ADDED Viewed

@@ -0,0 +1,4 @@
+export { resolveLimits, lookupKnownModel, classifyProvider } from "./limits.js";
+export { readQuotaState, writeQuotaState, computeMaxSafeConcurrency, recordWaveOutcome, getQuotaStatePath, decayWeight, applyDecayToEntry, } from "./state.js";
+export { scheduleWave, buildProviderModelKey } from "./scheduler.js";
+export { probeProvider } from "./probe.js";

package/dist/quota/limits.d.ts ADDED Viewed

@@ -0,0 +1,16 @@
+import type { ResolvedProviderName, SessionConfig } from "../types/sessionConfig.js";
+import type { LimitConfidence, LimitSource, ResolvedLimits } from "./types.js";
+export type ProviderType = "hosted" | "local" | "unknown";
+export declare function classifyProvider(providerName: ResolvedProviderName): ProviderType;
+export declare function lookupKnownModel(modelKey: string): Pick<ResolvedLimits, "context_tokens" | "output_tokens"> | undefined;
+export interface LimitResolutionResult {
+    limits: ResolvedLimits;
+    source: LimitSource;
+    confidence: LimitConfidence;
+}
+export interface ResolveLimitsOptions {
+    providerName: ResolvedProviderName;
+    sessionConfig: SessionConfig;
+    hostModel?: string | null;
+}
+export declare function resolveLimits(options: ResolveLimitsOptions): LimitResolutionResult;

package/dist/quota/limits.js ADDED Viewed

@@ -0,0 +1,77 @@
+// RPM/TPM are omitted here — they are tier-dependent and must come from learning.
+const KNOWN_MODEL_LIMITS = {
+    "anthropic/claude-opus-4-7": { context_tokens: 200_000, output_tokens: 32_000 },
+    "anthropic/claude-sonnet-4-6": { context_tokens: 200_000, output_tokens: 8_192 },
+    "anthropic/claude-haiku-4-5": { context_tokens: 200_000, output_tokens: 8_192 },
+    "anthropic/claude-opus-4-5": { context_tokens: 200_000, output_tokens: 8_192 },
+    "anthropic/claude-sonnet-4-5": { context_tokens: 200_000, output_tokens: 8_192 },
+    "openai/gpt-4o": { context_tokens: 128_000, output_tokens: 16_384 },
+    "openai/gpt-4o-mini": { context_tokens: 128_000, output_tokens: 16_384 },
+    "google/gemini-2.0-flash": { context_tokens: 1_048_576, output_tokens: 8_192 },
+    "google/gemini-1.5-pro": { context_tokens: 2_097_152, output_tokens: 8_192 },
+};
+export function classifyProvider(providerName) {
+    switch (providerName) {
+        case "claude-code":
+        case "opencode":
+            return "hosted";
+        case "local-subprocess":
+            return "local";
+        case "subprocess-template":
+        case "vscode-task":
+        default:
+            return "unknown";
+    }
+}
+export function lookupKnownModel(modelKey) {
+    return KNOWN_MODEL_LIMITS[modelKey.toLowerCase().trim()];
+}
+function defaultLimits(sessionConfig) {
+    const quota = sessionConfig.quota ?? {};
+    return {
+        context_tokens: quota.default_context_tokens ?? 32_000,
+        output_tokens: quota.reserved_output_tokens ?? 4_096,
+        requests_per_minute: null,
+        input_tokens_per_minute: null,
+        output_tokens_per_minute: null,
+    };
+}
+export function resolveLimits(options) {
+    const { providerName: _providerName, sessionConfig, hostModel } = options;
+    const quota = sessionConfig.quota ?? {};
+    const defaults = defaultLimits(sessionConfig);
+    // 1. Explicit per-model config overrides
+    if (hostModel && quota.models?.[hostModel]) {
+        const override = quota.models[hostModel];
+        return {
+            limits: {
+                context_tokens: override.context_tokens ?? defaults.context_tokens,
+                output_tokens: override.output_tokens ?? defaults.output_tokens,
+                requests_per_minute: override.requests_per_minute ?? null,
+                input_tokens_per_minute: override.input_tokens_per_minute ?? null,
+                output_tokens_per_minute: override.output_tokens_per_minute ?? null,
+            },
+            source: "explicit_config",
+            confidence: "high",
+        };
+    }
+    // 2. Static known-model database (context/output only; RPM/TPM from learning)
+    if (hostModel) {
+        const known = lookupKnownModel(hostModel);
+        if (known) {
+            return {
+                limits: {
+                    context_tokens: known.context_tokens,
+                    output_tokens: known.output_tokens,
+                    requests_per_minute: null,
+                    input_tokens_per_minute: null,
+                    output_tokens_per_minute: null,
+                },
+                source: "known_metadata",
+                confidence: "medium",
+            };
+        }
+    }
+    // 3. Conservative defaults for all provider types
+    return { limits: defaults, source: "default", confidence: "low" };
+}

package/dist/quota/probe.d.ts ADDED Viewed

@@ -0,0 +1,13 @@
+export interface ProbeResult {
+    supported: boolean;
+    reason: string;
+}
+/**
+ * Probe a provider to discover its rate limits.
+ *
+ * Only subprocess-template supports direct probing since it is the only
+ * provider where the auditor controls the API call. IDE providers
+ * (claude-code, opencode) select the model internally; their limits come
+ * from known-model metadata or learned behavior.
+ */
+export declare function probeProvider(providerName: string, probeMode?: "auto" | "never" | "force"): Promise<ProbeResult>;

package/dist/quota/probe.js ADDED Viewed

@@ -0,0 +1,21 @@
+/**
+ * Probe a provider to discover its rate limits.
+ *
+ * Only subprocess-template supports direct probing since it is the only
+ * provider where the auditor controls the API call. IDE providers
+ * (claude-code, opencode) select the model internally; their limits come
+ * from known-model metadata or learned behavior.
+ */
+export async function probeProvider(providerName, probeMode = "auto") {
+    if (probeMode === "never") {
+        return { supported: false, reason: "probe disabled by config" };
+    }
+    if (providerName !== "subprocess-template") {
+        return {
+            supported: false,
+            reason: `probe not applicable for ${providerName} — limits come from known-model metadata or learned behavior`,
+        };
+    }
+    // subprocess-template probe not yet implemented
+    return { supported: false, reason: "subprocess-template probe not yet implemented" };
+}

package/dist/quota/scheduler.d.ts ADDED Viewed

@@ -0,0 +1,14 @@
+import type { ResolvedProviderName, SessionConfig } from "../types/sessionConfig.js";
+import type { QuotaStateEntry, WaveSchedule } from "./types.js";
+export interface ScheduleWaveOptions {
+    providerName: ResolvedProviderName;
+    sessionConfig: SessionConfig;
+    hostModel: string | null;
+    requestedConcurrency: number;
+    /** Average estimated tokens per packet/worker. Used for TPM budget. */
+    estimatedPacketTokens?: number;
+    quotaStateEntry?: QuotaStateEntry | null;
+}
+export declare function scheduleWave(options: ScheduleWaveOptions): WaveSchedule;
+/** Build the state key used for indexing quota-state.json entries. */
+export declare function buildProviderModelKey(providerName: string, hostModel: string | null | undefined): string;

package/dist/quota/scheduler.js ADDED Viewed

@@ -0,0 +1,76 @@
+import { classifyProvider, resolveLimits } from "./limits.js";
+import { computeMaxSafeConcurrency } from "./state.js";
+export function scheduleWave(options) {
+    const { providerName, sessionConfig, hostModel, requestedConcurrency, estimatedPacketTokens = 0, quotaStateEntry = null, } = options;
+    const quota = sessionConfig.quota ?? {};
+    if (quota.enabled === false) {
+        const limits = {
+            context_tokens: quota.default_context_tokens ?? 32_000,
+            output_tokens: quota.reserved_output_tokens ?? 4_096,
+            requests_per_minute: null,
+            input_tokens_per_minute: null,
+            output_tokens_per_minute: null,
+        };
+        return {
+            wave_size: requestedConcurrency,
+            estimated_wave_tokens: requestedConcurrency * estimatedPacketTokens,
+            cooldown_until: null,
+            confidence: "high",
+            source: "default",
+            resolved_limits: limits,
+            model: hostModel,
+        };
+    }
+    const safetyMargin = quota.safety_margin ?? 0.8;
+    const halfLifeHours = quota.empirical_half_life_hours ?? 24;
+    const providerType = classifyProvider(providerName);
+    const { limits, source, confidence } = resolveLimits({ providerName, sessionConfig, hostModel });
+    let waveSize = requestedConcurrency;
+    let cooldownUntil = null;
+    // Respect an active cooldown period
+    if (quotaStateEntry?.cooldown_until) {
+        const cooldownExpiry = new Date(quotaStateEntry.cooldown_until).getTime();
+        if (cooldownExpiry > Date.now()) {
+            cooldownUntil = quotaStateEntry.cooldown_until;
+            waveSize = 1;
+        }
+    }
+    if (!cooldownUntil) {
+        // Cap by requests-per-minute
+        if (limits.requests_per_minute != null) {
+            const rpmCap = Math.max(1, Math.floor(limits.requests_per_minute * safetyMargin));
+            waveSize = Math.min(waveSize, rpmCap);
+        }
+        // Cap by input tokens-per-minute
+        if (limits.input_tokens_per_minute != null && estimatedPacketTokens > 0) {
+            const tpmCap = Math.max(1, Math.floor((limits.input_tokens_per_minute * safetyMargin) / estimatedPacketTokens));
+            waveSize = Math.min(waveSize, tpmCap);
+        }
+        if (quotaStateEntry) {
+            const learnedCap = computeMaxSafeConcurrency(quotaStateEntry, halfLifeHours);
+            waveSize = Math.min(waveSize, learnedCap);
+        }
+        else if (providerType === "hosted" && source === "default") {
+            // Unknown hosted provider with no learned data and no model-specific limits —
+            // be conservative. If the caller supplied RPM/TPM caps those already govern rate;
+            // this guard only triggers when we have no rate information at all.
+            const conservativeDefault = quota.unknown_hosted_concurrency ?? 1;
+            waveSize = Math.min(waveSize, conservativeDefault);
+        }
+        // Local providers with no learned data: use requestedConcurrency (no rate pressure)
+    }
+    waveSize = Math.max(1, waveSize);
+    return {
+        wave_size: waveSize,
+        estimated_wave_tokens: waveSize * estimatedPacketTokens,
+        cooldown_until: cooldownUntil,
+        confidence,
+        source,
+        resolved_limits: limits,
+        model: hostModel,
+    };
+}
+/** Build the state key used for indexing quota-state.json entries. */
+export function buildProviderModelKey(providerName, hostModel) {
+    return hostModel ? `${providerName}/${hostModel}` : `${providerName}/*`;
+}

package/dist/quota/state.d.ts ADDED Viewed

@@ -0,0 +1,12 @@
+import type { ObservedWaveOutcome, QuotaState, QuotaStateEntry } from "./types.js";
+export declare function getQuotaStatePath(): string;
+export declare function decayWeight(weight: number, elapsedHours: number, halfLifeHours: number): number;
+export declare function applyDecayToEntry(entry: QuotaStateEntry, halfLifeHours: number): QuotaStateEntry;
+export declare function readQuotaState(): Promise<QuotaState>;
+export declare function writeQuotaState(state: QuotaState): Promise<void>;
+/**
+ * Returns the highest concurrency level for which decayed success evidence
+ * exceeds failure evidence, with a minimum of 1.
+ */
+export declare function computeMaxSafeConcurrency(entry: QuotaStateEntry, halfLifeHours: number, maxToCheck?: number): number;
+export declare function recordWaveOutcome(providerModelKey: string, outcome: ObservedWaveOutcome, halfLifeHours: number): Promise<void>;

package/dist/quota/state.js ADDED Viewed

@@ -0,0 +1,101 @@
+import { mkdir, readFile, writeFile } from "node:fs/promises";
+import { homedir } from "node:os";
+import { join } from "node:path";
+const STATE_DIR = join(homedir(), ".audit-code");
+const STATE_PATH = join(STATE_DIR, "quota-state.json");
+// A bucket needs at least this much success weight before we trust it.
+const MIN_EVIDENCE_WEIGHT = 0.5;
+export function getQuotaStatePath() {
+    return STATE_PATH;
+}
+export function decayWeight(weight, elapsedHours, halfLifeHours) {
+    if (halfLifeHours <= 0 || weight <= 0)
+        return 0;
+    return weight * Math.pow(0.5, elapsedHours / halfLifeHours);
+}
+export function applyDecayToEntry(entry, halfLifeHours) {
+    const elapsedHours = (Date.now() - new Date(entry.updated_at).getTime()) / (1000 * 60 * 60);
+    if (elapsedHours < 0.001)
+        return entry;
+    const decayed = {};
+    for (const [key, bucket] of Object.entries(entry.buckets)) {
+        decayed[key] = {
+            success_weight: decayWeight(bucket.success_weight, elapsedHours, halfLifeHours),
+            failure_weight: decayWeight(bucket.failure_weight, elapsedHours, halfLifeHours),
+        };
+    }
+    return { ...entry, buckets: decayed };
+}
+function isQuotaState(value) {
+    return (value !== null &&
+        typeof value === "object" &&
+        !Array.isArray(value) &&
+        value["version"] === 1 &&
+        typeof value["entries"] === "object");
+}
+export async function readQuotaState() {
+    try {
+        const raw = await readFile(STATE_PATH, "utf8");
+        const parsed = JSON.parse(raw);
+        if (isQuotaState(parsed))
+            return parsed;
+    }
+    catch {
+        // File not found or malformed — start fresh
+    }
+    return { version: 1, entries: {} };
+}
+export async function writeQuotaState(state) {
+    await mkdir(STATE_DIR, { recursive: true });
+    await writeFile(STATE_PATH, JSON.stringify(state, null, 2) + "\n", "utf8");
+}
+/**
+ * Returns the highest concurrency level for which decayed success evidence
+ * exceeds failure evidence, with a minimum of 1.
+ */
+export function computeMaxSafeConcurrency(entry, halfLifeHours, maxToCheck = 32) {
+    const decayed = applyDecayToEntry(entry, halfLifeHours);
+    let maxSafe = 1;
+    for (let n = 1; n <= maxToCheck; n++) {
+        const bucket = decayed.buckets[String(n)];
+        if (!bucket)
+            break;
+        if (bucket.success_weight >= MIN_EVIDENCE_WEIGHT &&
+            bucket.success_weight > bucket.failure_weight) {
+            maxSafe = n;
+        }
+        else {
+            break;
+        }
+    }
+    return maxSafe;
+}
+function blankEntry() {
+    return { updated_at: new Date().toISOString(), buckets: {}, cooldown_until: null, last_429_at: null };
+}
+export async function recordWaveOutcome(providerModelKey, outcome, halfLifeHours) {
+    const state = await readQuotaState();
+    const entry = applyDecayToEntry(state.entries[providerModelKey] ?? blankEntry(), halfLifeHours);
+    if (outcome.outcome === "success") {
+        // Success at N proves 1..N are all safe
+        for (let n = 1; n <= outcome.concurrency; n++) {
+            const bucket = entry.buckets[String(n)] ?? { success_weight: 0, failure_weight: 0 };
+            bucket.success_weight += 1.0;
+            entry.buckets[String(n)] = bucket;
+        }
+    }
+    else {
+        entry.last_429_at = new Date().toISOString();
+        if (outcome.cooldown_until)
+            entry.cooldown_until = outcome.cooldown_until;
+        // Failure at N marks N and above as unsafe
+        for (let n = outcome.concurrency; n <= outcome.concurrency + 4; n++) {
+            const bucket = entry.buckets[String(n)] ?? { success_weight: 0, failure_weight: 0 };
+            bucket.failure_weight += 1.0;
+            entry.buckets[String(n)] = bucket;
+        }
+    }
+    entry.updated_at = new Date().toISOString();
+    state.entries[providerModelKey] = entry;
+    await writeQuotaState(state);
+}

package/dist/quota/types.d.ts ADDED Viewed

@@ -0,0 +1,50 @@
+export type LimitSource = "explicit_config" | "cli_flags" | "known_metadata" | "learned" | "default";
+export type LimitConfidence = "high" | "medium" | "low";
+export interface ResolvedLimits {
+    context_tokens: number;
+    output_tokens: number;
+    requests_per_minute: number | null;
+    input_tokens_per_minute: number | null;
+    output_tokens_per_minute: number | null;
+}
+export interface ConcurrencyBucket {
+    success_weight: number;
+    failure_weight: number;
+}
+export interface QuotaStateEntry {
+    updated_at: string;
+    buckets: Record<string, ConcurrencyBucket>;
+    cooldown_until: string | null;
+    last_429_at: string | null;
+}
+export interface QuotaState {
+    version: 1;
+    entries: Record<string, QuotaStateEntry>;
+}
+export interface WaveSchedule {
+    wave_size: number;
+    estimated_wave_tokens: number;
+    cooldown_until: string | null;
+    confidence: LimitConfidence;
+    source: LimitSource;
+    resolved_limits: ResolvedLimits;
+    model: string | null;
+}
+export interface DispatchQuota {
+    contract_version: "audit-code-dispatch-quota/v1alpha1";
+    run_id: string;
+    model: string | null;
+    resolved_limits: ResolvedLimits;
+    confidence: LimitConfidence;
+    source: LimitSource;
+    wave_size: number;
+    estimated_wave_tokens: number;
+    cooldown_until: string | null;
+}
+export interface ObservedWaveOutcome {
+    concurrency: number;
+    estimated_tokens: number;
+    outcome: "success" | "rate_limited" | "timeout";
+    cooldown_until?: string | null;
+    reset_at?: string | null;
+}

package/dist/quota/types.js ADDED Viewed

	@@ -0,0 +1 @@
1	+ export {};

package/dist/supervisor/operatorHandoff.js CHANGED Viewed

@@ -105,9 +105,7 @@ function buildSuggestedCommands(artifactsDir, suggestedInputs, status, activeRev
         return [
             renderShellCommand([
                 "audit-code",
-                "prepare-dispatch",
-                "--run-id",
-                activeReviewRun.run_id,
+                "next-step",
                 "--artifacts-dir",
                 artifactsDir,
             ]),
@@ -170,7 +168,7 @@ function renderMarkdown(handoff) {
             lines.push(`- ${command}`);
         }
         if (handoff.active_review_run) {
-            lines.push("- Use packet dispatch commands only when the conversation host exposes a callable subagent tool; otherwise follow the single-task fallback.");
+            lines.push("- Use next-step so the backend renders either packet dispatch or single-task fallback after the host reports capabilities.");
         }
     }
     if (handoff.active_review_run) {
@@ -233,9 +231,7 @@ export function buildAuditCodeHandoff(params) {
     if (params.state.status === BLOCKED_STATUS && params.activeReviewRun) {
         handoff.quick_start = renderShellCommand([
             "audit-code",
-            "prepare-dispatch",
-            "--run-id",
-            params.activeReviewRun.run_id,
+            "next-step",
             "--artifacts-dir",
             params.artifactsDir,
         ]);

package/dist/types/sessionConfig.d.ts CHANGED Viewed

@@ -20,6 +20,33 @@ export interface VSCodeTaskConfig {
     command_template: string[];
     env?: Record<string, string>;
 }
+export interface QuotaModelLimits {
+    context_tokens?: number;
+    output_tokens?: number;
+    requests_per_minute?: number;
+    input_tokens_per_minute?: number;
+    output_tokens_per_minute?: number;
+}
+export interface QuotaConfig {
+    /** Set to false to disable all quota scheduling (default: true). */
+    enabled?: boolean;
+    /** Whether to probe the provider for live limits (default: "auto"). */
+    probe?: "auto" | "never" | "force";
+    /** Fraction of known limits to actually use (default: 0.8). */
+    safety_margin?: number;
+    /** Concurrency ceiling for hosted providers with no learned data (default: 1). */
+    unknown_hosted_concurrency?: number;
+    /** Concurrency for local providers with no learned data (default: "unlimited"). */
+    unknown_local_concurrency?: number | "unlimited";
+    /** Assumed context window when the model is not recognized (default: 32000). */
+    default_context_tokens?: number;
+    /** Tokens reserved for model output per request (default: 4096). */
+    reserved_output_tokens?: number;
+    /** Half-life of empirical success/failure evidence in hours (default: 24). */
+    empirical_half_life_hours?: number;
+    /** Per-model overrides keyed by "provider/model". */
+    models?: Record<string, QuotaModelLimits>;
+}
 export declare const PROVIDER_SECTION_KEYS: {
     readonly "subprocess-template": "subprocess_template";
     readonly "claude-code": "claude_code";
@@ -40,4 +67,5 @@ export interface SessionConfig {
     vscode_task?: VSCodeTaskConfig;
     agent_task_batch_size?: number;
     parallel_workers?: number;
+    quota?: QuotaConfig;
 }

package/docs/contracts.md CHANGED Viewed

@@ -77,6 +77,23 @@ The backend stores resumable artifacts under `.audit-artifacts/`, including:
 Consumers should treat these as versioned JSON artifacts and validate them with
 `audit-code validate` rather than inferring state from filenames alone.
+## Step artifacts
+The conversation-first `/audit-code` prompt is a loader. It runs
+`audit-code next-step` and then follows only the returned step prompt. The
+backend writes the current step contract to:
+- `<artifacts_dir>/steps/current-step.json`
+- `<artifacts_dir>/steps/current-prompt.md`
+`current-step.json` uses `contract_version: "audit-code-step/v1alpha1"` and
+includes `step_kind`, `prompt_path`, `status`, `run_id`, `allowed_commands`,
+`stop_condition`, `repo_root`, `artifacts_dir`, and relevant `artifact_paths`.
+When semantic review is blocked, `next-step` first emits a `capability_check`.
+After the host reports `--host-can-dispatch-subagents true|false`, the backend
+renders exactly one review path: packet dispatch or the single-task fallback.
 ## Dispatch packets
 Packet dispatch preserves the existing `AuditTask` and `AuditResult`
@@ -92,13 +109,18 @@ Planning artifacts are shaped by:
 Normal packet flow:
 ```text
-audit-code prepare-dispatch --run-id <run_id> --artifacts-dir <artifacts_dir>
+audit-code next-step --host-can-dispatch-subagents true
+backend prepares dispatch-plan.json
 conversation launches one worker per dispatch-plan entry
 worker reads entry.prompt_path
 worker submits AuditResult[] through submit-packet
 audit-code merge-and-ingest --run-id <run_id> --artifacts-dir <artifacts_dir>
 ```
+`audit-code prepare-dispatch --run-id <run_id> --artifacts-dir
+<artifacts_dir>` remains available for compatibility and tests, but generic
+handoff fields point users and prompts to `next-step`.
 Packet artifacts:
 - `<artifacts_dir>/runs/<run_id>/dispatch-plan.json`

package/docs/operator-guide.md CHANGED Viewed

@@ -43,7 +43,7 @@ Host-specific files may include:
 - Codex: managed `AGENTS.md` fallback guidance
 - Claude Desktop: project template, remote MCP connector, local MCP bundle
-- OpenCode: `opencode.json` with `/audit-code` slash command and auditor MCP server
+- OpenCode: `opencode.json` with auditor MCP server and permission wiring; the `/audit-code` command is global npm-installed state
 - VS Code/Copilot: prompt, custom agent, instructions, and `.vscode/mcp.json`
 - Antigravity: planning-mode and MCP-oriented guidance
@@ -64,9 +64,10 @@ with the canonical `/audit-code` spelling.
 Claude Desktop is treated as an MCP-first host. Use the generated project
 template and local bundle artifacts when installing the integration.
-OpenCode uses `opencode.json` (generated by `audit-code ensure` or `audit-code
-install`) which registers the `/audit-code` slash command and the auditor MCP
-server together. VS Code uses repo-local prompt and MCP configuration files.
+OpenCode uses the global command seeded by `npm install -g auditor-lambda`.
+The generated project `opencode.json` should not define `command["audit-code"]`;
+it only wires the auditor MCP server and project permissions. VS Code uses
+repo-local prompt and MCP configuration files.
 Antigravity should be treated as a workflow-and-artifacts host until it has a
 stable project-local config surface. Use generated planning-mode guidance,
@@ -100,6 +101,7 @@ The wrapper:
 Useful fallback commands:
 ```bash
+audit-code next-step
 audit-code --single-step
 audit-code --results /path/to/audit_results.json
 audit-code --batch-results /path/to/results-dir
@@ -111,6 +113,11 @@ audit-code cleanup
 audit-code mcp
 ```
+`audit-code next-step` is the backend-rendered step engine used by the
+conversation prompt. It writes `.audit-artifacts/steps/current-step.json` and
+`.audit-artifacts/steps/current-prompt.md`, then the host should follow only
+that prompt.
 `audit-code validate` checks artifact shape, cross-artifact consistency,
 session config, and explicit provider readiness.

package/docs/product.md CHANGED Viewed

@@ -148,9 +148,10 @@ Readiness should be judged through three checks:
 - field-trial quality: run real repositories through planning, validate
   artifacts, and use `audit_plan_metrics.json` to track packet count, weak
   packet count, average cohesion, merge edge kinds, and weak-packet samples
-- full-loop behavior: prove `prepare-dispatch`, worker review,
-  `submit-packet`, `merge-and-ingest`, selective deepening, runtime validation,
-  and final `audit-report.md` promotion in at least one real host flow
+- full-loop behavior: prove `next-step` capability routing, packet dispatch,
+  worker review, `submit-packet`, `merge-and-ingest`, selective deepening,
+  runtime validation, and final `audit-report.md` promotion in at least one
+  real host flow
 - release hygiene: keep `npm run verify:release`, linked smoke, packaged
   smoke, tarball preview, and Trusted Publishing green from a clean checkout

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "auditor-lambda",
-  "version": "0.3.20",
+  "version": "0.3.22",
   "private": false,
   "description": "Portable hybrid code-auditing framework for arbitrary repositories.",
   "type": "module",