npm - @tangle-network/agent-eval - Versions diffs - 0.48.0 → 0.50.0 - Mend

@tangle-network/agent-eval 0.48.0 → 0.50.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (54) hide show

package/README.md +7 -0
package/dist/adapters/http.d.ts +1 -1
package/dist/adapters/langchain.d.ts +1 -1
package/dist/adapters/{traceai.d.ts → otel.d.ts} +29 -29
package/dist/adapters/{traceai.js → otel.js} +9 -5
package/dist/adapters/otel.js.map +1 -0
package/dist/campaign/index.d.ts +3 -3
package/dist/{chunk-PD3MH6WU.js → chunk-5KSDYBYH.js} +2 -2
package/dist/{chunk-MNL6LXGQ.js → chunk-EGIPWXHL.js} +2 -98
package/dist/chunk-EGIPWXHL.js.map +1 -0
package/dist/{chunk-OYI6RZJK.js → chunk-FQK2CCIM.js} +1 -1
package/dist/chunk-FQK2CCIM.js.map +1 -0
package/dist/chunk-MAZ26DC7.js +99 -0
package/dist/chunk-MAZ26DC7.js.map +1 -0
package/dist/chunk-SHTXZ4O2.js +113 -0
package/dist/chunk-SHTXZ4O2.js.map +1 -0
package/dist/{chunk-KQ26DYTQ.js → chunk-UBQGWD3O.js} +2 -2
package/dist/contract/index.d.ts +206 -9
package/dist/contract/index.js +751 -3
package/dist/contract/index.js.map +1 -1
package/dist/governance/index.d.ts +1 -1
package/dist/hosted/index.d.ts +8 -192
package/dist/hosted/index.js +1 -1
package/dist/index-BRxz6qov.d.ts +409 -0
package/dist/index.d.ts +18 -462
package/dist/index.js +14 -106
package/dist/index.js.map +1 -1
package/dist/meta-eval/index.d.ts +3 -3
package/dist/openapi.json +1 -1
package/dist/{outcome-store-BxJ3DQKJ.d.ts → outcome-store-D6KWmYvj.d.ts} +1 -1
package/dist/registry-8KAs18kY.d.ts +457 -0
package/dist/{release-report-DBB8lB1P.d.ts → release-report-DSu0DWy8.d.ts} +3 -296
package/dist/reporting.d.ts +6 -4
package/dist/reporting.js +6 -4
package/dist/{researcher-CHMO56K0.d.ts → researcher-LZD0qHEa.d.ts} +1 -1
package/dist/rl.d.ts +9 -8
package/dist/rl.js +3 -2
package/dist/rl.js.map +1 -1
package/dist/{rubric-predictive-validity-CJ08tGwq.d.ts → rubric-predictive-validity-ByZEC3BX.d.ts} +1 -1
package/dist/{run-improvement-loop-B-L8GgpW.d.ts → run-improvement-loop-BPMjNKMJ.d.ts} +2 -2
package/dist/sequential-5iSVfzl2.d.ts +139 -0
package/dist/store-CJbzDxZ2.d.ts +220 -0
package/dist/{sequential-CbFH___X.d.ts → summary-report-B7gNRX-r.d.ts} +1 -139
package/dist/traces.d.ts +3 -220
package/dist/{types-8u72Gc76.d.ts → types-Dbj5gu8n.d.ts} +1 -1
package/dist/types-DhqpAi_z.d.ts +296 -0
package/docs/adapters-observability.md +3 -3
package/package.json +5 -5
package/dist/adapters/traceai.js.map +0 -1
package/dist/chunk-MNL6LXGQ.js.map +0 -1
package/dist/chunk-OYI6RZJK.js.map +0 -1
/package/dist/{chunk-PD3MH6WU.js.map → chunk-5KSDYBYH.js.map} +0 -0
/package/dist/{chunk-KQ26DYTQ.js.map → chunk-UBQGWD3O.js.map} +0 -0
/package/docs/design/{substrate-gaps-2026-05-27.md → substrate-gaps.md} +0 -0

package/dist/meta-eval/index.d.ts CHANGED Viewed

@@ -1,7 +1,7 @@
 import { R as Run, T as TraceStore } from '../store-Db2Bv8Cf.js';
-import { b as OutcomeFilter, O as OutcomeStore } from '../outcome-store-BxJ3DQKJ.js';
-export { D as DeploymentOutcome, F as FileSystemOutcomeStore, a as FileSystemOutcomeStoreOptions, I as InMemoryOutcomeStore } from '../outcome-store-BxJ3DQKJ.js';
-export { R as RubricOutcomePair, a as RubricPredictiveValidityInput, b as RubricPredictiveValidityReport, c as RubricRanking, r as rubricPredictiveValidity } from '../rubric-predictive-validity-CJ08tGwq.js';
+import { a as OutcomeFilter, O as OutcomeStore } from '../outcome-store-D6KWmYvj.js';
+export { D as DeploymentOutcome, F as FileSystemOutcomeStore, b as FileSystemOutcomeStoreOptions, I as InMemoryOutcomeStore } from '../outcome-store-D6KWmYvj.js';
+export { R as RubricOutcomePair, a as RubricPredictiveValidityInput, b as RubricPredictiveValidityReport, c as RubricRanking, r as rubricPredictiveValidity } from '../rubric-predictive-validity-ByZEC3BX.js';
 import '../run-record-BGY6bHRh.js';
 import '../errors-mje_cKOs.js';

package/dist/openapi.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "openapi": "3.1.0",
   "info": {
     "title": "@tangle-network/agent-eval — wire protocol",
-    "version": "0.48.0",
+    "version": "0.50.0",
     "description": "HTTP and stdio RPC interface to agent-eval. The TypeScript runtime is the source of truth; this spec is the contract that cross-language clients (Python, Rust, Go) generate from.\n\nWire-protocol version: 1.0.0. Bumps on breaking changes to request/response schemas.",
     "contact": {
       "name": "Tangle Network",

package/dist/{outcome-store-BxJ3DQKJ.d.ts → outcome-store-D6KWmYvj.d.ts} RENAMED Viewed

@@ -60,4 +60,4 @@ declare class FileSystemOutcomeStore implements OutcomeStore {
     list(filter?: OutcomeFilter): Promise<DeploymentOutcome[]>;
 }
-export { type DeploymentOutcome as D, FileSystemOutcomeStore as F, InMemoryOutcomeStore as I, type OutcomeStore as O, type FileSystemOutcomeStoreOptions as a, type OutcomeFilter as b };
+export { type DeploymentOutcome as D, FileSystemOutcomeStore as F, InMemoryOutcomeStore as I, type OutcomeStore as O, type OutcomeFilter as a, type FileSystemOutcomeStoreOptions as b };

package/dist/registry-8KAs18kY.d.ts ADDED Viewed

@@ -0,0 +1,457 @@
+import { b as LlmCallRequest, c as LlmCallResult } from './llm-client-BXVRUZyX.js';
+import { R as RunRecord } from './run-record-BGY6bHRh.js';
+import { T as TraceAnalysisStore } from './store-CJbzDxZ2.js';
+import { J as JudgeInput } from './types-DhqpAi_z.js';
+/**
+ * ChatClient — the single LLM abstraction analysts call.
+ *
+ * agent-eval already ships an `LlmClient` (OpenAI-compatible, retry,
+ * graceful JSON-schema degrade) and judges that talk to `TCloud`. Two
+ * mixed patterns force every analyst author to pick a transport, which
+ * couples analyst code to runtime concerns (cli-bridge vs router vs
+ * sandbox-sdk) it shouldn't know about.
+ *
+ * `ChatClient` is one interface every analyst takes via `AnalystContext.chat`.
+ * The operator decides at the registry boundary which transport binds
+ * to it. Analyst code stays transport-agnostic; swapping production
+ * (sandbox-sdk) for local dev (cli-bridge) or tests (mock) is a one-
+ * line factory call.
+ *
+ * Designed to coexist: existing `LlmClient` callers and existing
+ * `TCloud`-based judges keep working untouched. New analyst code uses
+ * `ChatClient`. When old call sites migrate, they pick up budgeting,
+ * cancellation, and unified telemetry for free.
+ */
+/**
+ * Unified chat interface. Mirrors LlmCallRequest/Result so the OpenAI-
+ * compatible mental model stays. Two methods: a one-shot `chat()` and
+ * an `streamChat()` for future agentic loops (not yet exposed).
+ */
+interface ChatClient {
+    /** Display name of the bound transport — included in telemetry. */
+    readonly transport: ChatTransport;
+    /** Default model when caller omits — operators bind this per environment. */
+    readonly defaultModel?: string;
+    chat(req: ChatRequest, opts?: ChatCallOpts): Promise<ChatResponse>;
+}
+type ChatTransport = 'router' | 'sandbox-sdk' | 'cli-bridge' | 'direct-provider' | 'mock';
+interface ChatRequest extends Omit<LlmCallRequest, 'model'> {
+    /** Optional — falls back to ChatClient.defaultModel. */
+    model?: string;
+}
+type ChatResponse = LlmCallResult;
+interface ChatCallOpts {
+    /** Cancel the in-flight request. */
+    signal?: AbortSignal;
+    /** Hard USD ceiling for this single call (informational; the underlying transport may not enforce). */
+    maxCostUsd?: number;
+    /** Correlation tag carried into request headers when the transport allows. */
+    correlationId?: string;
+}
+type CreateChatClientOpts = RouterTransportOpts | CliBridgeTransportOpts | DirectProviderTransportOpts | SandboxSdkTransportOpts | MockTransportOpts;
+interface BaseTransportOpts {
+    defaultModel?: string;
+}
+interface RouterTransportOpts extends BaseTransportOpts {
+    transport: 'router';
+    baseUrl?: string;
+    apiKey: string;
+}
+interface CliBridgeTransportOpts extends BaseTransportOpts {
+    transport: 'cli-bridge';
+    baseUrl?: string;
+    bearer?: string;
+}
+interface DirectProviderTransportOpts extends BaseTransportOpts {
+    transport: 'direct-provider';
+    baseUrl: string;
+    apiKey: string;
+}
+/**
+ * Sandbox-SDK transport. Provided as a thin pass-through: the caller
+ * supplies a callable that mimics LlmClient.chat() against an already-
+ * configured Sandbox handle. We don't import the SDK here to keep
+ * agent-eval dep-free of @tangle-network/sandbox.
+ */
+interface SandboxSdkTransportOpts extends BaseTransportOpts {
+    transport: 'sandbox-sdk';
+    chat: (req: ChatRequest, opts?: ChatCallOpts) => Promise<ChatResponse>;
+}
+/**
+ * Mock transport for tests. The handler receives the request and returns
+ * whatever the test wants. No retries, no JSON-schema degrade.
+ */
+interface MockTransportOpts extends BaseTransportOpts {
+    transport: 'mock';
+    handler: (req: ChatRequest, opts?: ChatCallOpts) => Promise<ChatResponse>;
+}
+/**
+ * Build a ChatClient bound to a specific transport. The returned client
+ * is safe to share across analysts in a single registry run.
+ */
+declare function createChatClient(opts: CreateChatClientOpts): ChatClient;
+/**
+ * Analyst contract — the missing orchestration layer over agent-eval's
+ * existing analyzers (analyzeTraces, MultiLayerVerifier, RunCritic,
+ * SemanticConceptJudge, JudgeFn, ...).
+ *
+ * Each existing primitive returns its own output shape. The Analyst
+ * contract is the single envelope every primitive lifts into, so a
+ * registry can run N analysts against a run and a single renderer can
+ * compose findings without knowing which analyzer produced them.
+ *
+ * The contract is intentionally domain-agnostic: nothing here knows
+ * about code, voice, RAG, or any particular agent stack. Analysts
+ * declare what INPUT KIND they need (a trace store, an artifact dir,
+ * a RunRecord, a JudgeInput, or `custom`), and the registry routes
+ * the matching input from `AnalystRunInputs`.
+ */
+/**
+ * Unified envelope every analyst emits. Schema-versioned so renderers
+ * and time-series diffs survive future field additions.
+ */
+interface AnalystFinding {
+    schema_version: '1.0.0';
+    /**
+     * Stable hash over identity-defining fields (analyst_id + canonical
+     * claim + area + optional subject). Two findings from two runs that
+     * "are the same finding" share this id — that's what `diffFindings`
+     * uses to compute appeared/disappeared sets across runs.
+     */
+    finding_id: string;
+    analyst_id: string;
+    produced_at: string;
+    severity: AnalystSeverity;
+    /**
+     * Coarse classification. Renderers group by this. Free-form so
+     * domain-specific analysts can introduce categories without a
+     * schema change ('agent-reasoning', 'verification', 'cost',
+     * 'tool-use', 'safety', 'latency', 'data-quality', ...).
+     */
+    area: string;
+    claim: string;
+    rationale?: string;
+    evidence_refs: EvidenceRef[];
+    recommended_action?: string;
+    validation_plan?: string;
+    /** 0..1 — the analyst's own confidence. Not calibrated across analysts. */
+    confidence: number;
+    /**
+     * Optional subject the finding is about — leaf id, agent id, request
+     * id. Included in finding_id when present so per-subject findings
+     * diff cleanly across runs.
+     */
+    subject?: string;
+    /** Analyst-private extras; renderers ignore unless they know the analyst. */
+    metadata?: Record<string, unknown>;
+}
+type AnalystSeverity = 'critical' | 'high' | 'medium' | 'low' | 'info';
+interface EvidenceRef {
+    /**
+     * Where the evidence lives. `span` and `event` refer to OTLP trace
+     * elements; `artifact` to a file inside the run's artifact tree;
+     * `finding` to another AnalystFinding (cross-analyst chaining);
+     * `metric` to a named scalar reading the renderer knows how to read.
+     */
+    kind: 'span' | 'event' | 'artifact' | 'finding' | 'metric';
+    uri: string;
+    excerpt?: string;
+}
+/**
+ * The discriminator the registry uses to pass the right input.
+ * `custom` is the escape hatch — analysts that need something else
+ * (e.g. an embedding cache, a partner SDK handle) read it from
+ * `AnalystRunInputs.custom[<analyst id>]`.
+ */
+type AnalystInputKind = 'trace-store' | 'artifact-dir' | 'run-record' | 'judge-input' | 'custom';
+interface AnalystCost {
+    /** `deterministic` analysts MUST NOT call the LLM. */
+    kind: 'deterministic' | 'llm';
+    /** Optional declared upper bound; the registry can enforce a budget. */
+    est_usd_per_run?: number;
+    /** Models the analyst expects to use (informational). */
+    models?: string[];
+}
+interface AnalystRequirements {
+    /** Min number of shots / samples the analyst needs to produce signal. */
+    min_shots?: number;
+    /** Capabilities the runtime must supply (e.g. ['network', 'gpu']). */
+    capabilities?: string[];
+}
+/**
+ * What's passed to every analyst call. The registry resolves which
+ * field the analyst's `inputKind` selects and asserts it's present.
+ */
+interface AnalystRunInputs {
+    traceStore?: TraceAnalysisStore;
+    artifactDir?: string;
+    runRecord?: RunRecord;
+    judgeInput?: JudgeInput;
+    /** Keyed by analyst id; populated by callers that registered custom analysts. */
+    custom?: Record<string, unknown>;
+}
+interface AnalystContext {
+    runId: string;
+    /** Stable correlation id so logs from a single registry.run() share a tag. */
+    correlationId: string;
+    /** Wall-clock deadline (epoch ms). Analysts SHOULD honor for graceful cancel. */
+    deadlineMs?: number;
+    /** Per-analyst USD budget. Analysts MAY check before issuing LLM calls. */
+    budgetUsd?: number;
+    /**
+     * Shared chat client. Analysts that call an LLM go through this so
+     * the operator picks transport (sandbox-sdk | router | cli-bridge |
+     * direct-provider | mock) at the registry boundary without touching
+     * analyst code.
+     */
+    chat?: ChatClient;
+    /**
+     * Findings from a prior run the operator wants the analyst to see as
+     * retrieval context. Kinds that take advantage of cross-run memory
+     * (failure-mode "I saw this cluster last run", knowledge-gap "the wiki
+     * page I asked for is still missing") render these into the actor's
+     * working set. Filtering is the operator's job: pass the slice that
+     * matches the analyst's id, or pass everything and let the kind
+     * filter. Empty / absent means no cross-run context.
+     */
+    priorFindings?: ReadonlyArray<AnalystFinding>;
+    /** Free-form runtime tags (env, host, op). Findings can echo these into metadata. */
+    tags?: Record<string, string>;
+    /** Logger callback — analysts SHOULD prefer this over console.* for testability. */
+    log?: (msg: string, fields?: Record<string, unknown>) => void;
+    /** Optional abort signal. Analysts SHOULD pass it through to LLM calls. */
+    signal?: AbortSignal;
+}
+/**
+ * The minimal contract. Concrete analysts can refine `TInput` so
+ * implementations stay type-safe (e.g. a trace analyst's `TInput` is
+ * `TraceAnalysisStore`); the registry passes the right field from
+ * `AnalystRunInputs` based on `inputKind`.
+ */
+interface Analyst<TInput = unknown> {
+    /** Stable identifier — appears in finding_id, telemetry, and registry exclusion lists. */
+    readonly id: string;
+    /** Human-readable. One sentence. */
+    readonly description: string;
+    readonly inputKind: AnalystInputKind;
+    readonly cost: AnalystCost;
+    readonly requires?: AnalystRequirements;
+    /** Bump on breaking changes to claim wording or area so old finding_ids don't collide. */
+    readonly version: string;
+    analyze(input: TInput, ctx: AnalystContext): Promise<AnalystFinding[]>;
+}
+/**
+ * Compute the stable finding_id from the identity-defining fields.
+ * Default implementation hashes {analyst_id, area, subject, normalized claim}.
+ * Analysts that emit findings whose claim text varies per run (timestamps,
+ * counts) SHOULD either: (a) pass an explicit `id_basis` to fix the hash,
+ * or (b) move the variable part into `rationale`/`metadata` and keep the
+ * `claim` static.
+ */
+declare function computeFindingId(input: {
+    analyst_id: string;
+    area: string;
+    subject?: string;
+    claim: string;
+    /** Override the claim for hashing — use when the displayed claim has run-specific bits. */
+    id_basis?: string;
+}): string;
+/**
+ * Convenience factory: produce a fully-formed AnalystFinding with the
+ * id computed automatically. Analyst code stays terse.
+ */
+declare function makeFinding(init: Omit<AnalystFinding, 'schema_version' | 'finding_id' | 'produced_at'> & {
+    id_basis?: string;
+    produced_at?: string;
+}): AnalystFinding;
+interface AnalystRunSummary {
+    analyst_id: string;
+    status: 'ok' | 'skipped' | 'failed';
+    /** Why skipped — missing input, budget exceeded, capability unmet. */
+    reason?: string;
+    findings_count: number;
+    latency_ms: number;
+    cost_usd: number;
+    /** When `status='failed'`: the error class + message, never the full stack. */
+    error?: {
+        class: string;
+        message: string;
+    };
+}
+interface AnalystRunResult {
+    run_id: string;
+    correlation_id: string;
+    started_at: string;
+    ended_at: string;
+    findings: AnalystFinding[];
+    per_analyst: AnalystRunSummary[];
+    /** Total LLM cost in USD across all analysts in this registry.run(). */
+    total_cost_usd: number;
+}
+/**
+ * Events emitted by `AnalystRegistry.runStream(...)` in real time as
+ * the registry executes. UIs subscribe via `for await (const ev of
+ * registry.runStream(...))`; `registry.run(...)` is a thin collector
+ * over the same stream, so the two surfaces share their invariants.
+ *
+ * Per-finding events are intentionally omitted — analyzers are batch
+ * operations (an Ax actor returns the full `findings:json[]` at the
+ * end of the responder), so streaming inside one analyst would only
+ * emit partial JSON consumers can't render. The kind-completion event
+ * is the right granularity; subscribers wanting per-finding rendering
+ * iterate `event.findings` themselves.
+ */
+type AnalystRunEvent = {
+    type: 'run-started';
+    run_id: string;
+    correlation_id: string;
+    started_at: string;
+    /** The ordered list of analyst ids the registry will run. */
+    analyst_ids: ReadonlyArray<string>;
+} | {
+    type: 'analyst-skipped';
+    summary: AnalystRunSummary;
+} | {
+    type: 'analyst-started';
+    analyst_id: string;
+    started_at: string;
+} | {
+    type: 'analyst-completed';
+    /** `summary.status` is `'ok'` for clean completion or `'failed'` for thrown analysts. */
+    summary: AnalystRunSummary;
+    findings: ReadonlyArray<AnalystFinding>;
+} | {
+    type: 'run-completed';
+    result: AnalystRunResult;
+};
+/**
+ * AnalystRegistry — orchestrate N analysts against one run.
+ *
+ * Owns three responsibilities and only three:
+ *   1. Registration — ids must be unique; bad registrations fail loudly
+ *      at register-time, not run-time.
+ *   2. Routing — each analyst declares its `inputKind`; the registry
+ *      picks the matching field from AnalystRunInputs and skips the
+ *      analyst with a logged reason if it's missing.
+ *   3. Isolation — one analyst's exception MUST NOT stop other analysts.
+ *      Failed analysts produce zero findings + a 'failed' summary row.
+ *
+ * Cross-cutting concerns (telemetry, error → finding conversion, cost
+ * ingestion, storage rotation) live in `AnalystHooks`. Budget shaping
+ * (equal split vs weighted vs custom) lives in `BudgetPolicy`. Both
+ * have sensible defaults; consumers override only what they need.
+ */
+interface AnalystHooks {
+    /** Before analyze() — last chance to mutate ctx (e.g. inject tags, override budget). */
+    onBeforeAnalyze?(args: {
+        analyst: Analyst;
+        ctx: AnalystContext;
+        runId: string;
+    }): void | Promise<void>;
+    /** After every analyst (ok | failed | skipped). Use for telemetry, ingestion, rotation. */
+    onAfterAnalyze?(args: {
+        analyst: Analyst;
+        summary: AnalystRunSummary;
+        findings: AnalystFinding[];
+        runId: string;
+    }): void | Promise<void>;
+    /**
+     * On analyst exception. Hook MAY return findings to convert the
+     * error into structured findings; the summary still reports 'failed'.
+     * Return void to keep the default empty-findings behavior.
+     */
+    onError?(args: {
+        analyst: Analyst;
+        error: Error;
+        runId: string;
+    }): AnalystFinding[] | undefined | Promise<AnalystFinding[] | undefined>;
+    /** Once after registry.run() completes. Use for final aggregation, persistence. */
+    onComplete?(args: {
+        result: AnalystRunResult;
+    }): void | Promise<void>;
+}
+interface BudgetPolicy {
+    /** Overall USD cap across the registry.run(). */
+    totalUsd?: number;
+    /** Per-analyst weight for the default allocator. Missing ids get weight 1. */
+    weights?: Record<string, number>;
+    /**
+     * Custom allocator — receives the analyst, remaining/total budget, and
+     * the count of analysts that will run. Returns the per-analyst budget
+     * (or undefined to leave it uncapped). Overrides weights when set.
+     */
+    allocate?: (args: {
+        analyst: Analyst;
+        totalUsd: number | undefined;
+        remainingUsd: number | undefined;
+        runningCount: number;
+    }) => number | undefined;
+}
+interface AnalystRegistryOptions {
+    /** Shared chat client passed to every LLM analyst via AnalystContext. */
+    chat?: ChatClient;
+    /** Logger callback. Defaults to a no-op. */
+    log?: (msg: string, fields?: Record<string, unknown>) => void;
+    /** Hooks invoked around analyze() — observability + customization seam. */
+    hooks?: AnalystHooks;
+    /** Default budget when run() doesn't override. */
+    defaultBudget?: BudgetPolicy;
+}
+interface RegistryRunOpts {
+    /** Restrict to a subset of registered analysts by id. */
+    only?: string[];
+    /** Skip these analysts even if registered. Useful for cheap iteration. */
+    skip?: string[];
+    /** Budget policy — totalUsd + optional weights/allocator. Falls back to options.defaultBudget. */
+    budget?: BudgetPolicy;
+    /** Wall-clock cap. Analysts SHOULD honor `ctx.deadlineMs`. */
+    timeoutMs?: number;
+    /** Abort signal — forwarded into every analyst's context. */
+    signal?: AbortSignal;
+    /** Tags echoed into AnalystContext.tags — useful for tracking environment/version in findings. */
+    tags?: Record<string, string>;
+    /**
+     * Prior-run findings made available as retrieval context to every
+     * analyst via `ctx.priorFindings`. The registry forwards the slice
+     * whose `analyst_id` matches each registered analyst so a kind sees
+     * only its own history. Pass `{ '*': findings }` to broadcast to
+     * every analyst (useful for cross-kind chaining where the improvement
+     * analyst consumes upstream failure findings).
+     */
+    priorFindings?: ReadonlyArray<AnalystFinding> | Record<string, ReadonlyArray<AnalystFinding>>;
+}
+declare class AnalystRegistry {
+    private readonly analysts;
+    private readonly options;
+    constructor(options?: AnalystRegistryOptions);
+    register(analyst: Analyst): void;
+    list(): ReadonlyArray<{
+        id: string;
+        description: string;
+        version: string;
+        cost: Analyst['cost'];
+    }>;
+    run(runId: string, inputs: AnalystRunInputs, runOpts?: RegistryRunOpts): Promise<AnalystRunResult>;
+    /**
+     * Streaming counterpart to `run()`. Emits `AnalystRunEvent` values
+     * in real time — `run-started`, then per-analyst `skipped` /
+     * `started` / `completed`, then a terminal `run-completed` whose
+     * payload is the full `AnalystRunResult`. UIs use this to render
+     * progress; persistence consumers use `run()` and read the result.
+     *
+     * Hooks (`onBeforeAnalyze` / `onAfterAnalyze` / `onError` /
+     * `onComplete`) fire as before — streaming is additive, not a hook
+     * replacement.
+     */
+    runStream(runId: string, inputs: AnalystRunInputs, runOpts?: RegistryRunOpts): AsyncGenerator<AnalystRunEvent, void, void>;
+    private selectAnalysts;
+    private routeInput;
+}
+export { AnalystRegistry as A, type BudgetPolicy as B, type ChatCallOpts as C, type DirectProviderTransportOpts as D, type EvidenceRef as E, type MockTransportOpts as M, type RegistryRunOpts as R, type SandboxSdkTransportOpts as S, type Analyst as a, type AnalystSeverity as b, type AnalystFinding as c, type AnalystCost as d, type AnalystContext as e, type AnalystHooks as f, type AnalystInputKind as g, type AnalystRegistryOptions as h, type AnalystRequirements as i, type AnalystRunEvent as j, type AnalystRunInputs as k, type AnalystRunResult as l, type AnalystRunSummary as m, type ChatClient as n, type ChatRequest as o, type ChatResponse as p, type ChatTransport as q, type CliBridgeTransportOpts as r, type CreateChatClientOpts as s, type RouterTransportOpts as t, computeFindingId as u, createChatClient as v, makeFinding as w };