npm - akribes - Versions diffs - 0.21.17 - Mend

akribes 0.21.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (100) hide show

package/CHANGELOG.md +30 -0
package/LICENSE +21 -0
package/README.md +160 -0
package/dist/client.d.ts +240 -0
package/dist/client.d.ts.map +1 -0
package/dist/client.js +272 -0
package/dist/client.js.map +1 -0
package/dist/errors.d.ts +196 -0
package/dist/errors.d.ts.map +1 -0
package/dist/errors.js +274 -0
package/dist/errors.js.map +1 -0
package/dist/execution/index.d.ts +3 -0
package/dist/execution/index.d.ts.map +1 -0
package/dist/execution/index.js +3 -0
package/dist/execution/index.js.map +1 -0
package/dist/execution/replay.d.ts +37 -0
package/dist/execution/replay.d.ts.map +1 -0
package/dist/execution/replay.js +59 -0
package/dist/execution/replay.js.map +1 -0
package/dist/execution/steps.d.ts +327 -0
package/dist/execution/steps.d.ts.map +1 -0
package/dist/execution/steps.js +1068 -0
package/dist/execution/steps.js.map +1 -0
package/dist/http.d.ts +53 -0
package/dist/http.d.ts.map +1 -0
package/dist/http.js +141 -0
package/dist/http.js.map +1 -0
package/dist/index.d.ts +36 -0
package/dist/index.d.ts.map +1 -0
package/dist/index.js +38 -0
package/dist/index.js.map +1 -0
package/dist/runStream.d.ts +176 -0
package/dist/runStream.d.ts.map +1 -0
package/dist/runStream.js +408 -0
package/dist/runStream.js.map +1 -0
package/dist/sse.d.ts +46 -0
package/dist/sse.d.ts.map +1 -0
package/dist/sse.js +218 -0
package/dist/sse.js.map +1 -0
package/dist/sub/bench.d.ts +182 -0
package/dist/sub/bench.d.ts.map +1 -0
package/dist/sub/bench.js +420 -0
package/dist/sub/bench.js.map +1 -0
package/dist/sub/channels.d.ts +22 -0
package/dist/sub/channels.d.ts.map +1 -0
package/dist/sub/channels.js +32 -0
package/dist/sub/channels.js.map +1 -0
package/dist/sub/clients.d.ts +79 -0
package/dist/sub/clients.d.ts.map +1 -0
package/dist/sub/clients.js +190 -0
package/dist/sub/clients.js.map +1 -0
package/dist/sub/documents.d.ts +113 -0
package/dist/sub/documents.d.ts.map +1 -0
package/dist/sub/documents.js +329 -0
package/dist/sub/documents.js.map +1 -0
package/dist/sub/evals.d.ts +71 -0
package/dist/sub/evals.d.ts.map +1 -0
package/dist/sub/evals.js +86 -0
package/dist/sub/evals.js.map +1 -0
package/dist/sub/events.d.ts +65 -0
package/dist/sub/events.d.ts.map +1 -0
package/dist/sub/events.js +154 -0
package/dist/sub/events.js.map +1 -0
package/dist/sub/executions.d.ts +255 -0
package/dist/sub/executions.d.ts.map +1 -0
package/dist/sub/executions.js +322 -0
package/dist/sub/executions.js.map +1 -0
package/dist/sub/mcp.d.ts +51 -0
package/dist/sub/mcp.d.ts.map +1 -0
package/dist/sub/mcp.js +42 -0
package/dist/sub/mcp.js.map +1 -0
package/dist/sub/projects.d.ts +73 -0
package/dist/sub/projects.d.ts.map +1 -0
package/dist/sub/projects.js +101 -0
package/dist/sub/projects.js.map +1 -0
package/dist/sub/scripts.d.ts +58 -0
package/dist/sub/scripts.d.ts.map +1 -0
package/dist/sub/scripts.js +82 -0
package/dist/sub/scripts.js.map +1 -0
package/dist/sub/tokens.d.ts +126 -0
package/dist/sub/tokens.d.ts.map +1 -0
package/dist/sub/tokens.js +105 -0
package/dist/sub/tokens.js.map +1 -0
package/dist/sub/versions.d.ts +29 -0
package/dist/sub/versions.d.ts.map +1 -0
package/dist/sub/versions.js +52 -0
package/dist/sub/versions.js.map +1 -0
package/dist/tokenSafety.d.ts +15 -0
package/dist/tokenSafety.d.ts.map +1 -0
package/dist/tokenSafety.js +24 -0
package/dist/tokenSafety.js.map +1 -0
package/dist/types.d.ts +1147 -0
package/dist/types.d.ts.map +1 -0
package/dist/types.js +132 -0
package/dist/types.js.map +1 -0
package/dist/workflowEvents.d.ts +297 -0
package/dist/workflowEvents.d.ts.map +1 -0
package/dist/workflowEvents.js +612 -0
package/dist/workflowEvents.js.map +1 -0
package/package.json +57 -0

package/dist/types.d.ts ADDED Viewed

@@ -0,0 +1,1147 @@
+export type EngineEvent = {
+    type: string;
+    payload: unknown;
+};
+export type RegistryEvent = {
+    type: 'ProjectCreated';
+    payload: {
+        project_id: number;
+        project: Project;
+    };
+} | {
+    type: 'ProjectUpdated';
+    payload: {
+        project_id: number;
+        project: Project;
+    };
+} | {
+    type: 'ProjectDeleted';
+    payload: number;
+} | {
+    type: 'ScriptCreated';
+    payload: {
+        project_id: number;
+        script: Script;
+    };
+} | {
+    type: 'ScriptUpdated';
+    payload: {
+        project_id: number;
+        script_name: string;
+        version_id: number;
+        channel: string | null;
+    };
+} | {
+    type: 'ScriptDeleted';
+    payload: {
+        project_id: number;
+        script_name: string;
+    };
+};
+export type EvalEvent = {
+    type: 'RunStarted';
+    payload: {
+        project_id: number;
+        script_name: string;
+        run: EvalRun;
+    };
+} | {
+    type: 'RunProgress';
+    payload: {
+        project_id: number;
+        script_name: string;
+        run_id: number;
+        completed_cases: number;
+        total_cases: number | null;
+        average_score: number | null;
+        latest_result: EvalCaseReport | null;
+    };
+} | {
+    type: 'RunFinished';
+    payload: {
+        project_id: number;
+        script_name: string;
+        run: EvalRun;
+    };
+};
+export type HubEvent = {
+    type: 'Execution';
+    payload: {
+        project_id: number;
+        script_name: string;
+        execution_id: string;
+        event: EngineEvent;
+        /** Monotonic per-execution sequence number. Starts at 1; matches the
+         *  `execution_events.id` ordering for the same execution. Optional
+         *  for forward-compat with older servers that don't yet stamp it. */
+        seq?: number;
+        /** Server-side RFC3339 timestamp with ms precision (e.g.
+         *  `"2026-05-07T10:23:42.187Z"`). Optional for forward-compat. */
+        at?: string;
+    };
+} | {
+    type: 'Registry';
+    payload: RegistryEvent;
+} | {
+    type: 'Eval';
+    payload: EvalEvent;
+};
+export type Project = {
+    id: number;
+    name: string;
+    sort_order: number;
+    created_at: string;
+};
+export type Script = {
+    id: number;
+    project_id: number;
+    name: string;
+    sort_order: number;
+    created_at: string;
+};
+export type ClientInterest = {
+    script_name: string;
+    inputs: Record<string, string>;
+    channel?: string;
+    lifetime?: 'session' | 'perma';
+    strict?: boolean;
+};
+export type RegisteredInterest = {
+    script_name: string;
+    channel: string;
+    bound_version_id: number | null;
+    input_schema: [string, string][];
+};
+export type RegisterClientResponse = {
+    interests: RegisteredInterest[];
+};
+export type SchemaMismatch = {
+    missing: [string, string][];
+    wrong_type: [string, string, string][];
+    extra: string[];
+};
+export type ContractLockInfo = {
+    id: number;
+    client_id: string;
+    client_name: string;
+    script_name: string;
+    channel: string;
+    bound_version_id: number | null;
+    lifetime: string;
+    drifted: boolean;
+    created_by: string | null;
+    created_at: string;
+    input_schema: string;
+};
+export type ContractWarning = {
+    client_id: string;
+    client_name: string;
+    channel: string;
+    mismatch: SchemaMismatch;
+};
+/** Structural representation of a declared Akribes type. Nested `inner` handles
+ *  parameterized types like `list[str]` or `list[Profile]`. `choices` is
+ *  populated for `choice[...]` types. */
+export type TypeRef = {
+    name: string;
+    inner?: TypeRef | null;
+    choices?: string[] | null;
+};
+/** Token consumption breakdown for a single task execution, mirroring the
+ *  Rust `TokenUsage` struct in `akribes-core`. */
+export type TokenUsage = {
+    input_tokens: number;
+    output_tokens: number;
+    model: string;
+    provider: string;
+    cached_input_tokens: number;
+    /** Cache-creation (write) tokens. Anthropic-only today: `cache_creation_input_tokens`
+     *  from the Messages API usage block, billed by the server at 1.25x base
+     *  input (5-minute TTL). OpenAI and Gemini always emit 0. Older servers
+     *  that predate this field emit it as 0 via serde default. */
+    cache_write_input_tokens: number;
+};
+/** Known discriminants of `TaskEndPayload.variant` (issue #206). A future
+ *  engine may add more (e.g. `"partial"` for #205); consumers MUST tolerate
+ *  other string values via a catch-all.
+ */
+export type KnownTaskEndVariant = 'success' | 'unable' | 'failed';
+/** Wire shape of `TaskEndPayload.variant` — one of the {@link KnownTaskEndVariant}s
+ *  today, or any other `snake_case` string from a newer server. Consumers
+ *  should narrow on the known set and fall through for unknowns so the stream
+ *  keeps flowing across SDK upgrades. Mirrors `akribes_core::event::TaskEndVariant`
+ *  which uses `#[serde(other)]` for the same forward-compat contract. */
+export type TaskEndVariant = KnownTaskEndVariant | (string & {});
+/** Payload of a `TaskEnd` engine event. */
+export type TaskEndPayload = {
+    task: string;
+    on_error_label: string | null;
+    value: unknown;
+    value_type: TypeRef | null;
+    /** serde-serialized `std::time::Duration` */
+    duration: {
+        secs: number;
+        nanos: number;
+    };
+    /** 1-indexed attempt number (u8) */
+    attempt: number;
+    usage: TokenUsage | null;
+    /** How the task finished (issue #206). `"success"` is the wire default
+     *  emitted by the server; absence on the wire indicates a pre-#206
+     *  server and should be treated as `"success"`. */
+    variant?: TaskEndVariant;
+};
+/**
+ * Studio-facing projection of an `input ... by <ref>(args)` clause (Track C).
+ *
+ * `display` is the source-form workflow ref (e.g. `epopy/fetch@production`).
+ * `explicit_args` lists the names of arguments the resolver clause binds
+ * literally — Studio uses this to know which slots are deterministic on
+ * the parent side vs. propagated implicitly from same-named parent inputs.
+ */
+export type InputResolver = {
+    display: string;
+    explicit_args: string[];
+};
+/** A single `input <name>: <ty>` declaration as emitted by the draft endpoint. */
+export type InputDecl = {
+    name: string;
+    ty: TypeRef;
+    /** Optional `##` doc-comment attached to the input. */
+    docs?: string | null;
+    /** Track C: optional fallback resolver attached via `input X: T by ref(args)`. */
+    resolver?: InputResolver | null;
+};
+/** A single field on a `type` declaration. */
+export type TypeField = {
+    name: string;
+    ty: TypeRef;
+    /** Optional `##` doc-comment attached to the field. */
+    docs?: string | null;
+};
+export type PutDraftResponse = {
+    schema_warnings: ContractWarning[];
+    inputs: InputDecl[];
+    type_defs: Record<string, TypeField[]>;
+};
+export type BreakingInterest = {
+    client_id: string;
+    client_name: string;
+    channel: string;
+    lifetime: string;
+    mismatch: SchemaMismatch;
+};
+export type DryRunResult = {
+    dry_run: true;
+    would_break: number;
+    breaking_interests: BreakingInterest[];
+};
+export type ScriptVersion = {
+    id: number;
+    script_id: number;
+    source: string;
+    label: string | null;
+    published_by: string | null;
+    created_at: string;
+};
+export type ScriptVersionResponse = ScriptVersion & {
+    inputs: [string, string][];
+};
+export type ScriptChannel = {
+    id: number;
+    script_id: number;
+    name: string;
+    version_id: number | null;
+    updated_at: string | null;
+};
+export type RunResult = {
+    execution_id: string;
+    /** Event-log watermark a subscriber should pass as `last_event_id` on
+     *  the FIRST `events.subscribe(...)` call after this run. Always `0`
+     *  on a fresh spawn — the server's catchup path then replays every
+     *  buffered event with `id > 0` so no event is dropped between the
+     *  spawn response and the SSE/WS attach (#807). Optional for
+     *  back-compat with pre-0.21.13 servers; treat `undefined` as `0`. */
+    since_id?: number;
+};
+export type ErrorKind = 'RateLimit' | 'AuthError' | 'TokenLimit' | 'ServerError' | 'ServerError500' | 'BadGateway502' | 'ServiceUnavailable503' | 'GatewayTimeout504' | 'NetworkError' | 'ParseError' | 'Cancelled' | 'ScriptError';
+/** A document reference returned when S3 persistence is active. */
+export type DocumentRef = {
+    document_id: string;
+    filename: string;
+};
+export type ExecutionStatus = {
+    id: string;
+    project_id: number;
+    script_name: string;
+    status: 'running' | 'completed' | 'failed' | 'cancelled';
+    started_at: string | null;
+    finished_at: string | null;
+    version_id: number | null;
+    channel: string | null;
+    error: string | null;
+    error_kind: ErrorKind | null;
+    result: unknown;
+    /** When S3 is enabled: `{ inputName: DocumentRef }`. Without S3: `{ inputName: markdownString }`. */
+    documents: Record<string, string | DocumentRef> | null;
+    triggered_by: string | null;
+    input_tokens: number;
+    output_tokens: number;
+    /** Tokens consumed by tool-response payloads (task 39b). */
+    tool_tokens?: number;
+    cost_usd: number | null;
+    /** Declared record types from the source the execution ran against,
+     *  keyed by `type Name:` identifier. Lets clients render results back to
+     *  their declared shape (named records, typed columns) instead of
+     *  falling through to JSON shape inference. Empty object when the
+     *  source couldn't be parsed; `undefined` from older servers. */
+    type_defs?: Record<string, TypeField[]>;
+    /** Workflow's declared return `TypeRef`, when statically resolvable from
+     *  the source. Populated when the workflow ends in
+     *  `return <task>(...)` or `return <flow>(...)` and the callee's
+     *  signature is local. Lets the renderer dispatch straight into the
+     *  typed path (e.g. `list[Patent]` → typed `RecordTable`) instead of
+     *  inferring from `value`. `null` / `undefined` for older servers,
+     *  unparseable source, or workflows whose final expression isn't a
+     *  resolvable call. */
+    result_type?: TypeRef | null;
+    /** ID of the parent execution that spawned this one via `spawn_child_execution`.
+     *  Null for top-level executions. Forward-looking for v1 (typically null until
+     *  a host wires the spawn callback). */
+    parent_execution_id?: string | null;
+    /** The node ID within the parent execution at which this child was spawned.
+     *  Null when `parent_execution_id` is null or when the node id is unavailable. */
+    parent_node_id?: string | null;
+};
+export type ExecutionOutput = {
+    status: 'running' | 'completed' | 'failed' | 'cancelled';
+    error: string | null;
+    error_kind: ErrorKind | null;
+    result: unknown;
+};
+export type CostByVersion = {
+    version_id: number | null;
+    executions: number;
+    total_cost_usd: number;
+    avg_cost_usd: number;
+    unknown_cost_executions: number;
+};
+export type CostByChannel = {
+    /** `"unknown"` when the execution row's channel column was NULL. */
+    channel: string;
+    executions: number;
+    total_cost_usd: number;
+    avg_cost_usd: number;
+    unknown_cost_executions: number;
+};
+export type CostByScript = {
+    script_name: string;
+    executions: number;
+    total_cost_usd: number;
+    avg_cost_usd: number;
+    unknown_cost_executions: number;
+};
+export type ScriptCost = {
+    total_executions: number;
+    total_cost_usd: number;
+    avg_cost_usd: number;
+    total_input_tokens: number;
+    total_output_tokens: number;
+    /** Executions whose model wasn't in the server's pricing table — their tokens
+     *  are still counted but they contribute `0` to cost totals. */
+    unknown_cost_executions: number;
+    by_version: CostByVersion[];
+    by_channel: CostByChannel[];
+};
+export type ProjectCost = {
+    project_id: number;
+    total_executions: number;
+    total_cost_usd: number;
+    avg_cost_usd: number;
+    total_input_tokens: number;
+    total_output_tokens: number;
+    unknown_cost_executions: number;
+    by_script: CostByScript[];
+    by_channel: CostByChannel[];
+};
+export type ExecutionEvents = {
+    execution_id: string;
+    status: string;
+    complete: boolean;
+    events: EngineEvent[];
+    next_after_id: number | null;
+    has_more: boolean;
+};
+export type TokenInfo = {
+    id: string;
+    label: string;
+    user_email: string | null;
+    scopes: {
+        projects: '*' | number[];
+        role: 'admin' | 'editor' | 'viewer';
+        scripts?: string[];
+        executions?: string[];
+        can_mint: boolean;
+    };
+    minted_by: string;
+    expires_at: string;
+    revoked: boolean;
+    created_at: string;
+    last_used_at: string | null;
+};
+export type MintTokenResponse = {
+    token: string;
+    token_id: string;
+    expires_at: string;
+};
+export type ClientInfo = {
+    id: string;
+    name: string;
+    last_seen: string;
+    scripts: string[];
+};
+export type DraftResponse = {
+    source: string;
+    inputs: InputDecl[];
+    type_defs: Record<string, TypeField[]>;
+};
+/** Execution DAG graph returned by the /graph endpoint. */
+export type ScriptGraph = {
+    nodes: ScriptGraphNode[];
+    edges: ScriptGraphEdge[];
+};
+export type ScriptGraphNode = {
+    id: number;
+    op_type: string;
+    op_name: string | null;
+    target_var: string | null;
+    reads: string[];
+    line: number;
+    col: number;
+};
+export type ScriptGraphEdge = {
+    from: number;
+    to: number;
+};
+/** S3 document reference via pre-signed URL. */
+export type S3PresignedRef = {
+    presigned_url: string;
+};
+/** S3 document reference via temporary credentials. */
+export type S3CredentialsRef = {
+    bucket: string;
+    key: string;
+    region?: string;
+    access_key_id: string;
+    secret_access_key: string;
+    session_token?: string;
+};
+/** S3 document reference — either a pre-signed URL or bucket/key with temp credentials. */
+export type S3DocumentRef = S3PresignedRef | S3CredentialsRef;
+/** Response from the /convert endpoint. */
+export type ConvertResult = {
+    markdown: string;
+    /** Present when the server has S3 persistence enabled. Pass this back as a
+     * document input on subsequent runs to skip re-upload + reconversion. */
+    document_id?: string;
+    filename?: string;
+};
+export type EvalSuite = {
+    id: number;
+    script_id: number;
+    name: string;
+    runner_url: string;
+    config: Record<string, unknown>;
+    auto_run_channels: string[];
+    created_at: string;
+};
+export type EvalRun = {
+    id: number;
+    suite_id: number;
+    script_id: number;
+    version_id: number | null;
+    channel: string | null;
+    source_hash: string;
+    status: 'pending' | 'running' | 'completed' | 'failed' | 'canceled';
+    total_cases: number | null;
+    completed_cases: number;
+    average_score: number | null;
+    runner_run_id: string | null;
+    detail_url: string | null;
+    triggered_by: string | null;
+    started_at: string;
+    finished_at: string | null;
+    error: string | null;
+};
+export type EvalResult = {
+    id: number;
+    run_id: number;
+    case_id: string;
+    score: number | null;
+    status: string;
+    metadata: Record<string, unknown> | null;
+    execution_id: string | null;
+    created_at: string;
+};
+export type EvalCaseReport = {
+    case_id: string;
+    score: number | null;
+    status: string;
+    metadata: Record<string, unknown> | null;
+    execution_id: string | null;
+};
+export type EvalSuiteSummary = {
+    suite_id: number;
+    script_id: number;
+    script_name: string;
+    suite_name: string;
+    latest_run_id: number | null;
+    latest_run_at: string | null;
+    latest_avg_score: number | null;
+    prior_avg_score: number | null;
+};
+/** Wire status of a bench run. */
+export type BenchStatus = 'pending' | 'running' | 'completed' | 'failed' | 'canceled';
+/** Wire status of a single per-case result row. */
+export type BenchResultStatus = 'ok' | 'workflow_failed' | 'judge_failed' | 'skipped' | 'cached';
+/** Per-case compare flag emitted by `GET /bench-runs/{a}/compare/{b}`. */
+export type CompareFlag = 'improved' | 'regressed' | 'unchanged' | 'missing_a' | 'missing_b';
+/** A single typed value flowing through a bench case (input value, expected
+ *  output, ground truth, judge score, workflow output). Shape is determined
+ *  dynamically by the corresponding `TypeRef` from the script signature;
+ *  consumers narrow via the schema. Matches Studio's `AkribesValue`. */
+export type AkribesValue = unknown;
+/** Per-input typed value bag — keys match the script's declared inputs (or
+ *  outputs), values follow each field's `TypeRef`. Opaque at the SDK level
+ *  because keys are dynamic per script; the server validates the payload
+ *  against the script's signature on every write path. */
+export type AkribesValueBag = Record<string, unknown>;
+/** Free-form bench-runtime knobs. Not script IO — this is for the
+ *  coordinator's own configuration (e.g. `concurrency`, `retry_policy`).
+ *  The server tolerates extra keys for forward compat. */
+export type BenchConfig = {
+    /** Max parallel case executions; defaults to 10 server-side. */
+    concurrency?: number;
+    [extra: string]: unknown;
+};
+/** Per-script bench configuration. One row per `scripts.id`.
+ *  `judge_script_id` is nullable while the bench is still being authored. */
+export type Bench = {
+    id: number;
+    script_id: number;
+    judge_script_id: number | null;
+    judge_channel: string;
+    config: BenchConfig;
+    created_at: string;
+    updated_at: string;
+};
+/** Aggregated per-bench summary backing the project-level evals landing
+ *  page. Returned by `GET /projects/{id}/benches`. */
+export type ProjectBenchSummary = {
+    bench_id: number;
+    script_id: number;
+    script_name: string;
+    judge_script_id: number | null;
+    judge_script_name: string | null;
+    judge_channel: string;
+    case_count: number;
+    latest_run_id: number | null;
+    latest_run_status: BenchStatus | null;
+    latest_run_channel: string | null;
+    latest_run_workflow_version_id: number | null;
+    latest_run_at: string | null;
+    latest_run_mean_score: number | null;
+    latest_run_cost_usd: number | null;
+    updated_at: string;
+};
+/** A single bench-run row. `workflow_version_id` / `judge_version_id` are
+ *  resolved at trigger time so a later channel publish doesn't change what
+ *  this run represents. */
+export type BenchRun = {
+    id: number;
+    bench_id: number;
+    channel: string;
+    workflow_version_id: number;
+    judge_version_id: number;
+    status: BenchStatus;
+    triggered_by: string | null;
+    triggered_at: string;
+    completed_at: string | null;
+    total_cost_usd: number;
+    total_cases: number;
+    cache_hit_cases: number;
+    notes: string | null;
+    mcp_session_id?: string | null;
+    /** Subset of case IDs this run targets. `null` / absent = every case in
+     *  the bench. */
+    case_filter?: string[] | null;
+    /** Mean headline_score across cases with `status='ok'|'cached'`. Populated
+     *  by the list-runs aggregate query; bare GET-run leaves it absent. */
+    mean_headline_score?: number | null;
+    /** Count of results with `status='ok'|'cached'`. Paired with
+     *  `mean_headline_score`. */
+    ok_cases?: number | null;
+    /** Per-`BenchResultStatus` row count for this run. Populated alongside
+     *  `mean_headline_score` / `ok_cases` by the list-runs and get-run
+     *  aggregate queries (#753). Statuses with zero rows may be absent
+     *  rather than serialised as `0`. Use the headline `ok_cases` for the
+     *  ok+cached total — the breakdown lets the rail split the rest into
+     *  `workflow_failed` / `judge_failed` / `skipped`. */
+    status_breakdown?: Partial<Record<BenchResultStatus, number>>;
+    /** Pre-flight warnings populated by the trigger endpoint only — e.g.
+     *  "OPENAI_API_KEY missing; N cases will likely fail". Empty / absent on
+     *  every other read path. */
+    warnings?: string[];
+    /** Name of the judge script whose version produced this run, joined in by
+     *  `get_run` and `list_runs` so consumers can deep-link to the judge's
+     *  source at `judge_version_id` without an N+1 lookup. Absent on
+     *  coordinator-inserted rows and on benches with no judge wired up. */
+    judge_script_name?: string | null;
+};
+/** One per-case score row for a bench run. Carries the workflow execution's
+ *  typed output alongside the judge's score blob so the studio's typed
+ *  renderers don't need a second fetch. */
+export type BenchResult = {
+    id: number;
+    bench_run_id: number;
+    case_id: string;
+    workflow_execution_id: string | null;
+    judge_execution_id: string | null;
+    /** Full judge output — shape is dictated by the judge's declared output
+     *  `TypeRef`. */
+    score: AkribesValue | null;
+    /** Workflow execution's actual output value, joined in on the read path
+     *  from `executions.result`. `null` when the workflow failed, was
+     *  canceled, or this row is a pure cache-hit. */
+    workflow_output: AkribesValue | null;
+    headline_score: number | null;
+    status: BenchResultStatus;
+    cost_usd: number;
+    duration_ms: number | null;
+    cache_hit: boolean;
+    input_hash?: string | null;
+    /** Human-readable error captured on `workflow_failed` / `judge_failed`
+     *  rows. `null` on `ok` / `cached`. */
+    error?: string | null;
+    created_at: string;
+};
+/** Server-side projection of an `executions` row with `kind='case'`. */
+export type BenchCase = {
+    /** `executions.id` for the underlying frozen execution row. */
+    id: string;
+    project_id: number;
+    script_name: string;
+    bench_id: number | null;
+    kind: string;
+    frozen: boolean;
+    case_name: string | null;
+    inputs: AkribesValueBag | null;
+    expected_output: AkribesValue | null;
+    ground_truth: AkribesValue | null;
+    /** SHA-256 hex of `canonical_json(inputs)`. Nullable on legacy rows. */
+    input_hash?: string | null;
+    created_at: string;
+};
+export type CompareCase = {
+    case_id: string;
+    case_label: string;
+    score_a: number | null;
+    score_b: number | null;
+    delta: number | null;
+    /** One of {@link CompareFlag} or any future server-emitted string. */
+    flag: CompareFlag | string;
+};
+export type CompareAggregate = {
+    mean_score_delta: number;
+    cost_delta_usd: number;
+    n_regressed: number;
+    n_improved: number;
+    n_unchanged: number;
+};
+export type CompareReport = {
+    run_a_id: number;
+    run_b_id: number;
+    aggregate: CompareAggregate;
+    per_case: CompareCase[];
+};
+export type DriftedCase = {
+    case_id: string;
+    label: string;
+    what_broke: string;
+};
+export type DriftReport = {
+    drifted: DriftedCase[];
+    /** `null` when the script has never been published. */
+    script_version_id: number | null;
+    published_at: string | null;
+    published_by: string | null;
+    /** Single-line summary suitable for inline display. Empty when no drift. */
+    summary: string;
+};
+/** Receipt returned by `PATCH /bench-runs/{id}/tag-session`. */
+export type BenchRunTagSessionResponse = {
+    tagged: boolean;
+    run_id: number;
+    mcp_session_id: string;
+};
+/** Page of bench-run events emitted by the JSON form of
+ *  `GET /bench-runs/{id}/events`. The MCP layer polls this for incremental
+ *  updates; live UIs use the SSE form (same path). */
+export type BenchRunEventsPage = {
+    events: unknown[];
+    complete?: boolean;
+};
+export type CreateOrUpdateBenchRequest = {
+    judge_script_id?: number | null;
+    judge_channel?: string;
+    config?: BenchConfig;
+};
+export type CreateBenchCaseRequest = {
+    /** Per-input typed value bag. Keys match the script's declared inputs;
+     *  each value's shape follows the input's `TypeRef`. */
+    inputs: AkribesValueBag;
+    /** Optional expected output. Shape follows the script's declared output
+     *  `TypeRef`. When omitted, the judge must work off `ground_truth`. */
+    expected_output?: AkribesValue;
+    /** Free-form judge ground-truth payload (no contract). */
+    ground_truth?: AkribesValue;
+    name?: string;
+};
+export type PatchBenchCaseRequest = {
+    inputs?: AkribesValueBag;
+    expected_output?: AkribesValue;
+    ground_truth?: AkribesValue;
+    name?: string;
+};
+export type PromoteCaseEdits = {
+    inputs?: AkribesValueBag;
+    expected_output?: AkribesValue;
+    ground_truth?: AkribesValue;
+};
+export type PromoteExecutionRequest = {
+    /** Override any of the source execution's inputs / outputs before
+     *  freezing into a case. All shape-typed against the script's signature
+     *  on the server side. */
+    edits?: PromoteCaseEdits;
+    name?: string;
+};
+export type TriggerBenchRunRequest = {
+    channel: string;
+    notes?: string;
+    /** Subset of case IDs. Empty / omitted = run every case. */
+    case_ids?: string[];
+};
+/** A single field on a script's declared signature. `ty` is the SDK's
+ *  structural `TypeRef`, matching what live `EngineEvent::TaskEnd` values
+ *  carry on `value_type`. */
+export type BenchSignatureField = {
+    path: string;
+    ty: TypeRef;
+    required: boolean;
+    annotations: string[];
+};
+/** Parsed script signature — used to render type-aware form fields.
+ *  Returned by `GET /projects/{id}/scripts/{name}/signature`. */
+export type ScriptSignature = {
+    inputs: BenchSignatureField[];
+    outputs: BenchSignatureField[];
+    /** Named record types declared in the script source, keyed by
+     *  `type Name:` identifier. */
+    type_defs: Record<string, TypeField[]>;
+};
+/** Workflow + judge signature pair plus the structured `breaks` list.
+ *  Returned by `GET /projects/{id}/scripts/{name}/bench/contract-preview`. */
+export type ContractPreview = {
+    workflow: {
+        fields: BenchSignatureField[];
+    };
+    judge: {
+        fields: BenchSignatureField[];
+    };
+    breaks: string[];
+};
+export type ToolCallStartEvent = {
+    task_name: string;
+    tool_name: string;
+    server_name: string;
+    input: unknown;
+    tool_use_id?: string;
+};
+export type ToolCallEndEvent = {
+    task_name: string;
+    tool_name: string;
+    output: unknown;
+    duration_ms: number;
+    error?: string;
+    tool_use_id?: string;
+};
+export type McpServerDegradedEvent = {
+    alias: string;
+    reason: string;
+};
+export type McpServerRecoveredEvent = {
+    alias: string;
+};
+export type McpServerSummary = {
+    alias: string;
+    url: string;
+    origin: 'env' | 'script' | 'db';
+    is_registry: boolean;
+    status: 'connected' | 'degraded' | 'offline' | 'pinned_offline';
+    tool_count: number;
+};
+export type McpToolSummary = {
+    qualified_name: string;
+    server_alias: string;
+    description?: string;
+    input_schema: unknown;
+};
+/** Typed narrower: returns true if an `EngineEvent` is a `ToolCallStart` event. */
+export declare function isToolCallStart(event: EngineEvent): event is {
+    type: 'ToolCallStart';
+    payload: ToolCallStartEvent;
+};
+/** Typed narrower: returns true if an `EngineEvent` is a `ToolCallEnd` event. */
+export declare function isToolCallEnd(event: EngineEvent): event is {
+    type: 'ToolCallEnd';
+    payload: ToolCallEndEvent;
+};
+/** Typed narrower: returns true if an `EngineEvent` is an `McpServerDegraded` event. */
+export declare function isMcpServerDegraded(event: EngineEvent): event is {
+    type: 'McpServerDegraded';
+    payload: McpServerDegradedEvent;
+};
+/** Typed narrower: returns true if an `EngineEvent` is an `McpServerRecovered` event. */
+export declare function isMcpServerRecovered(event: EngineEvent): event is {
+    type: 'McpServerRecovered';
+    payload: McpServerRecoveredEvent;
+};
+/** Emitted exactly once when a `loop NAME(...) -> Ret` call begins.
+ *  `max_turns` is the resolved upper-bound turn budget (declared
+ *  `max_turns:` if present, else the engine's default). */
+export type LoopStartEvent = {
+    name: string;
+    max_turns: number;
+};
+/** Emitted after every turn of a `loop` settles. `turn` is 1-indexed.
+ *  `tool_calls` is the names of the tools the model invoked this turn,
+ *  in dispatch order — including the synthetic `state_get`,
+ *  `state_update`, `return`, and any user `tools:` entries. */
+export type LoopTurnEvent = {
+    name: string;
+    turn: number;
+    tool_calls: string[];
+};
+/** Emitted exactly once when a `loop` exits. `value` is the agent's
+ *  submitted return value (from `return(...)`), the final state on a
+ *  natural `stop_when:` exit without a return, or a `FatalError`
+ *  envelope when the loop exhausted its `max_turns` budget. */
+export type LoopEndEvent = {
+    name: string;
+    turn_count: number;
+    value: unknown;
+};
+/** Typed narrower: returns true if an `EngineEvent` is a `LoopStart`. */
+export declare function isLoopStart(event: EngineEvent): event is {
+    type: 'LoopStart';
+    payload: LoopStartEvent;
+};
+/** Typed narrower: returns true if an `EngineEvent` is a `LoopTurn`. */
+export declare function isLoopTurn(event: EngineEvent): event is {
+    type: 'LoopTurn';
+    payload: LoopTurnEvent;
+};
+/** Typed narrower: returns true if an `EngineEvent` is a `LoopEnd`. */
+export declare function isLoopEnd(event: EngineEvent): event is {
+    type: 'LoopEnd';
+    payload: LoopEndEvent;
+};
+/** Emitted once per primitive activation of the compaction chain. Mirrors
+ *  `akribes_core::event::EngineEvent::ContextCompacted` — fired by the
+ *  engine before/after a compaction step succeeds in shrinking the
+ *  conversation under the configured cap. `provider_native: true` means
+ *  Anthropic / OpenAI performed the compaction server-side; the engine
+ *  surfaces the before/after counts from the response. `strategy` is the
+ *  primitive name (`drop_thinking_blocks`, `drop_oldest_tool_results`,
+ *  `summarize_to_state`, `provider_native`) or the user task name for a
+ *  custom compactor task.
+ *
+ *  See `docs/superpowers/specs/2026-05-12-compaction-design.md`
+ *  ("Observability + cost") for the contract.
+ */
+export type ContextCompactedEvent = {
+    agent: string;
+    /** UUID of the surrounding `loop` block when compaction fires mid-loop;
+     *  `null` for compaction outside a loop. */
+    loop_id: string | null;
+    /** 1-indexed loop turn the compaction fired before, when applicable. */
+    turn: number | null;
+    /** Configured percent-of-window threshold (0-100), when the
+     *  triggering rule was `at_pct`. */
+    threshold_pct: number | null;
+    /** Configured absolute-token threshold, when the triggering rule was
+     *  `at_tokens`. */
+    threshold_abs: number | null;
+    /** Primitive name or user task name. */
+    strategy: string;
+    before_tokens: number;
+    after_tokens: number;
+    provider_native: boolean;
+    /** Cache TTL applied on the request that produced this compaction.
+     *  `"1h"` on the Anthropic `provider_native` path (akribes-core pins
+     *  `ttl: "1h"` via the `extended-cache-ttl-2025-04-11` beta header),
+     *  `null` for OpenAI native compaction and every non-native primitive.
+     *  Cost dashboards multiply cache-write tokens by the correct provider
+     *  rate via this field — the 5m and 1h tiers price 60% apart
+     *  (issue #1130). */
+    cache_ttl?: string | null;
+};
+/** Emitted when the compaction chain runs to exhaustion (or when
+ *  `compaction: none` and the request would still exceed the model's
+ *  context window). Mirrors
+ *  `akribes_core::event::EngineEvent::ContextOverflow`. Carries the chain
+ *  log so users can diagnose which primitives ran before the engine gave
+ *  up. A `ContextCompactionExhausted` `Error` event follows.
+ */
+export type ContextOverflowEvent = {
+    agent: string;
+    attempted_strategies: string[];
+    configured_cap_tokens: number;
+    model_context_window: number;
+};
+/** Typed narrower: returns true if an `EngineEvent` is a `ContextCompacted`. */
+export declare function isContextCompacted(event: EngineEvent): event is {
+    type: 'ContextCompacted';
+    payload: ContextCompactedEvent;
+};
+/** Typed narrower: returns true if an `EngineEvent` is a `ContextOverflow`. */
+export declare function isContextOverflow(event: EngineEvent): event is {
+    type: 'ContextOverflow';
+    payload: ContextOverflowEvent;
+};
+/** LLM provider response captured for durable replay. Carries the full
+ *  response (text + tool-use blocks + usage) keyed by `(node_id, call_index)`.
+ *  Consumed by the engine's replay cache; UX clients typically ignore it.
+ *  Mirrors `akribes_core::event::EngineEvent::LLMResponse`. */
+export type LLMResponseEvent = {
+    node_id: string;
+    call_index: number;
+    text: string;
+    tool_calls: {
+        tool_use_id: string;
+        name: string;
+        args: unknown;
+    }[];
+    usage?: unknown;
+};
+/** A child execution row was just inserted at the parent's `call(...)` node.
+ *  Mirrors `akribes_core::event::EngineEvent::SubScriptSpawned`. */
+export type SubScriptSpawnedEvent = {
+    child_execution_id: string;
+    parent_node_id: string;
+    args: unknown;
+};
+/** Child execution finished; the parent observed its terminal state.
+ *  `outcome.kind` is `"Ok"` or `"Err"`. Mirrors
+ *  `akribes_core::event::EngineEvent::SubScriptResult`. */
+export type SubScriptResultEvent = {
+    parent_node_id: string;
+    child_execution_id: string;
+    outcome: {
+        kind: 'Ok';
+        detail: {
+            value: unknown;
+        };
+    } | {
+        kind: 'Err';
+        detail: {
+            kind: string;
+            message: string;
+            code?: string;
+        };
+    };
+};
+/** A `Suspended` checkpoint resolved — the durable record of a /resume payload.
+ *  Mirrors `akribes_core::event::EngineEvent::CheckpointResolution`. */
+export type CheckpointResolutionEvent = {
+    checkpoint_id: string;
+    payload: unknown;
+};
+/** Typed narrower: returns true if an `EngineEvent` is an `LLMResponse`. */
+export declare function isLLMResponse(event: EngineEvent): event is {
+    type: 'LLMResponse';
+    payload: LLMResponseEvent;
+};
+/** Typed narrower: returns true if an `EngineEvent` is a `SubScriptSpawned`. */
+export declare function isSubScriptSpawned(event: EngineEvent): event is {
+    type: 'SubScriptSpawned';
+    payload: SubScriptSpawnedEvent;
+};
+/** Typed narrower: returns true if an `EngineEvent` is a `SubScriptResult`. */
+export declare function isSubScriptResult(event: EngineEvent): event is {
+    type: 'SubScriptResult';
+    payload: SubScriptResultEvent;
+};
+/** Typed narrower: returns true if an `EngineEvent` is a `CheckpointResolution`. */
+export declare function isCheckpointResolution(event: EngineEvent): event is {
+    type: 'CheckpointResolution';
+    payload: CheckpointResolutionEvent;
+};
+/**
+ * Structured "I can't" response from an agent. Mirrors the Rust `Unable`
+ * record (`crates/akribes-core/src/unable.rs`) — see `UNABLE_TYPE_NAME`.
+ *
+ * The canonical wire envelope is `{ "unable": UnableRecord }`. Detect with
+ * {@link isUnableEnvelope}.
+ */
+export type UnableRecord = {
+    reason: string;
+    missing: string[];
+    category: string;
+};
+/** Wire-format twin of the Rust `ValidationErrorWire`. The `stage` string is
+ *  `"parse"` | `"schema"` | `"custom:<rule>"` — kept opaque here so SDK
+ *  consumers don't need to round-trip through the internal enum. */
+export type ValidationErrorWire = {
+    stage: string;
+    message: string;
+    /** JSON-pointer-like path for schema errors. `null`/absent for parse. */
+    path?: string | null;
+};
+/** Names of the `SuspendTrigger` variants the SDK knows how to normalize. */
+export type KnownSuspendTriggerKind = 'DagPosition' | 'ValidationExhausted' | 'AgentUnable';
+/** Forward-compat catch-all for unknown `SuspendTrigger` kinds. Emitted
+ *  verbatim from the wire (snake_case fields preserved) so a newer server
+ *  never crashes an older SDK. Callers that want to opt in inspect `raw`
+ *  directly — the SDK makes no typed guarantees about its contents. */
+export type UnknownSuspendTrigger = {
+    kind: string;
+    /** Opaque wire payload with all fields from the server preserved. */
+    raw: Record<string, unknown>;
+};
+/**
+ * Why the engine suspended at a checkpoint. Mirrors the Rust `SuspendTrigger`
+ * (serde-tagged on `"kind"`, `crates/akribes-core/src/event.rs`).
+ *
+ * Callers should narrow on `kind` for the known variants and fall through to
+ * {@link UnknownSuspendTrigger} for future server versions.
+ */
+export type SuspendTrigger = {
+    kind: 'DagPosition';
+} | {
+    kind: 'ValidationExhausted';
+    taskName: string;
+    retryCount: number;
+    lastAttempt: string;
+    validationErrors: ValidationErrorWire[];
+} | {
+    kind: 'AgentUnable';
+    taskName: string;
+    unable: UnableRecord;
+} | UnknownSuspendTrigger;
+/** Return true iff `v` is a `{ "unable": <object> }` envelope and nothing
+ *  else. Mirrors `is_unable_envelope` in `akribes-core/src/unable.rs`. */
+export declare function isUnableEnvelope(v: unknown): v is {
+    unable: UnableRecord;
+};
+/** Typed narrower: returns true if an `EngineEvent` is a `TaskEnd` event. */
+export declare function isTaskEnd(event: EngineEvent): event is {
+    type: 'TaskEnd';
+    payload: TaskEndPayload;
+};
+/** Payload of a `ValidationFailure` engine event (issue #320). Emitted in
+ *  addition to the existing `Log` line on every structured-output validation
+ *  retry — the typed shape lets consumers render the model's actual
+ *  response, the schema-validator's structured error breakdown, and the
+ *  provider's `stop_reason` (so a `max_tokens` truncation isn't
+ *  misdiagnosed as a schema overflow). Mirrors
+ *  `akribes_core::event::EngineEvent::ValidationFailure`. */
+export type ValidationFailurePayload = {
+    task_name: string;
+    /** 1-indexed attempt number. */
+    attempt: number;
+    /** Raw text / JSON-serialized tool input the model emitted. */
+    model_response: string;
+    /** JSON-pointer paths to required fields the validator flagged as absent. */
+    missing_fields: string[];
+    /** Paths to fields rejected by `additionalProperties: false`. */
+    extra_fields: string[];
+    /** Human-readable type/value mismatches. */
+    type_errors: string[];
+    /** Provider stop_reason (`"max_tokens"` / `"end_turn"` / etc.) when known. */
+    stop_reason: string | null;
+};
+/** Typed narrower: returns true if an `EngineEvent` is a `ValidationFailure`. */
+export declare function isValidationFailure(event: EngineEvent): event is {
+    type: 'ValidationFailure';
+    payload: ValidationFailurePayload;
+};
+/** Emitted exactly once when a `runtime` block begins executing. `language`
+ *  is the lowercase language token from the `language:` field
+ *  (`python` / `bash` / `node` / `rust` / `java`); `runtime_name` is the
+ *  declared block name (e.g. `run_python`). */
+export type RuntimeStartEvent = {
+    task_name: string;
+    runtime_name: string;
+    language: string;
+};
+/** Streaming stdout chunk from a `runtime` block. `chunk` is the partial
+ *  text exactly as the sandbox saw it on the child process's stdout; the
+ *  engine doesn't buffer line-by-line, so consumers may need to coalesce
+ *  partial lines themselves. */
+export type RuntimeStdoutEvent = {
+    task_name: string;
+    chunk: string;
+};
+/** Streaming stderr chunk from a `runtime` block. Same shape as
+ *  {@link RuntimeStdoutEvent} — separated so consumers can colour-code
+ *  output without re-classifying. */
+export type RuntimeStderrEvent = {
+    task_name: string;
+    chunk: string;
+};
+/** Emitted exactly once when a `runtime` block exits cleanly (process
+ *  finished — exit_code 0 means success, non-zero is still a "clean" exit
+ *  from the sandbox's perspective). `duration_ms` is wall-clock time from
+ *  RuntimeStart to process exit. */
+export type RuntimeEndEvent = {
+    task_name: string;
+    exit_code: number;
+    duration_ms: number;
+};
+/** Emitted exactly once when a `runtime` block fails to run to completion —
+ *  e.g. sandbox unavailable, timeout, OOM kill, or any other internal
+ *  error. `kind` mirrors the `RuntimeError` enum on the Rust side
+ *  (`Timeout`, `OomKilled`, `SandboxUnavailable`, `Internal`, `NotConfigured`);
+ *  callers should branch on the known set and fall through for unknowns. */
+export type RuntimeErrorEvent = {
+    task_name: string;
+    kind: string;
+    message: string;
+};
+/** Typed narrower: returns true if an `EngineEvent` is a `RuntimeStart`. */
+export declare function isRuntimeStart(event: EngineEvent): event is {
+    type: 'RuntimeStart';
+    payload: RuntimeStartEvent;
+};
+/** Typed narrower: returns true if an `EngineEvent` is a `RuntimeStdout`. */
+export declare function isRuntimeStdout(event: EngineEvent): event is {
+    type: 'RuntimeStdout';
+    payload: RuntimeStdoutEvent;
+};
+/** Typed narrower: returns true if an `EngineEvent` is a `RuntimeStderr`. */
+export declare function isRuntimeStderr(event: EngineEvent): event is {
+    type: 'RuntimeStderr';
+    payload: RuntimeStderrEvent;
+};
+/** Typed narrower: returns true if an `EngineEvent` is a `RuntimeEnd`. */
+export declare function isRuntimeEnd(event: EngineEvent): event is {
+    type: 'RuntimeEnd';
+    payload: RuntimeEndEvent;
+};
+/** Typed narrower: returns true if an `EngineEvent` is a `RuntimeError`. */
+export declare function isRuntimeError(event: EngineEvent): event is {
+    type: 'RuntimeError';
+    payload: RuntimeErrorEvent;
+};
+//# sourceMappingURL=types.d.ts.map