npm - @eidentic/server - Versions diffs - 0.1.0 - Mend

@eidentic/server 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/dist/index.d.cts ADDED Viewed

@@ -0,0 +1,1068 @@
+import * as hono from 'hono';
+import { Hono } from 'hono';
+import { cors } from 'hono/cors';
+import { Agent } from '@eidentic/core';
+import { RateLimiterPort, RateLimitResult, QuotaPort, QuotaLimits, QuotaCheck, StreamEvent, LoggerPort, Usage, CostBreakdown, AuthPrincipal, AuthPort } from '@eidentic/types';
+export { AuthPort, AuthPrincipal, AuthRequest, QuotaCheck, QuotaLimits, QuotaPort, QuotaUsage, RateLimiterPort } from '@eidentic/types';
+import { WorkflowResult, WorkflowRunOwner, RecordOptions, WorkflowRunError, WorkflowRunRegistry, StepTrace } from '@eidentic/workflow';
+export { RecordOptions, WorkflowRunError, WorkflowRunOwner, WorkflowRunRegistry } from '@eidentic/workflow';
+interface TokenBucketOptions {
+    /** Maximum number of tokens the bucket can hold. */
+    capacity: number;
+    /** Tokens added per second. Set to 0 for a one-shot allow-N-then-block bucket. */
+    refillPerSec: number;
+    /** Injectable clock for testing. Defaults to Date.now. */
+    now?: () => number;
+}
+/**
+ * In-process token-bucket rate limiter implementing `RateLimiterPort` (§20.3).
+ *
+ * Each unique `key` gets an independent bucket, created lazily on first `acquire`.
+ * On each call the bucket is refilled based on elapsed time before the token check,
+ * so callers never see more than `capacity` tokens regardless of how long they wait.
+ *
+ * ## Memory management
+ *
+ * Bucket entries are evicted opportunistically on `acquire`. A sweep runs at most
+ * once per full-refill window (`capacity / refillPerSec * 1000` ms) to keep cost
+ * amortised. Entries older than twice the full-refill window are dropped — at that
+ * age the bucket would be fully refilled anyway, so eviction is semantically
+ * lossless. No background timer is used.
+ *
+ * @note For multi-process deployments use a store-backed or Redis-backed limiter.
+ */
+declare class InMemoryTokenBucketLimiter implements RateLimiterPort {
+    private readonly capacity;
+    private readonly refillPerSec;
+    private readonly now;
+    private readonly buckets;
+    /**
+     * The eviction threshold in ms: entries older than this are guaranteed to be
+     * at full capacity, so removing them is lossless.
+     * = (capacity / refillPerSec) * 1000 * 2
+     * For zero-refill configs a fixed 24-hour window bounds growth.
+     */
+    private readonly evictThresholdMs;
+    /**
+     * Sweep runs at most once per this interval. Set to half the eviction
+     * threshold so that stale entries are caught within at most one extra window.
+     */
+    private readonly sweepIntervalMs;
+    private lastSweepMs;
+    constructor(opts: TokenBucketOptions);
+    /**
+     * Test-only accessor: number of entries currently held in the buckets map.
+     * Not part of the `RateLimiterPort` contract.
+     */
+    get bucketCount(): number;
+    acquire(key: string, cost?: number): RateLimitResult;
+    /**
+     * Evict bucket entries older than `evictThresholdMs`. Called opportunistically
+     * from `acquire` — no timer involved.
+     */
+    private _sweep;
+}
+/** Token produced by `check` and consumed by `record` or `release` to pair a reservation with its settlement. */
+interface QuotaReservation {
+    readonly key: string;
+    /** Opaque generation counter — ensures stale tokens from a reset() don't settle against fresh state. */
+    readonly generation: number;
+    /** Wall-clock timestamp (ms) when the reservation was created — used by the max-age sweep. */
+    readonly createdAt: number;
+}
+/**
+ * In-process cumulative quota ledger implementing `QuotaPort` (§20.4) with reserve-on-check /
+ * settle-on-record semantics to prevent concurrent requests from bursting past hard caps.
+ *
+ * Each unique `key` gets an independent usage counter, created lazily on first access.
+ *
+ * **Reserve-settle protocol (A8)**
+ * `check(key)` now also reserves 1 run in an in-flight counter. Ceilings are checked against
+ * `committed + reserved`, so concurrent callers that have not yet settled see each other's
+ * pending runs. `record(key, spend, reservation)` settles the reservation (replacing the
+ * 1-run hold with the actual spend) and increments the committed counter.
+ *
+ * Callers SHOULD call `record` or `release` after every successful `check` to avoid leaking
+ * reservations. The `reservation` token returned on `ok:true` should be stored and passed back.
+ * Legacy callers that omit the `reservation` argument to `record` are still supported
+ * (backward-compatible): the committed counter increments normally, but the in-flight count
+ * is not decremented (they were never reserving anyway).
+ *
+ * **Max-age sweep (Fix #4 defense-in-depth)**
+ * Reservations that are never settled (e.g. the server process crashes mid-run without calling
+ * `record`/`release`) would permanently inflate the in-flight counter and eventually block all
+ * new runs. A background sweep runs every `reservationMaxAgeMs` (default 5 minutes) and
+ * discards any reservation older than that threshold, freeing the leaked slot.
+ * Call `destroy()` when shutting down to stop the sweep interval.
+ *
+ * @note In-memory + unbounded map = v1. Real deployments should back this with a store
+ * or Redis. A `reset(key?)` helper is provided for tests and dev tooling.
+ *
+ * Deferred (v1): storage quotas, monthly-window reset/approval flow,
+ * model-downgrade-on-soft, persistent/Redis ledger, USD/token-level reservations
+ * (current implementation reserves 1 run; usd/token over-burst under extreme concurrency
+ * is bounded to at most `concurrency * limit_per_call`, significantly tighter than without).
+ */
+declare class InMemoryQuota implements QuotaPort {
+    private readonly resolve;
+    private readonly ledger;
+    /** In-flight run count per key: incremented on check, decremented on record/release. */
+    private readonly reserved;
+    /** Per-key monotonic generation counter, bumped on reset() to invalidate outstanding tokens. */
+    private readonly generations;
+    /** All live reservation tokens, used by the max-age sweep. */
+    private readonly activeReservations;
+    /** Max-age sweep interval handle — cleared on destroy(). */
+    private readonly sweepInterval;
+    /** How old a reservation must be (ms) before the sweep discards it. Default 5 minutes. */
+    private readonly reservationMaxAgeMs;
+    constructor(limits: QuotaLimits | ((key: string) => QuotaLimits), options?: {
+        /**
+         * Maximum age (milliseconds) of an unsettled reservation before the sweep
+         * automatically releases it. Defaults to 300_000 (5 minutes).
+         * Set to 0 or Infinity to disable the sweep.
+         */
+        reservationMaxAgeMs?: number;
+    });
+    /**
+     * Release all stale reservations (older than `reservationMaxAgeMs`).
+     * Called automatically by the background sweep, but also available for testing.
+     */
+    sweepStaleReservations(): void;
+    /**
+     * Stop the background sweep interval. Call this when shutting down to prevent
+     * open handle warnings in tests and to release resources cleanly.
+     */
+    destroy(): void;
+    private getUsage;
+    private getGeneration;
+    private getReserved;
+    /**
+     * Check whether `key` may start another run. On success, reserves 1 run in the in-flight
+     * counter and returns a `reservation` token. Hard ceilings are checked against
+     * `committed + reserved` so concurrent callers see each other's pending runs.
+     *
+     * Always call `record(key, spend, reservation)` or `release(reservation)` after `check`
+     * to free the reservation and prevent in-flight count leakage.
+     */
+    check(key: string): QuotaCheck & {
+        reservation?: QuotaReservation;
+    };
+    /**
+     * Settle a reservation by recording actual spend. The 1-run reservation is consumed and the
+     * committed counter is updated with the real spend. If `reservation` is provided and stale
+     * (reset() was called between check and record), the settle is a no-op — no double-counting.
+     *
+     * Legacy callers that omit `reservation` still have their spend committed (backward-compatible),
+     * but the in-flight counter is not decremented (they weren't reserving).
+     */
+    record(key: string, spend: {
+        usd: number;
+        tokens: number;
+    }, reservation?: QuotaReservation): void;
+    /**
+     * Release a reservation WITHOUT recording spend (error / abort path).
+     * If the token is stale (reset() was called), this is a no-op.
+     */
+    release(reservation: QuotaReservation): void;
+    /**
+     * Reset usage counters for a specific key, or ALL keys when called with no argument.
+     * Increments the generation counter for affected keys so outstanding reservation tokens
+     * become stale and cannot settle against the fresh state.
+     * Useful for tests and dev tooling.
+     */
+    reset(key?: string): void;
+}
+/**
+ * AI SDK UI message-stream bridge (§ feat/ai-sdk-ui-stream).
+ *
+ * Converts Eidentic's `StreamEvent` async iterable into the Vercel AI SDK v6
+ * UI message-stream protocol so that a Next.js `app/api/chat/route.ts` can
+ * call `return toUIMessageStreamResponse(agent.query(input, { sessionId }))`
+ * and a `useChat` (or CopilotKit) frontend just works.
+ *
+ * ## Mapping
+ *
+ * | Eidentic `StreamEvent`               | AI SDK `UIMessageChunk`                  |
+ * |-------------------------------------|------------------------------------------|
+ * | `stream.delta`                      | `text-delta` (streaming token)           |
+ * | `assistant` text blocks             | `text-start` + `text-delta` + `text-end` |
+ * | `assistant` tool_use blocks         | `tool-input-available`                   |
+ * | `tool.result` (success)             | `tool-output-available`                  |
+ * | `tool.result` (error)               | `tool-output-error`                      |
+ * | `result` (terminal, any subtype)    | `finish`                                 |
+ * | `error` (thrown inside generator)  | `error` chunk (via `onError`)            |
+ *
+ * `session.init` and `compaction` events are silently ignored — they carry
+ * metadata/audit information that is not meaningful to a chat UI.
+ *
+ * ## Usage (Next.js App Router)
+ *
+ * ```ts
+ * // app/api/chat/route.ts
+ * import { toUIMessageStreamResponse } from "@eidentic/server";
+ * import { myAgent } from "@/lib/agent";
+ *
+ * export async function POST(req: Request) {
+ *   const { messages, sessionId } = await req.json();
+ *   const input = messages.at(-1)?.content ?? "";
+ *   return toUIMessageStreamResponse(
+ *     myAgent.query(input, { sessionId }),
+ *   );
+ * }
+ * ```
+ *
+ * On the client, configure `useChat` to point at this route — no further
+ * changes are needed since the response uses the standard AI SDK SSE wire
+ * format.
+ */
+interface ToUIMessageStreamOptions {
+    /**
+     * Optional HTTP headers to include in the `Response` returned by
+     * `toUIMessageStreamResponse`. Useful for CORS, cache-control, etc.
+     */
+    headers?: Headers | Record<string, string>;
+    /**
+     * HTTP status code for the response. Defaults to 200.
+     */
+    status?: number;
+}
+/**
+ * Converts a Eidentic `StreamEvent` async iterable into a
+ * `ReadableStream<UIMessageChunk>` compatible with the Vercel AI SDK v6
+ * UI message-stream protocol.
+ *
+ * Use `toUIMessageStreamResponse` for the common case of returning a `Response`
+ * from a Next.js route handler. Use this function directly when you need the
+ * raw stream (e.g. for piping to a Node.js `ServerResponse`).
+ */
+declare function toUIMessageStream(events: AsyncIterable<StreamEvent>): ReadableStream;
+/**
+ * Converts a Eidentic `StreamEvent` async iterable into a `Response` that
+ * streams AI SDK UI message chunks to the client (SSE format).
+ *
+ * This is the primary integration point for Next.js App Router route handlers.
+ * The returned `Response` is directly compatible with `useChat` from the
+ * `@ai-sdk/react` package and frameworks that speak the AI SDK UI wire format
+ * (CopilotKit, etc.).
+ *
+ * @example
+ * ```ts
+ * // app/api/chat/route.ts
+ * import { toUIMessageStreamResponse } from "@eidentic/server";
+ * import { myAgent } from "@/lib/agent";
+ *
+ * export async function POST(req: Request) {
+ *   const { messages, sessionId } = await req.json();
+ *   const input = messages.at(-1)?.content ?? "";
+ *   return toUIMessageStreamResponse(
+ *     myAgent.query(input, { sessionId }),
+ *   );
+ * }
+ * ```
+ */
+declare function toUIMessageStreamResponse(events: AsyncIterable<StreamEvent>, opts?: ToUIMessageStreamOptions): Response;
+/**
+ * @eidentic/server — in-process agent scheduler.
+ *
+ * Registers tasks that fire an agent-run callback on a fixed interval or cron
+ * expression. Designed for in-process background work; persistence and
+ * multi-instance leader-election are **out of scope** (no DB, no distributed
+ * lock). For durable/multi-instance scheduling, pair this with a purpose-built
+ * durable job queue or distributed scheduler as a follow-up.
+ *
+ * ## Clock / timer injection
+ *
+ * The scheduler accepts an optional `ClockPort` (a thin `{ now(): number }`
+ * interface) and an optional `TimerPort` (wraps `setInterval` / `clearInterval`).
+ * In production, the defaults delegate to the real `Date.now()` and global
+ * `setInterval`. In tests, inject fakes to drive time without real sleeping.
+ *
+ * ## Overlap / concurrency
+ *
+ * If a task's previous invocation is still in flight when the next tick fires,
+ * the tick is **skipped silently** — runs never pile up. This is intentional: the
+ * scheduler is optimistic ("at-most-once per interval") rather than catch-up
+ * ("missed runs queue up"). Document this clearly to callers.
+ *
+ * ## Error isolation
+ *
+ * Each task's run callback is invoked in its own microtask chain. An unhandled
+ * rejection or thrown error is caught, logged via the injected `LoggerPort`, and
+ * swallowed so that one failing task never kills the scheduler or other tasks.
+ */
+/**
+ * Fire the task every `everyMs` milliseconds (wall-clock elapsed since the
+ * last trigger, not since epoch). First fire happens after one full interval.
+ */
+interface IntervalSchedule {
+    kind: "interval";
+    everyMs: number;
+}
+/**
+ * Fire the task according to a standard 5-field cron expression
+ * (`"* * * * *"` — minute hour dom month dow). The next run time is computed
+ * by `cron-parser` from the current clock value on each tick.
+ *
+ * Timezone is UTC by default; pass `tz` to override.
+ */
+interface CronSchedule {
+    kind: "cron";
+    expression: string;
+    /** IANA timezone identifier, e.g. `"America/New_York"`. Defaults to UTC. */
+    tz?: string;
+}
+/** Union of all supported schedule shapes. */
+type Schedule = IntervalSchedule | CronSchedule;
+/** Context passed to a task's run callback on each trigger. */
+interface RunContext {
+    /** The task's registered id. */
+    taskId: string;
+    /** Wall-clock timestamp (ms since epoch) when this trigger fired. */
+    triggeredAt: number;
+}
+/** A function that performs the agent work for a scheduled task. */
+type RunCallback = (ctx: RunContext) => void | Promise<void>;
+interface ScheduledTask {
+    /** Unique identifier for this task. Used with `remove(id)`. */
+    id: string;
+    /** When to fire. */
+    schedule: Schedule;
+    /**
+     * Called on each trigger. Typically invokes `agent.query(...)` with the
+     * relevant input and owner scope.
+     *
+     * The callback runs asynchronously; errors are caught and logged.
+     * If the previous invocation is still in-flight when the next tick fires,
+     * this fire is **skipped** (no overlap — at-most-once-per-interval semantics).
+     */
+    run: RunCallback;
+}
+/** Minimal clock interface — injectable for deterministic tests. */
+interface ClockPort {
+    now(): number;
+}
+/** Injectable timer port — wraps setInterval / clearInterval. */
+interface TimerPort {
+    setInterval(fn: () => void, ms: number): unknown;
+    clearInterval(handle: unknown): void;
+}
+interface SchedulerOptions {
+    /**
+     * How often the scheduler checks for due tasks.
+     * Defaults to `1000` ms (1 second). Tune lower for sub-second granularity;
+     * cron resolution is 1 minute regardless (cron-parser is minute-granular).
+     */
+    tickIntervalMs?: number;
+    /** Injectable clock — defaults to `Date.now()`. Inject a fake in tests. */
+    clock?: ClockPort;
+    /** Injectable timer — defaults to `globalThis.setInterval`. Inject a fake in tests. */
+    timer?: TimerPort;
+    /**
+     * Structured logger for task error reporting.
+     * Pass `NoopLogger` to silence all output.
+     * Defaults to a minimal `console.error` shim.
+     */
+    logger?: LoggerPort;
+}
+/**
+ * In-process agent scheduler.
+ *
+ * ```ts
+ * const scheduler = new Scheduler({ logger: myLogger });
+ *
+ * scheduler.add({
+ *   id: "digest",
+ *   schedule: { kind: "cron", expression: "0 9 * * *", tz: "America/New_York" },
+ *   run: async () => {
+ *     for await (const _ of agent.query("Generate daily digest", { sessionId: crypto.randomUUID() })) {}
+ *   },
+ * });
+ *
+ * scheduler.start();
+ * // …
+ * scheduler.stop();
+ * ```
+ *
+ * ### Overlap skip
+ * If a task's previous run is still in flight when its next fire time arrives,
+ * the tick is silently skipped. This prevents unbounded pile-up on slow tasks.
+ *
+ * ### Error isolation
+ * Each task's `run` callback error is caught and logged; it does NOT propagate
+ * to the scheduler or affect other tasks.
+ *
+ * ### Persistence / distribution
+ * This is a **purely in-process** scheduler — state lives in memory, is lost
+ * on restart, and is NOT coordinated across multiple instances. For durable or
+ * multi-instance scheduling, wire an external queue/workflow engine.
+ */
+declare class Scheduler {
+    private readonly tickIntervalMs;
+    private readonly clock;
+    private readonly timer;
+    private readonly logger;
+    private readonly tasks;
+    private timerHandle;
+    private running;
+    constructor(opts?: SchedulerOptions);
+    /**
+     * Start the scheduler's internal tick loop.
+     * Calling `start()` while already running is a no-op.
+     */
+    start(): void;
+    /**
+     * Stop the scheduler. All in-flight callbacks are allowed to complete
+     * (they are not aborted), but no new runs will be triggered.
+     * Calling `stop()` while not running is a no-op.
+     */
+    stop(): void;
+    /**
+     * Register a scheduled task. If a task with the same `id` already exists,
+     * it is replaced (the old state is discarded).
+     *
+     * For cron tasks, the first next-fire time is computed immediately from the
+     * current clock value.
+     */
+    add(task: ScheduledTask): void;
+    /**
+     * Remove a registered task by id.
+     * Any in-flight callback for this task will be allowed to complete, but no
+     * further fires will occur.
+     * Returns `true` if the task was found and removed, `false` otherwise.
+     */
+    remove(id: string): boolean;
+    /** Returns the ids of all currently registered tasks. */
+    taskIds(): string[];
+    /**
+     * Evaluate all registered tasks against the given timestamp and fire any
+     * that are due.
+     *
+     * This is the core scheduling method. The `start()` loop calls it
+     * automatically at `tickIntervalMs` granularity, but tests can call it
+     * directly to drive time without real timers.
+     */
+    tick(now: number): void;
+    private _fire;
+}
+/**
+ * @eidentic/server — BatchRunner: bounded-concurrency offline/batch agent processing.
+ *
+ * ## Overview
+ *
+ * `BatchRunner` takes an array of inputs and runs each through an agent with a
+ * configurable concurrency cap (default 4). Items that throw are captured as
+ * per-item errors — a single bad item never aborts the batch. Aggregate
+ * usage/cost totals are accumulated using the existing `Usage` / `CostBreakdown`
+ * types from `@eidentic/types`.
+ *
+ * ## Provider-native batch (e.g. Anthropic Message Batches API)
+ *
+ * The Anthropic Message Batches API offers ~50% cost savings for async jobs but
+ * is exposed via a separate REST API, NOT through the AI SDK v6 `generateText` /
+ * `streamText` surface. Wiring it would require either a dedicated HTTP client
+ * per provider or a non-trivial AI-SDK fork — too large for v1.
+ *
+ * Instead, the `BatchRunner` accepts an optional `backend` parameter (the
+ * `BatchBackend` interface). The default `"concurrent"` backend runs items via
+ * `agent.query()` with bounded parallelism. A future `"anthropic-batch"` or
+ * `"openai-batch"` backend could implement the provider REST APIs behind this
+ * seam without changing any public BatchRunner API.
+ *
+ * ## Concurrency guarantee
+ *
+ * At most `concurrency` items are ever in-flight simultaneously. The runner uses
+ * a slot-based semaphore — no library dep. Items are dispatched in input order;
+ * results are collected as they complete (output order may differ when
+ * `concurrency > 1`).
+ *
+ * ## Cancellation
+ *
+ * Pass an `AbortSignal` via `BatchRunOptions.signal`. Once aborted, no further
+ * items are dispatched. Items already in-flight receive the signal and may abort
+ * early (dependent on the agent's internal abort handling). A cancelled batch
+ * returns partial results up to that point with `aggregate.cancelled: true`.
+ *
+ * ## Progress callback
+ *
+ * `onProgress(item)` is called once per completed item (success OR error),
+ * passing the completed `BatchItemResult`. Useful for streaming output to a UI
+ * or writing partial results to disk.
+ */
+/** A single item to process in a batch. */
+interface BatchItem {
+    /**
+     * Optional stable identifier for this item.
+     * Defaults to the item's zero-based index string ("0", "1", …) when absent.
+     */
+    id?: string;
+    /** The user message to pass to `agent.query()`. */
+    input: string;
+    /** Optional per-item `userId` forwarded to `agent.query()`. */
+    userId?: string;
+    /** Optional per-item `orgId` forwarded to `agent.query()`. */
+    orgId?: string;
+    /** Optional per-item `sessionId`. Generated (UUID) when absent. */
+    sessionId?: string;
+}
+/** Outcome of a successfully completed item. */
+interface BatchItemSuccess {
+    status: "success";
+    id: string;
+    /** The agent's final text output (from the terminal `result` event). */
+    output: string;
+    /** Per-item token usage (foreground totals, same units as `Usage`). */
+    usage: Usage;
+    /**
+     * Per-item cost breakdown. Present only when the agent has a `PriceTable`
+     * configured (same condition as `result.cost` being non-undefined).
+     */
+    cost?: CostBreakdown;
+    /** The sessionId used for this item's run. */
+    sessionId: string;
+}
+/** Outcome of a failed item (the error is captured; the batch continues). */
+interface BatchItemError {
+    status: "error";
+    id: string;
+    /** Error message. */
+    error: string;
+    /** The sessionId used for this item's run (may be undefined if dispatch never started). */
+    sessionId?: string;
+}
+type BatchItemResult = BatchItemSuccess | BatchItemError;
+/** Aggregate totals across all items in the batch. */
+interface BatchAggregate {
+    /** Total input + output tokens across all SUCCESSFUL items. */
+    totalUsage: Usage;
+    /**
+     * Summed USD cost across all successful items that had a `CostBreakdown`.
+     * `undefined` when no items had pricing.
+     */
+    totalUsd?: number;
+    /** Number of items that completed with `status: "success"`. */
+    successCount: number;
+    /** Number of items that completed with `status: "error"`. */
+    errorCount: number;
+    /**
+     * `true` when the batch was stopped early by an AbortSignal.
+     * Items that were not yet dispatched when the signal fired are absent from
+     * `results` — they were never started.
+     */
+    cancelled: boolean;
+}
+/** The return value of `BatchRunner.run()`. */
+interface BatchResult {
+    /** Per-item outcome, in completion order. May be shorter than `items` if cancelled. */
+    results: BatchItemResult[];
+    /** Aggregate totals. */
+    aggregate: BatchAggregate;
+}
+/** Called once per completed item (success or error), in completion order. */
+type OnProgress = (item: BatchItemResult) => void;
+/**
+ * Interface for the batch execution backend.
+ *
+ * The default `"concurrent"` backend calls `agent.query()` directly with
+ * bounded parallelism. Future implementations may use provider-native APIs:
+ *
+ * ```ts
+ * // Future usage (not implemented in v1):
+ * const runner = new BatchRunner(agent, {
+ *   backend: new AnthropicBatchBackend({ apiKey: process.env.ANTHROPIC_API_KEY }),
+ * });
+ * ```
+ *
+ * A backend receives one item at a time and is responsible for:
+ * 1. Dispatching the item to the underlying inference service.
+ * 2. Returning a `BatchItemResult` (NEVER throwing — capture errors as `status:"error"`).
+ * 3. Respecting `signal` for cancellation.
+ *
+ * The `BatchRunner` handles concurrency, cancellation bookkeeping, progress
+ * callbacks, and aggregate accumulation — the backend only needs to run ONE item.
+ */
+interface BatchBackend {
+    /**
+     * Execute a single batch item and return its result.
+     *
+     * Called by `BatchRunner` up to `concurrency` times in parallel. The backend
+     * MUST honour `signal.aborted` and terminate early when the signal fires.
+     *
+     * @param item - The resolved item (id, input, sessionId, userId, orgId all populated).
+     * @param signal - The batch's cancellation signal (may already be aborted).
+     * @returns The per-item outcome. Should not throw; capture errors as `{ status: "error" }`.
+     */
+    run(item: Required<Pick<BatchItem, "id" | "input">> & {
+        userId: string;
+        orgId: string;
+        sessionId: string;
+    }, signal: AbortSignal): Promise<BatchItemResult>;
+}
+interface BatchRunnerOptions {
+    /**
+     * Maximum number of items to process concurrently.
+     * @default 4
+     */
+    concurrency?: number;
+    /**
+     * Batch execution backend.
+     * Defaults to `ConcurrentAgentBackend` (calls `agent.query()` directly).
+     *
+     * Swap this to integrate provider-native batch APIs (e.g. Anthropic Message
+     * Batches, OpenAI Batch API) in a future release without changing any other
+     * `BatchRunner` API surface.
+     */
+    backend?: BatchBackend;
+}
+interface BatchRunOptions {
+    /**
+     * AbortSignal for cancelling the batch. Once aborted, no further items are
+     * dispatched. In-flight items receive the signal and may terminate early.
+     */
+    signal?: AbortSignal;
+    /**
+     * Optional callback invoked once per completed item (success or error).
+     * Called in completion order (not necessarily input order when `concurrency > 1`).
+     */
+    onProgress?: OnProgress;
+    /**
+     * Whether to accumulate per-item results in the returned `BatchResult.results` array.
+     *
+     * @default true
+     *
+     * For large batches (thousands of items or more) holding all results in memory may
+     * exhaust the heap. Set `collectResults: false` to skip in-memory accumulation;
+     * `BatchResult.results` will be an empty array while `aggregate` totals remain accurate.
+     * Use the `onProgress` callback to drain results incrementally instead:
+     *
+     * ```ts
+     * await runner.run(items, {
+     *   collectResults: false,
+     *   onProgress: (item) => db.insert(item), // stream results to persistent storage
+     * });
+     * ```
+     */
+    collectResults?: boolean;
+}
+/**
+ * Bounded-concurrency batch processor for agent inputs.
+ *
+ * ```ts
+ * const runner = new BatchRunner(agent, { concurrency: 8 });
+ *
+ * const { results, aggregate } = await runner.run(
+ *   inputs.map((text) => ({ input: text })),
+ *   {
+ *     signal: controller.signal,
+ *     onProgress: (item) => console.log(item.status, item.id),
+ *   },
+ * );
+ *
+ * console.log(
+ *   `${aggregate.successCount} ok, ${aggregate.errorCount} err, ` +
+ *   `${aggregate.totalUsage.inputTokens + aggregate.totalUsage.outputTokens} tokens total`,
+ * );
+ * ```
+ *
+ * ### Error isolation
+ * A failed item (agent error, network error, aborted sub-run) is captured as
+ * `{ status: "error", ... }` — it does NOT abort the batch.
+ *
+ * ### Provider-native batch (deferred, v1)
+ * v1 uses `agent.query()` directly (the `"concurrent"` backend). To integrate
+ * Anthropic Message Batches or OpenAI Batch API, implement `BatchBackend` and
+ * pass it via `options.backend`.
+ */
+declare class BatchRunner {
+    private readonly concurrency;
+    private readonly backend;
+    constructor(agent: Agent, options?: BatchRunnerOptions);
+    /**
+     * Process a list of inputs with bounded concurrency.
+     *
+     * @param items - Items to process. Each must have at least `input` set.
+     * @param opts - Run-level options (signal, progress callback, collectResults).
+     * @returns `BatchResult` containing per-item outcomes and aggregate totals.
+     *
+     * ### Large-batch tip
+     * For very large batches (thousands of items), holding all results in memory may
+     * be impractical. Pass `collectResults: false` to skip in-memory accumulation:
+     * `BatchResult.results` will be empty while `aggregate` totals remain accurate.
+     * Drain results incrementally via `onProgress` instead.
+     */
+    run(items: BatchItem[], opts?: BatchRunOptions): Promise<BatchResult>;
+}
+/** Status of an async run. */
+type AsyncRunStatus = "running" | "completed" | "failed" | "aborted";
+/**
+ * Registry entry for one async run.
+ * `owner` mirrors the principal that started the run — used to enforce that
+ * only the owning tenant may poll the run's status.
+ */
+interface AsyncRunEntry {
+    runId: string;
+    sessionId: string;
+    agentId: string;
+    status: AsyncRunStatus;
+    /** Text output when status is "completed". */
+    output?: string;
+    /** Error message when status is "failed". */
+    error?: string;
+    /** Principal identifiers used for ownership checks on the status endpoint. */
+    owner: {
+        userId?: string;
+        orgId?: string;
+        apiKey?: string;
+    };
+    createdAt: number;
+    settledAt?: number;
+}
+/**
+ * In-process registry of async runs.
+ * Exported so tests and tooling can inspect entries directly when needed.
+ * In production, this is a module-private detail behind the server factory.
+ *
+ * [M10] Bounded retention: once the registry reaches `maxRuns` entries, the
+ * oldest *settled* runs (completed/failed/aborted) are evicted first.
+ * In-flight runs (status="running") are never evicted under normal cap pressure.
+ * If eviction of settled runs is not enough to make room (all runs are in-flight),
+ * the new entry is still accepted — the cap is a best-effort bound, not a hard gate.
+ */
+declare class AsyncRunRegistry {
+    private readonly runs;
+    private readonly maxRuns;
+    constructor(options?: {
+        maxRuns?: number;
+    });
+    set(entry: AsyncRunEntry): void;
+    get(runId: string): AsyncRunEntry | undefined;
+    settle(runId: string, patch: Partial<AsyncRunEntry>): void;
+    /** Evict the single oldest settled (non-in-flight) entry to make room. */
+    private _evictOldestSettled;
+    /** Return all entries (copy of values). Used by graceful drain to check in-flight count. */
+    values(): AsyncRunEntry[];
+}
+/**
+ * No-op auth: always returns an empty principal (single-tenant mode).
+ *
+ * **Warning:** When used with `exposeEvents: true` or in any multi-tenant
+ * deployment, all requests are treated as the same anonymous principal.
+ * Every client can read every session's events. Only use `NoAuth` for
+ * trusted single-tenant environments (local dev, internal services) — do
+ * NOT expose a server using `NoAuth` to the public internet.
+ */
+declare const NoAuth: AuthPort;
+/**
+ * API-key auth: reads `Authorization: Bearer <key>` or `x-api-key` header,
+ * looks it up in the provided key→principal map, returns null on mismatch.
+ *
+ * `runAuth` lowercases all header keys before the adapter sees them, so only
+ * the lowercase variants are reachable here. The capitalised fallback branches
+ * were dead code and have been removed.
+ *
+ * **Security note — plain-object key lookup is not constant-time.**
+ * Operators with high-security needs (timing-attack resistance) should use a
+ * hashed-key comparison (e.g. HMAC) rather than a plain Map lookup. This
+ * implementation is sufficient for most deployments but not for environments
+ * where side-channel timing attacks are a credible threat model.
+ *
+ * **Prototype-pollution guard:** `Object.hasOwn` is used before the lookup so
+ * that keys such as `"__proto__"`, `"constructor"`, or `"toString"` — which
+ * exist on every plain object's prototype — never resolve to a principal.
+ */
+declare function ApiKeyAuth(keys: Record<string, AuthPrincipal>): AuthPort;
+type AgentResolver = (agentId: string) => Agent | undefined;
+interface ServerOptions {
+    /** Resolve an agent by id. Accepts a plain record or a resolver function. */
+    agents: Record<string, Agent> | AgentResolver;
+    /**
+     * Authentication adapter. Defaults to `NoAuth` (single-tenant).
+     *
+     * **Warning:** `NoAuth` must not be used in publicly exposed multi-tenant
+     * deployments — all clients share a single anonymous principal and can read
+     * each other's sessions when `exposeEvents: true` is set. See `NoAuth` for details.
+     */
+    auth?: AuthPort;
+    /** Optional base path prefix, e.g. "/api". Default "". */
+    basePath?: string;
+    /**
+     * Expose the `GET /v1/agents/:agentId/sessions/:sessionId/events` audit
+     * endpoint. Defaults to **false** (secure-by-default).
+     *
+     * The endpoint enforces per-principal session ownership: a principal may only
+     * read events for sessions it owns (matching `userId`/`orgId`/`apiKey`). Sessions
+     * with no recorded owner (legacy / `NoAuth`) remain readable for back-compat, so in
+     * multi-tenant deployments ensure sessions are created through an authenticated
+     * principal so they carry an owner.
+     */
+    exposeEvents?: boolean;
+    /**
+     * Token-bucket rate limiter (§20.3). When set, every POST /query and /resume
+     * request is checked AFTER auth resolves and BEFORE agent work begins.
+     * Throttled requests receive 429 + Retry-After. When absent, the check is
+     * skipped entirely — the hot path is byte-identical to the pre-rate-limit behaviour.
+     */
+    rateLimiter?: RateLimiterPort;
+    /**
+     * Derive the rate-limit bucket key from the authenticated principal and agentId.
+     * Defaults to: `principal.apiKey ?? principal.userId ?? principal.orgId ?? "anonymous"`.
+     */
+    rateLimitKey?: (principal: AuthPrincipal, agentId: string) => string;
+    /**
+     * Pre-authentication rate limiter applied to all /v1 routes BEFORE auth runs.
+     * Defends against unauthenticated hammering, credential brute-force, and
+     * enumeration attacks that would otherwise be unthrottled.
+     *
+     * Keyed by client IP (see `getClientKey` for customisation). Default limit is
+     * 60 requests per minute per client key using an internal `InMemoryTokenBucketLimiter`.
+     *
+     * Set to `null` to **explicitly disable** pre-auth rate limiting (not recommended
+     * for public-facing deployments).
+     *
+     * When absent, an internal limiter with safe defaults (60 req/min) is used.
+     */
+    preAuthRateLimiter?: RateLimiterPort | null;
+    /**
+     * Derive the pre-auth rate-limit bucket key from the raw Hono context.
+     * Defaults to the remote address from the Node.js socket (`c.env?.incoming?.socket?.remoteAddress`),
+     * falling back to the constant `"unknown"` on non-Node runtimes.
+     *
+     * When `trustProxy: true`, the FIRST entry of the `x-forwarded-for` header is
+     * used instead of the socket address.
+     */
+    getClientKey?: (c: hono.Context) => string;
+    /**
+     * When `true`, the first entry of the `x-forwarded-for` header is trusted as
+     * the real client IP for pre-auth rate-limiting. Defaults to `false`.
+     *
+     * Only set this to `true` when the server is behind a trusted reverse proxy
+     * that overwrites `x-forwarded-for` — otherwise clients can spoof their IP
+     * to bypass the pre-auth rate limiter.
+     */
+    trustProxy?: boolean;
+    /**
+     * Per-tenant cumulative quota ledger (§20.4). When set, every POST /query and /resume
+     * request is checked AFTER auth + rate-limit + body validation + agent resolution and
+     * BEFORE agent work begins. Hard-cap exceeded → HTTP 402 Payment Required + JSON error body.
+     * Soft-cap crossed → `X-Eidentic-Quota-Warning: soft-limit` header (still streams).
+     * After a run completes the terminal usage/cost is recorded into the ledger.
+     * When absent, the check is skipped — the hot path is byte-identical to the no-quota behaviour.
+     *
+     * Quota is checked AFTER body validation and agent resolution so that malformed requests
+     * and requests for unknown agents never consume a reservation slot (Fix #4).
+     */
+    quota?: QuotaPort;
+    /**
+     * Derive the quota ledger key from the authenticated principal and agentId.
+     * Defaults to the same derivation as `rateLimitKey`:
+     * `principal.apiKey ?? principal.userId ?? principal.orgId ?? "anonymous"`.
+     */
+    quotaKey?: (principal: AuthPrincipal, agentId: string) => string;
+    /**
+     * Maximum number of characters allowed in the `input` field of /query and /runs
+     * requests, and in a string `decision` on /resume requests.
+     * Defaults to 32,000. Requests exceeding this limit receive a 400 error.
+     */
+    maxInputChars?: number;
+    /**
+     * [M10] Maximum number of async-run entries retained in the in-process registry.
+     * Once the limit is reached, the oldest *settled* run (completed/failed/aborted)
+     * is evicted to make room. In-flight runs are never evicted.
+     * Defaults to 1000. Mirror of the workflow registry's bounded pattern.
+     */
+    maxAsyncRuns?: number;
+    /**
+     * Webhook delivery configuration for async runs started via `POST /v1/agents/:id/runs`.
+     *
+     * When provided, a `callbackUrl` field may be included in the runs request body.
+     * On run completion (success or error) the server POSTs a JSON payload to that URL:
+     *
+     * ```json
+     * { "runId": "…", "agentId": "…", "status": "completed"|"failed",
+     *   "output": "…", "error": "…", "usage": { "inputTokens": 0, "outputTokens": 0 } }
+     * ```
+     *
+     * ### Signature verification recipe
+     *
+     * The request carries two headers:
+     * - `X-Eidentic-Timestamp` — Unix timestamp in milliseconds (string).
+     * - `X-Eidentic-Signature` — `sha256=<hex HMAC-SHA256>` where the HMAC key is
+     *   `signingSecret` and the message is `<timestamp>.<rawBody>`.
+     *
+     * To verify on your server (Node.js example):
+     * ```ts
+     * import { createHmac } from "node:crypto";
+     *
+     * function verify(secret: string, timestamp: string, rawBody: string, signature: string) {
+     *   const expected = "sha256=" + createHmac("sha256", secret)
+     *     .update(timestamp + "." + rawBody).digest("hex");
+     *   // Use a constant-time comparison in production:
+     *   return expected === signature;
+     * }
+     * ```
+     *
+     * **Delivery guarantees:** one attempt + up to 2 retries (1s, 2s backoff), 10 s timeout
+     * per attempt, redirects never followed. Failures are logged but never surface to the caller.
+     *
+     * **Security:** `callbackUrl` must be an http/https URL with a public (non-private) host.
+     * Set `allowPrivateHosts: true` ONLY in development / test environments.
+     *
+     * Callbacks are **disabled** unless this option is set. Sending `callbackUrl` in the
+     * request body while `webhooks` is not configured returns `400 Bad Request`.
+     */
+    webhooks?: {
+        /** HMAC-SHA256 signing secret. Used to sign every webhook delivery. */
+        signingSecret: string;
+        /**
+         * When `true`, private/loopback/link-local addresses are allowed as callback
+         * hosts. Defaults to `false`. Only enable in controlled test environments.
+         */
+        allowPrivateHosts?: boolean;
+    };
+    /**
+     * CORS options passed through to the `hono/cors` middleware.
+     *
+     * **Default:** no CORS headers are added (safest default).
+     * When provided, the middleware is applied to all routes.
+     *
+     * **Warning:** `{ origin: "*", credentials: true }` is rejected by browsers.
+     * Do not combine a wildcard `origin` with `credentials: true`.
+     *
+     * @example
+     * // Allow a specific origin
+     * cors: { origin: "https://app.example.com", credentials: true }
+     *
+     * @example
+     * // Allow any origin (unauthenticated public APIs only)
+     * cors: { origin: "*" }
+     */
+    cors?: Parameters<typeof cors>[0];
+    /**
+     * External workflow run registry to use instead of the server's own internal one.
+     *
+     * When provided the server will use this registry for all `GET /v1/workflows` and
+     * `GET /v1/workflows/:id` endpoints and for `handle.recordWorkflow()` / `handle.recordWorkflowError()`.
+     * This enables durable or cross-instance registries (e.g. backed by a database).
+     *
+     * When absent, an in-memory bounded registry is created automatically.
+     */
+    workflowRuns?: WorkflowRunRegistry;
+}
+/**
+ * Workflow run summary — the shape returned in the `GET /v1/workflows` list.
+ * Full detail (including trace/output/error) is available via `GET /v1/workflows/:id`.
+ */
+interface WorkflowRunSummary {
+    id: string;
+    name: string;
+    status: "ok" | "error";
+    startedAt: number;
+    durationMs: number;
+    stepCount: number;
+}
+/**
+ * Workflow run detail — the shape returned by `GET /v1/workflows/:id`.
+ * Extends `WorkflowRunSummary` with trace, optional output, and optional error.
+ */
+interface WorkflowRunDetail extends WorkflowRunSummary {
+    trace: StepTrace[];
+    output?: unknown;
+    error?: string;
+}
+/**
+ * Programmatic handle returned by `createServer`.
+ *
+ * `handle.recordWorkflow(name, result)` ingests a completed workflow run into
+ * the server's registry, making it queryable via the workflow endpoints.
+ * Returns the generated record `id` so callers can reference it immediately.
+ */
+interface ServerHandle {
+    /**
+     * Ingest a completed workflow run.
+     *
+     * @param name   — human-readable workflow name
+     * @param result — `WorkflowResult<O>` returned by `workflow.run()`
+     * @param owner  — optional principal to attach for per-tenant filtering
+     * @param opts   — optional record options (e.g. the workflow `version`)
+     * @returns the generated record id
+     */
+    recordWorkflow<O>(name: string, result: WorkflowResult<O>, owner?: WorkflowRunOwner, opts?: RecordOptions): string;
+    /**
+     * Ingest a failed workflow run from a `WorkflowRunError`.
+     * Records the partial step trace and error message so crashed runs appear
+     * in the workflow run registry with `status: "error"`.
+     *
+     * @param err   — `WorkflowRunError` caught from `workflow.run()`
+     * @param owner — optional principal to attach for per-tenant filtering
+     * @param opts  — optional record options (e.g. the workflow `version`)
+     * @returns the generated record id
+     */
+    recordWorkflowError(err: WorkflowRunError, owner?: WorkflowRunOwner, opts?: RecordOptions): string;
+}
+/**
+ * Return value of `createServer`.
+ *
+ * Extends Hono so existing `const app = createServer(...)` usage remains valid:
+ * `app.request(...)`, `app.fetch`, etc. all work as before.
+ * `app.handle` is the programmatic ingestion surface (new, non-breaking addition).
+ */
+type EidenticServer = Hono & {
+    handle: ServerHandle;
+};
+/** Payload sent to a callbackUrl on run completion. */
+interface WebhookPayload {
+    runId: string;
+    agentId: string;
+    status: "completed" | "failed";
+    output?: string;
+    error?: string;
+    usage?: {
+        inputTokens: number;
+        outputTokens: number;
+    };
+}
+declare function createServer(opts: ServerOptions): EidenticServer;
+interface ServeNodeHandle {
+    close(): void;
+    /**
+     * Gracefully drain the server:
+     * 1. Stops accepting new connections.
+     * 2. Returns `503 Service Unavailable` (with `Retry-After: 5`) to any new
+     *    `/v1/*` requests that arrive while draining.
+     * 3. Waits until all in-flight async runs settle (polls every 100 ms), or
+     *    until `timeoutMs` elapses.
+     * 4. Calls `close()` to shut down the underlying HTTP server.
+     *
+     * @param timeoutMs — maximum time to wait for in-flight runs to settle.
+     *   Defaults to 30 000 ms (30 s).
+     */
+    drain(timeoutMs?: number): Promise<void>;
+}
+/**
+ * Serve a Hono app on Node.js using `@hono/node-server`.
+ * This is an optional convenience; install `@hono/node-server` separately.
+ * The core `createServer` return value is runtime-agnostic and works on any
+ * Hono-compatible runtime (Cloudflare Workers, Bun, Deno, etc.).
+ *
+ * Returns a `ServeNodeHandle` with:
+ * - `close()` — immediately close the HTTP server.
+ * - `drain(timeoutMs?)` — gracefully drain: stop accepting new connections,
+ *   return 503 to new `/v1/*` requests, wait for in-flight async runs to settle,
+ *   then close. Defaults to 30 s timeout.
+ */
+declare function serveNode(app: Hono, opts?: {
+    port?: number;
+}): Promise<ServeNodeHandle>;
+export { type AgentResolver, ApiKeyAuth, type AsyncRunEntry, AsyncRunRegistry, type AsyncRunStatus, type BatchAggregate, type BatchBackend, type BatchItem, type BatchItemError, type BatchItemResult, type BatchItemSuccess, type BatchResult, type BatchRunOptions, BatchRunner, type BatchRunnerOptions, type ClockPort, type CronSchedule, type EidenticServer, InMemoryQuota, InMemoryTokenBucketLimiter, type IntervalSchedule, NoAuth, type OnProgress, type RunCallback, type RunContext, type Schedule, type ScheduledTask, Scheduler, type SchedulerOptions, type ServeNodeHandle, type ServerHandle, type ServerOptions, type TimerPort, type ToUIMessageStreamOptions, type TokenBucketOptions, type WebhookPayload, type WorkflowRunDetail, type WorkflowRunSummary, createServer, serveNode, toUIMessageStream, toUIMessageStreamResponse };