npm - @nightowlsdev/core - Versions diffs - 0.4.0 → 0.5.0 - Mend

@nightowlsdev/core 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/index.d.cts CHANGED Viewed

@@ -321,6 +321,16 @@ type SwarmEvent = (EvBase & {
         result?: unknown;
         error?: string;
     };
+}) | (EvBase & {
+    type: "swarm.client_action";
+    data: {
+        followupId: string;
+        toolCallId: string;
+        tool: string;
+        input: unknown;
+        needsApproval: boolean;
+        from: string;
+    };
 }) | (EvBase & {
     type: "swarm.question";
     data: {
@@ -346,6 +356,7 @@ type SwarmEvent = (EvBase & {
         modelId: string;
         breakdown: UsageBreakdown;
         cost: UsageCost;
+        generationId: string;
     };
 }) | (EvBase & {
     type: "swarm.turn_usage";
@@ -694,6 +705,24 @@ interface MessageStore {
      *  `engine.history` (Mastra recall, scoped by resourceId=tenant:user). Kept for contract consistency. */
     history(tenantId: string, threadId: string, limit?: number): Promise<SwarmMessage[]>;
 }
+/** The thread (conversation container) a run references. `runs.thread_id` is a NOT NULL FK to this row, so the
+ *  row MUST exist before the first run/message of a conversation. `ThreadStore.ensure` is the supported,
+ *  schema-private way to create it idempotently — without it a host has to reach into the engine's raw pool and
+ *  hardcode `nightowls.threads`'s columns (FR-009). */
+interface ThreadStore {
+    /**
+     * Idempotently create the thread row a run will reference (insert-or-ignore on `id`). Safe to call before
+     * every run. `orgId` is the tenant; `userId` the conversation owner; `projectId` an optional host-owned
+     * sub-scope. Never throws on a pre-existing row. The engine calls this at run start when the adapter provides
+     * it, so `messages.append` cannot throw `unknown thread` through the supported path.
+     */
+    ensure(spec: {
+        id: string;
+        orgId: string;
+        userId: string;
+        projectId?: string;
+    }): Promise<void>;
+}
 /** Scratchpad caps (intentionally lossy — working memory, not a system of record). */
 declare const SCRATCHPAD_MAX_ENTRY_CHARS = 4000;
 declare const SCRATCHPAD_MAX_KEYS = 64;
@@ -795,6 +824,10 @@ interface StorageAdapter {
     runs: RunStore;
     events: EventStore;
     messages: MessageStore;
+    /** Opt-in thread (conversation container) creation (FR-009). When present, the engine idempotently ensures the
+     *  run's thread row exists before the first message/event write, so a host need not pre-create it with raw SQL.
+     *  Optional so existing adapters keep compiling; a host whose threads are externally managed may omit it. */
+    threads?: ThreadStore;
     /** Opt-in container scratchpad (Part D). Optional so existing adapters keep compiling. */
     scratchpad?: ScratchpadStore;
     /**
@@ -823,6 +856,12 @@ interface StorageAdapter {
      * is the only staleness bound there). The supabase adapter uses Postgres LISTEN/NOTIFY.
      */
     subscribeInvalidations?(onInvalidate: (key: string) => void): () => void;
+    /**
+     * FR-016 — enumerate the engine's tenants (org ids) so a host can idempotently backfill every tenant's crew
+     * (`engine.run` resolves agents per-tenant from the DB, so each tenant must be seeded before its first run).
+     * Optional: a read-only / in-memory adapter may omit it. The supabase adapter reads `nightowls.orgs`.
+     */
+    listTenants?(): Promise<string[]>;
 }
 interface ModelProvider {
     resolve(modelId: string, ctx: {
@@ -939,6 +978,23 @@ declare function ev<T extends SwarmEvent["type"]>(type: T, base: {
 }, data: ByType<T>["data"]): ByType<T>;
 declare function isEvent<T extends SwarmEvent["type"]>(e: SwarmEvent, type: T): e is ByType<T>;
+/**
+ * The per-run mutable store a first-party tool sees on `SwarmToolContext.state`. A simple keyed bag plus an
+ * `entries()` snapshot the `onRunEnd` drain reads. Reads of an unset key yield `undefined`; it never throws.
+ */
+interface RunStateHandle {
+    get<T = unknown>(key: string): T | undefined;
+    set(key: string, value: unknown): void;
+    has(key: string): boolean;
+    delete(key: string): boolean;
+    /** A point-in-time DEEP snapshot of all current entries — what `onRunEnd` flushes and the engine persists into
+     *  the run snapshot. Deep-copied so a later in-place mutation of a stored object can't corrupt an already-taken
+     *  snapshot. Values must be JSON-serializable (they ride the run snapshot). */
+    entries(): Record<string, unknown>;
+}
+/** Build a fresh, Map-backed run-state handle. `seed` (e.g. from `onRunStart`) pre-populates it. */
+declare function createRunState(seed?: Record<string, unknown>): RunStateHandle;
 /** Identifies who holds (or is queued for) a container's floor — surfaced in the "waiting for X" indicator. */
 interface FloorHolder {
     /** Display name of the holding run's lane agent, e.g. "Coordinator". */
@@ -1168,6 +1224,27 @@ interface EngineOpts {
      * fallback nudge is used instead.
      */
     verifyCompletion?: CompletionVerifier;
+    /**
+     * FR-003 — per-run lifecycle hooks (fired on `run()`). `onRunStart` is awaited once when a run begins (after the
+     * run row + thread are ensured, before the first model launch), receiving the run's `input` and a fresh
+     * `RunStateHandle` to seed (e.g. from `input.context`). `onRunEnd` is awaited once at the run's terminal boundary
+     * (done / failed / suspended / thrown / abandoned — it fires from the run's `finally`), receiving the final
+     * `state` and the `outcome`, so a host can flush/persist deterministically. Both are FAIL-SAFE (a throw is
+     * swallowed + logged, never breaking the run). `state` is the SAME handle the run's tools saw via `ctx.state`.
+     * `onRunEnd` fires at EACH segment's terminal: a suspend ends the run() segment with outcome `"suspended"`, and
+     * the resume segment fires its own `onRunEnd` (`"done"`/`"failed"`/`"suspended"`) — a host keys billing/persist
+     * on the terminal outcome. A `resume` RESTORES the per-run `state` persisted at suspend (so a client-tool /
+     * evolving-document flow keeps state across the answer); `onRunStart` fires on `run()` only. State values must be
+     * JSON-serializable (they ride the run snapshot). Omit ⇒ no-op.
+     */
+    onRunStart?: (ctx: SwarmContext, info: {
+        input: RunInput;
+        state: RunStateHandle;
+    }) => void | Promise<void>;
+    onRunEnd?: (ctx: SwarmContext, info: {
+        state: RunStateHandle;
+        outcome: "done" | "failed" | "suspended";
+    }) => void | Promise<void>;
 }
 declare class SwarmEngine {
     private opts;
@@ -1356,6 +1433,14 @@ interface SwarmToolContext {
      * back-compat with code that constructs a bare ctx; the engine always populates it.
      */
     secrets?: BoundSecrets;
+    /**
+     * FR-003 — the per-run mutable state store. The SAME handle across every tool call in this run (orchestrator
+     * AND delegated sub-agents), so a chain of tools (`addPrimitive → addStageZone → …`) can read the previous
+     * tool's result and write the next. Seeded by `SwarmConfig.onRunStart` (e.g. from the client payload), drained
+     * by `onRunEnd`. GC'd with the run — no module-level registry. Optional in the type only for back-compat with a
+     * hand-constructed bare ctx; the engine always populates it.
+     */
+    state?: RunStateHandle;
 }
 interface SwarmTool {
     name: string;
@@ -1365,6 +1450,20 @@ interface SwarmTool {
 type SwarmSkill = SwarmTool;
 declare function defineTool<I, O>(spec: ToolSpec<I, O>): SwarmTool;
 declare function defineSkill(tool: SwarmTool): SwarmSkill;
+interface ClientToolSpec<I, O> {
+    name: string;
+    description?: string;
+    inputSchema: z.ZodType<I>;
+    outputSchema?: z.ZodType<O>;
+    /** Ask the UI to confirm before the client runs the action (the handler renders an approval step). Default false. */
+    needsApproval?: boolean;
+}
+/** Thrown when a client tool's handler reports an error (the browser posted `{ error }`) — surfaces as a failed
+ *  tool_result so the model sees the action did not succeed. */
+declare class ClientToolError extends Error {
+    constructor(toolName: string, reason?: string);
+}
+declare function defineClientTool<I, O>(spec: ClientToolSpec<I, O>): SwarmTool;
 interface AgentSpec {
     slug: string;
     role?: "orchestrator" | "specialist";
@@ -1543,6 +1642,15 @@ interface SwarmConfig {
      * Omit ⇒ the cheap structural "did the root speak last?" fallback nudge.
      */
     verifyCompletion?: CompletionVerifier;
+    /**
+     * FR-003 — per-run lifecycle hooks. `onRunStart(ctx, { input, state })` seeds the run's mutable `ctx.state`
+     * store (e.g. `state.set("sceneCode", input.context?.currentCode ?? "")`) once before the first generation;
+     * `onRunEnd(ctx, { state, outcome })` drains it deterministically at the run's terminal boundary (done /
+     * failed / suspended / thrown). The SAME `state` handle flows into every tool call of the run (orchestrator +
+     * delegated sub-agents) via `ctx.state`. Both are FAIL-SAFE. Omit ⇒ no per-run state seeding/draining.
+     */
+    onRunStart?: EngineOpts["onRunStart"];
+    onRunEnd?: EngineOpts["onRunEnd"];
     /**
      * Declarative conditional policy (Phase A). `advise` rules are injected into the system prompt; `enforce`
      * rules compile into the decision hooks (deny/ask), folding in the non-removable SP5 policy floor. Per-agent
@@ -1629,6 +1737,64 @@ declare class RowCache<V> {
     invalidate(key: string): void;
 }
+/** Drain an `engine.run(...)` (an `AsyncIterable<SwarmEvent>`) into the eval-shaped trajectory + final output. The
+ *  output is the concatenation of the assistant `swarm.message` texts (the model's visible reply), in stream order. */
+declare function drainTrajectory(stream: AsyncIterable<SwarmEvent>): Promise<{
+    events: SwarmEvent[];
+    output: string;
+}>;
+/** FR-018 — run one input through a built `Swarm`/`SwarmEngine` and return its full trajectory + output. This is
+ *  the seam a host hands to `@nightowlsdev/eval` (`RunAgent = (case) => Promise<{ events, output }>`): no SSE,
+ *  no engine internals. `ctx` defaults to an ephemeral local context when omitted. */
+declare function runToTrajectory(target: Swarm | SwarmEngine, input: RunInput | string, ctx?: Partial<SwarmContext> & {
+    agentSlug: string;
+}): Promise<{
+    events: SwarmEvent[];
+    output: string;
+}>;
+/** Options for {@link runAgent}. The model wiring is the only required part (the same `modelFactory` a swarm
+ *  uses); everything else mirrors the `SwarmConfig` fields and defaults to the single-agent/ephemeral case. */
+interface RunAgentOpts {
+    /** Same `(modelId, agentSlug?) => model` factory `defineSwarm` takes (e.g. `openaiModels()`). Required. */
+    modelFactory: SwarmConfig["modelFactory"];
+    /** Model allow-list + optional tier router. Defaults to allowing the agent's own `modelId` (+ tier models). */
+    models?: {
+        allow?: string[];
+        tier?: TierConfig;
+    };
+    /** Per-run caps. Defaults to a generous single-run budget (`{ maxSteps: 50, maxCostUsd: 10 }`). */
+    cost?: Partial<SwarmConfig["cost"]>;
+    /** Bring your own adapter (e.g. to inspect events afterwards). Defaults to a fresh `InMemoryStorage`. */
+    storage?: StorageAdapter;
+    /** Pass-throughs onto the one-agent SwarmConfig — identical semantics to `defineSwarm`. */
+    telemetry?: SwarmConfig["telemetry"];
+    memory?: SwarmConfig["memory"];
+    hooks?: SwarmConfig["hooks"];
+    toolApproval?: SwarmConfig["toolApproval"];
+    secrets?: SwarmConfig["secrets"];
+    onEvent?: SwarmConfig["onEvent"];
+    onRunStart?: SwarmConfig["onRunStart"];
+    onRunEnd?: SwarmConfig["onRunEnd"];
+    pageContext?: SwarmConfig["pageContext"];
+    mastraStore?: SwarmConfig["mastraStore"];
+    /** Override the ephemeral run context (tenantId/userId/run/thread ids). */
+    ctx?: Partial<SwarmContext>;
+}
+/** Build a one-agent `Swarm` from an `AgentDef` + options — the standalone equivalent of `defineSwarm` for a
+ *  single agent. Useful when you want the built engine (e.g. to call `resume`) rather than a one-shot run. */
+declare function buildSingleAgentSwarm(def: AgentDef, opts: RunAgentOpts): Swarm;
+/**
+ * FR-019 — run a single `AgentDef` to completion and return its trajectory + final output, with NO Supabase
+ * adapter and NO publish required. Honors tier resolution + cost cap + tool-approval (it builds the real engine).
+ *
+ * @example
+ *   const { output } = await runAgent(titleAgent, "Summarize: …", { modelFactory: openaiModels() });
+ */
+declare function runAgent(def: AgentDef, input: RunInput | string, opts: RunAgentOpts): Promise<{
+    events: SwarmEvent[];
+    output: string;
+}>;
 declare class InMemoryStorage implements StorageAdapter {
     private evts;
     private seq;
@@ -1640,6 +1806,7 @@ declare class InMemoryStorage implements StorageAdapter {
     private agentRows;
     private heads;
     private pads;
+    private threadRows;
     seedAgent(v: AgentVersion, tenantId?: string): void;
     recordSuspend(runId: string, tenantId: string, followupId: string, toolCallId: string): void;
     markFollowupAnswered(followupId: string, tenantId: string): boolean;
@@ -1647,6 +1814,14 @@ declare class InMemoryStorage implements StorageAdapter {
     getRun(runId: string): RunRow | undefined;
     events: EventStore;
     runs: RunStore;
+    threads: ThreadStore;
+    /** Test/host helper: read a recorded thread row. */
+    getThread(id: string): {
+        id: string;
+        orgId: string;
+        userId: string;
+        projectId?: string;
+    } | undefined;
     messages: MessageStore;
     scratchpad: ScratchpadStore;
     agents: AgentRepo;
@@ -1710,4 +1885,4 @@ declare function rateConfig(max: number | undefined, windowSec: number, fallback
 declare const VERSION = "0.0.0";
-export { ASK_TOOL_NAME, type ActiveRun, type AgentDef, type AgentMemoryOverride, AgentMutationForbidden, type AgentRepo, type AgentSpec, type AgentSummary, type AgentVersion, type AgentVersionContent, type AgentVersionInfo, type AskField, type AskFieldOption, type AuthContext, type AuthProvider, type BoundSecrets, type BundleDef, type BundleDep, type BundleRepo, type BundleSpec, type BundleVersion, type BundleVersionContent, type BundleVersionInfo, type BundleWritableRepo, CapturingExporter, type CompletionVerdict, type CompletionVerifier, type ConnectorGrant, type ContainerFloor, CostGovernor, DelegateBudgets, type EngineOpts, type EventStore, type FloorHolder, GUARDRAILS, InMemoryContainerFloor, InMemoryStorage, type MemoryConfig, type MessageStore, type ModelProvider, type ModelRef, type ModelTier, type NewRun, PRICE_TABLE, type PerDelegateBudget, type Price, type PriceFeed, type PricingOpts, type RateLimitConfig, type RateLimitDecision, type RateLimitState, type RateLimitStore, type Release, ReserveDenied, RowCache, type RuleAction, type RuleCondition, type RuleDef, type RuleLevel, type RuleSpec, type RunInput, type RunRow, type RunStatus, type RunStore, type Runner, SCRATCHPAD_MAX_ENTRY_CHARS, SCRATCHPAD_MAX_KEYS, type ScratchpadEntry, type ScratchpadStore, type SecretResolver, type SlugUsage, SpanCollector, type StorageAdapter, type Swarm, type SwarmActor, type SwarmConfig, type SwarmContext, SwarmEngine, type SwarmEvent, type SwarmMessage, type SwarmSkill, type SwarmSpan, type SwarmTool, type SwarmToolContext, type TelemetryExporter, type ThreadSummary, type TierConfig, type TierEscalationContext, type TierResolution, type ToolSpec, type TurnUsage, type UsageBreakdown, type UsageCost, VERSION, type VersionedRepo, type WorkflowCompliance, type WorkflowDef, type WorkflowRef, type WorkflowRunState, type WorkflowSpec, type WorkflowStep, type WorkflowTransition, allowListModelProvider, assertActorMayMutateDefinition, buildSkillResolver, composePolicyPrompt, composeSystemPrompt, compositeTelemetry, containerFloor, createInMemoryRateLimitStore, customAuth, customTelemetry, decideFixedWindow, defineAgent, defineBundle, defineRule, defineSkill, defineSwarm, defineTool, defineWorkflow, ev, isEvent, isTierSentinel, mergeBundle, priceUsage, rateConfig, resolveTelemetry, resolveTier, sumBreakdowns, sumTurnUsage, tierModelId, toBundleContent };
+export { ASK_TOOL_NAME, type ActiveRun, type AgentDef, type AgentMemoryOverride, AgentMutationForbidden, type AgentRepo, type AgentSpec, type AgentSummary, type AgentVersion, type AgentVersionContent, type AgentVersionInfo, type AskField, type AskFieldOption, type AuthContext, type AuthProvider, type BoundSecrets, type BundleDef, type BundleDep, type BundleRepo, type BundleSpec, type BundleVersion, type BundleVersionContent, type BundleVersionInfo, type BundleWritableRepo, CapturingExporter, ClientToolError, type ClientToolSpec, type CompletionVerdict, type CompletionVerifier, type ConnectorGrant, type ContainerFloor, CostGovernor, DelegateBudgets, type EngineOpts, type EventStore, type FloorHolder, GUARDRAILS, InMemoryContainerFloor, InMemoryStorage, type MemoryConfig, type MessageStore, type ModelProvider, type ModelRef, type ModelTier, type NewRun, PRICE_TABLE, type PerDelegateBudget, type Price, type PriceFeed, type PricingOpts, type RateLimitConfig, type RateLimitDecision, type RateLimitState, type RateLimitStore, type Release, ReserveDenied, RowCache, type RuleAction, type RuleCondition, type RuleDef, type RuleLevel, type RuleSpec, type RunAgentOpts, type RunInput, type RunRow, type RunStateHandle, type RunStatus, type RunStore, type Runner, SCRATCHPAD_MAX_ENTRY_CHARS, SCRATCHPAD_MAX_KEYS, type ScratchpadEntry, type ScratchpadStore, type SecretResolver, type SlugUsage, SpanCollector, type StorageAdapter, type Swarm, type SwarmActor, type SwarmConfig, type SwarmContext, SwarmEngine, type SwarmEvent, type SwarmMessage, type SwarmSkill, type SwarmSpan, type SwarmTool, type SwarmToolContext, type TelemetryExporter, type ThreadStore, type ThreadSummary, type TierConfig, type TierEscalationContext, type TierResolution, type ToolSpec, type TurnUsage, type UsageBreakdown, type UsageCost, VERSION, type VersionedRepo, type WorkflowCompliance, type WorkflowDef, type WorkflowRef, type WorkflowRunState, type WorkflowSpec, type WorkflowStep, type WorkflowTransition, allowListModelProvider, assertActorMayMutateDefinition, buildSingleAgentSwarm, buildSkillResolver, composePolicyPrompt, composeSystemPrompt, compositeTelemetry, containerFloor, createInMemoryRateLimitStore, createRunState, customAuth, customTelemetry, decideFixedWindow, defineAgent, defineBundle, defineClientTool, defineRule, defineSkill, defineSwarm, defineTool, defineWorkflow, drainTrajectory, ev, isEvent, isTierSentinel, mergeBundle, priceUsage, rateConfig, resolveTelemetry, resolveTier, runAgent, runToTrajectory, sumBreakdowns, sumTurnUsage, tierModelId, toBundleContent };

package/dist/index.d.ts CHANGED Viewed

@@ -321,6 +321,16 @@ type SwarmEvent = (EvBase & {
         result?: unknown;
         error?: string;
     };
+}) | (EvBase & {
+    type: "swarm.client_action";
+    data: {
+        followupId: string;
+        toolCallId: string;
+        tool: string;
+        input: unknown;
+        needsApproval: boolean;
+        from: string;
+    };
 }) | (EvBase & {
     type: "swarm.question";
     data: {
@@ -346,6 +356,7 @@ type SwarmEvent = (EvBase & {
         modelId: string;
         breakdown: UsageBreakdown;
         cost: UsageCost;
+        generationId: string;
     };
 }) | (EvBase & {
     type: "swarm.turn_usage";
@@ -694,6 +705,24 @@ interface MessageStore {
      *  `engine.history` (Mastra recall, scoped by resourceId=tenant:user). Kept for contract consistency. */
     history(tenantId: string, threadId: string, limit?: number): Promise<SwarmMessage[]>;
 }
+/** The thread (conversation container) a run references. `runs.thread_id` is a NOT NULL FK to this row, so the
+ *  row MUST exist before the first run/message of a conversation. `ThreadStore.ensure` is the supported,
+ *  schema-private way to create it idempotently — without it a host has to reach into the engine's raw pool and
+ *  hardcode `nightowls.threads`'s columns (FR-009). */
+interface ThreadStore {
+    /**
+     * Idempotently create the thread row a run will reference (insert-or-ignore on `id`). Safe to call before
+     * every run. `orgId` is the tenant; `userId` the conversation owner; `projectId` an optional host-owned
+     * sub-scope. Never throws on a pre-existing row. The engine calls this at run start when the adapter provides
+     * it, so `messages.append` cannot throw `unknown thread` through the supported path.
+     */
+    ensure(spec: {
+        id: string;
+        orgId: string;
+        userId: string;
+        projectId?: string;
+    }): Promise<void>;
+}
 /** Scratchpad caps (intentionally lossy — working memory, not a system of record). */
 declare const SCRATCHPAD_MAX_ENTRY_CHARS = 4000;
 declare const SCRATCHPAD_MAX_KEYS = 64;
@@ -795,6 +824,10 @@ interface StorageAdapter {
     runs: RunStore;
     events: EventStore;
     messages: MessageStore;
+    /** Opt-in thread (conversation container) creation (FR-009). When present, the engine idempotently ensures the
+     *  run's thread row exists before the first message/event write, so a host need not pre-create it with raw SQL.
+     *  Optional so existing adapters keep compiling; a host whose threads are externally managed may omit it. */
+    threads?: ThreadStore;
     /** Opt-in container scratchpad (Part D). Optional so existing adapters keep compiling. */
     scratchpad?: ScratchpadStore;
     /**
@@ -823,6 +856,12 @@ interface StorageAdapter {
      * is the only staleness bound there). The supabase adapter uses Postgres LISTEN/NOTIFY.
      */
     subscribeInvalidations?(onInvalidate: (key: string) => void): () => void;
+    /**
+     * FR-016 — enumerate the engine's tenants (org ids) so a host can idempotently backfill every tenant's crew
+     * (`engine.run` resolves agents per-tenant from the DB, so each tenant must be seeded before its first run).
+     * Optional: a read-only / in-memory adapter may omit it. The supabase adapter reads `nightowls.orgs`.
+     */
+    listTenants?(): Promise<string[]>;
 }
 interface ModelProvider {
     resolve(modelId: string, ctx: {
@@ -939,6 +978,23 @@ declare function ev<T extends SwarmEvent["type"]>(type: T, base: {
 }, data: ByType<T>["data"]): ByType<T>;
 declare function isEvent<T extends SwarmEvent["type"]>(e: SwarmEvent, type: T): e is ByType<T>;
+/**
+ * The per-run mutable store a first-party tool sees on `SwarmToolContext.state`. A simple keyed bag plus an
+ * `entries()` snapshot the `onRunEnd` drain reads. Reads of an unset key yield `undefined`; it never throws.
+ */
+interface RunStateHandle {
+    get<T = unknown>(key: string): T | undefined;
+    set(key: string, value: unknown): void;
+    has(key: string): boolean;
+    delete(key: string): boolean;
+    /** A point-in-time DEEP snapshot of all current entries — what `onRunEnd` flushes and the engine persists into
+     *  the run snapshot. Deep-copied so a later in-place mutation of a stored object can't corrupt an already-taken
+     *  snapshot. Values must be JSON-serializable (they ride the run snapshot). */
+    entries(): Record<string, unknown>;
+}
+/** Build a fresh, Map-backed run-state handle. `seed` (e.g. from `onRunStart`) pre-populates it. */
+declare function createRunState(seed?: Record<string, unknown>): RunStateHandle;
 /** Identifies who holds (or is queued for) a container's floor — surfaced in the "waiting for X" indicator. */
 interface FloorHolder {
     /** Display name of the holding run's lane agent, e.g. "Coordinator". */
@@ -1168,6 +1224,27 @@ interface EngineOpts {
      * fallback nudge is used instead.
      */
     verifyCompletion?: CompletionVerifier;
+    /**
+     * FR-003 — per-run lifecycle hooks (fired on `run()`). `onRunStart` is awaited once when a run begins (after the
+     * run row + thread are ensured, before the first model launch), receiving the run's `input` and a fresh
+     * `RunStateHandle` to seed (e.g. from `input.context`). `onRunEnd` is awaited once at the run's terminal boundary
+     * (done / failed / suspended / thrown / abandoned — it fires from the run's `finally`), receiving the final
+     * `state` and the `outcome`, so a host can flush/persist deterministically. Both are FAIL-SAFE (a throw is
+     * swallowed + logged, never breaking the run). `state` is the SAME handle the run's tools saw via `ctx.state`.
+     * `onRunEnd` fires at EACH segment's terminal: a suspend ends the run() segment with outcome `"suspended"`, and
+     * the resume segment fires its own `onRunEnd` (`"done"`/`"failed"`/`"suspended"`) — a host keys billing/persist
+     * on the terminal outcome. A `resume` RESTORES the per-run `state` persisted at suspend (so a client-tool /
+     * evolving-document flow keeps state across the answer); `onRunStart` fires on `run()` only. State values must be
+     * JSON-serializable (they ride the run snapshot). Omit ⇒ no-op.
+     */
+    onRunStart?: (ctx: SwarmContext, info: {
+        input: RunInput;
+        state: RunStateHandle;
+    }) => void | Promise<void>;
+    onRunEnd?: (ctx: SwarmContext, info: {
+        state: RunStateHandle;
+        outcome: "done" | "failed" | "suspended";
+    }) => void | Promise<void>;
 }
 declare class SwarmEngine {
     private opts;
@@ -1356,6 +1433,14 @@ interface SwarmToolContext {
      * back-compat with code that constructs a bare ctx; the engine always populates it.
      */
     secrets?: BoundSecrets;
+    /**
+     * FR-003 — the per-run mutable state store. The SAME handle across every tool call in this run (orchestrator
+     * AND delegated sub-agents), so a chain of tools (`addPrimitive → addStageZone → …`) can read the previous
+     * tool's result and write the next. Seeded by `SwarmConfig.onRunStart` (e.g. from the client payload), drained
+     * by `onRunEnd`. GC'd with the run — no module-level registry. Optional in the type only for back-compat with a
+     * hand-constructed bare ctx; the engine always populates it.
+     */
+    state?: RunStateHandle;
 }
 interface SwarmTool {
     name: string;
@@ -1365,6 +1450,20 @@ interface SwarmTool {
 type SwarmSkill = SwarmTool;
 declare function defineTool<I, O>(spec: ToolSpec<I, O>): SwarmTool;
 declare function defineSkill(tool: SwarmTool): SwarmSkill;
+interface ClientToolSpec<I, O> {
+    name: string;
+    description?: string;
+    inputSchema: z.ZodType<I>;
+    outputSchema?: z.ZodType<O>;
+    /** Ask the UI to confirm before the client runs the action (the handler renders an approval step). Default false. */
+    needsApproval?: boolean;
+}
+/** Thrown when a client tool's handler reports an error (the browser posted `{ error }`) — surfaces as a failed
+ *  tool_result so the model sees the action did not succeed. */
+declare class ClientToolError extends Error {
+    constructor(toolName: string, reason?: string);
+}
+declare function defineClientTool<I, O>(spec: ClientToolSpec<I, O>): SwarmTool;
 interface AgentSpec {
     slug: string;
     role?: "orchestrator" | "specialist";
@@ -1543,6 +1642,15 @@ interface SwarmConfig {
      * Omit ⇒ the cheap structural "did the root speak last?" fallback nudge.
      */
     verifyCompletion?: CompletionVerifier;
+    /**
+     * FR-003 — per-run lifecycle hooks. `onRunStart(ctx, { input, state })` seeds the run's mutable `ctx.state`
+     * store (e.g. `state.set("sceneCode", input.context?.currentCode ?? "")`) once before the first generation;
+     * `onRunEnd(ctx, { state, outcome })` drains it deterministically at the run's terminal boundary (done /
+     * failed / suspended / thrown). The SAME `state` handle flows into every tool call of the run (orchestrator +
+     * delegated sub-agents) via `ctx.state`. Both are FAIL-SAFE. Omit ⇒ no per-run state seeding/draining.
+     */
+    onRunStart?: EngineOpts["onRunStart"];
+    onRunEnd?: EngineOpts["onRunEnd"];
     /**
      * Declarative conditional policy (Phase A). `advise` rules are injected into the system prompt; `enforce`
      * rules compile into the decision hooks (deny/ask), folding in the non-removable SP5 policy floor. Per-agent
@@ -1629,6 +1737,64 @@ declare class RowCache<V> {
     invalidate(key: string): void;
 }
+/** Drain an `engine.run(...)` (an `AsyncIterable<SwarmEvent>`) into the eval-shaped trajectory + final output. The
+ *  output is the concatenation of the assistant `swarm.message` texts (the model's visible reply), in stream order. */
+declare function drainTrajectory(stream: AsyncIterable<SwarmEvent>): Promise<{
+    events: SwarmEvent[];
+    output: string;
+}>;
+/** FR-018 — run one input through a built `Swarm`/`SwarmEngine` and return its full trajectory + output. This is
+ *  the seam a host hands to `@nightowlsdev/eval` (`RunAgent = (case) => Promise<{ events, output }>`): no SSE,
+ *  no engine internals. `ctx` defaults to an ephemeral local context when omitted. */
+declare function runToTrajectory(target: Swarm | SwarmEngine, input: RunInput | string, ctx?: Partial<SwarmContext> & {
+    agentSlug: string;
+}): Promise<{
+    events: SwarmEvent[];
+    output: string;
+}>;
+/** Options for {@link runAgent}. The model wiring is the only required part (the same `modelFactory` a swarm
+ *  uses); everything else mirrors the `SwarmConfig` fields and defaults to the single-agent/ephemeral case. */
+interface RunAgentOpts {
+    /** Same `(modelId, agentSlug?) => model` factory `defineSwarm` takes (e.g. `openaiModels()`). Required. */
+    modelFactory: SwarmConfig["modelFactory"];
+    /** Model allow-list + optional tier router. Defaults to allowing the agent's own `modelId` (+ tier models). */
+    models?: {
+        allow?: string[];
+        tier?: TierConfig;
+    };
+    /** Per-run caps. Defaults to a generous single-run budget (`{ maxSteps: 50, maxCostUsd: 10 }`). */
+    cost?: Partial<SwarmConfig["cost"]>;
+    /** Bring your own adapter (e.g. to inspect events afterwards). Defaults to a fresh `InMemoryStorage`. */
+    storage?: StorageAdapter;
+    /** Pass-throughs onto the one-agent SwarmConfig — identical semantics to `defineSwarm`. */
+    telemetry?: SwarmConfig["telemetry"];
+    memory?: SwarmConfig["memory"];
+    hooks?: SwarmConfig["hooks"];
+    toolApproval?: SwarmConfig["toolApproval"];
+    secrets?: SwarmConfig["secrets"];
+    onEvent?: SwarmConfig["onEvent"];
+    onRunStart?: SwarmConfig["onRunStart"];
+    onRunEnd?: SwarmConfig["onRunEnd"];
+    pageContext?: SwarmConfig["pageContext"];
+    mastraStore?: SwarmConfig["mastraStore"];
+    /** Override the ephemeral run context (tenantId/userId/run/thread ids). */
+    ctx?: Partial<SwarmContext>;
+}
+/** Build a one-agent `Swarm` from an `AgentDef` + options — the standalone equivalent of `defineSwarm` for a
+ *  single agent. Useful when you want the built engine (e.g. to call `resume`) rather than a one-shot run. */
+declare function buildSingleAgentSwarm(def: AgentDef, opts: RunAgentOpts): Swarm;
+/**
+ * FR-019 — run a single `AgentDef` to completion and return its trajectory + final output, with NO Supabase
+ * adapter and NO publish required. Honors tier resolution + cost cap + tool-approval (it builds the real engine).
+ *
+ * @example
+ *   const { output } = await runAgent(titleAgent, "Summarize: …", { modelFactory: openaiModels() });
+ */
+declare function runAgent(def: AgentDef, input: RunInput | string, opts: RunAgentOpts): Promise<{
+    events: SwarmEvent[];
+    output: string;
+}>;
 declare class InMemoryStorage implements StorageAdapter {
     private evts;
     private seq;
@@ -1640,6 +1806,7 @@ declare class InMemoryStorage implements StorageAdapter {
     private agentRows;
     private heads;
     private pads;
+    private threadRows;
     seedAgent(v: AgentVersion, tenantId?: string): void;
     recordSuspend(runId: string, tenantId: string, followupId: string, toolCallId: string): void;
     markFollowupAnswered(followupId: string, tenantId: string): boolean;
@@ -1647,6 +1814,14 @@ declare class InMemoryStorage implements StorageAdapter {
     getRun(runId: string): RunRow | undefined;
     events: EventStore;
     runs: RunStore;
+    threads: ThreadStore;
+    /** Test/host helper: read a recorded thread row. */
+    getThread(id: string): {
+        id: string;
+        orgId: string;
+        userId: string;
+        projectId?: string;
+    } | undefined;
     messages: MessageStore;
     scratchpad: ScratchpadStore;
     agents: AgentRepo;
@@ -1710,4 +1885,4 @@ declare function rateConfig(max: number | undefined, windowSec: number, fallback
 declare const VERSION = "0.0.0";
-export { ASK_TOOL_NAME, type ActiveRun, type AgentDef, type AgentMemoryOverride, AgentMutationForbidden, type AgentRepo, type AgentSpec, type AgentSummary, type AgentVersion, type AgentVersionContent, type AgentVersionInfo, type AskField, type AskFieldOption, type AuthContext, type AuthProvider, type BoundSecrets, type BundleDef, type BundleDep, type BundleRepo, type BundleSpec, type BundleVersion, type BundleVersionContent, type BundleVersionInfo, type BundleWritableRepo, CapturingExporter, type CompletionVerdict, type CompletionVerifier, type ConnectorGrant, type ContainerFloor, CostGovernor, DelegateBudgets, type EngineOpts, type EventStore, type FloorHolder, GUARDRAILS, InMemoryContainerFloor, InMemoryStorage, type MemoryConfig, type MessageStore, type ModelProvider, type ModelRef, type ModelTier, type NewRun, PRICE_TABLE, type PerDelegateBudget, type Price, type PriceFeed, type PricingOpts, type RateLimitConfig, type RateLimitDecision, type RateLimitState, type RateLimitStore, type Release, ReserveDenied, RowCache, type RuleAction, type RuleCondition, type RuleDef, type RuleLevel, type RuleSpec, type RunInput, type RunRow, type RunStatus, type RunStore, type Runner, SCRATCHPAD_MAX_ENTRY_CHARS, SCRATCHPAD_MAX_KEYS, type ScratchpadEntry, type ScratchpadStore, type SecretResolver, type SlugUsage, SpanCollector, type StorageAdapter, type Swarm, type SwarmActor, type SwarmConfig, type SwarmContext, SwarmEngine, type SwarmEvent, type SwarmMessage, type SwarmSkill, type SwarmSpan, type SwarmTool, type SwarmToolContext, type TelemetryExporter, type ThreadSummary, type TierConfig, type TierEscalationContext, type TierResolution, type ToolSpec, type TurnUsage, type UsageBreakdown, type UsageCost, VERSION, type VersionedRepo, type WorkflowCompliance, type WorkflowDef, type WorkflowRef, type WorkflowRunState, type WorkflowSpec, type WorkflowStep, type WorkflowTransition, allowListModelProvider, assertActorMayMutateDefinition, buildSkillResolver, composePolicyPrompt, composeSystemPrompt, compositeTelemetry, containerFloor, createInMemoryRateLimitStore, customAuth, customTelemetry, decideFixedWindow, defineAgent, defineBundle, defineRule, defineSkill, defineSwarm, defineTool, defineWorkflow, ev, isEvent, isTierSentinel, mergeBundle, priceUsage, rateConfig, resolveTelemetry, resolveTier, sumBreakdowns, sumTurnUsage, tierModelId, toBundleContent };
+export { ASK_TOOL_NAME, type ActiveRun, type AgentDef, type AgentMemoryOverride, AgentMutationForbidden, type AgentRepo, type AgentSpec, type AgentSummary, type AgentVersion, type AgentVersionContent, type AgentVersionInfo, type AskField, type AskFieldOption, type AuthContext, type AuthProvider, type BoundSecrets, type BundleDef, type BundleDep, type BundleRepo, type BundleSpec, type BundleVersion, type BundleVersionContent, type BundleVersionInfo, type BundleWritableRepo, CapturingExporter, ClientToolError, type ClientToolSpec, type CompletionVerdict, type CompletionVerifier, type ConnectorGrant, type ContainerFloor, CostGovernor, DelegateBudgets, type EngineOpts, type EventStore, type FloorHolder, GUARDRAILS, InMemoryContainerFloor, InMemoryStorage, type MemoryConfig, type MessageStore, type ModelProvider, type ModelRef, type ModelTier, type NewRun, PRICE_TABLE, type PerDelegateBudget, type Price, type PriceFeed, type PricingOpts, type RateLimitConfig, type RateLimitDecision, type RateLimitState, type RateLimitStore, type Release, ReserveDenied, RowCache, type RuleAction, type RuleCondition, type RuleDef, type RuleLevel, type RuleSpec, type RunAgentOpts, type RunInput, type RunRow, type RunStateHandle, type RunStatus, type RunStore, type Runner, SCRATCHPAD_MAX_ENTRY_CHARS, SCRATCHPAD_MAX_KEYS, type ScratchpadEntry, type ScratchpadStore, type SecretResolver, type SlugUsage, SpanCollector, type StorageAdapter, type Swarm, type SwarmActor, type SwarmConfig, type SwarmContext, SwarmEngine, type SwarmEvent, type SwarmMessage, type SwarmSkill, type SwarmSpan, type SwarmTool, type SwarmToolContext, type TelemetryExporter, type ThreadStore, type ThreadSummary, type TierConfig, type TierEscalationContext, type TierResolution, type ToolSpec, type TurnUsage, type UsageBreakdown, type UsageCost, VERSION, type VersionedRepo, type WorkflowCompliance, type WorkflowDef, type WorkflowRef, type WorkflowRunState, type WorkflowSpec, type WorkflowStep, type WorkflowTransition, allowListModelProvider, assertActorMayMutateDefinition, buildSingleAgentSwarm, buildSkillResolver, composePolicyPrompt, composeSystemPrompt, compositeTelemetry, containerFloor, createInMemoryRateLimitStore, createRunState, customAuth, customTelemetry, decideFixedWindow, defineAgent, defineBundle, defineClientTool, defineRule, defineSkill, defineSwarm, defineTool, defineWorkflow, drainTrajectory, ev, isEvent, isTierSentinel, mergeBundle, priceUsage, rateConfig, resolveTelemetry, resolveTier, runAgent, runToTrajectory, sumBreakdowns, sumTurnUsage, tierModelId, toBundleContent };