@nightowlsdev/core 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.cts CHANGED
@@ -321,6 +321,16 @@ type SwarmEvent = (EvBase & {
321
321
  result?: unknown;
322
322
  error?: string;
323
323
  };
324
+ }) | (EvBase & {
325
+ type: "swarm.client_action";
326
+ data: {
327
+ followupId: string;
328
+ toolCallId: string;
329
+ tool: string;
330
+ input: unknown;
331
+ needsApproval: boolean;
332
+ from: string;
333
+ };
324
334
  }) | (EvBase & {
325
335
  type: "swarm.question";
326
336
  data: {
@@ -346,6 +356,7 @@ type SwarmEvent = (EvBase & {
346
356
  modelId: string;
347
357
  breakdown: UsageBreakdown;
348
358
  cost: UsageCost;
359
+ generationId: string;
349
360
  };
350
361
  }) | (EvBase & {
351
362
  type: "swarm.turn_usage";
@@ -694,6 +705,24 @@ interface MessageStore {
694
705
  * `engine.history` (Mastra recall, scoped by resourceId=tenant:user). Kept for contract consistency. */
695
706
  history(tenantId: string, threadId: string, limit?: number): Promise<SwarmMessage[]>;
696
707
  }
708
+ /** The thread (conversation container) a run references. `runs.thread_id` is a NOT NULL FK to this row, so the
709
+ * row MUST exist before the first run/message of a conversation. `ThreadStore.ensure` is the supported,
710
+ * schema-private way to create it idempotently — without it a host has to reach into the engine's raw pool and
711
+ * hardcode `nightowls.threads`'s columns (FR-009). */
712
+ interface ThreadStore {
713
+ /**
714
+ * Idempotently create the thread row a run will reference (insert-or-ignore on `id`). Safe to call before
715
+ * every run. `orgId` is the tenant; `userId` the conversation owner; `projectId` an optional host-owned
716
+ * sub-scope. Never throws on a pre-existing row. The engine calls this at run start when the adapter provides
717
+ * it, so `messages.append` cannot throw `unknown thread` through the supported path.
718
+ */
719
+ ensure(spec: {
720
+ id: string;
721
+ orgId: string;
722
+ userId: string;
723
+ projectId?: string;
724
+ }): Promise<void>;
725
+ }
697
726
  /** Scratchpad caps (intentionally lossy — working memory, not a system of record). */
698
727
  declare const SCRATCHPAD_MAX_ENTRY_CHARS = 4000;
699
728
  declare const SCRATCHPAD_MAX_KEYS = 64;
@@ -795,6 +824,10 @@ interface StorageAdapter {
795
824
  runs: RunStore;
796
825
  events: EventStore;
797
826
  messages: MessageStore;
827
+ /** Opt-in thread (conversation container) creation (FR-009). When present, the engine idempotently ensures the
828
+ * run's thread row exists before the first message/event write, so a host need not pre-create it with raw SQL.
829
+ * Optional so existing adapters keep compiling; a host whose threads are externally managed may omit it. */
830
+ threads?: ThreadStore;
798
831
  /** Opt-in container scratchpad (Part D). Optional so existing adapters keep compiling. */
799
832
  scratchpad?: ScratchpadStore;
800
833
  /**
@@ -823,6 +856,12 @@ interface StorageAdapter {
823
856
  * is the only staleness bound there). The supabase adapter uses Postgres LISTEN/NOTIFY.
824
857
  */
825
858
  subscribeInvalidations?(onInvalidate: (key: string) => void): () => void;
859
+ /**
860
+ * FR-016 — enumerate the engine's tenants (org ids) so a host can idempotently backfill every tenant's crew
861
+ * (`engine.run` resolves agents per-tenant from the DB, so each tenant must be seeded before its first run).
862
+ * Optional: a read-only / in-memory adapter may omit it. The supabase adapter reads `nightowls.orgs`.
863
+ */
864
+ listTenants?(): Promise<string[]>;
826
865
  }
827
866
  interface ModelProvider {
828
867
  resolve(modelId: string, ctx: {
@@ -939,6 +978,23 @@ declare function ev<T extends SwarmEvent["type"]>(type: T, base: {
939
978
  }, data: ByType<T>["data"]): ByType<T>;
940
979
  declare function isEvent<T extends SwarmEvent["type"]>(e: SwarmEvent, type: T): e is ByType<T>;
941
980
 
981
+ /**
982
+ * The per-run mutable store a first-party tool sees on `SwarmToolContext.state`. A simple keyed bag plus an
983
+ * `entries()` snapshot the `onRunEnd` drain reads. Reads of an unset key yield `undefined`; it never throws.
984
+ */
985
+ interface RunStateHandle {
986
+ get<T = unknown>(key: string): T | undefined;
987
+ set(key: string, value: unknown): void;
988
+ has(key: string): boolean;
989
+ delete(key: string): boolean;
990
+ /** A point-in-time DEEP snapshot of all current entries — what `onRunEnd` flushes and the engine persists into
991
+ * the run snapshot. Deep-copied so a later in-place mutation of a stored object can't corrupt an already-taken
992
+ * snapshot. Values must be JSON-serializable (they ride the run snapshot). */
993
+ entries(): Record<string, unknown>;
994
+ }
995
+ /** Build a fresh, Map-backed run-state handle. `seed` (e.g. from `onRunStart`) pre-populates it. */
996
+ declare function createRunState(seed?: Record<string, unknown>): RunStateHandle;
997
+
942
998
  /** Identifies who holds (or is queued for) a container's floor — surfaced in the "waiting for X" indicator. */
943
999
  interface FloorHolder {
944
1000
  /** Display name of the holding run's lane agent, e.g. "Coordinator". */
@@ -1168,6 +1224,27 @@ interface EngineOpts {
1168
1224
  * fallback nudge is used instead.
1169
1225
  */
1170
1226
  verifyCompletion?: CompletionVerifier;
1227
+ /**
1228
+ * FR-003 — per-run lifecycle hooks (fired on `run()`). `onRunStart` is awaited once when a run begins (after the
1229
+ * run row + thread are ensured, before the first model launch), receiving the run's `input` and a fresh
1230
+ * `RunStateHandle` to seed (e.g. from `input.context`). `onRunEnd` is awaited once at the run's terminal boundary
1231
+ * (done / failed / suspended / thrown / abandoned — it fires from the run's `finally`), receiving the final
1232
+ * `state` and the `outcome`, so a host can flush/persist deterministically. Both are FAIL-SAFE (a throw is
1233
+ * swallowed + logged, never breaking the run). `state` is the SAME handle the run's tools saw via `ctx.state`.
1234
+ * `onRunEnd` fires at EACH segment's terminal: a suspend ends the run() segment with outcome `"suspended"`, and
1235
+ * the resume segment fires its own `onRunEnd` (`"done"`/`"failed"`/`"suspended"`) — a host keys billing/persist
1236
+ * on the terminal outcome. A `resume` RESTORES the per-run `state` persisted at suspend (so a client-tool /
1237
+ * evolving-document flow keeps state across the answer); `onRunStart` fires on `run()` only. State values must be
1238
+ * JSON-serializable (they ride the run snapshot). Omit ⇒ no-op.
1239
+ */
1240
+ onRunStart?: (ctx: SwarmContext, info: {
1241
+ input: RunInput;
1242
+ state: RunStateHandle;
1243
+ }) => void | Promise<void>;
1244
+ onRunEnd?: (ctx: SwarmContext, info: {
1245
+ state: RunStateHandle;
1246
+ outcome: "done" | "failed" | "suspended";
1247
+ }) => void | Promise<void>;
1171
1248
  }
1172
1249
  declare class SwarmEngine {
1173
1250
  private opts;
@@ -1356,6 +1433,14 @@ interface SwarmToolContext {
1356
1433
  * back-compat with code that constructs a bare ctx; the engine always populates it.
1357
1434
  */
1358
1435
  secrets?: BoundSecrets;
1436
+ /**
1437
+ * FR-003 — the per-run mutable state store. The SAME handle across every tool call in this run (orchestrator
1438
+ * AND delegated sub-agents), so a chain of tools (`addPrimitive → addStageZone → …`) can read the previous
1439
+ * tool's result and write the next. Seeded by `SwarmConfig.onRunStart` (e.g. from the client payload), drained
1440
+ * by `onRunEnd`. GC'd with the run — no module-level registry. Optional in the type only for back-compat with a
1441
+ * hand-constructed bare ctx; the engine always populates it.
1442
+ */
1443
+ state?: RunStateHandle;
1359
1444
  }
1360
1445
  interface SwarmTool {
1361
1446
  name: string;
@@ -1365,6 +1450,20 @@ interface SwarmTool {
1365
1450
  type SwarmSkill = SwarmTool;
1366
1451
  declare function defineTool<I, O>(spec: ToolSpec<I, O>): SwarmTool;
1367
1452
  declare function defineSkill(tool: SwarmTool): SwarmSkill;
1453
+ interface ClientToolSpec<I, O> {
1454
+ name: string;
1455
+ description?: string;
1456
+ inputSchema: z.ZodType<I>;
1457
+ outputSchema?: z.ZodType<O>;
1458
+ /** Ask the UI to confirm before the client runs the action (the handler renders an approval step). Default false. */
1459
+ needsApproval?: boolean;
1460
+ }
1461
+ /** Thrown when a client tool's handler reports an error (the browser posted `{ error }`) — surfaces as a failed
1462
+ * tool_result so the model sees the action did not succeed. */
1463
+ declare class ClientToolError extends Error {
1464
+ constructor(toolName: string, reason?: string);
1465
+ }
1466
+ declare function defineClientTool<I, O>(spec: ClientToolSpec<I, O>): SwarmTool;
1368
1467
  interface AgentSpec {
1369
1468
  slug: string;
1370
1469
  role?: "orchestrator" | "specialist";
@@ -1543,6 +1642,15 @@ interface SwarmConfig {
1543
1642
  * Omit ⇒ the cheap structural "did the root speak last?" fallback nudge.
1544
1643
  */
1545
1644
  verifyCompletion?: CompletionVerifier;
1645
+ /**
1646
+ * FR-003 — per-run lifecycle hooks. `onRunStart(ctx, { input, state })` seeds the run's mutable `ctx.state`
1647
+ * store (e.g. `state.set("sceneCode", input.context?.currentCode ?? "")`) once before the first generation;
1648
+ * `onRunEnd(ctx, { state, outcome })` drains it deterministically at the run's terminal boundary (done /
1649
+ * failed / suspended / thrown). The SAME `state` handle flows into every tool call of the run (orchestrator +
1650
+ * delegated sub-agents) via `ctx.state`. Both are FAIL-SAFE. Omit ⇒ no per-run state seeding/draining.
1651
+ */
1652
+ onRunStart?: EngineOpts["onRunStart"];
1653
+ onRunEnd?: EngineOpts["onRunEnd"];
1546
1654
  /**
1547
1655
  * Declarative conditional policy (Phase A). `advise` rules are injected into the system prompt; `enforce`
1548
1656
  * rules compile into the decision hooks (deny/ask), folding in the non-removable SP5 policy floor. Per-agent
@@ -1629,6 +1737,64 @@ declare class RowCache<V> {
1629
1737
  invalidate(key: string): void;
1630
1738
  }
1631
1739
 
1740
+ /** Drain an `engine.run(...)` (an `AsyncIterable<SwarmEvent>`) into the eval-shaped trajectory + final output. The
1741
+ * output is the concatenation of the assistant `swarm.message` texts (the model's visible reply), in stream order. */
1742
+ declare function drainTrajectory(stream: AsyncIterable<SwarmEvent>): Promise<{
1743
+ events: SwarmEvent[];
1744
+ output: string;
1745
+ }>;
1746
+ /** FR-018 — run one input through a built `Swarm`/`SwarmEngine` and return its full trajectory + output. This is
1747
+ * the seam a host hands to `@nightowlsdev/eval` (`RunAgent = (case) => Promise<{ events, output }>`): no SSE,
1748
+ * no engine internals. `ctx` defaults to an ephemeral local context when omitted. */
1749
+ declare function runToTrajectory(target: Swarm | SwarmEngine, input: RunInput | string, ctx?: Partial<SwarmContext> & {
1750
+ agentSlug: string;
1751
+ }): Promise<{
1752
+ events: SwarmEvent[];
1753
+ output: string;
1754
+ }>;
1755
+ /** Options for {@link runAgent}. The model wiring is the only required part (the same `modelFactory` a swarm
1756
+ * uses); everything else mirrors the `SwarmConfig` fields and defaults to the single-agent/ephemeral case. */
1757
+ interface RunAgentOpts {
1758
+ /** Same `(modelId, agentSlug?) => model` factory `defineSwarm` takes (e.g. `openaiModels()`). Required. */
1759
+ modelFactory: SwarmConfig["modelFactory"];
1760
+ /** Model allow-list + optional tier router. Defaults to allowing the agent's own `modelId` (+ tier models). */
1761
+ models?: {
1762
+ allow?: string[];
1763
+ tier?: TierConfig;
1764
+ };
1765
+ /** Per-run caps. Defaults to a generous single-run budget (`{ maxSteps: 50, maxCostUsd: 10 }`). */
1766
+ cost?: Partial<SwarmConfig["cost"]>;
1767
+ /** Bring your own adapter (e.g. to inspect events afterwards). Defaults to a fresh `InMemoryStorage`. */
1768
+ storage?: StorageAdapter;
1769
+ /** Pass-throughs onto the one-agent SwarmConfig — identical semantics to `defineSwarm`. */
1770
+ telemetry?: SwarmConfig["telemetry"];
1771
+ memory?: SwarmConfig["memory"];
1772
+ hooks?: SwarmConfig["hooks"];
1773
+ toolApproval?: SwarmConfig["toolApproval"];
1774
+ secrets?: SwarmConfig["secrets"];
1775
+ onEvent?: SwarmConfig["onEvent"];
1776
+ onRunStart?: SwarmConfig["onRunStart"];
1777
+ onRunEnd?: SwarmConfig["onRunEnd"];
1778
+ pageContext?: SwarmConfig["pageContext"];
1779
+ mastraStore?: SwarmConfig["mastraStore"];
1780
+ /** Override the ephemeral run context (tenantId/userId/run/thread ids). */
1781
+ ctx?: Partial<SwarmContext>;
1782
+ }
1783
+ /** Build a one-agent `Swarm` from an `AgentDef` + options — the standalone equivalent of `defineSwarm` for a
1784
+ * single agent. Useful when you want the built engine (e.g. to call `resume`) rather than a one-shot run. */
1785
+ declare function buildSingleAgentSwarm(def: AgentDef, opts: RunAgentOpts): Swarm;
1786
+ /**
1787
+ * FR-019 — run a single `AgentDef` to completion and return its trajectory + final output, with NO Supabase
1788
+ * adapter and NO publish required. Honors tier resolution + cost cap + tool-approval (it builds the real engine).
1789
+ *
1790
+ * @example
1791
+ * const { output } = await runAgent(titleAgent, "Summarize: …", { modelFactory: openaiModels() });
1792
+ */
1793
+ declare function runAgent(def: AgentDef, input: RunInput | string, opts: RunAgentOpts): Promise<{
1794
+ events: SwarmEvent[];
1795
+ output: string;
1796
+ }>;
1797
+
1632
1798
  declare class InMemoryStorage implements StorageAdapter {
1633
1799
  private evts;
1634
1800
  private seq;
@@ -1640,6 +1806,7 @@ declare class InMemoryStorage implements StorageAdapter {
1640
1806
  private agentRows;
1641
1807
  private heads;
1642
1808
  private pads;
1809
+ private threadRows;
1643
1810
  seedAgent(v: AgentVersion, tenantId?: string): void;
1644
1811
  recordSuspend(runId: string, tenantId: string, followupId: string, toolCallId: string): void;
1645
1812
  markFollowupAnswered(followupId: string, tenantId: string): boolean;
@@ -1647,6 +1814,14 @@ declare class InMemoryStorage implements StorageAdapter {
1647
1814
  getRun(runId: string): RunRow | undefined;
1648
1815
  events: EventStore;
1649
1816
  runs: RunStore;
1817
+ threads: ThreadStore;
1818
+ /** Test/host helper: read a recorded thread row. */
1819
+ getThread(id: string): {
1820
+ id: string;
1821
+ orgId: string;
1822
+ userId: string;
1823
+ projectId?: string;
1824
+ } | undefined;
1650
1825
  messages: MessageStore;
1651
1826
  scratchpad: ScratchpadStore;
1652
1827
  agents: AgentRepo;
@@ -1710,4 +1885,4 @@ declare function rateConfig(max: number | undefined, windowSec: number, fallback
1710
1885
 
1711
1886
  declare const VERSION = "0.0.0";
1712
1887
 
1713
- export { ASK_TOOL_NAME, type ActiveRun, type AgentDef, type AgentMemoryOverride, AgentMutationForbidden, type AgentRepo, type AgentSpec, type AgentSummary, type AgentVersion, type AgentVersionContent, type AgentVersionInfo, type AskField, type AskFieldOption, type AuthContext, type AuthProvider, type BoundSecrets, type BundleDef, type BundleDep, type BundleRepo, type BundleSpec, type BundleVersion, type BundleVersionContent, type BundleVersionInfo, type BundleWritableRepo, CapturingExporter, type CompletionVerdict, type CompletionVerifier, type ConnectorGrant, type ContainerFloor, CostGovernor, DelegateBudgets, type EngineOpts, type EventStore, type FloorHolder, GUARDRAILS, InMemoryContainerFloor, InMemoryStorage, type MemoryConfig, type MessageStore, type ModelProvider, type ModelRef, type ModelTier, type NewRun, PRICE_TABLE, type PerDelegateBudget, type Price, type PriceFeed, type PricingOpts, type RateLimitConfig, type RateLimitDecision, type RateLimitState, type RateLimitStore, type Release, ReserveDenied, RowCache, type RuleAction, type RuleCondition, type RuleDef, type RuleLevel, type RuleSpec, type RunInput, type RunRow, type RunStatus, type RunStore, type Runner, SCRATCHPAD_MAX_ENTRY_CHARS, SCRATCHPAD_MAX_KEYS, type ScratchpadEntry, type ScratchpadStore, type SecretResolver, type SlugUsage, SpanCollector, type StorageAdapter, type Swarm, type SwarmActor, type SwarmConfig, type SwarmContext, SwarmEngine, type SwarmEvent, type SwarmMessage, type SwarmSkill, type SwarmSpan, type SwarmTool, type SwarmToolContext, type TelemetryExporter, type ThreadSummary, type TierConfig, type TierEscalationContext, type TierResolution, type ToolSpec, type TurnUsage, type UsageBreakdown, type UsageCost, VERSION, type VersionedRepo, type WorkflowCompliance, type WorkflowDef, type WorkflowRef, type WorkflowRunState, type WorkflowSpec, type WorkflowStep, type WorkflowTransition, allowListModelProvider, assertActorMayMutateDefinition, buildSkillResolver, composePolicyPrompt, composeSystemPrompt, compositeTelemetry, containerFloor, createInMemoryRateLimitStore, customAuth, customTelemetry, decideFixedWindow, defineAgent, defineBundle, defineRule, defineSkill, defineSwarm, defineTool, defineWorkflow, ev, isEvent, isTierSentinel, mergeBundle, priceUsage, rateConfig, resolveTelemetry, resolveTier, sumBreakdowns, sumTurnUsage, tierModelId, toBundleContent };
1888
+ export { ASK_TOOL_NAME, type ActiveRun, type AgentDef, type AgentMemoryOverride, AgentMutationForbidden, type AgentRepo, type AgentSpec, type AgentSummary, type AgentVersion, type AgentVersionContent, type AgentVersionInfo, type AskField, type AskFieldOption, type AuthContext, type AuthProvider, type BoundSecrets, type BundleDef, type BundleDep, type BundleRepo, type BundleSpec, type BundleVersion, type BundleVersionContent, type BundleVersionInfo, type BundleWritableRepo, CapturingExporter, ClientToolError, type ClientToolSpec, type CompletionVerdict, type CompletionVerifier, type ConnectorGrant, type ContainerFloor, CostGovernor, DelegateBudgets, type EngineOpts, type EventStore, type FloorHolder, GUARDRAILS, InMemoryContainerFloor, InMemoryStorage, type MemoryConfig, type MessageStore, type ModelProvider, type ModelRef, type ModelTier, type NewRun, PRICE_TABLE, type PerDelegateBudget, type Price, type PriceFeed, type PricingOpts, type RateLimitConfig, type RateLimitDecision, type RateLimitState, type RateLimitStore, type Release, ReserveDenied, RowCache, type RuleAction, type RuleCondition, type RuleDef, type RuleLevel, type RuleSpec, type RunAgentOpts, type RunInput, type RunRow, type RunStateHandle, type RunStatus, type RunStore, type Runner, SCRATCHPAD_MAX_ENTRY_CHARS, SCRATCHPAD_MAX_KEYS, type ScratchpadEntry, type ScratchpadStore, type SecretResolver, type SlugUsage, SpanCollector, type StorageAdapter, type Swarm, type SwarmActor, type SwarmConfig, type SwarmContext, SwarmEngine, type SwarmEvent, type SwarmMessage, type SwarmSkill, type SwarmSpan, type SwarmTool, type SwarmToolContext, type TelemetryExporter, type ThreadStore, type ThreadSummary, type TierConfig, type TierEscalationContext, type TierResolution, type ToolSpec, type TurnUsage, type UsageBreakdown, type UsageCost, VERSION, type VersionedRepo, type WorkflowCompliance, type WorkflowDef, type WorkflowRef, type WorkflowRunState, type WorkflowSpec, type WorkflowStep, type WorkflowTransition, allowListModelProvider, assertActorMayMutateDefinition, buildSingleAgentSwarm, buildSkillResolver, composePolicyPrompt, composeSystemPrompt, compositeTelemetry, containerFloor, createInMemoryRateLimitStore, createRunState, customAuth, customTelemetry, decideFixedWindow, defineAgent, defineBundle, defineClientTool, defineRule, defineSkill, defineSwarm, defineTool, defineWorkflow, drainTrajectory, ev, isEvent, isTierSentinel, mergeBundle, priceUsage, rateConfig, resolveTelemetry, resolveTier, runAgent, runToTrajectory, sumBreakdowns, sumTurnUsage, tierModelId, toBundleContent };
package/dist/index.d.ts CHANGED
@@ -321,6 +321,16 @@ type SwarmEvent = (EvBase & {
321
321
  result?: unknown;
322
322
  error?: string;
323
323
  };
324
+ }) | (EvBase & {
325
+ type: "swarm.client_action";
326
+ data: {
327
+ followupId: string;
328
+ toolCallId: string;
329
+ tool: string;
330
+ input: unknown;
331
+ needsApproval: boolean;
332
+ from: string;
333
+ };
324
334
  }) | (EvBase & {
325
335
  type: "swarm.question";
326
336
  data: {
@@ -346,6 +356,7 @@ type SwarmEvent = (EvBase & {
346
356
  modelId: string;
347
357
  breakdown: UsageBreakdown;
348
358
  cost: UsageCost;
359
+ generationId: string;
349
360
  };
350
361
  }) | (EvBase & {
351
362
  type: "swarm.turn_usage";
@@ -694,6 +705,24 @@ interface MessageStore {
694
705
  * `engine.history` (Mastra recall, scoped by resourceId=tenant:user). Kept for contract consistency. */
695
706
  history(tenantId: string, threadId: string, limit?: number): Promise<SwarmMessage[]>;
696
707
  }
708
+ /** The thread (conversation container) a run references. `runs.thread_id` is a NOT NULL FK to this row, so the
709
+ * row MUST exist before the first run/message of a conversation. `ThreadStore.ensure` is the supported,
710
+ * schema-private way to create it idempotently — without it a host has to reach into the engine's raw pool and
711
+ * hardcode `nightowls.threads`'s columns (FR-009). */
712
+ interface ThreadStore {
713
+ /**
714
+ * Idempotently create the thread row a run will reference (insert-or-ignore on `id`). Safe to call before
715
+ * every run. `orgId` is the tenant; `userId` the conversation owner; `projectId` an optional host-owned
716
+ * sub-scope. Never throws on a pre-existing row. The engine calls this at run start when the adapter provides
717
+ * it, so `messages.append` cannot throw `unknown thread` through the supported path.
718
+ */
719
+ ensure(spec: {
720
+ id: string;
721
+ orgId: string;
722
+ userId: string;
723
+ projectId?: string;
724
+ }): Promise<void>;
725
+ }
697
726
  /** Scratchpad caps (intentionally lossy — working memory, not a system of record). */
698
727
  declare const SCRATCHPAD_MAX_ENTRY_CHARS = 4000;
699
728
  declare const SCRATCHPAD_MAX_KEYS = 64;
@@ -795,6 +824,10 @@ interface StorageAdapter {
795
824
  runs: RunStore;
796
825
  events: EventStore;
797
826
  messages: MessageStore;
827
+ /** Opt-in thread (conversation container) creation (FR-009). When present, the engine idempotently ensures the
828
+ * run's thread row exists before the first message/event write, so a host need not pre-create it with raw SQL.
829
+ * Optional so existing adapters keep compiling; a host whose threads are externally managed may omit it. */
830
+ threads?: ThreadStore;
798
831
  /** Opt-in container scratchpad (Part D). Optional so existing adapters keep compiling. */
799
832
  scratchpad?: ScratchpadStore;
800
833
  /**
@@ -823,6 +856,12 @@ interface StorageAdapter {
823
856
  * is the only staleness bound there). The supabase adapter uses Postgres LISTEN/NOTIFY.
824
857
  */
825
858
  subscribeInvalidations?(onInvalidate: (key: string) => void): () => void;
859
+ /**
860
+ * FR-016 — enumerate the engine's tenants (org ids) so a host can idempotently backfill every tenant's crew
861
+ * (`engine.run` resolves agents per-tenant from the DB, so each tenant must be seeded before its first run).
862
+ * Optional: a read-only / in-memory adapter may omit it. The supabase adapter reads `nightowls.orgs`.
863
+ */
864
+ listTenants?(): Promise<string[]>;
826
865
  }
827
866
  interface ModelProvider {
828
867
  resolve(modelId: string, ctx: {
@@ -939,6 +978,23 @@ declare function ev<T extends SwarmEvent["type"]>(type: T, base: {
939
978
  }, data: ByType<T>["data"]): ByType<T>;
940
979
  declare function isEvent<T extends SwarmEvent["type"]>(e: SwarmEvent, type: T): e is ByType<T>;
941
980
 
981
+ /**
982
+ * The per-run mutable store a first-party tool sees on `SwarmToolContext.state`. A simple keyed bag plus an
983
+ * `entries()` snapshot the `onRunEnd` drain reads. Reads of an unset key yield `undefined`; it never throws.
984
+ */
985
+ interface RunStateHandle {
986
+ get<T = unknown>(key: string): T | undefined;
987
+ set(key: string, value: unknown): void;
988
+ has(key: string): boolean;
989
+ delete(key: string): boolean;
990
+ /** A point-in-time DEEP snapshot of all current entries — what `onRunEnd` flushes and the engine persists into
991
+ * the run snapshot. Deep-copied so a later in-place mutation of a stored object can't corrupt an already-taken
992
+ * snapshot. Values must be JSON-serializable (they ride the run snapshot). */
993
+ entries(): Record<string, unknown>;
994
+ }
995
+ /** Build a fresh, Map-backed run-state handle. `seed` (e.g. from `onRunStart`) pre-populates it. */
996
+ declare function createRunState(seed?: Record<string, unknown>): RunStateHandle;
997
+
942
998
  /** Identifies who holds (or is queued for) a container's floor — surfaced in the "waiting for X" indicator. */
943
999
  interface FloorHolder {
944
1000
  /** Display name of the holding run's lane agent, e.g. "Coordinator". */
@@ -1168,6 +1224,27 @@ interface EngineOpts {
1168
1224
  * fallback nudge is used instead.
1169
1225
  */
1170
1226
  verifyCompletion?: CompletionVerifier;
1227
+ /**
1228
+ * FR-003 — per-run lifecycle hooks (fired on `run()`). `onRunStart` is awaited once when a run begins (after the
1229
+ * run row + thread are ensured, before the first model launch), receiving the run's `input` and a fresh
1230
+ * `RunStateHandle` to seed (e.g. from `input.context`). `onRunEnd` is awaited once at the run's terminal boundary
1231
+ * (done / failed / suspended / thrown / abandoned — it fires from the run's `finally`), receiving the final
1232
+ * `state` and the `outcome`, so a host can flush/persist deterministically. Both are FAIL-SAFE (a throw is
1233
+ * swallowed + logged, never breaking the run). `state` is the SAME handle the run's tools saw via `ctx.state`.
1234
+ * `onRunEnd` fires at EACH segment's terminal: a suspend ends the run() segment with outcome `"suspended"`, and
1235
+ * the resume segment fires its own `onRunEnd` (`"done"`/`"failed"`/`"suspended"`) — a host keys billing/persist
1236
+ * on the terminal outcome. A `resume` RESTORES the per-run `state` persisted at suspend (so a client-tool /
1237
+ * evolving-document flow keeps state across the answer); `onRunStart` fires on `run()` only. State values must be
1238
+ * JSON-serializable (they ride the run snapshot). Omit ⇒ no-op.
1239
+ */
1240
+ onRunStart?: (ctx: SwarmContext, info: {
1241
+ input: RunInput;
1242
+ state: RunStateHandle;
1243
+ }) => void | Promise<void>;
1244
+ onRunEnd?: (ctx: SwarmContext, info: {
1245
+ state: RunStateHandle;
1246
+ outcome: "done" | "failed" | "suspended";
1247
+ }) => void | Promise<void>;
1171
1248
  }
1172
1249
  declare class SwarmEngine {
1173
1250
  private opts;
@@ -1356,6 +1433,14 @@ interface SwarmToolContext {
1356
1433
  * back-compat with code that constructs a bare ctx; the engine always populates it.
1357
1434
  */
1358
1435
  secrets?: BoundSecrets;
1436
+ /**
1437
+ * FR-003 — the per-run mutable state store. The SAME handle across every tool call in this run (orchestrator
1438
+ * AND delegated sub-agents), so a chain of tools (`addPrimitive → addStageZone → …`) can read the previous
1439
+ * tool's result and write the next. Seeded by `SwarmConfig.onRunStart` (e.g. from the client payload), drained
1440
+ * by `onRunEnd`. GC'd with the run — no module-level registry. Optional in the type only for back-compat with a
1441
+ * hand-constructed bare ctx; the engine always populates it.
1442
+ */
1443
+ state?: RunStateHandle;
1359
1444
  }
1360
1445
  interface SwarmTool {
1361
1446
  name: string;
@@ -1365,6 +1450,20 @@ interface SwarmTool {
1365
1450
  type SwarmSkill = SwarmTool;
1366
1451
  declare function defineTool<I, O>(spec: ToolSpec<I, O>): SwarmTool;
1367
1452
  declare function defineSkill(tool: SwarmTool): SwarmSkill;
1453
+ interface ClientToolSpec<I, O> {
1454
+ name: string;
1455
+ description?: string;
1456
+ inputSchema: z.ZodType<I>;
1457
+ outputSchema?: z.ZodType<O>;
1458
+ /** Ask the UI to confirm before the client runs the action (the handler renders an approval step). Default false. */
1459
+ needsApproval?: boolean;
1460
+ }
1461
+ /** Thrown when a client tool's handler reports an error (the browser posted `{ error }`) — surfaces as a failed
1462
+ * tool_result so the model sees the action did not succeed. */
1463
+ declare class ClientToolError extends Error {
1464
+ constructor(toolName: string, reason?: string);
1465
+ }
1466
+ declare function defineClientTool<I, O>(spec: ClientToolSpec<I, O>): SwarmTool;
1368
1467
  interface AgentSpec {
1369
1468
  slug: string;
1370
1469
  role?: "orchestrator" | "specialist";
@@ -1543,6 +1642,15 @@ interface SwarmConfig {
1543
1642
  * Omit ⇒ the cheap structural "did the root speak last?" fallback nudge.
1544
1643
  */
1545
1644
  verifyCompletion?: CompletionVerifier;
1645
+ /**
1646
+ * FR-003 — per-run lifecycle hooks. `onRunStart(ctx, { input, state })` seeds the run's mutable `ctx.state`
1647
+ * store (e.g. `state.set("sceneCode", input.context?.currentCode ?? "")`) once before the first generation;
1648
+ * `onRunEnd(ctx, { state, outcome })` drains it deterministically at the run's terminal boundary (done /
1649
+ * failed / suspended / thrown). The SAME `state` handle flows into every tool call of the run (orchestrator +
1650
+ * delegated sub-agents) via `ctx.state`. Both are FAIL-SAFE. Omit ⇒ no per-run state seeding/draining.
1651
+ */
1652
+ onRunStart?: EngineOpts["onRunStart"];
1653
+ onRunEnd?: EngineOpts["onRunEnd"];
1546
1654
  /**
1547
1655
  * Declarative conditional policy (Phase A). `advise` rules are injected into the system prompt; `enforce`
1548
1656
  * rules compile into the decision hooks (deny/ask), folding in the non-removable SP5 policy floor. Per-agent
@@ -1629,6 +1737,64 @@ declare class RowCache<V> {
1629
1737
  invalidate(key: string): void;
1630
1738
  }
1631
1739
 
1740
+ /** Drain an `engine.run(...)` (an `AsyncIterable<SwarmEvent>`) into the eval-shaped trajectory + final output. The
1741
+ * output is the concatenation of the assistant `swarm.message` texts (the model's visible reply), in stream order. */
1742
+ declare function drainTrajectory(stream: AsyncIterable<SwarmEvent>): Promise<{
1743
+ events: SwarmEvent[];
1744
+ output: string;
1745
+ }>;
1746
+ /** FR-018 — run one input through a built `Swarm`/`SwarmEngine` and return its full trajectory + output. This is
1747
+ * the seam a host hands to `@nightowlsdev/eval` (`RunAgent = (case) => Promise<{ events, output }>`): no SSE,
1748
+ * no engine internals. `ctx` defaults to an ephemeral local context when omitted. */
1749
+ declare function runToTrajectory(target: Swarm | SwarmEngine, input: RunInput | string, ctx?: Partial<SwarmContext> & {
1750
+ agentSlug: string;
1751
+ }): Promise<{
1752
+ events: SwarmEvent[];
1753
+ output: string;
1754
+ }>;
1755
+ /** Options for {@link runAgent}. The model wiring is the only required part (the same `modelFactory` a swarm
1756
+ * uses); everything else mirrors the `SwarmConfig` fields and defaults to the single-agent/ephemeral case. */
1757
+ interface RunAgentOpts {
1758
+ /** Same `(modelId, agentSlug?) => model` factory `defineSwarm` takes (e.g. `openaiModels()`). Required. */
1759
+ modelFactory: SwarmConfig["modelFactory"];
1760
+ /** Model allow-list + optional tier router. Defaults to allowing the agent's own `modelId` (+ tier models). */
1761
+ models?: {
1762
+ allow?: string[];
1763
+ tier?: TierConfig;
1764
+ };
1765
+ /** Per-run caps. Defaults to a generous single-run budget (`{ maxSteps: 50, maxCostUsd: 10 }`). */
1766
+ cost?: Partial<SwarmConfig["cost"]>;
1767
+ /** Bring your own adapter (e.g. to inspect events afterwards). Defaults to a fresh `InMemoryStorage`. */
1768
+ storage?: StorageAdapter;
1769
+ /** Pass-throughs onto the one-agent SwarmConfig — identical semantics to `defineSwarm`. */
1770
+ telemetry?: SwarmConfig["telemetry"];
1771
+ memory?: SwarmConfig["memory"];
1772
+ hooks?: SwarmConfig["hooks"];
1773
+ toolApproval?: SwarmConfig["toolApproval"];
1774
+ secrets?: SwarmConfig["secrets"];
1775
+ onEvent?: SwarmConfig["onEvent"];
1776
+ onRunStart?: SwarmConfig["onRunStart"];
1777
+ onRunEnd?: SwarmConfig["onRunEnd"];
1778
+ pageContext?: SwarmConfig["pageContext"];
1779
+ mastraStore?: SwarmConfig["mastraStore"];
1780
+ /** Override the ephemeral run context (tenantId/userId/run/thread ids). */
1781
+ ctx?: Partial<SwarmContext>;
1782
+ }
1783
+ /** Build a one-agent `Swarm` from an `AgentDef` + options — the standalone equivalent of `defineSwarm` for a
1784
+ * single agent. Useful when you want the built engine (e.g. to call `resume`) rather than a one-shot run. */
1785
+ declare function buildSingleAgentSwarm(def: AgentDef, opts: RunAgentOpts): Swarm;
1786
+ /**
1787
+ * FR-019 — run a single `AgentDef` to completion and return its trajectory + final output, with NO Supabase
1788
+ * adapter and NO publish required. Honors tier resolution + cost cap + tool-approval (it builds the real engine).
1789
+ *
1790
+ * @example
1791
+ * const { output } = await runAgent(titleAgent, "Summarize: …", { modelFactory: openaiModels() });
1792
+ */
1793
+ declare function runAgent(def: AgentDef, input: RunInput | string, opts: RunAgentOpts): Promise<{
1794
+ events: SwarmEvent[];
1795
+ output: string;
1796
+ }>;
1797
+
1632
1798
  declare class InMemoryStorage implements StorageAdapter {
1633
1799
  private evts;
1634
1800
  private seq;
@@ -1640,6 +1806,7 @@ declare class InMemoryStorage implements StorageAdapter {
1640
1806
  private agentRows;
1641
1807
  private heads;
1642
1808
  private pads;
1809
+ private threadRows;
1643
1810
  seedAgent(v: AgentVersion, tenantId?: string): void;
1644
1811
  recordSuspend(runId: string, tenantId: string, followupId: string, toolCallId: string): void;
1645
1812
  markFollowupAnswered(followupId: string, tenantId: string): boolean;
@@ -1647,6 +1814,14 @@ declare class InMemoryStorage implements StorageAdapter {
1647
1814
  getRun(runId: string): RunRow | undefined;
1648
1815
  events: EventStore;
1649
1816
  runs: RunStore;
1817
+ threads: ThreadStore;
1818
+ /** Test/host helper: read a recorded thread row. */
1819
+ getThread(id: string): {
1820
+ id: string;
1821
+ orgId: string;
1822
+ userId: string;
1823
+ projectId?: string;
1824
+ } | undefined;
1650
1825
  messages: MessageStore;
1651
1826
  scratchpad: ScratchpadStore;
1652
1827
  agents: AgentRepo;
@@ -1710,4 +1885,4 @@ declare function rateConfig(max: number | undefined, windowSec: number, fallback
1710
1885
 
1711
1886
  declare const VERSION = "0.0.0";
1712
1887
 
1713
- export { ASK_TOOL_NAME, type ActiveRun, type AgentDef, type AgentMemoryOverride, AgentMutationForbidden, type AgentRepo, type AgentSpec, type AgentSummary, type AgentVersion, type AgentVersionContent, type AgentVersionInfo, type AskField, type AskFieldOption, type AuthContext, type AuthProvider, type BoundSecrets, type BundleDef, type BundleDep, type BundleRepo, type BundleSpec, type BundleVersion, type BundleVersionContent, type BundleVersionInfo, type BundleWritableRepo, CapturingExporter, type CompletionVerdict, type CompletionVerifier, type ConnectorGrant, type ContainerFloor, CostGovernor, DelegateBudgets, type EngineOpts, type EventStore, type FloorHolder, GUARDRAILS, InMemoryContainerFloor, InMemoryStorage, type MemoryConfig, type MessageStore, type ModelProvider, type ModelRef, type ModelTier, type NewRun, PRICE_TABLE, type PerDelegateBudget, type Price, type PriceFeed, type PricingOpts, type RateLimitConfig, type RateLimitDecision, type RateLimitState, type RateLimitStore, type Release, ReserveDenied, RowCache, type RuleAction, type RuleCondition, type RuleDef, type RuleLevel, type RuleSpec, type RunInput, type RunRow, type RunStatus, type RunStore, type Runner, SCRATCHPAD_MAX_ENTRY_CHARS, SCRATCHPAD_MAX_KEYS, type ScratchpadEntry, type ScratchpadStore, type SecretResolver, type SlugUsage, SpanCollector, type StorageAdapter, type Swarm, type SwarmActor, type SwarmConfig, type SwarmContext, SwarmEngine, type SwarmEvent, type SwarmMessage, type SwarmSkill, type SwarmSpan, type SwarmTool, type SwarmToolContext, type TelemetryExporter, type ThreadSummary, type TierConfig, type TierEscalationContext, type TierResolution, type ToolSpec, type TurnUsage, type UsageBreakdown, type UsageCost, VERSION, type VersionedRepo, type WorkflowCompliance, type WorkflowDef, type WorkflowRef, type WorkflowRunState, type WorkflowSpec, type WorkflowStep, type WorkflowTransition, allowListModelProvider, assertActorMayMutateDefinition, buildSkillResolver, composePolicyPrompt, composeSystemPrompt, compositeTelemetry, containerFloor, createInMemoryRateLimitStore, customAuth, customTelemetry, decideFixedWindow, defineAgent, defineBundle, defineRule, defineSkill, defineSwarm, defineTool, defineWorkflow, ev, isEvent, isTierSentinel, mergeBundle, priceUsage, rateConfig, resolveTelemetry, resolveTier, sumBreakdowns, sumTurnUsage, tierModelId, toBundleContent };
1888
+ export { ASK_TOOL_NAME, type ActiveRun, type AgentDef, type AgentMemoryOverride, AgentMutationForbidden, type AgentRepo, type AgentSpec, type AgentSummary, type AgentVersion, type AgentVersionContent, type AgentVersionInfo, type AskField, type AskFieldOption, type AuthContext, type AuthProvider, type BoundSecrets, type BundleDef, type BundleDep, type BundleRepo, type BundleSpec, type BundleVersion, type BundleVersionContent, type BundleVersionInfo, type BundleWritableRepo, CapturingExporter, ClientToolError, type ClientToolSpec, type CompletionVerdict, type CompletionVerifier, type ConnectorGrant, type ContainerFloor, CostGovernor, DelegateBudgets, type EngineOpts, type EventStore, type FloorHolder, GUARDRAILS, InMemoryContainerFloor, InMemoryStorage, type MemoryConfig, type MessageStore, type ModelProvider, type ModelRef, type ModelTier, type NewRun, PRICE_TABLE, type PerDelegateBudget, type Price, type PriceFeed, type PricingOpts, type RateLimitConfig, type RateLimitDecision, type RateLimitState, type RateLimitStore, type Release, ReserveDenied, RowCache, type RuleAction, type RuleCondition, type RuleDef, type RuleLevel, type RuleSpec, type RunAgentOpts, type RunInput, type RunRow, type RunStateHandle, type RunStatus, type RunStore, type Runner, SCRATCHPAD_MAX_ENTRY_CHARS, SCRATCHPAD_MAX_KEYS, type ScratchpadEntry, type ScratchpadStore, type SecretResolver, type SlugUsage, SpanCollector, type StorageAdapter, type Swarm, type SwarmActor, type SwarmConfig, type SwarmContext, SwarmEngine, type SwarmEvent, type SwarmMessage, type SwarmSkill, type SwarmSpan, type SwarmTool, type SwarmToolContext, type TelemetryExporter, type ThreadStore, type ThreadSummary, type TierConfig, type TierEscalationContext, type TierResolution, type ToolSpec, type TurnUsage, type UsageBreakdown, type UsageCost, VERSION, type VersionedRepo, type WorkflowCompliance, type WorkflowDef, type WorkflowRef, type WorkflowRunState, type WorkflowSpec, type WorkflowStep, type WorkflowTransition, allowListModelProvider, assertActorMayMutateDefinition, buildSingleAgentSwarm, buildSkillResolver, composePolicyPrompt, composeSystemPrompt, compositeTelemetry, containerFloor, createInMemoryRateLimitStore, createRunState, customAuth, customTelemetry, decideFixedWindow, defineAgent, defineBundle, defineClientTool, defineRule, defineSkill, defineSwarm, defineTool, defineWorkflow, drainTrajectory, ev, isEvent, isTierSentinel, mergeBundle, priceUsage, rateConfig, resolveTelemetry, resolveTier, runAgent, runToTrajectory, sumBreakdowns, sumTurnUsage, tierModelId, toBundleContent };