@nightowlsdev/core 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.cts CHANGED
@@ -1,4 +1,175 @@
1
1
  import { z } from 'zod';
2
+ import { HookDispatcher, ToolApprovalPolicy, SwarmHooks } from '@nightowlsdev/hooks';
3
+ export { ALLOW, ALLOW_TOOL, DEFAULT_READ_ONLY_TOOLS, GuardMutationEvent, GuardMutationHook, HookDecision, HookDispatcher, PreGenerationEvent, PreGenerationHook, PreToolCallHook, SwarmHooks, ToolApprovalPolicy, ToolDecision, ToolPreCallEvent, ask, createHookDispatcher, defineHook, deny } from '@nightowlsdev/hooks';
4
+
5
+ interface Price {
6
+ inUsdPerMtok: number;
7
+ outUsdPerMtok: number;
8
+ /**
9
+ * Per-class USD rates per million tokens (SP1). All OPTIONAL + additive — a price entry without them
10
+ * prices exactly like before. When a class's rate is absent the engine falls back to the closest base
11
+ * rate, picked to match how providers bill these classes:
12
+ * - `cacheReadUsdPerMtok` → falls back to `inUsdPerMtok` (cache reads are discounted input; absent a
13
+ * discount rate we conservatively bill them at the full input rate rather than free).
14
+ * - `cacheWriteUsdPerMtok` → falls back to `inUsdPerMtok` (cache writes are an input-side surcharge).
15
+ * - `reasoningUsdPerMtok` → falls back to `outUsdPerMtok` (reasoning tokens are generated output).
16
+ */
17
+ cacheReadUsdPerMtok?: number;
18
+ cacheWriteUsdPerMtok?: number;
19
+ reasoningUsdPerMtok?: number;
20
+ }
21
+ /**
22
+ * Per-generation usage, by token class (SP1). `inputTokens`/`outputTokens` are always present (today's
23
+ * shape); the rest are OPTIONAL — a provider that does not report a class leaves it undefined and it is
24
+ * priced as zero (NEVER fabricated). `toolCalls`/`agentActivations` are activity counts for telemetry,
25
+ * not priced. Plain domain type — @mastra-free (engine wall, CONTRACTS §1).
26
+ */
27
+ interface UsageBreakdown {
28
+ inputTokens: number;
29
+ outputTokens: number;
30
+ cacheReadTokens?: number;
31
+ cacheWriteTokens?: number;
32
+ reasoningTokens?: number;
33
+ /** Number of (non-delegation) tool calls in this generation. Telemetry only — not priced. */
34
+ toolCalls?: number;
35
+ /** Number of sub-agent delegations (`agent-<slug>` calls) in this generation. Telemetry only — not priced. */
36
+ agentActivations?: number;
37
+ }
38
+ /** A priced usage: the computed USD plus the breakdown it was priced from. */
39
+ interface UsageCost {
40
+ usd: number;
41
+ breakdown: UsageBreakdown;
42
+ }
43
+ /**
44
+ * Optional seam that supplies/overrides per-model prices (SP1). Supplies NUMBERS only (no @mastra) so the
45
+ * engine wall holds. `prices()` returns a full `modelId → Price` map merged over the built-in `PRICE_TABLE`
46
+ * + any static `prices` config; a host can back it with a live price feed. Sync to keep governor construction
47
+ * synchronous on the hot path — resolve/refresh out of band and return the current snapshot here.
48
+ */
49
+ interface PriceFeed {
50
+ prices(): Record<string, Price>;
51
+ }
52
+ declare const PRICE_TABLE: Record<string, Price>;
53
+ /** Options shared by the pricing helpers + the governors that thread pricing config through. */
54
+ interface PricingOpts {
55
+ /**
56
+ * When TRUE an unpriced `modelId` THROWS instead of pricing at $0. Default FALSE so OSS users (the
57
+ * built-in `PRICE_TABLE` has only 2 entries) are not broken — an unknown model keeps the historical
58
+ * $0 fallback (the cost cap simply can't fire on it). A host that wants billing safety flips this on.
59
+ */
60
+ failOnUnknownModel?: boolean;
61
+ }
62
+ /** Price one usage at a model's rate, across EVERY token class (SP1). Shared by the global CostGovernor and
63
+ * per-delegate DelegateBudgets so the two caps always agree on what a token costs. Each class is priced at
64
+ * its explicit per-class rate, with the documented fallbacks (see `Price`) when a rate is absent; a token
65
+ * class the provider omitted is treated as zero (never fabricated). An unknown model prices at $0 by default
66
+ * (the cap can't fire on a model with no price entry — historical behavior) unless `failOnUnknownModel`. */
67
+ declare function priceUsage(prices: Record<string, Price>, modelId: string, u: UsageBreakdown, opts?: PricingOpts): number;
68
+ /**
69
+ * Sum a list of `UsageBreakdown` into a single turn-total breakdown (SP4 — the room-turn billing unit).
70
+ * The always-present base classes (`inputTokens`/`outputTokens`) sum as plain numbers. Each OPTIONAL class
71
+ * (cacheRead/cacheWrite/reasoning/toolCalls/agentActivations) sums with UNDEFINED as the additive identity:
72
+ * - undefined + number → the number (a class present in only some generations still totals correctly),
73
+ * - all-undefined → stays UNDEFINED (we never fabricate a class no provider reported as a zero),
74
+ * - present-and-zero → preserved as 0 (an explicit 0 means "reported zero", distinct from "not reported").
75
+ * An empty list yields the zero base breakdown `{ inputTokens: 0, outputTokens: 0 }`. @mastra-free.
76
+ */
77
+ declare function sumBreakdowns(items: UsageBreakdown[]): UsageBreakdown;
78
+ /** One generation's priced usage attributed to an agent slug — the unit the turn aggregates over. */
79
+ interface SlugUsage {
80
+ slug: string;
81
+ breakdown: UsageBreakdown;
82
+ cost: UsageCost;
83
+ }
84
+ /** A room-turn's aggregate usage (SP4): the summed breakdown + summed USD for the WHOLE turn, plus a
85
+ * per-agent-slug split so the platform can attribute credits per agent. `bySlug` is in first-seen order. */
86
+ interface TurnUsage {
87
+ breakdown: UsageBreakdown;
88
+ cost: UsageCost;
89
+ bySlug: SlugUsage[];
90
+ }
91
+ /**
92
+ * Aggregate every generation's `(slug, breakdown, cost)` over a room-turn into ONE turn total + a per-slug
93
+ * split (SP4). The turn total breakdown/USD are the sum across ALL generations; `bySlug` folds every
94
+ * generation of the same slug into one entry (so a slug that generated twice — e.g. an orchestrator before
95
+ * and after a delegation — appears once with its combined breakdown/USD), in FIRST-SEEN order for a stable
96
+ * split. By construction the per-slug entries sum back to the turn total (the invariant the platform's
97
+ * per-agent credit attribution relies on). An empty list yields a zero total + empty split. @mastra-free.
98
+ */
99
+ declare function sumTurnUsage(items: SlugUsage[]): TurnUsage;
100
+ declare class CostGovernor {
101
+ private opts;
102
+ private steps;
103
+ private usd;
104
+ private prices;
105
+ private failOnUnknownModel;
106
+ constructor(opts: {
107
+ maxSteps: number;
108
+ maxCostUsd: number;
109
+ /** Static per-model price overrides, merged over PRICE_TABLE. */
110
+ prices?: Record<string, Price>;
111
+ /** Optional live price seam (SP1). Merged OVER `prices` (a feed entry wins) at construction. */
112
+ priceFeed?: PriceFeed;
113
+ } & PricingOpts);
114
+ step(): void;
115
+ /** Price a single usage WITHOUT accumulating it (for per-generation telemetry cost). */
116
+ priceOf(modelId: string, u: UsageBreakdown): number;
117
+ /** Price a single usage WITHOUT accumulating it, returning the usd + the breakdown it was priced from. */
118
+ costOf(modelId: string, u: UsageBreakdown): UsageCost;
119
+ addUsage(modelId: string, u: UsageBreakdown): void;
120
+ costUsd(): number;
121
+ /** The current USD cap (SP9-core: the cap-that-asks reads this to surface "spend / cap" + to compute the raise). */
122
+ get maxCostUsd(): number;
123
+ /**
124
+ * SP9-core — RAISE the USD cap by `incrementUsd` (the budget an approved "Budget cap reached — continue?"
125
+ * grants). Mutates the governor's ceiling so a freshly-resumed generation isn't immediately re-blocked at the
126
+ * SAME cap; the run gets real additional headroom. Only the cap-that-asks resume path calls this; the default
127
+ * terminal-stop path never does, so today's behaviour is unchanged.
128
+ */
129
+ raiseCostCap(incrementUsd: number): void;
130
+ shouldStop(): {
131
+ stop: boolean;
132
+ reason?: string;
133
+ };
134
+ }
135
+ /**
136
+ * Per-delegate USD sub-budgets (R5). The global `cost.maxCostUsd` bounds the WHOLE turn; this adds an
137
+ * optional ceiling applied to EACH delegate (sub-agent) so one runaway sub-agent can't burn the entire
138
+ * turn before the global cap notices. `maxCostUsd` is the default ceiling for every delegate; `bySlug`
139
+ * overrides it per delegate slug (a slug listed there is capped even if there is no default). The run's
140
+ * root orchestrator is NOT a delegate and is never capped here — the global cap already covers it.
141
+ */
142
+ interface PerDelegateBudget {
143
+ /** Default USD ceiling per delegate. Omit to cap only the slugs named in `bySlug`. */
144
+ maxCostUsd?: number;
145
+ /** Per-slug overrides of `maxCostUsd`. */
146
+ bySlug?: Record<string, {
147
+ maxCostUsd?: number;
148
+ }>;
149
+ }
150
+ /** Tracks per-delegate USD spend and reports the first delegate to exceed its budget. Priced from the same
151
+ * table as CostGovernor (via the shared `priceUsage`) so the global and per-delegate caps agree. Usage
152
+ * attributed to the root orchestrator slug is ignored. */
153
+ declare class DelegateBudgets {
154
+ private cfg;
155
+ private rootSlug;
156
+ private usd;
157
+ private prices;
158
+ private failOnUnknownModel;
159
+ constructor(cfg: PerDelegateBudget, rootSlug: string, pricing?: {
160
+ prices?: Record<string, Price>;
161
+ priceFeed?: PriceFeed;
162
+ } & PricingOpts);
163
+ /** The USD cap for a delegate: its `bySlug` override if present, else the default. `undefined` → uncapped. */
164
+ private capFor;
165
+ /** Accumulate one generation's usage against a delegate. No-op for the root orchestrator (not a delegate). */
166
+ addUsage(slug: string, modelId: string, u: UsageBreakdown): void;
167
+ /** The first delegate that has met or exceeded its USD cap, or null. */
168
+ exceeded(): {
169
+ slug: string;
170
+ reason: string;
171
+ } | null;
172
+ }
2
173
 
3
174
  interface SwarmContext {
4
175
  tenantId: string;
@@ -17,8 +188,63 @@ interface AuthProvider {
17
188
  authenticate(req: Request): Promise<AuthContext | null>;
18
189
  can?(ctx: AuthContext, capability: string): boolean | Promise<boolean>;
19
190
  }
191
+ /**
192
+ * The PRINCIPAL performing a definition mutation (SP6) — a plain (@mastra-free) discriminated union covering
193
+ * every kind of actor that can publish/rollback an agent definition. Distinct from `AuthContext` (the run's
194
+ * end-user identity): an actor models WHO is mutating the swarm's own code, which may be a human in the
195
+ * no-code builder, a service (CI/seeding pipeline), or a system task.
196
+ *
197
+ * - `human` → a person acting in a tenant (the no-code builder's authenticated user).
198
+ * - `service` → a non-human automation acting in a tenant (CI, a seeding job, a migration runner).
199
+ * - `system` → an internal/un-tenanted operation (bootstrap seed, ops rollback) carrying a `reason`.
200
+ * - `agent` → an AGENT acting on its own behalf. This variant exists for ONE reason: so an agent
201
+ * principal is REPRESENTABLE and can therefore be UNCONDITIONALLY BARRED from mutating a
202
+ * definition (an agent must never be able to rewrite the swarm's own code). The bar is
203
+ * enforced in the repo contract layer (`assertActorMayMutateDefinition`) and CANNOT be
204
+ * overridden by any policy/hook. No code should ever construct an `agent` actor expecting a
205
+ * mutation to succeed — it is the deny sentinel.
206
+ */
207
+ type SwarmActor = {
208
+ type: "human";
209
+ userId: string;
210
+ tenantId: string;
211
+ } | {
212
+ type: "service";
213
+ serviceId: string;
214
+ tenantId: string;
215
+ } | {
216
+ type: "system";
217
+ reason: string;
218
+ } | {
219
+ type: "agent";
220
+ agentSlug: string;
221
+ tenantId: string;
222
+ };
223
+ /**
224
+ * The NON-BYPASSABLE security invariant for definition mutations (SP6): an `agent` principal can NEVER
225
+ * publish or roll back an agent definition, regardless of any configured policy/hook. Every writable-repo
226
+ * mutation path MUST call this FIRST (before the `guardDefinitionMutation` policy hook) so the bar cannot be
227
+ * weakened by an allow-all hook. Throws `AgentMutationForbidden` for an `agent` actor; a no-op otherwise.
228
+ *
229
+ * This lives in the contract layer (not behind a hook) on purpose: the hook is removable/host-configurable
230
+ * policy, whereas this bar is a framework invariant — fail-closed and non-negotiable.
231
+ */
232
+ declare function assertActorMayMutateDefinition(actor: SwarmActor): void;
233
+ /** Thrown by `assertActorMayMutateDefinition` when an `agent` principal attempts a definition mutation. A
234
+ * named class so callers can distinguish the security bar from an ordinary failure (e.g. a missing version). */
235
+ declare class AgentMutationForbidden extends Error {
236
+ readonly code: "AGENT_MUTATION_FORBIDDEN";
237
+ constructor(agentSlug: string);
238
+ }
239
+ /**
240
+ * The seam a platform vault (SP15-platform) implements: given a secret `ref` + the RUN's identity ctx, it
241
+ * supplies the secret VALUE. @mastra-free. Resolution is execution-time + ctx-scoped (the connector calls it at
242
+ * tool-call time with the live run ctx — see @nightowlsdev/mcp connector.ts), so the vault enforces per-tenant
243
+ * scoping off `ctx.tenantId`: a run must NOT be able to resolve another tenant's secret. Returns `undefined` for
244
+ * an unknown ref (a tool then sees "no secret") — the resolver should never leak across tenants.
245
+ */
20
246
  interface SecretResolver {
21
- resolve(ref: string, ctx: SwarmContext): Promise<string>;
247
+ resolve(ref: string, ctx: SwarmContext): Promise<string | undefined>;
22
248
  }
23
249
  /** An optional rich input the asking agent CONSTRUCTS for a HITL `ask`, so the UI can render a fitting
24
250
  * widget instead of a bare text box. Omitted ⇒ a plain text input. Answer shape by kind: confirm→boolean,
@@ -54,6 +280,9 @@ interface EvBase {
54
280
  ts: number;
55
281
  seq?: number;
56
282
  schemaVersion: 1;
283
+ /** The run's thread (the lane this event belongs to). NOT set on live events — the store fills it in on a
284
+ * container restore (listForContainer) so a client can tell a lane side-chat apart from the main thread. */
285
+ threadId?: string;
57
286
  }
58
287
  type SwarmEvent = (EvBase & {
59
288
  type: "swarm.status";
@@ -65,7 +294,7 @@ type SwarmEvent = (EvBase & {
65
294
  }) | (EvBase & {
66
295
  type: "swarm.message";
67
296
  data: {
68
- role: "assistant";
297
+ role: "assistant" | "user";
69
298
  delta?: string;
70
299
  text?: string;
71
300
  };
@@ -92,6 +321,16 @@ type SwarmEvent = (EvBase & {
92
321
  result?: unknown;
93
322
  error?: string;
94
323
  };
324
+ }) | (EvBase & {
325
+ type: "swarm.client_action";
326
+ data: {
327
+ followupId: string;
328
+ toolCallId: string;
329
+ tool: string;
330
+ input: unknown;
331
+ needsApproval: boolean;
332
+ from: string;
333
+ };
95
334
  }) | (EvBase & {
96
335
  type: "swarm.question";
97
336
  data: {
@@ -110,6 +349,32 @@ type SwarmEvent = (EvBase & {
110
349
  from: "user" | string;
111
350
  answer: unknown;
112
351
  };
352
+ }) | (EvBase & {
353
+ type: "swarm.usage";
354
+ data: {
355
+ slug: string;
356
+ modelId: string;
357
+ breakdown: UsageBreakdown;
358
+ cost: UsageCost;
359
+ generationId: string;
360
+ };
361
+ }) | (EvBase & {
362
+ type: "swarm.turn_usage";
363
+ data: {
364
+ breakdown: UsageBreakdown;
365
+ cost: UsageCost;
366
+ bySlug: Array<{
367
+ slug: string;
368
+ breakdown: UsageBreakdown;
369
+ cost: UsageCost;
370
+ }>;
371
+ generations: number;
372
+ /** The segment's STARTING generation index — distinct per run/resume segment AND retry-stable (it's the
373
+ * monotonic, snapshot-persisted generation counter, not a fresh per-append seq). A host keys a PER-TURN
374
+ * billing debit on it so each segment of a suspend/resume run is charged exactly once (run segment = 0;
375
+ * each resume = the snapshot's next genIndex). The engine never prices — this is just a stable turn id. */
376
+ segmentIndex: number;
377
+ };
113
378
  }) | (EvBase & {
114
379
  type: "swarm.run_failed";
115
380
  data: {
@@ -142,6 +407,118 @@ interface AgentMemoryOverride {
142
407
  };
143
408
  observationalMemory?: boolean | Record<string, unknown>;
144
409
  }
410
+ /** Enforcement level a rule declares. `advise` = prompt-injected guidance; `enforce` = decision-hook veto. */
411
+ type RuleLevel = "advise" | "enforce";
412
+ /** Declarative match over a hook event. Empty = match-all within the resolved seam. */
413
+ interface RuleCondition {
414
+ /** agentSlug glob/exact. For a delegation the gate sees the PARENT (orchestrator) slug. */
415
+ agent?: string | string[];
416
+ /** toolName glob/exact (tool seam). */
417
+ tool?: string | string[];
418
+ /** tool provenance. */
419
+ origin?: "first-party" | "mcp";
420
+ /** modelId glob/exact (generation seam). */
421
+ model?: string | string[];
422
+ }
423
+ interface RuleAction {
424
+ do: "deny" | "ask";
425
+ reason?: string;
426
+ }
427
+ interface RuleSpec {
428
+ id: string;
429
+ statement: string;
430
+ when: RuleCondition;
431
+ level: RuleLevel;
432
+ /** REQUIRED when level==="enforce". `ask` is TOOL-SEAM ONLY (preGeneration cannot suspend). */
433
+ action?: RuleAction;
434
+ /** Seam; inferred from `when` when omitted (model ⇒ generation, else tool). */
435
+ on?: "tool" | "generation";
436
+ }
437
+ /** A normalized, engine-held rule (the output of `defineRule`). */
438
+ interface RuleDef {
439
+ id: string;
440
+ statement: string;
441
+ when: RuleCondition;
442
+ level: RuleLevel;
443
+ action?: RuleAction;
444
+ /** Resolved seam. */
445
+ seam: "tool" | "generation";
446
+ /** Set when authored via `defineAgent` (per-agent scope); undefined ⇒ swarm-wide. */
447
+ scopeAgent?: string;
448
+ }
449
+ /** Workflow compliance. v1: `advisory` (prompt) | `strict` (driver, Phase B — rejected by defineSwarm in Phase A). */
450
+ type WorkflowCompliance = "advisory" | "strict";
451
+ /** A flat data reference resolved at runtime: `{ $ref: "input" }` | `{ $ref: "steps.<id>" }`. */
452
+ type WorkflowRef = {
453
+ $ref: string;
454
+ };
455
+ interface WorkflowTransition {
456
+ to: string;
457
+ when?: {
458
+ $ref: string;
459
+ eq?: unknown;
460
+ exists?: boolean;
461
+ };
462
+ }
463
+ interface WorkflowStep {
464
+ id: string;
465
+ /** exactly ONE kind: */
466
+ agent?: string;
467
+ tool?: string;
468
+ human?: {
469
+ prompt: string;
470
+ field?: AskField;
471
+ };
472
+ /** agent steps: */
473
+ instruction?: string;
474
+ /** tool steps (values may be a WorkflowRef): */
475
+ args?: Record<string, unknown>;
476
+ /** agent steps (values may be a WorkflowRef): */
477
+ input?: Record<string, unknown>;
478
+ next?: string | WorkflowTransition[];
479
+ onError?: "fail" | {
480
+ to: string;
481
+ } | {
482
+ retry: number;
483
+ };
484
+ }
485
+ interface WorkflowSpec {
486
+ name: string;
487
+ compliance: WorkflowCompliance;
488
+ description?: string;
489
+ steps: WorkflowStep[];
490
+ start?: string;
491
+ }
492
+ /** A normalized, engine-held workflow (the output of `defineWorkflow`). */
493
+ interface WorkflowDef {
494
+ name: string;
495
+ compliance: WorkflowCompliance;
496
+ description?: string;
497
+ steps: WorkflowStep[];
498
+ /** Resolved start step id (default: steps[0].id). */
499
+ start: string;
500
+ /** Set when authored via `defineAgent.workflow` (per-agent scope). */
501
+ scopeAgent?: string;
502
+ }
503
+ /**
504
+ * The durable state of an in-flight STRICT workflow run (Phase B). Rides the existing run snapshot payload
505
+ * (not a new table). `cursor` = the step to run next; `outputs` = accumulated step results (for `$ref`);
506
+ * `generationIndex` = the monotonic per-run reserve counter continued across steps; `pending` marks a
507
+ * human/approval suspend so `resume()` re-enters the driver (synthetic `followupId`+`toolCallId` satisfy the
508
+ * resume-auth cross-check). Progress snapshotting only — NOT crash-mid-step replay.
509
+ */
510
+ interface WorkflowRunState {
511
+ workflow: string;
512
+ cursor: string;
513
+ outputs: Record<string, unknown>;
514
+ generationIndex: number;
515
+ pending?: {
516
+ kind: "human" | "approval";
517
+ stepId: string;
518
+ followupId: string;
519
+ toolCallId: string;
520
+ };
521
+ }
145
522
  interface AgentDef {
146
523
  slug: string;
147
524
  head: AgentVersion;
@@ -154,6 +531,82 @@ interface AgentDef {
154
531
  skills?: {
155
532
  name: string;
156
533
  }[];
534
+ /** Per-agent rules (engine-local, v1) — `defineAgent` stamps `scopeAgent` so `defineSwarm` can collect them. */
535
+ rules?: RuleDef[];
536
+ /** Per-agent workflow/procedure (engine-local, v1) — `scopeAgent`-stamped by `defineAgent`. */
537
+ workflow?: WorkflowDef;
538
+ }
539
+ /** A closure dependency satisfied by ANOTHER bundle: a delegate slug that is not a member of this bundle. Carried
540
+ * as a flat min-version floor (no transitive resolution in v1). */
541
+ interface BundleDep {
542
+ slug: string;
543
+ minVersion: number;
544
+ }
545
+ /**
546
+ * BN1 — a declarative connector grant: a member may invoke a connector's actions. The action names are folded into
547
+ * that member's `skillNames`, so the host's connector-tools resolver (`SwarmConfig.connectorTools`) materializes
548
+ * them per-tenant at CALL time, gated by the SP5 approval floor. The bundle carries NAMES ONLY — never a token,
549
+ * connection id, or backend; the host wires the connector + per-tenant credentials. A granted action need not have
550
+ * a first-party skill handle (it is connector-backed), so it is the explicit, validated exception to BN0's
551
+ * "every skill must resolve to a handle" closure rule.
552
+ */
553
+ interface ConnectorGrant {
554
+ /** The bundle member granted these actions (must be one of the bundle's `agents`). */
555
+ agentSlug: string;
556
+ /** The connector provider, e.g. `"slack"` — informational, and the prefix used to expand a short action name. */
557
+ provider: string;
558
+ /** Connector action names — full (`"slack.post_message"`) or short (`"post_message"`, expanded to `provider.action`). */
559
+ actions: string[];
560
+ }
561
+ /** Authoring input for `defineBundle`. It composes `defineAgent` OUTPUTS — it does not replace them. */
562
+ interface BundleSpec {
563
+ slug: string;
564
+ title?: string;
565
+ /** The composed members — exactly the output of `defineAgent` (skill handles ride along on each `AgentDef`). */
566
+ agents: AgentDef[];
567
+ /** SWARM-scoped rules (per-agent rules ride on the `AgentDef`s via `defineAgent`'s `scopeAgent` stamp). */
568
+ rules?: RuleDef[];
569
+ /** SWARM-scoped workflows (per-agent workflows ride on the `AgentDef`s). */
570
+ workflows?: WorkflowDef[];
571
+ /** BN1 — connector grants: a member may invoke a provider's actions (names only; the host materializes them). */
572
+ connectorGrants?: ConnectorGrant[];
573
+ /** Delegates that live in ANOTHER bundle (a delegate slug not among `agents`) — declared so closure validation
574
+ * can distinguish a legitimate external dependency from a typo. */
575
+ requires?: BundleDep[];
576
+ }
577
+ /** The validated, closure-checked composition `mergeBundle` folds into a `SwarmConfig`. The members carry any
578
+ * BN1 connector-grant action names folded into their `skillNames`. */
579
+ interface BundleDef {
580
+ slug: string;
581
+ title?: string;
582
+ agents: AgentDef[];
583
+ rules: RuleDef[];
584
+ workflows: WorkflowDef[];
585
+ connectorGrants: ConnectorGrant[];
586
+ requires: BundleDep[];
587
+ }
588
+ /** The publishable bundle payload (version is derived, not authored — mirrors `AgentVersionContent`). */
589
+ interface BundleVersionContent {
590
+ slug: string;
591
+ title?: string;
592
+ /** Composed members as serializable heads (no skill handles). */
593
+ agents: AgentVersionContent[];
594
+ rules: RuleDef[];
595
+ workflows: WorkflowDef[];
596
+ connectorGrants: ConnectorGrant[];
597
+ requires: BundleDep[];
598
+ }
599
+ /** One immutable, append-only bundle version (mirrors `AgentVersion` one level up). */
600
+ interface BundleVersion extends BundleVersionContent {
601
+ version: number;
602
+ }
603
+ /** A bundle version's summary row (for `listVersions` — mirrors `AgentVersionInfo`). */
604
+ interface BundleVersionInfo {
605
+ version: number;
606
+ title: string;
607
+ status: string;
608
+ isCurrent: boolean;
609
+ memberCount: number;
157
610
  }
158
611
  interface RunRow {
159
612
  runId: string;
@@ -210,6 +663,11 @@ interface EventStore {
210
663
  append(e: SwarmEvent): Promise<number>;
211
664
  /** Tenant-scoped (R11): a forged cross-org runId returns []. The store enforces tenancy, not just the caller. */
212
665
  list(tenantId: string, runId: string, sinceSeq: number): Promise<SwarmEvent[]>;
666
+ /** The full event log for a CONTAINER — every run in the conversation (the root thread + lane sub-threads
667
+ * `<container>:<slug>`), globally ordered by the generated `seq`. Lets a host rebuild a thread's RICH timeline
668
+ * (tool calls + delegation cards) on reload, where message-history is text-only. Tenant-scoped. Optional: a
669
+ * store without an events table may omit it (the host then falls back to message history). */
670
+ listForContainer?(tenantId: string, container: string): Promise<SwarmEvent[]>;
213
671
  subscribe(runId: string): AsyncIterable<SwarmEvent>;
214
672
  }
215
673
  interface RunStore {
@@ -247,6 +705,24 @@ interface MessageStore {
247
705
  * `engine.history` (Mastra recall, scoped by resourceId=tenant:user). Kept for contract consistency. */
248
706
  history(tenantId: string, threadId: string, limit?: number): Promise<SwarmMessage[]>;
249
707
  }
708
+ /** The thread (conversation container) a run references. `runs.thread_id` is a NOT NULL FK to this row, so the
709
+ * row MUST exist before the first run/message of a conversation. `ThreadStore.ensure` is the supported,
710
+ * schema-private way to create it idempotently — without it a host has to reach into the engine's raw pool and
711
+ * hardcode `nightowls.threads`'s columns (FR-009). */
712
+ interface ThreadStore {
713
+ /**
714
+ * Idempotently create the thread row a run will reference (insert-or-ignore on `id`). Safe to call before
715
+ * every run. `orgId` is the tenant; `userId` the conversation owner; `projectId` an optional host-owned
716
+ * sub-scope. Never throws on a pre-existing row. The engine calls this at run start when the adapter provides
717
+ * it, so `messages.append` cannot throw `unknown thread` through the supported path.
718
+ */
719
+ ensure(spec: {
720
+ id: string;
721
+ orgId: string;
722
+ userId: string;
723
+ projectId?: string;
724
+ }): Promise<void>;
725
+ }
250
726
  /** Scratchpad caps (intentionally lossy — working memory, not a system of record). */
251
727
  declare const SCRATCHPAD_MAX_ENTRY_CHARS = 4000;
252
728
  declare const SCRATCHPAD_MAX_KEYS = 64;
@@ -276,11 +752,82 @@ interface AgentRepo {
276
752
  getVersion(tenantId: string, slug: string, version: number): Promise<AgentVersion | null>;
277
753
  listSlugs(tenantId: string): Promise<string[]>;
278
754
  }
755
+ /** The publishable content of one agent version — everything an `AgentVersion` carries EXCEPT the `version`
756
+ * number, which is derived (append-only `max+1`) by the repo, never supplied by the caller. */
757
+ type AgentVersionContent = Omit<AgentVersion, "version">;
758
+ /** One row of an agent's version history (for the no-code builder's rollback UX). Plain/@mastra-free. */
759
+ interface AgentVersionInfo {
760
+ version: number;
761
+ role: string;
762
+ modelId: string;
763
+ status: string;
764
+ isCurrent: boolean;
765
+ }
766
+ /**
767
+ * The WRITABLE agent definition contract (SP6) — "the one true framework extension" the no-code builder hangs
768
+ * off. Extends the read-only `AgentRepo` with the append-only mutation surface:
769
+ * - `publish` → commit a new head version of `content`; returns the derived version number.
770
+ * - `rollback` → republish a prior version's content as a NEW head (append-only, `git revert` not
771
+ * `reset`); returns the new version + the source it was restored from.
772
+ * - `listVersions` → the agent's version history (oldest→newest), flagging the current head.
773
+ *
774
+ * Every mutation takes a `SwarmActor` (the principal) — NOT a bare string — and the implementation MUST:
775
+ * 1. call `assertActorMayMutateDefinition(actor)` FIRST (the non-bypassable agent-bar), then
776
+ * 2. consult the configured `guardDefinitionMutation` policy hook (if any),
777
+ * before committing. `listVersions` takes the actor too so an impl can authorize reads consistently (and so
778
+ * the agent-bar applies uniformly); reads do not mutate, so an impl MAY allow an `agent` actor to list, but
779
+ * the shipped impls apply the same bar for symmetry.
780
+ */
781
+ interface VersionedRepo extends AgentRepo {
782
+ publish(tenantId: string, slug: string, content: AgentVersionContent, actor: SwarmActor): Promise<{
783
+ version: number;
784
+ }>;
785
+ rollback(tenantId: string, slug: string, toVersion: number, actor: SwarmActor): Promise<{
786
+ version: number;
787
+ restoredFrom: number;
788
+ }>;
789
+ listVersions(tenantId: string, slug: string, actor: SwarmActor): Promise<AgentVersionInfo[]>;
790
+ }
791
+ /** BN2 — the read-only bundle version repo (head/getVersion/listSlugs). Structurally identical to `AgentRepo`,
792
+ * one level up: a `BundleVersion` is the unit, not an agent. */
793
+ interface BundleRepo {
794
+ head(tenantId: string, slug: string): Promise<BundleVersion | null>;
795
+ getVersion(tenantId: string, slug: string, version: number): Promise<BundleVersion | null>;
796
+ listSlugs(tenantId: string): Promise<string[]>;
797
+ }
798
+ /** BN2 — the WRITABLE bundle version repo (append-only publish/rollback/listVersions). Mirrors `VersionedRepo`;
799
+ * every mutation enforces the same non-bypassable actor-bar (an `agent` principal can never publish a bundle). */
800
+ interface BundleWritableRepo extends BundleRepo {
801
+ publish(tenantId: string, slug: string, content: BundleVersionContent, actor: SwarmActor): Promise<{
802
+ version: number;
803
+ }>;
804
+ rollback(tenantId: string, slug: string, toVersion: number, actor: SwarmActor): Promise<{
805
+ version: number;
806
+ restoredFrom: number;
807
+ }>;
808
+ listVersions(tenantId: string, slug: string, actor: SwarmActor): Promise<BundleVersionInfo[]>;
809
+ }
279
810
  interface StorageAdapter {
811
+ /** Read-only agent definitions (always present). The supabase adapter's `agents` is also a `VersionedRepo`,
812
+ * but the base contract stays `AgentRepo` so a read-only adapter compiles without growing a write path. */
280
813
  agents: AgentRepo;
814
+ /** Opt-in WRITABLE agent definitions (SP6). Present on adapters that back a definition store (the supabase
815
+ * adapter); omitted by read-only adapters (storage-local, the in-memory dev store) — a no-code builder
816
+ * requires an adapter that provides it. When present it is the SAME object as `agents` (a `VersionedRepo`
817
+ * IS an `AgentRepo`), surfaced under a distinct field so the writable surface is explicit + tree-checkable. */
818
+ agentsWritable?: VersionedRepo;
819
+ /** BN2 — opt-in WRITABLE bundle versions (append-only, same actor-bar). Present on adapters that back a bundle
820
+ * store (the supabase adapter); omitted by read-only/in-memory adapters. `bundles` is the read surface,
821
+ * `bundlesWritable` the same object's write surface — mirrors the `agents`/`agentsWritable` pattern. */
822
+ bundles?: BundleRepo;
823
+ bundlesWritable?: BundleWritableRepo;
281
824
  runs: RunStore;
282
825
  events: EventStore;
283
826
  messages: MessageStore;
827
+ /** Opt-in thread (conversation container) creation (FR-009). When present, the engine idempotently ensures the
828
+ * run's thread row exists before the first message/event write, so a host need not pre-create it with raw SQL.
829
+ * Optional so existing adapters keep compiling; a host whose threads are externally managed may omit it. */
830
+ threads?: ThreadStore;
284
831
  /** Opt-in container scratchpad (Part D). Optional so existing adapters keep compiling. */
285
832
  scratchpad?: ScratchpadStore;
286
833
  /**
@@ -292,11 +839,16 @@ interface StorageAdapter {
292
839
  */
293
840
  recordSuspend?(runId: string, tenantId: string, followupId: string, toolCallId: string): void | Promise<void>;
294
841
  /**
295
- * Mark a followup as answered so it can no longer be resumed (the engine calls this when a `resume`
296
- * begins). Closes a replay hole: without it, `findSuspended` keeps returning the followup and the same
297
- * answer can be replayed indefinitely. Idempotent; tenant-scoped. Awaited by the engine.
842
+ * Mark a followup answered so it can no longer be resumed (the engine calls this when a `resume` begins).
843
+ * Closes a replay hole: without it, `findSuspended` keeps returning the followup and the same answer can be
844
+ * replayed indefinitely. Tenant-scoped.
845
+ *
846
+ * **Compare-and-set (K4):** returns `true` if THIS call transitioned the followup unanswered→answered, `false`
847
+ * if it was already answered. An out-of-band reply path uses this as the single answer-once guard — two
848
+ * distinct inbound replies for one followup race here, and only the `true` winner resumes (the loser ACKs).
849
+ * The engine's own resume ignores the return (it already passed `findSuspended`).
298
850
  */
299
- markFollowupAnswered?(followupId: string, tenantId: string): void | Promise<void>;
851
+ markFollowupAnswered?(followupId: string, tenantId: string): boolean | Promise<boolean>;
300
852
  /**
301
853
  * Cross-process cache invalidation (R12). Subscribe to agent-republish notifications so an engine on ANY
302
854
  * instance can evict its agent-row cache immediately instead of waiting out the TTL. `onInvalidate` receives
@@ -304,6 +856,12 @@ interface StorageAdapter {
304
856
  * is the only staleness bound there). The supabase adapter uses Postgres LISTEN/NOTIFY.
305
857
  */
306
858
  subscribeInvalidations?(onInvalidate: (key: string) => void): () => void;
859
+ /**
860
+ * FR-016 — enumerate the engine's tenants (org ids) so a host can idempotently backfill every tenant's crew
861
+ * (`engine.run` resolves agents per-tenant from the DB, so each tenant must be seeded before its first run).
862
+ * Optional: a read-only / in-memory adapter may omit it. The supabase adapter reads `nightowls.orgs`.
863
+ */
864
+ listTenants?(): Promise<string[]>;
307
865
  }
308
866
  interface ModelProvider {
309
867
  resolve(modelId: string, ctx: {
@@ -357,7 +915,29 @@ interface RunInput {
357
915
  * (those come from `SwarmContext`); treat its contents as opaque, attacker-controllable input.
358
916
  */
359
917
  context?: Record<string, unknown>;
918
+ /**
919
+ * Phase B: run a named STRICT workflow via the step-driver instead of the free-form agent turn. Engine/host-
920
+ * side seam — set by a host calling the runner/engine directly; NOT exposed on the public chat route or MCP
921
+ * (the wall promise). An unknown name throws before the run row is created.
922
+ */
923
+ workflow?: string;
360
924
  }
925
+ /** The verdict from a {@link CompletionVerifier}: was the user's request actually satisfied, and if not, a
926
+ * short description of what's still missing (used to build a targeted continue-nudge). */
927
+ interface CompletionVerdict {
928
+ complete: boolean;
929
+ missing?: string;
930
+ }
931
+ /**
932
+ * Completion supervisor hook (reliability) — see EngineOpts.verifyCompletion. Given the original request + a
933
+ * transcript of what the run produced, decide whether the task is genuinely done. Host-supplied (typically a
934
+ * cheap LLM judge). FAIL-SAFE at the call site: a throw/timeout is treated as "complete" (never trap a run).
935
+ */
936
+ type CompletionVerifier = (args: {
937
+ request: string;
938
+ transcript: string;
939
+ ctx: SwarmContext;
940
+ }) => Promise<CompletionVerdict> | CompletionVerdict;
361
941
  interface Runner {
362
942
  run(input: RunInput, ctx: SwarmContext): AsyncIterable<SwarmEvent>;
363
943
  enqueue(input: RunInput, ctx: SwarmContext): Promise<{
@@ -369,6 +949,22 @@ interface Runner {
369
949
  followupId: string;
370
950
  answer: unknown;
371
951
  }, ctx: SwarmContext): AsyncIterable<SwarmEvent>;
952
+ /**
953
+ * Durable resume (symmetric with `enqueue`): wake a suspended run and return its `runId` WITHOUT streaming the
954
+ * continuation — the resumed events reach the client over its EXISTING Realtime subscription. A streaming-only
955
+ * runner omits this; the durable runner provides it. Streaming the resume instead would never close (the
956
+ * Realtime subscribe has no terminal), so the client's `answer()` would hang. Also re-wakes a continuation lost
957
+ * to a process restart, from the durable snapshot (see the background runner).
958
+ */
959
+ resumeEnqueue?(args: {
960
+ runId: string;
961
+ toolCallId: string;
962
+ followupId: string;
963
+ answer: unknown;
964
+ context?: Record<string, unknown>;
965
+ }, ctx: SwarmContext): Promise<{
966
+ runId: string;
967
+ }>;
372
968
  }
373
969
 
374
970
  type ByType<T extends SwarmEvent["type"]> = Extract<SwarmEvent, {
@@ -382,6 +978,23 @@ declare function ev<T extends SwarmEvent["type"]>(type: T, base: {
382
978
  }, data: ByType<T>["data"]): ByType<T>;
383
979
  declare function isEvent<T extends SwarmEvent["type"]>(e: SwarmEvent, type: T): e is ByType<T>;
384
980
 
981
+ /**
982
+ * The per-run mutable store a first-party tool sees on `SwarmToolContext.state`. A simple keyed bag plus an
983
+ * `entries()` snapshot the `onRunEnd` drain reads. Reads of an unset key yield `undefined`; it never throws.
984
+ */
985
+ interface RunStateHandle {
986
+ get<T = unknown>(key: string): T | undefined;
987
+ set(key: string, value: unknown): void;
988
+ has(key: string): boolean;
989
+ delete(key: string): boolean;
990
+ /** A point-in-time DEEP snapshot of all current entries — what `onRunEnd` flushes and the engine persists into
991
+ * the run snapshot. Deep-copied so a later in-place mutation of a stored object can't corrupt an already-taken
992
+ * snapshot. Values must be JSON-serializable (they ride the run snapshot). */
993
+ entries(): Record<string, unknown>;
994
+ }
995
+ /** Build a fresh, Map-backed run-state handle. `seed` (e.g. from `onRunStart`) pre-populates it. */
996
+ declare function createRunState(seed?: Record<string, unknown>): RunStateHandle;
997
+
385
998
  /** Identifies who holds (or is queued for) a container's floor — surfaced in the "waiting for X" indicator. */
386
999
  interface FloorHolder {
387
1000
  /** Display name of the holding run's lane agent, e.g. "Coordinator". */
@@ -422,88 +1035,116 @@ declare class InMemoryContainerFloor implements ContainerFloor {
422
1035
  /** Process-wide singleton. In-memory → single-process only (serverless instances don't share it). */
423
1036
  declare const containerFloor: ContainerFloor;
424
1037
 
425
- interface Price {
426
- inUsdPerMtok: number;
427
- outUsdPerMtok: number;
428
- }
429
- declare const PRICE_TABLE: Record<string, Price>;
430
- declare class CostGovernor {
431
- private opts;
432
- private steps;
433
- private usd;
434
- private prices;
435
- constructor(opts: {
436
- maxSteps: number;
437
- maxCostUsd: number;
438
- prices?: Record<string, Price>;
439
- });
440
- step(): void;
441
- /** Price a single usage WITHOUT accumulating it (for per-generation telemetry cost). */
442
- priceOf(modelId: string, u: {
443
- inputTokens: number;
444
- outputTokens: number;
445
- }): number;
446
- addUsage(modelId: string, u: {
447
- inputTokens: number;
448
- outputTokens: number;
449
- }): void;
450
- costUsd(): number;
451
- shouldStop(): {
452
- stop: boolean;
453
- reason?: string;
454
- };
1038
+ type ModelTier = "swift" | "genius";
1039
+ type ModelRef = string;
1040
+ /** Context handed to the per-task escalation hook so it can decide whether a specific generation needs Genius. */
1041
+ interface TierEscalationContext {
1042
+ tenantId: string;
1043
+ /** The agent the generation is for. */
1044
+ agentSlug: string;
1045
+ /** The agent's declared modelId (a tier sentinel when it opted into routing; the concrete pin otherwise). */
1046
+ pinnedModelId?: string;
455
1047
  }
456
1048
  /**
457
- * Per-delegate USD sub-budgets (R5). The global `cost.maxCostUsd` bounds the WHOLE turn; this adds an
458
- * optional ceiling applied to EACH delegate (sub-agent) so one runaway sub-agent can't burn the entire
459
- * turn before the global cap notices. `maxCostUsd` is the default ceiling for every delegate; `bySlug`
460
- * overrides it per delegate slug (a slug listed there is capped even if there is no default). The run's
461
- * root orchestrator is NOT a delegate and is never capped here — the global cap already covers it.
1049
+ * The tier configuration (lives on `SwarmConfig.models.tier` `EngineOpts.tier`). @mastra-free.
1050
+ * - `tiers.swift` — the cheap DEFAULT model. REQUIRED (the floor every non-pinned agent lands on).
1051
+ * - `tiers.genius` the frontier model. OPTIONAL; reachable ONLY through the premium gate below.
1052
+ * - `default` — the tier the bare `"tier:"` sentinel resolves to. Default `"swift"` (cheap-default).
1053
+ * - `allowGenius` — the SERVER-ENFORCED OPT-IN GATE. Default `false`. This is NOT a user-facing "smart"
1054
+ * slider: it lives in the platform-set EngineOpts so a pack/agent config CANNOT grant itself
1055
+ * Genius. With it false, any Genius request (a `tier:genius` agent OR an escalation) is
1056
+ * DOWNGRADED to Swift so the run still proceeds cheaply (deny-vs-downgrade: we DOWNGRADE).
1057
+ * - `escalate` — optional per-task hook (configurable per pack): given the generation's context it may bump
1058
+ * the chosen tier to `"genius"` (e.g. an ambiguous case). STILL subject to `allowGenius` —
1059
+ * escalation cannot bypass the premium gate.
462
1060
  */
463
- interface PerDelegateBudget {
464
- /** Default USD ceiling per delegate. Omit to cap only the slugs named in `bySlug`. */
465
- maxCostUsd?: number;
466
- /** Per-slug overrides of `maxCostUsd`. */
467
- bySlug?: Record<string, {
468
- maxCostUsd?: number;
469
- }>;
1061
+ interface TierConfig {
1062
+ tiers: {
1063
+ swift: ModelRef;
1064
+ genius?: ModelRef;
1065
+ };
1066
+ default?: ModelTier;
1067
+ allowGenius?: boolean;
1068
+ escalate?: (ctx: TierEscalationContext) => ModelTier | undefined;
470
1069
  }
471
- /** Tracks per-delegate USD spend and reports the first delegate to exceed its budget. Priced from the same
472
- * table as CostGovernor (via the shared `priceUsage`) so the global and per-delegate caps agree. Usage
473
- * attributed to the root orchestrator slug is ignored. */
474
- declare class DelegateBudgets {
475
- private cfg;
476
- private rootSlug;
477
- private usd;
478
- private prices;
479
- constructor(cfg: PerDelegateBudget, rootSlug: string, prices?: Record<string, Price>);
480
- /** The USD cap for a delegate: its `bySlug` override if present, else the default. `undefined` → uncapped. */
481
- private capFor;
482
- /** Accumulate one generation's usage against a delegate. No-op for the root orchestrator (not a delegate). */
483
- addUsage(slug: string, modelId: string, u: {
484
- inputTokens: number;
485
- outputTokens: number;
486
- }): void;
487
- /** The first delegate that has met or exceeded its USD cap, or null. */
488
- exceeded(): {
489
- slug: string;
490
- reason: string;
491
- } | null;
1070
+ /** The outcome of routing a single generation's model. `modelId` is always the EFFECTIVE id to use next. */
1071
+ interface TierResolution {
1072
+ /** The effective modelId to hand to the allow-list + factory. */
1073
+ modelId: string;
1074
+ /** The tier actually landed on. Undefined when the agent pinned a concrete model (a pin is not a tier). */
1075
+ tier?: ModelTier;
1076
+ /** True when a Genius request was downgraded to Swift (gate closed, or no genius model configured). */
1077
+ downgraded: boolean;
1078
+ /** The tier that was REQUESTED before gating (set when it differs from `tier`, i.e. on a downgrade). */
1079
+ requestedTier?: ModelTier;
1080
+ /** True when the escalation hook bumped the tier (to genius) and that bump survived the gate. */
1081
+ escalated?: boolean;
492
1082
  }
1083
+ /** Is this declared modelId a tier sentinel (opting into routing) rather than a concrete pin? */
1084
+ declare function isTierSentinel(modelId: string | undefined): boolean;
1085
+ /**
1086
+ * Resolve the effective model for ONE generation, applying tier routing + the premium Genius gate + escalation.
1087
+ *
1088
+ * Order of precedence:
1089
+ * 1. A CONCRETE PIN (non-sentinel modelId) is returned verbatim — routing/escalation never touch a pin.
1090
+ * 2. Otherwise the requested tier = the sentinel's tier (or the config default for a bare `"tier:"`).
1091
+ * 3. The optional `escalate` hook may bump the request to `"genius"`.
1092
+ * 4. THE GATE: a `"genius"` request survives ONLY when `allowGenius` is true AND a `genius` model is
1093
+ * configured; otherwise it DOWNGRADES to `"swift"` (the run proceeds cheaply; `downgraded` is flagged).
1094
+ */
1095
+ declare function resolveTier(modelId: string, cfg: TierConfig, ctx: TierEscalationContext): TierResolution;
1096
+ /**
1097
+ * The engine-facing convenience: given an agent's declared modelId, return the EFFECTIVE modelId after tier
1098
+ * routing. When no tier config is configured this is the identity function (today's behaviour). The resulting
1099
+ * modelId then flows through the SAME allow-list validation + factory mapping as before — a tier model is never
1100
+ * exempt from the allow-list.
1101
+ */
1102
+ declare function tierModelId(modelId: string, cfg: TierConfig | undefined, ctx: TierEscalationContext): string;
493
1103
 
494
1104
  interface EngineOpts {
495
1105
  storage: StorageAdapter;
496
1106
  model: ModelProvider;
497
1107
  modelFactory: (modelId: string, agentSlug?: string) => unknown;
498
- /** Global per-run caps + optional per-delegate sub-budgets (R5: `perDelegate` stops one runaway sub-agent
499
- * from burning the whole turn before the global `maxCostUsd` notices). */
1108
+ /**
1109
+ * SP10 the cheap-model router (Swift/Genius tiers). @mastra-free. When set, the per-agent model resolver
1110
+ * routes a tier-sentinel `modelId` (`"tier:swift"` / `"tier:genius"` / bare `"tier:"`) to the configured tier
1111
+ * model; a concrete pinned `modelId` is kept verbatim (routing layers OVER pinning). `tiers.swift` is the
1112
+ * cheap DEFAULT every non-pinned agent lands on; `tiers.genius` is reachable ONLY through the server-enforced
1113
+ * `allowGenius` premium gate (default false) — set HERE in EngineOpts so a pack/agent config cannot grant itself
1114
+ * Genius. An optional per-task `escalate` hook may bump a generation to Genius, still subject to the gate. When
1115
+ * a Genius request is denied by the gate it DOWNGRADES to Swift so the run proceeds cheaply. Omit ⇒ no routing
1116
+ * (identical to today: the agent's own `modelId` flows through, still allow-list-validated). The routed model
1117
+ * is ALWAYS re-validated by the allow-list `model` provider — a tier model is never exempt.
1118
+ */
1119
+ tier?: TierConfig;
1120
+ /** Global per-run caps + metering config (SP1) + optional per-delegate sub-budgets (R5: `perDelegate` stops
1121
+ * one runaway sub-agent from burning the whole turn before the global `maxCostUsd` notices).
1122
+ * - `prices`: static per-model overrides, merged over PRICE_TABLE.
1123
+ * - `priceFeed`: optional live price seam (numbers only — engine wall). Merged over `prices`.
1124
+ * - `failOnUnknownModel` (default false): an unpriced model THROWS instead of pricing at $0.
1125
+ * - `onCapHit` (SP9-core, default "stop"): the GLOBAL run-level cost/step cap's behaviour when reached.
1126
+ * "stop" (DEFAULT) = today's terminal `run_failed` stage "cost" — byte-identical to before.
1127
+ * "ask" = PAUSE the run and ASK the user "Budget cap reached — continue?" (a CONSUMER-context opt-in),
1128
+ * reusing SP5's suspend→swarm.question→resume machinery. Resume-approve RAISES the budget by
1129
+ * `capIncrementUsd` and continues; resume-reject terminally stops (stage "cost"). The
1130
+ * per-delegate cap (`perDelegate`) is NOT folded into the ask flow — it stays terminal.
1131
+ * - `capIncrementUsd` (SP9-core): the additional USD headroom an "ask" APPROVE grants (`maxCostUsd +=`).
1132
+ * Defaults to the original `maxCostUsd` ("another budget's worth"). Only meaningful with `onCapHit:"ask"`. */
500
1133
  cost: {
501
1134
  maxSteps: number;
502
1135
  maxCostUsd: number;
503
1136
  perDelegate?: PerDelegateBudget;
1137
+ prices?: Record<string, Price>;
1138
+ priceFeed?: PriceFeed;
1139
+ failOnUnknownModel?: boolean;
1140
+ onCapHit?: "stop" | "ask";
1141
+ capIncrementUsd?: number;
504
1142
  };
505
1143
  /** Per-swarm skill resolver. When omitted, agents expose only built-in tools. */
506
1144
  resolveSkill?: (name: string) => SwarmSkill | undefined;
1145
+ /** PR2 — opt-in per-request connector-tools resolver (tenant-scoped). Forwarded verbatim to `buildMastraAgent`
1146
+ * for the orchestrator + every sub-agent. Connector-agnostic: core never imports `@nightowlsdev/connectors`. */
1147
+ connectorTools?: (ctx: SwarmContext) => Promise<SwarmTool[]>;
507
1148
  /**
508
1149
  * Mastra storage backend for suspend/resume snapshots. Resume is storage-gated
509
1150
  * (SPIKE-FINDINGS item 5): the in-memory default cannot survive process death.
@@ -531,9 +1172,79 @@ interface EngineOpts {
531
1172
  };
532
1173
  /** Opt-in `recall_lane` tool (Part E). Read-only peer-lane transcript read. */
533
1174
  recallLane?: boolean;
1175
+ /** Phase A soft tier: per-agent soft-policy lines (advise rules + advisory-workflow summaries) appended to
1176
+ * each agent's system prompt. Built by `defineSwarm` from the swarm's rules/workflows. Omit ⇒ no policy. */
1177
+ softPolicy?: (slug: string) => string[];
1178
+ /** Phase B: swarm-level STRICT workflows, runnable by name via `RunInput.workflow`. Built by `defineSwarm`. */
1179
+ workflows?: WorkflowDef[];
1180
+ /** Phase B: per-agent STRICT workflows (keyed by agent slug) — replace that agent's turn when it owns the run. */
1181
+ agentWorkflows?: Record<string, WorkflowDef>;
534
1182
  /** Injectable per-lane floor (Part C / E3). Default: the in-memory process singleton. Pass a Postgres-backed
535
1183
  * floor (createPostgresFloor) for serverless / multi-instance deploys. */
536
1184
  floor?: ContainerFloor;
1185
+ /** Decision/observer hook dispatcher (SP2). `defineSwarm` always supplies one (allow-all when no hooks are
1186
+ * configured). When omitted (e.g. an engine built directly in a unit test), the engine defaults to an
1187
+ * allow-all dispatcher — behaviour identical to today. The engine AWAITS `preGeneration` before every model
1188
+ * launch; a `deny` vetoes the generation (terminal `run_failed` stage `"reserve"`). The same dispatcher's
1189
+ * `preToolCall` powers SP5's action-approval gate. */
1190
+ hooks?: HookDispatcher;
1191
+ /**
1192
+ * SP5 — the NON-REMOVABLE tool-approval policy (a P0 SAFETY control: spend caps limit cost, not harm). Forces
1193
+ * human approval on side-effecting tools regardless of the per-tool `needsApproval` flag, so a consumer pack
1194
+ * can't ship a `needsApproval:false` $0.50 action that causes $50k of damage. `defineSwarm` bakes this into the
1195
+ * `hooks` dispatcher (which combines policy + flag + the `preToolCall` hook), so when `hooks` is supplied THAT
1196
+ * dispatcher's policy is authoritative. This standalone field lets a DIRECT engine builder (e.g. a unit test
1197
+ * that passes no dispatcher) set the policy; the engine then builds an allow-all-hooks dispatcher WITH it.
1198
+ * Default `{ mode: "flag" }` — today's behaviour (only `needsApproval:true` tools gate).
1199
+ */
1200
+ toolApproval?: ToolApprovalPolicy;
1201
+ /**
1202
+ * SP15 — the optional SecretResolver the platform vault (SP15-platform) implements. When set, the engine
1203
+ * injects it on every run's RequestContext (SAME seam as SP5's ToolGate) so a first-party tool body can
1204
+ * `await ctx.secrets.resolve(ref)` to fetch a tenant-scoped secret at execution time. @mastra-free. Omit ⇒
1205
+ * `ctx.secrets.resolve(...)` yields `undefined` (no vault) and the no-secrets path is unchanged from today.
1206
+ */
1207
+ secrets?: SecretResolver;
1208
+ /**
1209
+ * SP3 — best-effort per-event OBSERVER, fired by the engine AFTER each event is persisted (in `emit`), for
1210
+ * BOTH `run` and `resume`. Transport-agnostic: it sees every event regardless of how it reaches the client
1211
+ * (interactive SSE vs durable + realtime), so platform metering (debit on `swarm.turn_usage`, settle on a
1212
+ * terminal) can live HERE rather than teeing the route's stream. Awaited but FAIL-SAFE — a throwing observer
1213
+ * is swallowed (the host logs its own errors), NEVER breaking the run, exactly like `telemetry`. Omit ⇒ no-op.
1214
+ */
1215
+ onEvent?: (ev: SwarmEvent, ctx: SwarmContext) => void | Promise<void>;
1216
+ /**
1217
+ * Completion supervisor (reliability) — an optional host check fired when a turn would END, to decide whether
1218
+ * the user's request was actually SATISFIED. The engine passes the original request + a transcript of what the
1219
+ * run produced; it returns `{ complete, missing? }`. When not complete the engine re-invokes the orchestrator
1220
+ * with a TARGETED nudge built from `missing` (same thread, full context), up to MAX_CONTINUE_NUDGES. If still
1221
+ * incomplete, the run ends `run_failed` stage "incomplete" (retryable) — a clear non-delivery the host can
1222
+ * refund — instead of a silent `done`. FAIL-SAFE: a throwing/rejecting verifier is treated as "complete"
1223
+ * (fail-open — never trap a run in a verify loop). Omit ⇒ the cheap structural "did the root speak last?"
1224
+ * fallback nudge is used instead.
1225
+ */
1226
+ verifyCompletion?: CompletionVerifier;
1227
+ /**
1228
+ * FR-003 — per-run lifecycle hooks (fired on `run()`). `onRunStart` is awaited once when a run begins (after the
1229
+ * run row + thread are ensured, before the first model launch), receiving the run's `input` and a fresh
1230
+ * `RunStateHandle` to seed (e.g. from `input.context`). `onRunEnd` is awaited once at the run's terminal boundary
1231
+ * (done / failed / suspended / thrown / abandoned — it fires from the run's `finally`), receiving the final
1232
+ * `state` and the `outcome`, so a host can flush/persist deterministically. Both are FAIL-SAFE (a throw is
1233
+ * swallowed + logged, never breaking the run). `state` is the SAME handle the run's tools saw via `ctx.state`.
1234
+ * `onRunEnd` fires at EACH segment's terminal: a suspend ends the run() segment with outcome `"suspended"`, and
1235
+ * the resume segment fires its own `onRunEnd` (`"done"`/`"failed"`/`"suspended"`) — a host keys billing/persist
1236
+ * on the terminal outcome. A `resume` RESTORES the per-run `state` persisted at suspend (so a client-tool /
1237
+ * evolving-document flow keeps state across the answer); `onRunStart` fires on `run()` only. State values must be
1238
+ * JSON-serializable (they ride the run snapshot). Omit ⇒ no-op.
1239
+ */
1240
+ onRunStart?: (ctx: SwarmContext, info: {
1241
+ input: RunInput;
1242
+ state: RunStateHandle;
1243
+ }) => void | Promise<void>;
1244
+ onRunEnd?: (ctx: SwarmContext, info: {
1245
+ state: RunStateHandle;
1246
+ outcome: "done" | "failed" | "suspended";
1247
+ }) => void | Promise<void>;
537
1248
  }
538
1249
  declare class SwarmEngine {
539
1250
  private opts;
@@ -541,11 +1252,56 @@ declare class SwarmEngine {
541
1252
  private rowCache;
542
1253
  private memory;
543
1254
  private floor;
1255
+ private hooks;
544
1256
  constructor(opts: EngineOpts);
1257
+ /** SP1: the swarm's metering config, in the shape DelegateBudgets/priceUsage expect. CostGovernor reads the
1258
+ * same fields directly off `opts.cost`; this packs them for the per-delegate tracker so both caps price
1259
+ * tokens identically (built-in PRICE_TABLE ← static `prices` ← live `priceFeed`, with `failOnUnknownModel`). */
1260
+ private pricingOpts;
1261
+ /** Fire the best-effort per-event observer (`EngineOpts.onEvent`). Awaited so an async observer (e.g. a
1262
+ * metering debit) completes in order, but FAIL-SAFE: a throw is swallowed (the host logs its own), never
1263
+ * breaking the run — same contract as the telemetry exporter. No-op when no observer is configured. */
1264
+ private notifyEvent;
1265
+ /** Run the completion supervisor (`EngineOpts.verifyCompletion`), FAIL-OPEN: no verifier, or a throwing one,
1266
+ * yields `{ complete: true }` so a missing/broken judge never traps a run in a verify loop. */
1267
+ private safeVerify;
1268
+ /** Best-effort recall of the run's ORIGINAL request (first user message on the thread) for the completion
1269
+ * verifier on RESUME, where the engine doesn't hold the opening message. Empty on any failure / no verifier. */
1270
+ private recallRequest;
545
1271
  /** Cached agent-row load shared by the three dynamic agent fns AND run/resume. */
546
1272
  private loadRow;
1273
+ /** Resolve an agent's STORED modelId — which may be a tier sentinel (`"tier:"` / `"tier:swift"`) — to the
1274
+ * CONCRETE model id the generation actually runs on, so metering/pricing + the preGeneration event see the
1275
+ * real model, not the sentinel (which has no price → every tier-routed turn would meter at $0). Mirrors
1276
+ * mastra-map's modelFor routing; with no tier config it returns the id unchanged. (SP10 pricing follow-up.) */
1277
+ private priceModelId;
547
1278
  private agent;
548
1279
  private requestContext;
1280
+ /**
1281
+ * SP5 — the action-approval gate handed to every gated tool via the RequestContext. Bound once (stable
1282
+ * reference). Delegates to the dispatcher's `preToolCall`, which is fail-closed (a throwing configured hook ⇒
1283
+ * deny) and applies the non-removable policy. The defineTool wrapper turns the returned `ToolDecision` into:
1284
+ * allow → run; deny → blocked result; ask → suspend-and-ask (the existing `swarm.question`/resume machinery).
1285
+ */
1286
+ private readonly toolGate;
1287
+ /**
1288
+ * SP5 truth-fix — resolve whether a tool WILL require approval, for the `swarm.tool_call` event's
1289
+ * `needsApproval` (the react reducer reads it to render an approval card). The mapChunk emit currently
1290
+ * hardcodes `false` (the truth-bug). This computes the truthful value from the SAME policy + per-tool flag the
1291
+ * gate uses: the tool's resolved `needsApproval` (its own flag, defaulting by origin) run through the
1292
+ * dispatcher's SYNC `policyDecision` — `ask` ⇒ true (it will gate), else false. The async `preToolCall` hook
1293
+ * can still escalate a specific call at execute time, but the policy-derived baseline is the truthful default
1294
+ * the UI needs without speculatively running the hook for every tool_call event.
1295
+ */
1296
+ private gatesApproval;
1297
+ /**
1298
+ * SP2: the preGeneration DECISION seam. Awaited immediately before each model launch (run + resume). The
1299
+ * dispatcher is fail-closed (a throwing hook ⇒ deny), so this only ever sees a clean `allow`/`deny`; a `deny`
1300
+ * THROWS `ReserveDenied` so the model call below never happens and the run/resume catch-all maps it to a
1301
+ * terminal `run_failed` stage "reserve" (NOT the generic "exception"). Allow-all + zero-overhead when no
1302
+ * hooks are configured (the default dispatcher returns allow synchronously-ish without invoking anything).
1303
+ */
1304
+ private guardGeneration;
549
1305
  /** Per-call Mastra memory ids + delegation, only when memory is configured (else stream is unchanged). */
550
1306
  private memoryOpts;
551
1307
  /**
@@ -606,11 +1362,28 @@ declare class SwarmEngine {
606
1362
  scratchpadPublic(container: string, ctx: SwarmContext): Promise<ScratchpadEntry[]>;
607
1363
  /** In-flight runs (running|suspended) for a container + its lanes — powers cross-lane background presence (E5). */
608
1364
  activeRuns(container: string, ctx: SwarmContext): Promise<ActiveRun[]>;
1365
+ /** The full, globally-ordered event log for a thread's CONTAINER (all its runs + lane sub-threads) — lets a host
1366
+ * rebuild the RICH timeline (tool calls + delegation cards) on reload, since message history is text-only.
1367
+ * Returns [] when the store has no events table (`listForContainer` unset). */
1368
+ threadEvents(threadId: string, ctx: SwarmContext): Promise<SwarmEvent[]>;
609
1369
  /** The tenant's agent roster (slug, title-cased display name, role, delegate graph) as wall-safe
610
1370
  * AgentSummary[]. Sourced from the agent rows; no vendor type in the signature or result. Powers
611
1371
  * the multi-agent pile / @mention UI. */
612
1372
  listAgents(ctx: SwarmContext): Promise<AgentSummary[]>;
613
1373
  run(input: RunInput, ctx: SwarmContext): AsyncIterable<SwarmEvent>;
1374
+ /**
1375
+ * Phase B — drive a STRICT workflow IN PLACE OF the free-form continue-nudge loop. Shared by `run()` (fresh)
1376
+ * and `resume()` (re-entry after a human/approval suspend). An `agent` step reuses `this.agent().stream()`
1377
+ * with a per-step requestContext (agentSlug = the step's agent) so it inherits persona/tools/gate/model/cost;
1378
+ * a `tool` step runs `executeToolWithGate`; a `human`/approval pause suspends SP9-style. Reserve, usage, and
1379
+ * the terminal turn_usage flow through the caller's machinery (`m`). Handles the terminal status/setStatus.
1380
+ */
1381
+ private driveWorkflow;
1382
+ /** A workflow `agent` step: stream `slug` with `message` (a per-step requestContext so it inherits the agent's
1383
+ * persona/tools/gate/model), reserving + metering through the caller's machinery, returning the final text. */
1384
+ private streamWorkflowAgentStep;
1385
+ /** A workflow `tool` step: run the gate-free tool body through `executeToolWithGate` (the engine-owned gate). */
1386
+ private runWorkflowToolStep;
614
1387
  resume(args: {
615
1388
  runId: string;
616
1389
  toolCallId: string;
@@ -619,6 +1392,24 @@ declare class SwarmEngine {
619
1392
  context?: Record<string, unknown>;
620
1393
  }, ctx: SwarmContext): AsyncIterable<SwarmEvent>;
621
1394
  }
1395
+ /**
1396
+ * SP2: a typed veto thrown when the `preGeneration` decision hook DENIES a model launch. Caught explicitly in
1397
+ * the run/resume catch-all so it maps to a TERMINAL `run_failed` stage `"reserve"` (mirroring the cost cap's
1398
+ * `"cost"` stage) instead of falling through to the generic `"exception"` stage. The model call has NOT
1399
+ * happened when this throws — the seam is BEFORE `stream`/`resumeStream`.
1400
+ */
1401
+ declare class ReserveDenied extends Error {
1402
+ readonly stage: "reserve";
1403
+ constructor(reason: string);
1404
+ }
1405
+
1406
+ /** The bound, run-scoped resolver handed to a tool body via `ctx.secrets`. `resolve(ref)` carries no ctx arg —
1407
+ * the run's tenant/auth scope is captured by the binding, so a tool can never resolve another tenant's secret
1408
+ * (the scope comes from the trusted RequestContext, NOT from tool args). Returns `undefined` when no resolver is
1409
+ * configured or the ref is unknown. */
1410
+ interface BoundSecrets {
1411
+ resolve(ref: string): Promise<string | undefined>;
1412
+ }
622
1413
 
623
1414
  interface ToolSpec<I, O> {
624
1415
  name: string;
@@ -633,9 +1424,23 @@ interface SwarmToolContext {
633
1424
  tenantId: string;
634
1425
  userId: string;
635
1426
  runId: string;
636
- secrets?: {
637
- resolve(ref: string): Promise<string>;
638
- };
1427
+ /**
1428
+ * SP15 — a run-scoped secret resolver (always present; bound to THIS run's tenant/auth ctx). A first-party
1429
+ * tool body calls `await ctx.secrets.resolve(ref)` to fetch a scoped secret value at execution time, mirroring
1430
+ * how the MCP connector resolves a credentialRef. The run's tenant scope is captured by the binding (NOT passed
1431
+ * by the tool), so a tool can never resolve another tenant's secret. Resolves to `undefined` when the swarm has
1432
+ * no SecretResolver configured (no vault) or the ref is unknown — never throws. Optional in the type only for
1433
+ * back-compat with code that constructs a bare ctx; the engine always populates it.
1434
+ */
1435
+ secrets?: BoundSecrets;
1436
+ /**
1437
+ * FR-003 — the per-run mutable state store. The SAME handle across every tool call in this run (orchestrator
1438
+ * AND delegated sub-agents), so a chain of tools (`addPrimitive → addStageZone → …`) can read the previous
1439
+ * tool's result and write the next. Seeded by `SwarmConfig.onRunStart` (e.g. from the client payload), drained
1440
+ * by `onRunEnd`. GC'd with the run — no module-level registry. Optional in the type only for back-compat with a
1441
+ * hand-constructed bare ctx; the engine always populates it.
1442
+ */
1443
+ state?: RunStateHandle;
639
1444
  }
640
1445
  interface SwarmTool {
641
1446
  name: string;
@@ -645,6 +1450,20 @@ interface SwarmTool {
645
1450
  type SwarmSkill = SwarmTool;
646
1451
  declare function defineTool<I, O>(spec: ToolSpec<I, O>): SwarmTool;
647
1452
  declare function defineSkill(tool: SwarmTool): SwarmSkill;
1453
+ interface ClientToolSpec<I, O> {
1454
+ name: string;
1455
+ description?: string;
1456
+ inputSchema: z.ZodType<I>;
1457
+ outputSchema?: z.ZodType<O>;
1458
+ /** Ask the UI to confirm before the client runs the action (the handler renders an approval step). Default false. */
1459
+ needsApproval?: boolean;
1460
+ }
1461
+ /** Thrown when a client tool's handler reports an error (the browser posted `{ error }`) — surfaces as a failed
1462
+ * tool_result so the model sees the action did not succeed. */
1463
+ declare class ClientToolError extends Error {
1464
+ constructor(toolName: string, reason?: string);
1465
+ }
1466
+ declare function defineClientTool<I, O>(spec: ClientToolSpec<I, O>): SwarmTool;
648
1467
  interface AgentSpec {
649
1468
  slug: string;
650
1469
  role?: "orchestrator" | "specialist";
@@ -655,22 +1474,100 @@ interface AgentSpec {
655
1474
  modelId?: string;
656
1475
  /** Per-agent memory OPTIONS override (R9), merged over the swarm `memory` config. Infra stays swarm-wide. */
657
1476
  memory?: AgentMemoryOverride;
1477
+ /** Per-agent rules (additive over swarm rules for THIS agent). Engine-local in v1 (not persisted/versioned). */
1478
+ rules?: RuleDef[];
1479
+ /** Per-agent workflow/procedure. Engine-local in v1. A strict one is rejected by `defineSwarm` until Phase B. */
1480
+ workflow?: WorkflowDef;
658
1481
  }
659
1482
  declare function defineAgent(spec: AgentSpec): AgentDef;
1483
+ /**
1484
+ * Normalize + validate a `RuleSpec` into a `RuleDef`. Plain data (no engine types) — the compiled rule the
1485
+ * engine holds. Validation is compile-time (throws): `enforce` requires an `action`; `ask` is TOOL-SEAM ONLY
1486
+ * (preGeneration is binary allow/deny — it cannot suspend); an `ask` cannot explicitly target a delegation
1487
+ * (`agent-*`) because `gateDelegation` defers `ask` to the sub-agent's inner gates (use `deny`).
1488
+ */
1489
+ declare function defineRule(spec: RuleSpec): RuleDef;
1490
+ /**
1491
+ * Normalize + validate a `WorkflowSpec` into a `WorkflowDef`. Validation is GRAPH-ONLY (pure code): unique step
1492
+ * ids, exactly one kind per step, `start`/`next`/`to`/`$ref` reference known steps, no cycles. Agent/tool
1493
+ * EXISTENCE is NOT validated here (it's runtime — `defineSwarm` doesn't validate delegate slugs and tenant DB
1494
+ * rows make it impossible; an unknown agent/tool surfaces as a runtime `run_failed` stage "workflow").
1495
+ */
1496
+ declare function defineWorkflow(spec: WorkflowSpec): WorkflowDef;
660
1497
  /** Build a per-swarm skill resolver from the agents' attached skill handles. */
661
1498
  declare function buildSkillResolver(agents: AgentDef[]): (name: string) => SwarmSkill | undefined;
1499
+ /**
1500
+ * Compose + CLOSURE-VALIDATE a capability bundle from `defineAgent` outputs (BN0 static composition + BN1 connector
1501
+ * grants). Pure normalizer/validator (no storage, no Mastra) — same posture as `defineRule`/`defineWorkflow`.
1502
+ * Validates, at author time, that the bundle is self-contained:
1503
+ * - every member `skillName` resolves to a first-party **handle** present on the bundle, OR is a declared
1504
+ * **connector grant** for that member (BN1 — connector-backed, materialized per-tenant by the host at runtime);
1505
+ * - every member delegate is a bundle member or a declared `requires` dependency;
1506
+ * - every tool-seam rule ref and every workflow `step.tool` resolves to a handle or any declared grant;
1507
+ * - no workflow step embeds a credential/connection ref.
1508
+ * So a missing handle/grant fails LOUD here, not as a runtime `run_failed`. Connector grants fold their action names
1509
+ * into the granted member's `skillNames` so the host's `connectorTools` resolver grants them by membership at call time.
1510
+ */
1511
+ declare function defineBundle(spec: BundleSpec): BundleDef;
1512
+ /**
1513
+ * Fold a validated bundle into a `SwarmConfig` (its agents + swarm-scoped rules/workflows) so the result is a
1514
+ * drop-in `defineSwarm` input. Per-agent rules/workflows ride on the merged `AgentDef`s and are collected by
1515
+ * `defineSwarm` exactly as for hand-authored agents — the bundle is a FRONT-END to `defineSwarm`, not a parallel
1516
+ * engine, so it adds no new runtime path. A bundle that re-declares an existing agent slug is a conflict (fail loud).
1517
+ */
1518
+ declare function mergeBundle(cfg: SwarmConfig, bundle: BundleDef): SwarmConfig;
1519
+ /**
1520
+ * Project a `BundleDef` (in-process, carrying skill HANDLES) into its SERIALIZABLE `BundleVersionContent` for
1521
+ * persistence (BN2): each member becomes its head's `AgentVersionContent` (the `version` + the handles dropped),
1522
+ * and the rules/workflows/connector-grants/deps carry through as the plain data they already are. This is the
1523
+ * bridge from BN0/BN1 (compose in-process) to BN2 (persist + version). The result has `skillNames` but no
1524
+ * handles — re-hydrating it into a live swarm (BN3 apply) needs a host-supplied handle manifest.
1525
+ */
1526
+ declare function toBundleContent(def: BundleDef): BundleVersionContent;
662
1527
  declare const ASK_TOOL_NAME = "ask";
663
1528
  interface SwarmConfig {
664
1529
  storage: StorageAdapter;
665
1530
  agents: AgentDef[];
1531
+ /**
1532
+ * The model allow-list + optional SP10 cheap-model router. `allow` is the per-tenant allow-set every
1533
+ * resolved model (incl. a tier model) must pass. `tier` (optional) enables Swift/Genius routing: a non-pinning
1534
+ * agent (tier-sentinel `modelId`) lands on the cheap `swift` model by default; `genius` is reachable ONLY via
1535
+ * the server-enforced `allowGenius` premium gate (a pack/agent config cannot grant itself Genius). See TierConfig.
1536
+ */
666
1537
  models: {
667
1538
  allow: string[];
1539
+ tier?: TierConfig;
668
1540
  };
669
1541
  modelFactory: (modelId: string, agentSlug?: string) => unknown;
1542
+ /**
1543
+ * PR2 — opt-in connector tools. Build with `materializeConnectors(connectors, backend)` from
1544
+ * `@nightowlsdev/connectors`; an agent gets a connector action only if the action `name` is in its
1545
+ * `skillNames` (i.e. it listed the action among its `skills`). Tenant-scoped + materialized per request.
1546
+ * Omit ⇒ no connector tools.
1547
+ */
1548
+ connectorTools?: (ctx: SwarmContext) => Promise<SwarmTool[]>;
1549
+ /**
1550
+ * Global per-run caps + metering config (SP1). `prices` statically overrides the built-in PRICE_TABLE;
1551
+ * `priceFeed` is an optional live seam supplying NUMBERS only (engine wall); `failOnUnknownModel` (default
1552
+ * false) makes an unpriced model THROW instead of pricing at $0. `perDelegate` adds optional per-delegate
1553
+ * USD sub-budgets (R5). All metering fields are threaded into the engine's CostGovernor + DelegateBudgets.
1554
+ *
1555
+ * SP9-core — the cap-that-asks (`onCapHit` + `capIncrementUsd`). `onCapHit:"ask"` (DEFAULT "stop") turns a
1556
+ * GLOBAL cost/step-cap hit into a PAUSE-and-ASK ("Budget cap reached — continue?") instead of a terminal
1557
+ * `run_failed`, so a consumer run can be granted more budget mid-task rather than dying. A pack sets this
1558
+ * SERVER-SIDE (it is not a per-agent flag). Resume-approve raises `maxCostUsd` by `capIncrementUsd` (default
1559
+ * = the original `maxCostUsd`) and continues; resume-reject terminally stops (stage "cost"). The per-delegate
1560
+ * cap is unaffected — it stays terminal. Leave `onCapHit` unset for today's behaviour (terminal stop).
1561
+ */
670
1562
  cost: {
671
1563
  maxSteps: number;
672
1564
  maxCostUsd: number;
673
1565
  perDelegate?: PerDelegateBudget;
1566
+ prices?: Record<string, Price>;
1567
+ priceFeed?: PriceFeed;
1568
+ failOnUnknownModel?: boolean;
1569
+ onCapHit?: "stop" | "ask";
1570
+ capIncrementUsd?: number;
674
1571
  };
675
1572
  /**
676
1573
  * Telemetry exporter(s). One or many — many are composed best-effort
@@ -701,6 +1598,71 @@ interface SwarmConfig {
701
1598
  /** Opt-in `recall_lane` tool (Part E): lets an agent read a peer agent's lane transcript in the same
702
1599
  * conversation. Read-only; reuses the engine's history (best-effort — empty without memory). */
703
1600
  recallLane?: boolean;
1601
+ /**
1602
+ * Opt-in decision/observer hooks (SP2). Types-only / engine-free (from `@nightowlsdev/hooks`). Today exposes a
1603
+ * `preGeneration` DECISION hook that the engine AWAITS immediately before EACH model launch (run + resume):
1604
+ * an `allow` proceeds, a `deny` VETOES the generation (terminal `run_failed` stage `"reserve"`, no model call).
1605
+ * Decision hooks are FAIL-CLOSED — a throwing hook is treated as a deny (a billing/safety veto must never
1606
+ * silently allow on error). Omit ⇒ allow-all, identical to prior behaviour with zero overhead. This is the
1607
+ * seam the platform's per-generation billing RESERVE (SP3) plugs into.
1608
+ */
1609
+ hooks?: SwarmHooks;
1610
+ /**
1611
+ * SP5 — the NON-REMOVABLE action-approval policy (a P0 SAFETY control). Forces human approval on
1612
+ * side-effecting tools regardless of the per-tool `needsApproval` flag, so a consumer pack cannot ship a
1613
+ * `needsApproval:false` $0.50 action that causes $50k of damage (spend caps limit cost, not harm). Two modes:
1614
+ * - `{ mode: "flag" }` (DEFAULT): today's behaviour — only `needsApproval:true` tools gate.
1615
+ * - `{ mode: "all-side-effecting" }`: force-ask EVERY non-read-only tool (every MCP tool + every first-party
1616
+ * tool not on the read-only allowlist), regardless of the per-tool flag. The safe default for an untrusted
1617
+ * consumer pack. Optionally override `readOnly` to customise the exempt set.
1618
+ * Baked into the `hooks` dispatcher, which combines policy + flag + the optional `preToolCall` hook into the
1619
+ * effective decision (allow / deny / ask-the-human). A `preToolCall` hook (in `hooks`) can add a richer gate.
1620
+ */
1621
+ toolApproval?: ToolApprovalPolicy;
1622
+ /**
1623
+ * SP15 — opt-in secret resolution for first-party tools. Pass a `SecretResolver` (the platform vault,
1624
+ * SP15-platform) and the engine scopes it per-run so a tool body can `await ctx.secrets.resolve(ref)` to fetch
1625
+ * a tenant-scoped secret at execution time — the same security posture as the MCP connector's credentialRef
1626
+ * resolution (resolve at execution, scoped by the live ctx, never from tool args). @mastra-free. Omit for
1627
+ * today's behaviour: `ctx.secrets.resolve(...)` yields `undefined` (no vault).
1628
+ */
1629
+ secrets?: SecretResolver;
1630
+ /**
1631
+ * SP3 — best-effort per-event OBSERVER, fired by the engine after each event is persisted (run + resume).
1632
+ * Transport-agnostic (sees every event regardless of interactive-SSE vs durable+realtime delivery), so the
1633
+ * platform's metering — debit on `swarm.turn_usage`, settle on a terminal — lives HERE instead of teeing the
1634
+ * route's stream. Awaited but FAIL-SAFE: a throwing observer is swallowed (log your own), never breaking the
1635
+ * run. The `preGeneration` reserve hook (above) + this observer are the two halves of the credit ledger.
1636
+ */
1637
+ onEvent?: (ev: SwarmEvent, ctx: SwarmContext) => void | Promise<void>;
1638
+ /**
1639
+ * Completion supervisor (reliability) — see EngineOpts.verifyCompletion. When set, the engine asks this at a
1640
+ * turn's end whether the user's request was actually satisfied, nudges the orchestrator with the specific gap
1641
+ * if not, and ends `run_failed:incomplete` (refundable) instead of a silent `done` if it still can't finish.
1642
+ * Omit ⇒ the cheap structural "did the root speak last?" fallback nudge.
1643
+ */
1644
+ verifyCompletion?: CompletionVerifier;
1645
+ /**
1646
+ * FR-003 — per-run lifecycle hooks. `onRunStart(ctx, { input, state })` seeds the run's mutable `ctx.state`
1647
+ * store (e.g. `state.set("sceneCode", input.context?.currentCode ?? "")`) once before the first generation;
1648
+ * `onRunEnd(ctx, { state, outcome })` drains it deterministically at the run's terminal boundary (done /
1649
+ * failed / suspended / thrown). The SAME `state` handle flows into every tool call of the run (orchestrator +
1650
+ * delegated sub-agents) via `ctx.state`. Both are FAIL-SAFE. Omit ⇒ no per-run state seeding/draining.
1651
+ */
1652
+ onRunStart?: EngineOpts["onRunStart"];
1653
+ onRunEnd?: EngineOpts["onRunEnd"];
1654
+ /**
1655
+ * Declarative conditional policy (Phase A). `advise` rules are injected into the system prompt; `enforce`
1656
+ * rules compile into the decision hooks (deny/ask), folding in the non-removable SP5 policy floor. Per-agent
1657
+ * rules are authored via `defineAgent({ rules })` and applied additively.
1658
+ */
1659
+ rules?: RuleDef[];
1660
+ /**
1661
+ * Authorable procedures (Phase A: `advisory` only — an `advisory` workflow's `description` is injected as a
1662
+ * suggested procedure; `compliance: "strict"` is REJECTED until the Phase-B step-driver). Per-agent
1663
+ * procedures are authored via `defineAgent({ workflow })`.
1664
+ */
1665
+ workflows?: WorkflowDef[];
704
1666
  }
705
1667
  interface Swarm {
706
1668
  engine: SwarmEngine;
@@ -748,10 +1710,7 @@ declare class SpanCollector {
748
1710
  * Close the open generation with this step's usage + its own per-call cost (already priced from
749
1711
  * the step usage by the engine). `costUsd` is per-generation — never a cumulative running total.
750
1712
  */
751
- closeGeneration(usage: {
752
- inputTokens: number;
753
- outputTokens: number;
754
- }, costUsd: number): void;
1713
+ closeGeneration(usage: UsageBreakdown, costUsd: number): void;
755
1714
  openTool(toolCallId: string, name: string): void;
756
1715
  closeTool(toolCallId: string, ok: boolean): void;
757
1716
  /**
@@ -778,6 +1737,64 @@ declare class RowCache<V> {
778
1737
  invalidate(key: string): void;
779
1738
  }
780
1739
 
1740
+ /** Drain an `engine.run(...)` (an `AsyncIterable<SwarmEvent>`) into the eval-shaped trajectory + final output. The
1741
+ * output is the concatenation of the assistant `swarm.message` texts (the model's visible reply), in stream order. */
1742
+ declare function drainTrajectory(stream: AsyncIterable<SwarmEvent>): Promise<{
1743
+ events: SwarmEvent[];
1744
+ output: string;
1745
+ }>;
1746
+ /** FR-018 — run one input through a built `Swarm`/`SwarmEngine` and return its full trajectory + output. This is
1747
+ * the seam a host hands to `@nightowlsdev/eval` (`RunAgent = (case) => Promise<{ events, output }>`): no SSE,
1748
+ * no engine internals. `ctx` defaults to an ephemeral local context when omitted. */
1749
+ declare function runToTrajectory(target: Swarm | SwarmEngine, input: RunInput | string, ctx?: Partial<SwarmContext> & {
1750
+ agentSlug: string;
1751
+ }): Promise<{
1752
+ events: SwarmEvent[];
1753
+ output: string;
1754
+ }>;
1755
+ /** Options for {@link runAgent}. The model wiring is the only required part (the same `modelFactory` a swarm
1756
+ * uses); everything else mirrors the `SwarmConfig` fields and defaults to the single-agent/ephemeral case. */
1757
+ interface RunAgentOpts {
1758
+ /** Same `(modelId, agentSlug?) => model` factory `defineSwarm` takes (e.g. `openaiModels()`). Required. */
1759
+ modelFactory: SwarmConfig["modelFactory"];
1760
+ /** Model allow-list + optional tier router. Defaults to allowing the agent's own `modelId` (+ tier models). */
1761
+ models?: {
1762
+ allow?: string[];
1763
+ tier?: TierConfig;
1764
+ };
1765
+ /** Per-run caps. Defaults to a generous single-run budget (`{ maxSteps: 50, maxCostUsd: 10 }`). */
1766
+ cost?: Partial<SwarmConfig["cost"]>;
1767
+ /** Bring your own adapter (e.g. to inspect events afterwards). Defaults to a fresh `InMemoryStorage`. */
1768
+ storage?: StorageAdapter;
1769
+ /** Pass-throughs onto the one-agent SwarmConfig — identical semantics to `defineSwarm`. */
1770
+ telemetry?: SwarmConfig["telemetry"];
1771
+ memory?: SwarmConfig["memory"];
1772
+ hooks?: SwarmConfig["hooks"];
1773
+ toolApproval?: SwarmConfig["toolApproval"];
1774
+ secrets?: SwarmConfig["secrets"];
1775
+ onEvent?: SwarmConfig["onEvent"];
1776
+ onRunStart?: SwarmConfig["onRunStart"];
1777
+ onRunEnd?: SwarmConfig["onRunEnd"];
1778
+ pageContext?: SwarmConfig["pageContext"];
1779
+ mastraStore?: SwarmConfig["mastraStore"];
1780
+ /** Override the ephemeral run context (tenantId/userId/run/thread ids). */
1781
+ ctx?: Partial<SwarmContext>;
1782
+ }
1783
+ /** Build a one-agent `Swarm` from an `AgentDef` + options — the standalone equivalent of `defineSwarm` for a
1784
+ * single agent. Useful when you want the built engine (e.g. to call `resume`) rather than a one-shot run. */
1785
+ declare function buildSingleAgentSwarm(def: AgentDef, opts: RunAgentOpts): Swarm;
1786
+ /**
1787
+ * FR-019 — run a single `AgentDef` to completion and return its trajectory + final output, with NO Supabase
1788
+ * adapter and NO publish required. Honors tier resolution + cost cap + tool-approval (it builds the real engine).
1789
+ *
1790
+ * @example
1791
+ * const { output } = await runAgent(titleAgent, "Summarize: …", { modelFactory: openaiModels() });
1792
+ */
1793
+ declare function runAgent(def: AgentDef, input: RunInput | string, opts: RunAgentOpts): Promise<{
1794
+ events: SwarmEvent[];
1795
+ output: string;
1796
+ }>;
1797
+
781
1798
  declare class InMemoryStorage implements StorageAdapter {
782
1799
  private evts;
783
1800
  private seq;
@@ -789,13 +1806,22 @@ declare class InMemoryStorage implements StorageAdapter {
789
1806
  private agentRows;
790
1807
  private heads;
791
1808
  private pads;
1809
+ private threadRows;
792
1810
  seedAgent(v: AgentVersion, tenantId?: string): void;
793
1811
  recordSuspend(runId: string, tenantId: string, followupId: string, toolCallId: string): void;
794
- markFollowupAnswered(followupId: string, tenantId: string): void;
1812
+ markFollowupAnswered(followupId: string, tenantId: string): boolean;
795
1813
  /** Test/host helper: read a run row (the RunStore interface is write-mostly). */
796
1814
  getRun(runId: string): RunRow | undefined;
797
1815
  events: EventStore;
798
1816
  runs: RunStore;
1817
+ threads: ThreadStore;
1818
+ /** Test/host helper: read a recorded thread row. */
1819
+ getThread(id: string): {
1820
+ id: string;
1821
+ orgId: string;
1822
+ userId: string;
1823
+ projectId?: string;
1824
+ } | undefined;
799
1825
  messages: MessageStore;
800
1826
  scratchpad: ScratchpadStore;
801
1827
  agents: AgentRepo;
@@ -806,9 +1832,57 @@ declare function composeSystemPrompt(row: AgentVersion): {
806
1832
  role: "system";
807
1833
  content: string;
808
1834
  }[];
1835
+ /**
1836
+ * Render the soft-policy lines for an agent (advise-rule statements + advisory-workflow summaries, from
1837
+ * `softPolicyFor`) as a single system message. Returns `[]` when there are none (zero overhead / no message).
1838
+ */
1839
+ declare function composePolicyPrompt(lines: string[]): {
1840
+ role: "system";
1841
+ content: string;
1842
+ }[];
809
1843
 
810
1844
  declare const customAuth: (fn: AuthProvider["authenticate"]) => AuthProvider;
811
1845
 
1846
+ interface RateLimitConfig {
1847
+ /** Window length in seconds. */
1848
+ windowSec: number;
1849
+ /** Max allowed events per window. */
1850
+ max: number;
1851
+ }
1852
+ interface RateLimitState {
1853
+ count: number;
1854
+ windowStartSec: number;
1855
+ }
1856
+ interface RateLimitDecision {
1857
+ allow: boolean;
1858
+ /** Remaining allowance in the current window (0 when denied). */
1859
+ remaining: number;
1860
+ /** Seconds until the window resets (when the count clears). */
1861
+ resetSec: number;
1862
+ }
1863
+ /**
1864
+ * Pure fixed-window rate-limit decision. If there's no prior state or the window has elapsed, a fresh window
1865
+ * starts at count 1 (this event). Otherwise the count increments. Allow while count ≤ max. Returns the decision
1866
+ * AND the next state to persist. Fixed-window is chosen for simplicity + O(1) state (one counter); its known
1867
+ * burst-at-boundary tradeoff is acceptable for an abuse backstop (not a billing meter).
1868
+ */
1869
+ declare function decideFixedWindow(prev: RateLimitState | null, cfg: RateLimitConfig, nowSec: number): {
1870
+ decision: RateLimitDecision;
1871
+ state: RateLimitState;
1872
+ };
1873
+ interface RateLimitStore {
1874
+ /** Record one event for `key` under `cfg` and return the decision. */
1875
+ hit(key: string, cfg: RateLimitConfig, nowSec: number): Promise<RateLimitDecision>;
1876
+ }
1877
+ /**
1878
+ * In-memory fixed-window store — a REAL limiter for a SINGLE instance. Keeps one window per key in a Map and
1879
+ * prunes expired keys opportunistically so memory stays bounded. NOT shared across instances: a horizontally
1880
+ * scaled deploy must back this with Redis/Postgres (same interface) or limits are per-instance.
1881
+ */
1882
+ declare function createInMemoryRateLimitStore(): RateLimitStore;
1883
+ /** Parse a "max/window" config from env (e.g. "60" with a default window), clamped to sane positive values. */
1884
+ declare function rateConfig(max: number | undefined, windowSec: number, fallbackMax: number): RateLimitConfig;
1885
+
812
1886
  declare const VERSION = "0.0.0";
813
1887
 
814
- export { ASK_TOOL_NAME, type ActiveRun, type AgentDef, type AgentMemoryOverride, type AgentRepo, type AgentSpec, type AgentSummary, type AgentVersion, type AskField, type AskFieldOption, type AuthContext, type AuthProvider, CapturingExporter, type ContainerFloor, CostGovernor, DelegateBudgets, type EngineOpts, type EventStore, type FloorHolder, GUARDRAILS, InMemoryContainerFloor, InMemoryStorage, type MemoryConfig, type MessageStore, type ModelProvider, type NewRun, PRICE_TABLE, type PerDelegateBudget, type Price, type Release, RowCache, type RunInput, type RunRow, type RunStatus, type RunStore, type Runner, SCRATCHPAD_MAX_ENTRY_CHARS, SCRATCHPAD_MAX_KEYS, type ScratchpadEntry, type ScratchpadStore, type SecretResolver, SpanCollector, type StorageAdapter, type Swarm, type SwarmConfig, type SwarmContext, SwarmEngine, type SwarmEvent, type SwarmMessage, type SwarmSkill, type SwarmSpan, type SwarmTool, type SwarmToolContext, type TelemetryExporter, type ThreadSummary, type ToolSpec, VERSION, allowListModelProvider, buildSkillResolver, composeSystemPrompt, compositeTelemetry, containerFloor, customAuth, customTelemetry, defineAgent, defineSkill, defineSwarm, defineTool, ev, isEvent, resolveTelemetry };
1888
+ export { ASK_TOOL_NAME, type ActiveRun, type AgentDef, type AgentMemoryOverride, AgentMutationForbidden, type AgentRepo, type AgentSpec, type AgentSummary, type AgentVersion, type AgentVersionContent, type AgentVersionInfo, type AskField, type AskFieldOption, type AuthContext, type AuthProvider, type BoundSecrets, type BundleDef, type BundleDep, type BundleRepo, type BundleSpec, type BundleVersion, type BundleVersionContent, type BundleVersionInfo, type BundleWritableRepo, CapturingExporter, ClientToolError, type ClientToolSpec, type CompletionVerdict, type CompletionVerifier, type ConnectorGrant, type ContainerFloor, CostGovernor, DelegateBudgets, type EngineOpts, type EventStore, type FloorHolder, GUARDRAILS, InMemoryContainerFloor, InMemoryStorage, type MemoryConfig, type MessageStore, type ModelProvider, type ModelRef, type ModelTier, type NewRun, PRICE_TABLE, type PerDelegateBudget, type Price, type PriceFeed, type PricingOpts, type RateLimitConfig, type RateLimitDecision, type RateLimitState, type RateLimitStore, type Release, ReserveDenied, RowCache, type RuleAction, type RuleCondition, type RuleDef, type RuleLevel, type RuleSpec, type RunAgentOpts, type RunInput, type RunRow, type RunStateHandle, type RunStatus, type RunStore, type Runner, SCRATCHPAD_MAX_ENTRY_CHARS, SCRATCHPAD_MAX_KEYS, type ScratchpadEntry, type ScratchpadStore, type SecretResolver, type SlugUsage, SpanCollector, type StorageAdapter, type Swarm, type SwarmActor, type SwarmConfig, type SwarmContext, SwarmEngine, type SwarmEvent, type SwarmMessage, type SwarmSkill, type SwarmSpan, type SwarmTool, type SwarmToolContext, type TelemetryExporter, type ThreadStore, type ThreadSummary, type TierConfig, type TierEscalationContext, type TierResolution, type ToolSpec, type TurnUsage, type UsageBreakdown, type UsageCost, VERSION, type VersionedRepo, type WorkflowCompliance, type WorkflowDef, type WorkflowRef, type WorkflowRunState, type WorkflowSpec, type WorkflowStep, type WorkflowTransition, allowListModelProvider, assertActorMayMutateDefinition, buildSingleAgentSwarm, buildSkillResolver, composePolicyPrompt, composeSystemPrompt, compositeTelemetry, containerFloor, createInMemoryRateLimitStore, createRunState, customAuth, customTelemetry, decideFixedWindow, defineAgent, defineBundle, defineClientTool, defineRule, defineSkill, defineSwarm, defineTool, defineWorkflow, drainTrajectory, ev, isEvent, isTierSentinel, mergeBundle, priceUsage, rateConfig, resolveTelemetry, resolveTier, runAgent, runToTrajectory, sumBreakdowns, sumTurnUsage, tierModelId, toBundleContent };