@nightowlsdev/core 0.3.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +270 -0
- package/dist/index.cjs +1815 -148
- package/dist/index.d.cts +1154 -80
- package/dist/index.d.ts +1154 -80
- package/dist/index.js +1785 -147
- package/package.json +5 -4
package/dist/index.d.ts
CHANGED
|
@@ -1,4 +1,175 @@
|
|
|
1
1
|
import { z } from 'zod';
|
|
2
|
+
import { HookDispatcher, ToolApprovalPolicy, SwarmHooks } from '@nightowlsdev/hooks';
|
|
3
|
+
export { ALLOW, ALLOW_TOOL, DEFAULT_READ_ONLY_TOOLS, GuardMutationEvent, GuardMutationHook, HookDecision, HookDispatcher, PreGenerationEvent, PreGenerationHook, PreToolCallHook, SwarmHooks, ToolApprovalPolicy, ToolDecision, ToolPreCallEvent, ask, createHookDispatcher, defineHook, deny } from '@nightowlsdev/hooks';
|
|
4
|
+
|
|
5
|
+
interface Price {
|
|
6
|
+
inUsdPerMtok: number;
|
|
7
|
+
outUsdPerMtok: number;
|
|
8
|
+
/**
|
|
9
|
+
* Per-class USD rates per million tokens (SP1). All OPTIONAL + additive — a price entry without them
|
|
10
|
+
* prices exactly like before. When a class's rate is absent the engine falls back to the closest base
|
|
11
|
+
* rate, picked to match how providers bill these classes:
|
|
12
|
+
* - `cacheReadUsdPerMtok` → falls back to `inUsdPerMtok` (cache reads are discounted input; absent a
|
|
13
|
+
* discount rate we conservatively bill them at the full input rate rather than free).
|
|
14
|
+
* - `cacheWriteUsdPerMtok` → falls back to `inUsdPerMtok` (cache writes are an input-side surcharge).
|
|
15
|
+
* - `reasoningUsdPerMtok` → falls back to `outUsdPerMtok` (reasoning tokens are generated output).
|
|
16
|
+
*/
|
|
17
|
+
cacheReadUsdPerMtok?: number;
|
|
18
|
+
cacheWriteUsdPerMtok?: number;
|
|
19
|
+
reasoningUsdPerMtok?: number;
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* Per-generation usage, by token class (SP1). `inputTokens`/`outputTokens` are always present (today's
|
|
23
|
+
* shape); the rest are OPTIONAL — a provider that does not report a class leaves it undefined and it is
|
|
24
|
+
* priced as zero (NEVER fabricated). `toolCalls`/`agentActivations` are activity counts for telemetry,
|
|
25
|
+
* not priced. Plain domain type — @mastra-free (engine wall, CONTRACTS §1).
|
|
26
|
+
*/
|
|
27
|
+
interface UsageBreakdown {
|
|
28
|
+
inputTokens: number;
|
|
29
|
+
outputTokens: number;
|
|
30
|
+
cacheReadTokens?: number;
|
|
31
|
+
cacheWriteTokens?: number;
|
|
32
|
+
reasoningTokens?: number;
|
|
33
|
+
/** Number of (non-delegation) tool calls in this generation. Telemetry only — not priced. */
|
|
34
|
+
toolCalls?: number;
|
|
35
|
+
/** Number of sub-agent delegations (`agent-<slug>` calls) in this generation. Telemetry only — not priced. */
|
|
36
|
+
agentActivations?: number;
|
|
37
|
+
}
|
|
38
|
+
/** A priced usage: the computed USD plus the breakdown it was priced from. */
|
|
39
|
+
interface UsageCost {
|
|
40
|
+
usd: number;
|
|
41
|
+
breakdown: UsageBreakdown;
|
|
42
|
+
}
|
|
43
|
+
/**
|
|
44
|
+
* Optional seam that supplies/overrides per-model prices (SP1). Supplies NUMBERS only (no @mastra) so the
|
|
45
|
+
* engine wall holds. `prices()` returns a full `modelId → Price` map merged over the built-in `PRICE_TABLE`
|
|
46
|
+
* + any static `prices` config; a host can back it with a live price feed. Sync to keep governor construction
|
|
47
|
+
* synchronous on the hot path — resolve/refresh out of band and return the current snapshot here.
|
|
48
|
+
*/
|
|
49
|
+
interface PriceFeed {
|
|
50
|
+
prices(): Record<string, Price>;
|
|
51
|
+
}
|
|
52
|
+
declare const PRICE_TABLE: Record<string, Price>;
|
|
53
|
+
/** Options shared by the pricing helpers + the governors that thread pricing config through. */
|
|
54
|
+
interface PricingOpts {
|
|
55
|
+
/**
|
|
56
|
+
* When TRUE an unpriced `modelId` THROWS instead of pricing at $0. Default FALSE so OSS users (the
|
|
57
|
+
* built-in `PRICE_TABLE` has only 2 entries) are not broken — an unknown model keeps the historical
|
|
58
|
+
* $0 fallback (the cost cap simply can't fire on it). A host that wants billing safety flips this on.
|
|
59
|
+
*/
|
|
60
|
+
failOnUnknownModel?: boolean;
|
|
61
|
+
}
|
|
62
|
+
/** Price one usage at a model's rate, across EVERY token class (SP1). Shared by the global CostGovernor and
|
|
63
|
+
* per-delegate DelegateBudgets so the two caps always agree on what a token costs. Each class is priced at
|
|
64
|
+
* its explicit per-class rate, with the documented fallbacks (see `Price`) when a rate is absent; a token
|
|
65
|
+
* class the provider omitted is treated as zero (never fabricated). An unknown model prices at $0 by default
|
|
66
|
+
* (the cap can't fire on a model with no price entry — historical behavior) unless `failOnUnknownModel`. */
|
|
67
|
+
declare function priceUsage(prices: Record<string, Price>, modelId: string, u: UsageBreakdown, opts?: PricingOpts): number;
|
|
68
|
+
/**
|
|
69
|
+
* Sum a list of `UsageBreakdown` into a single turn-total breakdown (SP4 — the room-turn billing unit).
|
|
70
|
+
* The always-present base classes (`inputTokens`/`outputTokens`) sum as plain numbers. Each OPTIONAL class
|
|
71
|
+
* (cacheRead/cacheWrite/reasoning/toolCalls/agentActivations) sums with UNDEFINED as the additive identity:
|
|
72
|
+
* - undefined + number → the number (a class present in only some generations still totals correctly),
|
|
73
|
+
* - all-undefined → stays UNDEFINED (we never fabricate a class no provider reported as a zero),
|
|
74
|
+
* - present-and-zero → preserved as 0 (an explicit 0 means "reported zero", distinct from "not reported").
|
|
75
|
+
* An empty list yields the zero base breakdown `{ inputTokens: 0, outputTokens: 0 }`. @mastra-free.
|
|
76
|
+
*/
|
|
77
|
+
declare function sumBreakdowns(items: UsageBreakdown[]): UsageBreakdown;
|
|
78
|
+
/** One generation's priced usage attributed to an agent slug — the unit the turn aggregates over. */
|
|
79
|
+
interface SlugUsage {
|
|
80
|
+
slug: string;
|
|
81
|
+
breakdown: UsageBreakdown;
|
|
82
|
+
cost: UsageCost;
|
|
83
|
+
}
|
|
84
|
+
/** A room-turn's aggregate usage (SP4): the summed breakdown + summed USD for the WHOLE turn, plus a
|
|
85
|
+
* per-agent-slug split so the platform can attribute credits per agent. `bySlug` is in first-seen order. */
|
|
86
|
+
interface TurnUsage {
|
|
87
|
+
breakdown: UsageBreakdown;
|
|
88
|
+
cost: UsageCost;
|
|
89
|
+
bySlug: SlugUsage[];
|
|
90
|
+
}
|
|
91
|
+
/**
|
|
92
|
+
* Aggregate every generation's `(slug, breakdown, cost)` over a room-turn into ONE turn total + a per-slug
|
|
93
|
+
* split (SP4). The turn total breakdown/USD are the sum across ALL generations; `bySlug` folds every
|
|
94
|
+
* generation of the same slug into one entry (so a slug that generated twice — e.g. an orchestrator before
|
|
95
|
+
* and after a delegation — appears once with its combined breakdown/USD), in FIRST-SEEN order for a stable
|
|
96
|
+
* split. By construction the per-slug entries sum back to the turn total (the invariant the platform's
|
|
97
|
+
* per-agent credit attribution relies on). An empty list yields a zero total + empty split. @mastra-free.
|
|
98
|
+
*/
|
|
99
|
+
declare function sumTurnUsage(items: SlugUsage[]): TurnUsage;
|
|
100
|
+
declare class CostGovernor {
|
|
101
|
+
private opts;
|
|
102
|
+
private steps;
|
|
103
|
+
private usd;
|
|
104
|
+
private prices;
|
|
105
|
+
private failOnUnknownModel;
|
|
106
|
+
constructor(opts: {
|
|
107
|
+
maxSteps: number;
|
|
108
|
+
maxCostUsd: number;
|
|
109
|
+
/** Static per-model price overrides, merged over PRICE_TABLE. */
|
|
110
|
+
prices?: Record<string, Price>;
|
|
111
|
+
/** Optional live price seam (SP1). Merged OVER `prices` (a feed entry wins) at construction. */
|
|
112
|
+
priceFeed?: PriceFeed;
|
|
113
|
+
} & PricingOpts);
|
|
114
|
+
step(): void;
|
|
115
|
+
/** Price a single usage WITHOUT accumulating it (for per-generation telemetry cost). */
|
|
116
|
+
priceOf(modelId: string, u: UsageBreakdown): number;
|
|
117
|
+
/** Price a single usage WITHOUT accumulating it, returning the usd + the breakdown it was priced from. */
|
|
118
|
+
costOf(modelId: string, u: UsageBreakdown): UsageCost;
|
|
119
|
+
addUsage(modelId: string, u: UsageBreakdown): void;
|
|
120
|
+
costUsd(): number;
|
|
121
|
+
/** The current USD cap (SP9-core: the cap-that-asks reads this to surface "spend / cap" + to compute the raise). */
|
|
122
|
+
get maxCostUsd(): number;
|
|
123
|
+
/**
|
|
124
|
+
* SP9-core — RAISE the USD cap by `incrementUsd` (the budget an approved "Budget cap reached — continue?"
|
|
125
|
+
* grants). Mutates the governor's ceiling so a freshly-resumed generation isn't immediately re-blocked at the
|
|
126
|
+
* SAME cap; the run gets real additional headroom. Only the cap-that-asks resume path calls this; the default
|
|
127
|
+
* terminal-stop path never does, so today's behaviour is unchanged.
|
|
128
|
+
*/
|
|
129
|
+
raiseCostCap(incrementUsd: number): void;
|
|
130
|
+
shouldStop(): {
|
|
131
|
+
stop: boolean;
|
|
132
|
+
reason?: string;
|
|
133
|
+
};
|
|
134
|
+
}
|
|
135
|
+
/**
|
|
136
|
+
* Per-delegate USD sub-budgets (R5). The global `cost.maxCostUsd` bounds the WHOLE turn; this adds an
|
|
137
|
+
* optional ceiling applied to EACH delegate (sub-agent) so one runaway sub-agent can't burn the entire
|
|
138
|
+
* turn before the global cap notices. `maxCostUsd` is the default ceiling for every delegate; `bySlug`
|
|
139
|
+
* overrides it per delegate slug (a slug listed there is capped even if there is no default). The run's
|
|
140
|
+
* root orchestrator is NOT a delegate and is never capped here — the global cap already covers it.
|
|
141
|
+
*/
|
|
142
|
+
interface PerDelegateBudget {
|
|
143
|
+
/** Default USD ceiling per delegate. Omit to cap only the slugs named in `bySlug`. */
|
|
144
|
+
maxCostUsd?: number;
|
|
145
|
+
/** Per-slug overrides of `maxCostUsd`. */
|
|
146
|
+
bySlug?: Record<string, {
|
|
147
|
+
maxCostUsd?: number;
|
|
148
|
+
}>;
|
|
149
|
+
}
|
|
150
|
+
/** Tracks per-delegate USD spend and reports the first delegate to exceed its budget. Priced from the same
|
|
151
|
+
* table as CostGovernor (via the shared `priceUsage`) so the global and per-delegate caps agree. Usage
|
|
152
|
+
* attributed to the root orchestrator slug is ignored. */
|
|
153
|
+
declare class DelegateBudgets {
|
|
154
|
+
private cfg;
|
|
155
|
+
private rootSlug;
|
|
156
|
+
private usd;
|
|
157
|
+
private prices;
|
|
158
|
+
private failOnUnknownModel;
|
|
159
|
+
constructor(cfg: PerDelegateBudget, rootSlug: string, pricing?: {
|
|
160
|
+
prices?: Record<string, Price>;
|
|
161
|
+
priceFeed?: PriceFeed;
|
|
162
|
+
} & PricingOpts);
|
|
163
|
+
/** The USD cap for a delegate: its `bySlug` override if present, else the default. `undefined` → uncapped. */
|
|
164
|
+
private capFor;
|
|
165
|
+
/** Accumulate one generation's usage against a delegate. No-op for the root orchestrator (not a delegate). */
|
|
166
|
+
addUsage(slug: string, modelId: string, u: UsageBreakdown): void;
|
|
167
|
+
/** The first delegate that has met or exceeded its USD cap, or null. */
|
|
168
|
+
exceeded(): {
|
|
169
|
+
slug: string;
|
|
170
|
+
reason: string;
|
|
171
|
+
} | null;
|
|
172
|
+
}
|
|
2
173
|
|
|
3
174
|
interface SwarmContext {
|
|
4
175
|
tenantId: string;
|
|
@@ -17,8 +188,63 @@ interface AuthProvider {
|
|
|
17
188
|
authenticate(req: Request): Promise<AuthContext | null>;
|
|
18
189
|
can?(ctx: AuthContext, capability: string): boolean | Promise<boolean>;
|
|
19
190
|
}
|
|
191
|
+
/**
|
|
192
|
+
* The PRINCIPAL performing a definition mutation (SP6) — a plain (@mastra-free) discriminated union covering
|
|
193
|
+
* every kind of actor that can publish/rollback an agent definition. Distinct from `AuthContext` (the run's
|
|
194
|
+
* end-user identity): an actor models WHO is mutating the swarm's own code, which may be a human in the
|
|
195
|
+
* no-code builder, a service (CI/seeding pipeline), or a system task.
|
|
196
|
+
*
|
|
197
|
+
* - `human` → a person acting in a tenant (the no-code builder's authenticated user).
|
|
198
|
+
* - `service` → a non-human automation acting in a tenant (CI, a seeding job, a migration runner).
|
|
199
|
+
* - `system` → an internal/un-tenanted operation (bootstrap seed, ops rollback) carrying a `reason`.
|
|
200
|
+
* - `agent` → an AGENT acting on its own behalf. This variant exists for ONE reason: so an agent
|
|
201
|
+
* principal is REPRESENTABLE and can therefore be UNCONDITIONALLY BARRED from mutating a
|
|
202
|
+
* definition (an agent must never be able to rewrite the swarm's own code). The bar is
|
|
203
|
+
* enforced in the repo contract layer (`assertActorMayMutateDefinition`) and CANNOT be
|
|
204
|
+
* overridden by any policy/hook. No code should ever construct an `agent` actor expecting a
|
|
205
|
+
* mutation to succeed — it is the deny sentinel.
|
|
206
|
+
*/
|
|
207
|
+
type SwarmActor = {
|
|
208
|
+
type: "human";
|
|
209
|
+
userId: string;
|
|
210
|
+
tenantId: string;
|
|
211
|
+
} | {
|
|
212
|
+
type: "service";
|
|
213
|
+
serviceId: string;
|
|
214
|
+
tenantId: string;
|
|
215
|
+
} | {
|
|
216
|
+
type: "system";
|
|
217
|
+
reason: string;
|
|
218
|
+
} | {
|
|
219
|
+
type: "agent";
|
|
220
|
+
agentSlug: string;
|
|
221
|
+
tenantId: string;
|
|
222
|
+
};
|
|
223
|
+
/**
|
|
224
|
+
* The NON-BYPASSABLE security invariant for definition mutations (SP6): an `agent` principal can NEVER
|
|
225
|
+
* publish or roll back an agent definition, regardless of any configured policy/hook. Every writable-repo
|
|
226
|
+
* mutation path MUST call this FIRST (before the `guardDefinitionMutation` policy hook) so the bar cannot be
|
|
227
|
+
* weakened by an allow-all hook. Throws `AgentMutationForbidden` for an `agent` actor; a no-op otherwise.
|
|
228
|
+
*
|
|
229
|
+
* This lives in the contract layer (not behind a hook) on purpose: the hook is removable/host-configurable
|
|
230
|
+
* policy, whereas this bar is a framework invariant — fail-closed and non-negotiable.
|
|
231
|
+
*/
|
|
232
|
+
declare function assertActorMayMutateDefinition(actor: SwarmActor): void;
|
|
233
|
+
/** Thrown by `assertActorMayMutateDefinition` when an `agent` principal attempts a definition mutation. A
|
|
234
|
+
* named class so callers can distinguish the security bar from an ordinary failure (e.g. a missing version). */
|
|
235
|
+
declare class AgentMutationForbidden extends Error {
|
|
236
|
+
readonly code: "AGENT_MUTATION_FORBIDDEN";
|
|
237
|
+
constructor(agentSlug: string);
|
|
238
|
+
}
|
|
239
|
+
/**
|
|
240
|
+
* The seam a platform vault (SP15-platform) implements: given a secret `ref` + the RUN's identity ctx, it
|
|
241
|
+
* supplies the secret VALUE. @mastra-free. Resolution is execution-time + ctx-scoped (the connector calls it at
|
|
242
|
+
* tool-call time with the live run ctx — see @nightowlsdev/mcp connector.ts), so the vault enforces per-tenant
|
|
243
|
+
* scoping off `ctx.tenantId`: a run must NOT be able to resolve another tenant's secret. Returns `undefined` for
|
|
244
|
+
* an unknown ref (a tool then sees "no secret") — the resolver should never leak across tenants.
|
|
245
|
+
*/
|
|
20
246
|
interface SecretResolver {
|
|
21
|
-
resolve(ref: string, ctx: SwarmContext): Promise<string>;
|
|
247
|
+
resolve(ref: string, ctx: SwarmContext): Promise<string | undefined>;
|
|
22
248
|
}
|
|
23
249
|
/** An optional rich input the asking agent CONSTRUCTS for a HITL `ask`, so the UI can render a fitting
|
|
24
250
|
* widget instead of a bare text box. Omitted ⇒ a plain text input. Answer shape by kind: confirm→boolean,
|
|
@@ -54,6 +280,9 @@ interface EvBase {
|
|
|
54
280
|
ts: number;
|
|
55
281
|
seq?: number;
|
|
56
282
|
schemaVersion: 1;
|
|
283
|
+
/** The run's thread (the lane this event belongs to). NOT set on live events — the store fills it in on a
|
|
284
|
+
* container restore (listForContainer) so a client can tell a lane side-chat apart from the main thread. */
|
|
285
|
+
threadId?: string;
|
|
57
286
|
}
|
|
58
287
|
type SwarmEvent = (EvBase & {
|
|
59
288
|
type: "swarm.status";
|
|
@@ -65,7 +294,7 @@ type SwarmEvent = (EvBase & {
|
|
|
65
294
|
}) | (EvBase & {
|
|
66
295
|
type: "swarm.message";
|
|
67
296
|
data: {
|
|
68
|
-
role: "assistant";
|
|
297
|
+
role: "assistant" | "user";
|
|
69
298
|
delta?: string;
|
|
70
299
|
text?: string;
|
|
71
300
|
};
|
|
@@ -92,6 +321,16 @@ type SwarmEvent = (EvBase & {
|
|
|
92
321
|
result?: unknown;
|
|
93
322
|
error?: string;
|
|
94
323
|
};
|
|
324
|
+
}) | (EvBase & {
|
|
325
|
+
type: "swarm.client_action";
|
|
326
|
+
data: {
|
|
327
|
+
followupId: string;
|
|
328
|
+
toolCallId: string;
|
|
329
|
+
tool: string;
|
|
330
|
+
input: unknown;
|
|
331
|
+
needsApproval: boolean;
|
|
332
|
+
from: string;
|
|
333
|
+
};
|
|
95
334
|
}) | (EvBase & {
|
|
96
335
|
type: "swarm.question";
|
|
97
336
|
data: {
|
|
@@ -110,6 +349,32 @@ type SwarmEvent = (EvBase & {
|
|
|
110
349
|
from: "user" | string;
|
|
111
350
|
answer: unknown;
|
|
112
351
|
};
|
|
352
|
+
}) | (EvBase & {
|
|
353
|
+
type: "swarm.usage";
|
|
354
|
+
data: {
|
|
355
|
+
slug: string;
|
|
356
|
+
modelId: string;
|
|
357
|
+
breakdown: UsageBreakdown;
|
|
358
|
+
cost: UsageCost;
|
|
359
|
+
generationId: string;
|
|
360
|
+
};
|
|
361
|
+
}) | (EvBase & {
|
|
362
|
+
type: "swarm.turn_usage";
|
|
363
|
+
data: {
|
|
364
|
+
breakdown: UsageBreakdown;
|
|
365
|
+
cost: UsageCost;
|
|
366
|
+
bySlug: Array<{
|
|
367
|
+
slug: string;
|
|
368
|
+
breakdown: UsageBreakdown;
|
|
369
|
+
cost: UsageCost;
|
|
370
|
+
}>;
|
|
371
|
+
generations: number;
|
|
372
|
+
/** The segment's STARTING generation index — distinct per run/resume segment AND retry-stable (it's the
|
|
373
|
+
* monotonic, snapshot-persisted generation counter, not a fresh per-append seq). A host keys a PER-TURN
|
|
374
|
+
* billing debit on it so each segment of a suspend/resume run is charged exactly once (run segment = 0;
|
|
375
|
+
* each resume = the snapshot's next genIndex). The engine never prices — this is just a stable turn id. */
|
|
376
|
+
segmentIndex: number;
|
|
377
|
+
};
|
|
113
378
|
}) | (EvBase & {
|
|
114
379
|
type: "swarm.run_failed";
|
|
115
380
|
data: {
|
|
@@ -142,6 +407,118 @@ interface AgentMemoryOverride {
|
|
|
142
407
|
};
|
|
143
408
|
observationalMemory?: boolean | Record<string, unknown>;
|
|
144
409
|
}
|
|
410
|
+
/** Enforcement level a rule declares. `advise` = prompt-injected guidance; `enforce` = decision-hook veto. */
|
|
411
|
+
type RuleLevel = "advise" | "enforce";
|
|
412
|
+
/** Declarative match over a hook event. Empty = match-all within the resolved seam. */
|
|
413
|
+
interface RuleCondition {
|
|
414
|
+
/** agentSlug glob/exact. For a delegation the gate sees the PARENT (orchestrator) slug. */
|
|
415
|
+
agent?: string | string[];
|
|
416
|
+
/** toolName glob/exact (tool seam). */
|
|
417
|
+
tool?: string | string[];
|
|
418
|
+
/** tool provenance. */
|
|
419
|
+
origin?: "first-party" | "mcp";
|
|
420
|
+
/** modelId glob/exact (generation seam). */
|
|
421
|
+
model?: string | string[];
|
|
422
|
+
}
|
|
423
|
+
interface RuleAction {
|
|
424
|
+
do: "deny" | "ask";
|
|
425
|
+
reason?: string;
|
|
426
|
+
}
|
|
427
|
+
interface RuleSpec {
|
|
428
|
+
id: string;
|
|
429
|
+
statement: string;
|
|
430
|
+
when: RuleCondition;
|
|
431
|
+
level: RuleLevel;
|
|
432
|
+
/** REQUIRED when level==="enforce". `ask` is TOOL-SEAM ONLY (preGeneration cannot suspend). */
|
|
433
|
+
action?: RuleAction;
|
|
434
|
+
/** Seam; inferred from `when` when omitted (model ⇒ generation, else tool). */
|
|
435
|
+
on?: "tool" | "generation";
|
|
436
|
+
}
|
|
437
|
+
/** A normalized, engine-held rule (the output of `defineRule`). */
|
|
438
|
+
interface RuleDef {
|
|
439
|
+
id: string;
|
|
440
|
+
statement: string;
|
|
441
|
+
when: RuleCondition;
|
|
442
|
+
level: RuleLevel;
|
|
443
|
+
action?: RuleAction;
|
|
444
|
+
/** Resolved seam. */
|
|
445
|
+
seam: "tool" | "generation";
|
|
446
|
+
/** Set when authored via `defineAgent` (per-agent scope); undefined ⇒ swarm-wide. */
|
|
447
|
+
scopeAgent?: string;
|
|
448
|
+
}
|
|
449
|
+
/** Workflow compliance. v1: `advisory` (prompt) | `strict` (driver, Phase B — rejected by defineSwarm in Phase A). */
|
|
450
|
+
type WorkflowCompliance = "advisory" | "strict";
|
|
451
|
+
/** A flat data reference resolved at runtime: `{ $ref: "input" }` | `{ $ref: "steps.<id>" }`. */
|
|
452
|
+
type WorkflowRef = {
|
|
453
|
+
$ref: string;
|
|
454
|
+
};
|
|
455
|
+
interface WorkflowTransition {
|
|
456
|
+
to: string;
|
|
457
|
+
when?: {
|
|
458
|
+
$ref: string;
|
|
459
|
+
eq?: unknown;
|
|
460
|
+
exists?: boolean;
|
|
461
|
+
};
|
|
462
|
+
}
|
|
463
|
+
interface WorkflowStep {
|
|
464
|
+
id: string;
|
|
465
|
+
/** exactly ONE kind: */
|
|
466
|
+
agent?: string;
|
|
467
|
+
tool?: string;
|
|
468
|
+
human?: {
|
|
469
|
+
prompt: string;
|
|
470
|
+
field?: AskField;
|
|
471
|
+
};
|
|
472
|
+
/** agent steps: */
|
|
473
|
+
instruction?: string;
|
|
474
|
+
/** tool steps (values may be a WorkflowRef): */
|
|
475
|
+
args?: Record<string, unknown>;
|
|
476
|
+
/** agent steps (values may be a WorkflowRef): */
|
|
477
|
+
input?: Record<string, unknown>;
|
|
478
|
+
next?: string | WorkflowTransition[];
|
|
479
|
+
onError?: "fail" | {
|
|
480
|
+
to: string;
|
|
481
|
+
} | {
|
|
482
|
+
retry: number;
|
|
483
|
+
};
|
|
484
|
+
}
|
|
485
|
+
interface WorkflowSpec {
|
|
486
|
+
name: string;
|
|
487
|
+
compliance: WorkflowCompliance;
|
|
488
|
+
description?: string;
|
|
489
|
+
steps: WorkflowStep[];
|
|
490
|
+
start?: string;
|
|
491
|
+
}
|
|
492
|
+
/** A normalized, engine-held workflow (the output of `defineWorkflow`). */
|
|
493
|
+
interface WorkflowDef {
|
|
494
|
+
name: string;
|
|
495
|
+
compliance: WorkflowCompliance;
|
|
496
|
+
description?: string;
|
|
497
|
+
steps: WorkflowStep[];
|
|
498
|
+
/** Resolved start step id (default: steps[0].id). */
|
|
499
|
+
start: string;
|
|
500
|
+
/** Set when authored via `defineAgent.workflow` (per-agent scope). */
|
|
501
|
+
scopeAgent?: string;
|
|
502
|
+
}
|
|
503
|
+
/**
|
|
504
|
+
* The durable state of an in-flight STRICT workflow run (Phase B). Rides the existing run snapshot payload
|
|
505
|
+
* (not a new table). `cursor` = the step to run next; `outputs` = accumulated step results (for `$ref`);
|
|
506
|
+
* `generationIndex` = the monotonic per-run reserve counter continued across steps; `pending` marks a
|
|
507
|
+
* human/approval suspend so `resume()` re-enters the driver (synthetic `followupId`+`toolCallId` satisfy the
|
|
508
|
+
* resume-auth cross-check). Progress snapshotting only — NOT crash-mid-step replay.
|
|
509
|
+
*/
|
|
510
|
+
interface WorkflowRunState {
|
|
511
|
+
workflow: string;
|
|
512
|
+
cursor: string;
|
|
513
|
+
outputs: Record<string, unknown>;
|
|
514
|
+
generationIndex: number;
|
|
515
|
+
pending?: {
|
|
516
|
+
kind: "human" | "approval";
|
|
517
|
+
stepId: string;
|
|
518
|
+
followupId: string;
|
|
519
|
+
toolCallId: string;
|
|
520
|
+
};
|
|
521
|
+
}
|
|
145
522
|
interface AgentDef {
|
|
146
523
|
slug: string;
|
|
147
524
|
head: AgentVersion;
|
|
@@ -154,6 +531,82 @@ interface AgentDef {
|
|
|
154
531
|
skills?: {
|
|
155
532
|
name: string;
|
|
156
533
|
}[];
|
|
534
|
+
/** Per-agent rules (engine-local, v1) — `defineAgent` stamps `scopeAgent` so `defineSwarm` can collect them. */
|
|
535
|
+
rules?: RuleDef[];
|
|
536
|
+
/** Per-agent workflow/procedure (engine-local, v1) — `scopeAgent`-stamped by `defineAgent`. */
|
|
537
|
+
workflow?: WorkflowDef;
|
|
538
|
+
}
|
|
539
|
+
/** A closure dependency satisfied by ANOTHER bundle: a delegate slug that is not a member of this bundle. Carried
|
|
540
|
+
* as a flat min-version floor (no transitive resolution in v1). */
|
|
541
|
+
interface BundleDep {
|
|
542
|
+
slug: string;
|
|
543
|
+
minVersion: number;
|
|
544
|
+
}
|
|
545
|
+
/**
|
|
546
|
+
* BN1 — a declarative connector grant: a member may invoke a connector's actions. The action names are folded into
|
|
547
|
+
* that member's `skillNames`, so the host's connector-tools resolver (`SwarmConfig.connectorTools`) materializes
|
|
548
|
+
* them per-tenant at CALL time, gated by the SP5 approval floor. The bundle carries NAMES ONLY — never a token,
|
|
549
|
+
* connection id, or backend; the host wires the connector + per-tenant credentials. A granted action need not have
|
|
550
|
+
* a first-party skill handle (it is connector-backed), so it is the explicit, validated exception to BN0's
|
|
551
|
+
* "every skill must resolve to a handle" closure rule.
|
|
552
|
+
*/
|
|
553
|
+
interface ConnectorGrant {
|
|
554
|
+
/** The bundle member granted these actions (must be one of the bundle's `agents`). */
|
|
555
|
+
agentSlug: string;
|
|
556
|
+
/** The connector provider, e.g. `"slack"` — informational, and the prefix used to expand a short action name. */
|
|
557
|
+
provider: string;
|
|
558
|
+
/** Connector action names — full (`"slack.post_message"`) or short (`"post_message"`, expanded to `provider.action`). */
|
|
559
|
+
actions: string[];
|
|
560
|
+
}
|
|
561
|
+
/** Authoring input for `defineBundle`. It composes `defineAgent` OUTPUTS — it does not replace them. */
|
|
562
|
+
interface BundleSpec {
|
|
563
|
+
slug: string;
|
|
564
|
+
title?: string;
|
|
565
|
+
/** The composed members — exactly the output of `defineAgent` (skill handles ride along on each `AgentDef`). */
|
|
566
|
+
agents: AgentDef[];
|
|
567
|
+
/** SWARM-scoped rules (per-agent rules ride on the `AgentDef`s via `defineAgent`'s `scopeAgent` stamp). */
|
|
568
|
+
rules?: RuleDef[];
|
|
569
|
+
/** SWARM-scoped workflows (per-agent workflows ride on the `AgentDef`s). */
|
|
570
|
+
workflows?: WorkflowDef[];
|
|
571
|
+
/** BN1 — connector grants: a member may invoke a provider's actions (names only; the host materializes them). */
|
|
572
|
+
connectorGrants?: ConnectorGrant[];
|
|
573
|
+
/** Delegates that live in ANOTHER bundle (a delegate slug not among `agents`) — declared so closure validation
|
|
574
|
+
* can distinguish a legitimate external dependency from a typo. */
|
|
575
|
+
requires?: BundleDep[];
|
|
576
|
+
}
|
|
577
|
+
/** The validated, closure-checked composition `mergeBundle` folds into a `SwarmConfig`. The members carry any
|
|
578
|
+
* BN1 connector-grant action names folded into their `skillNames`. */
|
|
579
|
+
interface BundleDef {
|
|
580
|
+
slug: string;
|
|
581
|
+
title?: string;
|
|
582
|
+
agents: AgentDef[];
|
|
583
|
+
rules: RuleDef[];
|
|
584
|
+
workflows: WorkflowDef[];
|
|
585
|
+
connectorGrants: ConnectorGrant[];
|
|
586
|
+
requires: BundleDep[];
|
|
587
|
+
}
|
|
588
|
+
/** The publishable bundle payload (version is derived, not authored — mirrors `AgentVersionContent`). */
|
|
589
|
+
interface BundleVersionContent {
|
|
590
|
+
slug: string;
|
|
591
|
+
title?: string;
|
|
592
|
+
/** Composed members as serializable heads (no skill handles). */
|
|
593
|
+
agents: AgentVersionContent[];
|
|
594
|
+
rules: RuleDef[];
|
|
595
|
+
workflows: WorkflowDef[];
|
|
596
|
+
connectorGrants: ConnectorGrant[];
|
|
597
|
+
requires: BundleDep[];
|
|
598
|
+
}
|
|
599
|
+
/** One immutable, append-only bundle version (mirrors `AgentVersion` one level up). */
|
|
600
|
+
interface BundleVersion extends BundleVersionContent {
|
|
601
|
+
version: number;
|
|
602
|
+
}
|
|
603
|
+
/** A bundle version's summary row (for `listVersions` — mirrors `AgentVersionInfo`). */
|
|
604
|
+
interface BundleVersionInfo {
|
|
605
|
+
version: number;
|
|
606
|
+
title: string;
|
|
607
|
+
status: string;
|
|
608
|
+
isCurrent: boolean;
|
|
609
|
+
memberCount: number;
|
|
157
610
|
}
|
|
158
611
|
interface RunRow {
|
|
159
612
|
runId: string;
|
|
@@ -210,6 +663,11 @@ interface EventStore {
|
|
|
210
663
|
append(e: SwarmEvent): Promise<number>;
|
|
211
664
|
/** Tenant-scoped (R11): a forged cross-org runId returns []. The store enforces tenancy, not just the caller. */
|
|
212
665
|
list(tenantId: string, runId: string, sinceSeq: number): Promise<SwarmEvent[]>;
|
|
666
|
+
/** The full event log for a CONTAINER — every run in the conversation (the root thread + lane sub-threads
|
|
667
|
+
* `<container>:<slug>`), globally ordered by the generated `seq`. Lets a host rebuild a thread's RICH timeline
|
|
668
|
+
* (tool calls + delegation cards) on reload, where message-history is text-only. Tenant-scoped. Optional: a
|
|
669
|
+
* store without an events table may omit it (the host then falls back to message history). */
|
|
670
|
+
listForContainer?(tenantId: string, container: string): Promise<SwarmEvent[]>;
|
|
213
671
|
subscribe(runId: string): AsyncIterable<SwarmEvent>;
|
|
214
672
|
}
|
|
215
673
|
interface RunStore {
|
|
@@ -247,6 +705,24 @@ interface MessageStore {
|
|
|
247
705
|
* `engine.history` (Mastra recall, scoped by resourceId=tenant:user). Kept for contract consistency. */
|
|
248
706
|
history(tenantId: string, threadId: string, limit?: number): Promise<SwarmMessage[]>;
|
|
249
707
|
}
|
|
708
|
+
/** The thread (conversation container) a run references. `runs.thread_id` is a NOT NULL FK to this row, so the
|
|
709
|
+
* row MUST exist before the first run/message of a conversation. `ThreadStore.ensure` is the supported,
|
|
710
|
+
* schema-private way to create it idempotently — without it a host has to reach into the engine's raw pool and
|
|
711
|
+
* hardcode `nightowls.threads`'s columns (FR-009). */
|
|
712
|
+
interface ThreadStore {
|
|
713
|
+
/**
|
|
714
|
+
* Idempotently create the thread row a run will reference (insert-or-ignore on `id`). Safe to call before
|
|
715
|
+
* every run. `orgId` is the tenant; `userId` the conversation owner; `projectId` an optional host-owned
|
|
716
|
+
* sub-scope. Never throws on a pre-existing row. The engine calls this at run start when the adapter provides
|
|
717
|
+
* it, so `messages.append` cannot throw `unknown thread` through the supported path.
|
|
718
|
+
*/
|
|
719
|
+
ensure(spec: {
|
|
720
|
+
id: string;
|
|
721
|
+
orgId: string;
|
|
722
|
+
userId: string;
|
|
723
|
+
projectId?: string;
|
|
724
|
+
}): Promise<void>;
|
|
725
|
+
}
|
|
250
726
|
/** Scratchpad caps (intentionally lossy — working memory, not a system of record). */
|
|
251
727
|
declare const SCRATCHPAD_MAX_ENTRY_CHARS = 4000;
|
|
252
728
|
declare const SCRATCHPAD_MAX_KEYS = 64;
|
|
@@ -276,11 +752,82 @@ interface AgentRepo {
|
|
|
276
752
|
getVersion(tenantId: string, slug: string, version: number): Promise<AgentVersion | null>;
|
|
277
753
|
listSlugs(tenantId: string): Promise<string[]>;
|
|
278
754
|
}
|
|
755
|
+
/** The publishable content of one agent version — everything an `AgentVersion` carries EXCEPT the `version`
|
|
756
|
+
* number, which is derived (append-only `max+1`) by the repo, never supplied by the caller. */
|
|
757
|
+
type AgentVersionContent = Omit<AgentVersion, "version">;
|
|
758
|
+
/** One row of an agent's version history (for the no-code builder's rollback UX). Plain/@mastra-free. */
|
|
759
|
+
interface AgentVersionInfo {
|
|
760
|
+
version: number;
|
|
761
|
+
role: string;
|
|
762
|
+
modelId: string;
|
|
763
|
+
status: string;
|
|
764
|
+
isCurrent: boolean;
|
|
765
|
+
}
|
|
766
|
+
/**
|
|
767
|
+
* The WRITABLE agent definition contract (SP6) — "the one true framework extension" the no-code builder hangs
|
|
768
|
+
* off. Extends the read-only `AgentRepo` with the append-only mutation surface:
|
|
769
|
+
* - `publish` → commit a new head version of `content`; returns the derived version number.
|
|
770
|
+
* - `rollback` → republish a prior version's content as a NEW head (append-only, `git revert` not
|
|
771
|
+
* `reset`); returns the new version + the source it was restored from.
|
|
772
|
+
* - `listVersions` → the agent's version history (oldest→newest), flagging the current head.
|
|
773
|
+
*
|
|
774
|
+
* Every mutation takes a `SwarmActor` (the principal) — NOT a bare string — and the implementation MUST:
|
|
775
|
+
* 1. call `assertActorMayMutateDefinition(actor)` FIRST (the non-bypassable agent-bar), then
|
|
776
|
+
* 2. consult the configured `guardDefinitionMutation` policy hook (if any),
|
|
777
|
+
* before committing. `listVersions` takes the actor too so an impl can authorize reads consistently (and so
|
|
778
|
+
* the agent-bar applies uniformly); reads do not mutate, so an impl MAY allow an `agent` actor to list, but
|
|
779
|
+
* the shipped impls apply the same bar for symmetry.
|
|
780
|
+
*/
|
|
781
|
+
interface VersionedRepo extends AgentRepo {
|
|
782
|
+
publish(tenantId: string, slug: string, content: AgentVersionContent, actor: SwarmActor): Promise<{
|
|
783
|
+
version: number;
|
|
784
|
+
}>;
|
|
785
|
+
rollback(tenantId: string, slug: string, toVersion: number, actor: SwarmActor): Promise<{
|
|
786
|
+
version: number;
|
|
787
|
+
restoredFrom: number;
|
|
788
|
+
}>;
|
|
789
|
+
listVersions(tenantId: string, slug: string, actor: SwarmActor): Promise<AgentVersionInfo[]>;
|
|
790
|
+
}
|
|
791
|
+
/** BN2 — the read-only bundle version repo (head/getVersion/listSlugs). Structurally identical to `AgentRepo`,
|
|
792
|
+
* one level up: a `BundleVersion` is the unit, not an agent. */
|
|
793
|
+
interface BundleRepo {
|
|
794
|
+
head(tenantId: string, slug: string): Promise<BundleVersion | null>;
|
|
795
|
+
getVersion(tenantId: string, slug: string, version: number): Promise<BundleVersion | null>;
|
|
796
|
+
listSlugs(tenantId: string): Promise<string[]>;
|
|
797
|
+
}
|
|
798
|
+
/** BN2 — the WRITABLE bundle version repo (append-only publish/rollback/listVersions). Mirrors `VersionedRepo`;
|
|
799
|
+
* every mutation enforces the same non-bypassable actor-bar (an `agent` principal can never publish a bundle). */
|
|
800
|
+
interface BundleWritableRepo extends BundleRepo {
|
|
801
|
+
publish(tenantId: string, slug: string, content: BundleVersionContent, actor: SwarmActor): Promise<{
|
|
802
|
+
version: number;
|
|
803
|
+
}>;
|
|
804
|
+
rollback(tenantId: string, slug: string, toVersion: number, actor: SwarmActor): Promise<{
|
|
805
|
+
version: number;
|
|
806
|
+
restoredFrom: number;
|
|
807
|
+
}>;
|
|
808
|
+
listVersions(tenantId: string, slug: string, actor: SwarmActor): Promise<BundleVersionInfo[]>;
|
|
809
|
+
}
|
|
279
810
|
interface StorageAdapter {
|
|
811
|
+
/** Read-only agent definitions (always present). The supabase adapter's `agents` is also a `VersionedRepo`,
|
|
812
|
+
* but the base contract stays `AgentRepo` so a read-only adapter compiles without growing a write path. */
|
|
280
813
|
agents: AgentRepo;
|
|
814
|
+
/** Opt-in WRITABLE agent definitions (SP6). Present on adapters that back a definition store (the supabase
|
|
815
|
+
* adapter); omitted by read-only adapters (storage-local, the in-memory dev store) — a no-code builder
|
|
816
|
+
* requires an adapter that provides it. When present it is the SAME object as `agents` (a `VersionedRepo`
|
|
817
|
+
* IS an `AgentRepo`), surfaced under a distinct field so the writable surface is explicit + tree-checkable. */
|
|
818
|
+
agentsWritable?: VersionedRepo;
|
|
819
|
+
/** BN2 — opt-in WRITABLE bundle versions (append-only, same actor-bar). Present on adapters that back a bundle
|
|
820
|
+
* store (the supabase adapter); omitted by read-only/in-memory adapters. `bundles` is the read surface,
|
|
821
|
+
* `bundlesWritable` the same object's write surface — mirrors the `agents`/`agentsWritable` pattern. */
|
|
822
|
+
bundles?: BundleRepo;
|
|
823
|
+
bundlesWritable?: BundleWritableRepo;
|
|
281
824
|
runs: RunStore;
|
|
282
825
|
events: EventStore;
|
|
283
826
|
messages: MessageStore;
|
|
827
|
+
/** Opt-in thread (conversation container) creation (FR-009). When present, the engine idempotently ensures the
|
|
828
|
+
* run's thread row exists before the first message/event write, so a host need not pre-create it with raw SQL.
|
|
829
|
+
* Optional so existing adapters keep compiling; a host whose threads are externally managed may omit it. */
|
|
830
|
+
threads?: ThreadStore;
|
|
284
831
|
/** Opt-in container scratchpad (Part D). Optional so existing adapters keep compiling. */
|
|
285
832
|
scratchpad?: ScratchpadStore;
|
|
286
833
|
/**
|
|
@@ -292,11 +839,16 @@ interface StorageAdapter {
|
|
|
292
839
|
*/
|
|
293
840
|
recordSuspend?(runId: string, tenantId: string, followupId: string, toolCallId: string): void | Promise<void>;
|
|
294
841
|
/**
|
|
295
|
-
* Mark a followup
|
|
296
|
-
*
|
|
297
|
-
*
|
|
842
|
+
* Mark a followup answered so it can no longer be resumed (the engine calls this when a `resume` begins).
|
|
843
|
+
* Closes a replay hole: without it, `findSuspended` keeps returning the followup and the same answer can be
|
|
844
|
+
* replayed indefinitely. Tenant-scoped.
|
|
845
|
+
*
|
|
846
|
+
* **Compare-and-set (K4):** returns `true` if THIS call transitioned the followup unanswered→answered, `false`
|
|
847
|
+
* if it was already answered. An out-of-band reply path uses this as the single answer-once guard — two
|
|
848
|
+
* distinct inbound replies for one followup race here, and only the `true` winner resumes (the loser ACKs).
|
|
849
|
+
* The engine's own resume ignores the return (it already passed `findSuspended`).
|
|
298
850
|
*/
|
|
299
|
-
markFollowupAnswered?(followupId: string, tenantId: string):
|
|
851
|
+
markFollowupAnswered?(followupId: string, tenantId: string): boolean | Promise<boolean>;
|
|
300
852
|
/**
|
|
301
853
|
* Cross-process cache invalidation (R12). Subscribe to agent-republish notifications so an engine on ANY
|
|
302
854
|
* instance can evict its agent-row cache immediately instead of waiting out the TTL. `onInvalidate` receives
|
|
@@ -304,6 +856,12 @@ interface StorageAdapter {
|
|
|
304
856
|
* is the only staleness bound there). The supabase adapter uses Postgres LISTEN/NOTIFY.
|
|
305
857
|
*/
|
|
306
858
|
subscribeInvalidations?(onInvalidate: (key: string) => void): () => void;
|
|
859
|
+
/**
|
|
860
|
+
* FR-016 — enumerate the engine's tenants (org ids) so a host can idempotently backfill every tenant's crew
|
|
861
|
+
* (`engine.run` resolves agents per-tenant from the DB, so each tenant must be seeded before its first run).
|
|
862
|
+
* Optional: a read-only / in-memory adapter may omit it. The supabase adapter reads `nightowls.orgs`.
|
|
863
|
+
*/
|
|
864
|
+
listTenants?(): Promise<string[]>;
|
|
307
865
|
}
|
|
308
866
|
interface ModelProvider {
|
|
309
867
|
resolve(modelId: string, ctx: {
|
|
@@ -357,7 +915,29 @@ interface RunInput {
|
|
|
357
915
|
* (those come from `SwarmContext`); treat its contents as opaque, attacker-controllable input.
|
|
358
916
|
*/
|
|
359
917
|
context?: Record<string, unknown>;
|
|
918
|
+
/**
|
|
919
|
+
* Phase B: run a named STRICT workflow via the step-driver instead of the free-form agent turn. Engine/host-
|
|
920
|
+
* side seam — set by a host calling the runner/engine directly; NOT exposed on the public chat route or MCP
|
|
921
|
+
* (the wall promise). An unknown name throws before the run row is created.
|
|
922
|
+
*/
|
|
923
|
+
workflow?: string;
|
|
360
924
|
}
|
|
925
|
+
/** The verdict from a {@link CompletionVerifier}: was the user's request actually satisfied, and if not, a
|
|
926
|
+
* short description of what's still missing (used to build a targeted continue-nudge). */
|
|
927
|
+
interface CompletionVerdict {
|
|
928
|
+
complete: boolean;
|
|
929
|
+
missing?: string;
|
|
930
|
+
}
|
|
931
|
+
/**
|
|
932
|
+
* Completion supervisor hook (reliability) — see EngineOpts.verifyCompletion. Given the original request + a
|
|
933
|
+
* transcript of what the run produced, decide whether the task is genuinely done. Host-supplied (typically a
|
|
934
|
+
* cheap LLM judge). FAIL-SAFE at the call site: a throw/timeout is treated as "complete" (never trap a run).
|
|
935
|
+
*/
|
|
936
|
+
type CompletionVerifier = (args: {
|
|
937
|
+
request: string;
|
|
938
|
+
transcript: string;
|
|
939
|
+
ctx: SwarmContext;
|
|
940
|
+
}) => Promise<CompletionVerdict> | CompletionVerdict;
|
|
361
941
|
interface Runner {
|
|
362
942
|
run(input: RunInput, ctx: SwarmContext): AsyncIterable<SwarmEvent>;
|
|
363
943
|
enqueue(input: RunInput, ctx: SwarmContext): Promise<{
|
|
@@ -369,6 +949,22 @@ interface Runner {
|
|
|
369
949
|
followupId: string;
|
|
370
950
|
answer: unknown;
|
|
371
951
|
}, ctx: SwarmContext): AsyncIterable<SwarmEvent>;
|
|
952
|
+
/**
|
|
953
|
+
* Durable resume (symmetric with `enqueue`): wake a suspended run and return its `runId` WITHOUT streaming the
|
|
954
|
+
* continuation — the resumed events reach the client over its EXISTING Realtime subscription. A streaming-only
|
|
955
|
+
* runner omits this; the durable runner provides it. Streaming the resume instead would never close (the
|
|
956
|
+
* Realtime subscribe has no terminal), so the client's `answer()` would hang. Also re-wakes a continuation lost
|
|
957
|
+
* to a process restart, from the durable snapshot (see the background runner).
|
|
958
|
+
*/
|
|
959
|
+
resumeEnqueue?(args: {
|
|
960
|
+
runId: string;
|
|
961
|
+
toolCallId: string;
|
|
962
|
+
followupId: string;
|
|
963
|
+
answer: unknown;
|
|
964
|
+
context?: Record<string, unknown>;
|
|
965
|
+
}, ctx: SwarmContext): Promise<{
|
|
966
|
+
runId: string;
|
|
967
|
+
}>;
|
|
372
968
|
}
|
|
373
969
|
|
|
374
970
|
type ByType<T extends SwarmEvent["type"]> = Extract<SwarmEvent, {
|
|
@@ -382,6 +978,23 @@ declare function ev<T extends SwarmEvent["type"]>(type: T, base: {
|
|
|
382
978
|
}, data: ByType<T>["data"]): ByType<T>;
|
|
383
979
|
declare function isEvent<T extends SwarmEvent["type"]>(e: SwarmEvent, type: T): e is ByType<T>;
|
|
384
980
|
|
|
981
|
+
/**
|
|
982
|
+
* The per-run mutable store a first-party tool sees on `SwarmToolContext.state`. A simple keyed bag plus an
|
|
983
|
+
* `entries()` snapshot the `onRunEnd` drain reads. Reads of an unset key yield `undefined`; it never throws.
|
|
984
|
+
*/
|
|
985
|
+
interface RunStateHandle {
|
|
986
|
+
get<T = unknown>(key: string): T | undefined;
|
|
987
|
+
set(key: string, value: unknown): void;
|
|
988
|
+
has(key: string): boolean;
|
|
989
|
+
delete(key: string): boolean;
|
|
990
|
+
/** A point-in-time DEEP snapshot of all current entries — what `onRunEnd` flushes and the engine persists into
|
|
991
|
+
* the run snapshot. Deep-copied so a later in-place mutation of a stored object can't corrupt an already-taken
|
|
992
|
+
* snapshot. Values must be JSON-serializable (they ride the run snapshot). */
|
|
993
|
+
entries(): Record<string, unknown>;
|
|
994
|
+
}
|
|
995
|
+
/** Build a fresh, Map-backed run-state handle. `seed` (e.g. from `onRunStart`) pre-populates it. */
|
|
996
|
+
declare function createRunState(seed?: Record<string, unknown>): RunStateHandle;
|
|
997
|
+
|
|
385
998
|
/** Identifies who holds (or is queued for) a container's floor — surfaced in the "waiting for X" indicator. */
|
|
386
999
|
interface FloorHolder {
|
|
387
1000
|
/** Display name of the holding run's lane agent, e.g. "Coordinator". */
|
|
@@ -422,88 +1035,116 @@ declare class InMemoryContainerFloor implements ContainerFloor {
|
|
|
422
1035
|
/** Process-wide singleton. In-memory → single-process only (serverless instances don't share it). */
|
|
423
1036
|
declare const containerFloor: ContainerFloor;
|
|
424
1037
|
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
private prices;
|
|
435
|
-
constructor(opts: {
|
|
436
|
-
maxSteps: number;
|
|
437
|
-
maxCostUsd: number;
|
|
438
|
-
prices?: Record<string, Price>;
|
|
439
|
-
});
|
|
440
|
-
step(): void;
|
|
441
|
-
/** Price a single usage WITHOUT accumulating it (for per-generation telemetry cost). */
|
|
442
|
-
priceOf(modelId: string, u: {
|
|
443
|
-
inputTokens: number;
|
|
444
|
-
outputTokens: number;
|
|
445
|
-
}): number;
|
|
446
|
-
addUsage(modelId: string, u: {
|
|
447
|
-
inputTokens: number;
|
|
448
|
-
outputTokens: number;
|
|
449
|
-
}): void;
|
|
450
|
-
costUsd(): number;
|
|
451
|
-
shouldStop(): {
|
|
452
|
-
stop: boolean;
|
|
453
|
-
reason?: string;
|
|
454
|
-
};
|
|
1038
|
+
type ModelTier = "swift" | "genius";
|
|
1039
|
+
type ModelRef = string;
|
|
1040
|
+
/** Context handed to the per-task escalation hook so it can decide whether a specific generation needs Genius. */
|
|
1041
|
+
interface TierEscalationContext {
|
|
1042
|
+
tenantId: string;
|
|
1043
|
+
/** The agent the generation is for. */
|
|
1044
|
+
agentSlug: string;
|
|
1045
|
+
/** The agent's declared modelId (a tier sentinel when it opted into routing; the concrete pin otherwise). */
|
|
1046
|
+
pinnedModelId?: string;
|
|
455
1047
|
}
|
|
456
1048
|
/**
|
|
457
|
-
*
|
|
458
|
-
*
|
|
459
|
-
*
|
|
460
|
-
*
|
|
461
|
-
*
|
|
1049
|
+
* The tier configuration (lives on `SwarmConfig.models.tier` → `EngineOpts.tier`). @mastra-free.
|
|
1050
|
+
* - `tiers.swift` — the cheap DEFAULT model. REQUIRED (the floor every non-pinned agent lands on).
|
|
1051
|
+
* - `tiers.genius` — the frontier model. OPTIONAL; reachable ONLY through the premium gate below.
|
|
1052
|
+
* - `default` — the tier the bare `"tier:"` sentinel resolves to. Default `"swift"` (cheap-default).
|
|
1053
|
+
* - `allowGenius` — the SERVER-ENFORCED OPT-IN GATE. Default `false`. This is NOT a user-facing "smart"
|
|
1054
|
+
* slider: it lives in the platform-set EngineOpts so a pack/agent config CANNOT grant itself
|
|
1055
|
+
* Genius. With it false, any Genius request (a `tier:genius` agent OR an escalation) is
|
|
1056
|
+
* DOWNGRADED to Swift so the run still proceeds cheaply (deny-vs-downgrade: we DOWNGRADE).
|
|
1057
|
+
* - `escalate` — optional per-task hook (configurable per pack): given the generation's context it may bump
|
|
1058
|
+
* the chosen tier to `"genius"` (e.g. an ambiguous case). STILL subject to `allowGenius` —
|
|
1059
|
+
* escalation cannot bypass the premium gate.
|
|
462
1060
|
*/
|
|
463
|
-
interface
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
1061
|
+
interface TierConfig {
|
|
1062
|
+
tiers: {
|
|
1063
|
+
swift: ModelRef;
|
|
1064
|
+
genius?: ModelRef;
|
|
1065
|
+
};
|
|
1066
|
+
default?: ModelTier;
|
|
1067
|
+
allowGenius?: boolean;
|
|
1068
|
+
escalate?: (ctx: TierEscalationContext) => ModelTier | undefined;
|
|
470
1069
|
}
|
|
471
|
-
/**
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
addUsage(slug: string, modelId: string, u: {
|
|
484
|
-
inputTokens: number;
|
|
485
|
-
outputTokens: number;
|
|
486
|
-
}): void;
|
|
487
|
-
/** The first delegate that has met or exceeded its USD cap, or null. */
|
|
488
|
-
exceeded(): {
|
|
489
|
-
slug: string;
|
|
490
|
-
reason: string;
|
|
491
|
-
} | null;
|
|
1070
|
+
/** The outcome of routing a single generation's model. `modelId` is always the EFFECTIVE id to use next. */
|
|
1071
|
+
interface TierResolution {
|
|
1072
|
+
/** The effective modelId to hand to the allow-list + factory. */
|
|
1073
|
+
modelId: string;
|
|
1074
|
+
/** The tier actually landed on. Undefined when the agent pinned a concrete model (a pin is not a tier). */
|
|
1075
|
+
tier?: ModelTier;
|
|
1076
|
+
/** True when a Genius request was downgraded to Swift (gate closed, or no genius model configured). */
|
|
1077
|
+
downgraded: boolean;
|
|
1078
|
+
/** The tier that was REQUESTED before gating (set when it differs from `tier`, i.e. on a downgrade). */
|
|
1079
|
+
requestedTier?: ModelTier;
|
|
1080
|
+
/** True when the escalation hook bumped the tier (to genius) and that bump survived the gate. */
|
|
1081
|
+
escalated?: boolean;
|
|
492
1082
|
}
|
|
1083
|
+
/** Is this declared modelId a tier sentinel (opting into routing) rather than a concrete pin? */
|
|
1084
|
+
declare function isTierSentinel(modelId: string | undefined): boolean;
|
|
1085
|
+
/**
|
|
1086
|
+
* Resolve the effective model for ONE generation, applying tier routing + the premium Genius gate + escalation.
|
|
1087
|
+
*
|
|
1088
|
+
* Order of precedence:
|
|
1089
|
+
* 1. A CONCRETE PIN (non-sentinel modelId) is returned verbatim — routing/escalation never touch a pin.
|
|
1090
|
+
* 2. Otherwise the requested tier = the sentinel's tier (or the config default for a bare `"tier:"`).
|
|
1091
|
+
* 3. The optional `escalate` hook may bump the request to `"genius"`.
|
|
1092
|
+
* 4. THE GATE: a `"genius"` request survives ONLY when `allowGenius` is true AND a `genius` model is
|
|
1093
|
+
* configured; otherwise it DOWNGRADES to `"swift"` (the run proceeds cheaply; `downgraded` is flagged).
|
|
1094
|
+
*/
|
|
1095
|
+
declare function resolveTier(modelId: string, cfg: TierConfig, ctx: TierEscalationContext): TierResolution;
|
|
1096
|
+
/**
|
|
1097
|
+
* The engine-facing convenience: given an agent's declared modelId, return the EFFECTIVE modelId after tier
|
|
1098
|
+
* routing. When no tier config is configured this is the identity function (today's behaviour). The resulting
|
|
1099
|
+
* modelId then flows through the SAME allow-list validation + factory mapping as before — a tier model is never
|
|
1100
|
+
* exempt from the allow-list.
|
|
1101
|
+
*/
|
|
1102
|
+
declare function tierModelId(modelId: string, cfg: TierConfig | undefined, ctx: TierEscalationContext): string;
|
|
493
1103
|
|
|
494
1104
|
interface EngineOpts {
|
|
495
1105
|
storage: StorageAdapter;
|
|
496
1106
|
model: ModelProvider;
|
|
497
1107
|
modelFactory: (modelId: string, agentSlug?: string) => unknown;
|
|
498
|
-
/**
|
|
499
|
-
*
|
|
1108
|
+
/**
|
|
1109
|
+
* SP10 — the cheap-model router (Swift/Genius tiers). @mastra-free. When set, the per-agent model resolver
|
|
1110
|
+
* routes a tier-sentinel `modelId` (`"tier:swift"` / `"tier:genius"` / bare `"tier:"`) to the configured tier
|
|
1111
|
+
* model; a concrete pinned `modelId` is kept verbatim (routing layers OVER pinning). `tiers.swift` is the
|
|
1112
|
+
* cheap DEFAULT every non-pinned agent lands on; `tiers.genius` is reachable ONLY through the server-enforced
|
|
1113
|
+
* `allowGenius` premium gate (default false) — set HERE in EngineOpts so a pack/agent config cannot grant itself
|
|
1114
|
+
* Genius. An optional per-task `escalate` hook may bump a generation to Genius, still subject to the gate. When
|
|
1115
|
+
* a Genius request is denied by the gate it DOWNGRADES to Swift so the run proceeds cheaply. Omit ⇒ no routing
|
|
1116
|
+
* (identical to today: the agent's own `modelId` flows through, still allow-list-validated). The routed model
|
|
1117
|
+
* is ALWAYS re-validated by the allow-list `model` provider — a tier model is never exempt.
|
|
1118
|
+
*/
|
|
1119
|
+
tier?: TierConfig;
|
|
1120
|
+
/** Global per-run caps + metering config (SP1) + optional per-delegate sub-budgets (R5: `perDelegate` stops
|
|
1121
|
+
* one runaway sub-agent from burning the whole turn before the global `maxCostUsd` notices).
|
|
1122
|
+
* - `prices`: static per-model overrides, merged over PRICE_TABLE.
|
|
1123
|
+
* - `priceFeed`: optional live price seam (numbers only — engine wall). Merged over `prices`.
|
|
1124
|
+
* - `failOnUnknownModel` (default false): an unpriced model THROWS instead of pricing at $0.
|
|
1125
|
+
* - `onCapHit` (SP9-core, default "stop"): the GLOBAL run-level cost/step cap's behaviour when reached.
|
|
1126
|
+
* "stop" (DEFAULT) = today's terminal `run_failed` stage "cost" — byte-identical to before.
|
|
1127
|
+
* "ask" = PAUSE the run and ASK the user "Budget cap reached — continue?" (a CONSUMER-context opt-in),
|
|
1128
|
+
* reusing SP5's suspend→swarm.question→resume machinery. Resume-approve RAISES the budget by
|
|
1129
|
+
* `capIncrementUsd` and continues; resume-reject terminally stops (stage "cost"). The
|
|
1130
|
+
* per-delegate cap (`perDelegate`) is NOT folded into the ask flow — it stays terminal.
|
|
1131
|
+
* - `capIncrementUsd` (SP9-core): the additional USD headroom an "ask" APPROVE grants (`maxCostUsd +=`).
|
|
1132
|
+
* Defaults to the original `maxCostUsd` ("another budget's worth"). Only meaningful with `onCapHit:"ask"`. */
|
|
500
1133
|
cost: {
|
|
501
1134
|
maxSteps: number;
|
|
502
1135
|
maxCostUsd: number;
|
|
503
1136
|
perDelegate?: PerDelegateBudget;
|
|
1137
|
+
prices?: Record<string, Price>;
|
|
1138
|
+
priceFeed?: PriceFeed;
|
|
1139
|
+
failOnUnknownModel?: boolean;
|
|
1140
|
+
onCapHit?: "stop" | "ask";
|
|
1141
|
+
capIncrementUsd?: number;
|
|
504
1142
|
};
|
|
505
1143
|
/** Per-swarm skill resolver. When omitted, agents expose only built-in tools. */
|
|
506
1144
|
resolveSkill?: (name: string) => SwarmSkill | undefined;
|
|
1145
|
+
/** PR2 — opt-in per-request connector-tools resolver (tenant-scoped). Forwarded verbatim to `buildMastraAgent`
|
|
1146
|
+
* for the orchestrator + every sub-agent. Connector-agnostic: core never imports `@nightowlsdev/connectors`. */
|
|
1147
|
+
connectorTools?: (ctx: SwarmContext) => Promise<SwarmTool[]>;
|
|
507
1148
|
/**
|
|
508
1149
|
* Mastra storage backend for suspend/resume snapshots. Resume is storage-gated
|
|
509
1150
|
* (SPIKE-FINDINGS item 5): the in-memory default cannot survive process death.
|
|
@@ -531,9 +1172,79 @@ interface EngineOpts {
|
|
|
531
1172
|
};
|
|
532
1173
|
/** Opt-in `recall_lane` tool (Part E). Read-only peer-lane transcript read. */
|
|
533
1174
|
recallLane?: boolean;
|
|
1175
|
+
/** Phase A soft tier: per-agent soft-policy lines (advise rules + advisory-workflow summaries) appended to
|
|
1176
|
+
* each agent's system prompt. Built by `defineSwarm` from the swarm's rules/workflows. Omit ⇒ no policy. */
|
|
1177
|
+
softPolicy?: (slug: string) => string[];
|
|
1178
|
+
/** Phase B: swarm-level STRICT workflows, runnable by name via `RunInput.workflow`. Built by `defineSwarm`. */
|
|
1179
|
+
workflows?: WorkflowDef[];
|
|
1180
|
+
/** Phase B: per-agent STRICT workflows (keyed by agent slug) — replace that agent's turn when it owns the run. */
|
|
1181
|
+
agentWorkflows?: Record<string, WorkflowDef>;
|
|
534
1182
|
/** Injectable per-lane floor (Part C / E3). Default: the in-memory process singleton. Pass a Postgres-backed
|
|
535
1183
|
* floor (createPostgresFloor) for serverless / multi-instance deploys. */
|
|
536
1184
|
floor?: ContainerFloor;
|
|
1185
|
+
/** Decision/observer hook dispatcher (SP2). `defineSwarm` always supplies one (allow-all when no hooks are
|
|
1186
|
+
* configured). When omitted (e.g. an engine built directly in a unit test), the engine defaults to an
|
|
1187
|
+
* allow-all dispatcher — behaviour identical to today. The engine AWAITS `preGeneration` before every model
|
|
1188
|
+
* launch; a `deny` vetoes the generation (terminal `run_failed` stage `"reserve"`). The same dispatcher's
|
|
1189
|
+
* `preToolCall` powers SP5's action-approval gate. */
|
|
1190
|
+
hooks?: HookDispatcher;
|
|
1191
|
+
/**
|
|
1192
|
+
* SP5 — the NON-REMOVABLE tool-approval policy (a P0 SAFETY control: spend caps limit cost, not harm). Forces
|
|
1193
|
+
* human approval on side-effecting tools regardless of the per-tool `needsApproval` flag, so a consumer pack
|
|
1194
|
+
* can't ship a `needsApproval:false` $0.50 action that causes $50k of damage. `defineSwarm` bakes this into the
|
|
1195
|
+
* `hooks` dispatcher (which combines policy + flag + the `preToolCall` hook), so when `hooks` is supplied THAT
|
|
1196
|
+
* dispatcher's policy is authoritative. This standalone field lets a DIRECT engine builder (e.g. a unit test
|
|
1197
|
+
* that passes no dispatcher) set the policy; the engine then builds an allow-all-hooks dispatcher WITH it.
|
|
1198
|
+
* Default `{ mode: "flag" }` — today's behaviour (only `needsApproval:true` tools gate).
|
|
1199
|
+
*/
|
|
1200
|
+
toolApproval?: ToolApprovalPolicy;
|
|
1201
|
+
/**
|
|
1202
|
+
* SP15 — the optional SecretResolver the platform vault (SP15-platform) implements. When set, the engine
|
|
1203
|
+
* injects it on every run's RequestContext (SAME seam as SP5's ToolGate) so a first-party tool body can
|
|
1204
|
+
* `await ctx.secrets.resolve(ref)` to fetch a tenant-scoped secret at execution time. @mastra-free. Omit ⇒
|
|
1205
|
+
* `ctx.secrets.resolve(...)` yields `undefined` (no vault) and the no-secrets path is unchanged from today.
|
|
1206
|
+
*/
|
|
1207
|
+
secrets?: SecretResolver;
|
|
1208
|
+
/**
|
|
1209
|
+
* SP3 — best-effort per-event OBSERVER, fired by the engine AFTER each event is persisted (in `emit`), for
|
|
1210
|
+
* BOTH `run` and `resume`. Transport-agnostic: it sees every event regardless of how it reaches the client
|
|
1211
|
+
* (interactive SSE vs durable + realtime), so platform metering (debit on `swarm.turn_usage`, settle on a
|
|
1212
|
+
* terminal) can live HERE rather than teeing the route's stream. Awaited but FAIL-SAFE — a throwing observer
|
|
1213
|
+
* is swallowed (the host logs its own errors), NEVER breaking the run, exactly like `telemetry`. Omit ⇒ no-op.
|
|
1214
|
+
*/
|
|
1215
|
+
onEvent?: (ev: SwarmEvent, ctx: SwarmContext) => void | Promise<void>;
|
|
1216
|
+
/**
|
|
1217
|
+
* Completion supervisor (reliability) — an optional host check fired when a turn would END, to decide whether
|
|
1218
|
+
* the user's request was actually SATISFIED. The engine passes the original request + a transcript of what the
|
|
1219
|
+
* run produced; it returns `{ complete, missing? }`. When not complete the engine re-invokes the orchestrator
|
|
1220
|
+
* with a TARGETED nudge built from `missing` (same thread, full context), up to MAX_CONTINUE_NUDGES. If still
|
|
1221
|
+
* incomplete, the run ends `run_failed` stage "incomplete" (retryable) — a clear non-delivery the host can
|
|
1222
|
+
* refund — instead of a silent `done`. FAIL-SAFE: a throwing/rejecting verifier is treated as "complete"
|
|
1223
|
+
* (fail-open — never trap a run in a verify loop). Omit ⇒ the cheap structural "did the root speak last?"
|
|
1224
|
+
* fallback nudge is used instead.
|
|
1225
|
+
*/
|
|
1226
|
+
verifyCompletion?: CompletionVerifier;
|
|
1227
|
+
/**
|
|
1228
|
+
* FR-003 — per-run lifecycle hooks (fired on `run()`). `onRunStart` is awaited once when a run begins (after the
|
|
1229
|
+
* run row + thread are ensured, before the first model launch), receiving the run's `input` and a fresh
|
|
1230
|
+
* `RunStateHandle` to seed (e.g. from `input.context`). `onRunEnd` is awaited once at the run's terminal boundary
|
|
1231
|
+
* (done / failed / suspended / thrown / abandoned — it fires from the run's `finally`), receiving the final
|
|
1232
|
+
* `state` and the `outcome`, so a host can flush/persist deterministically. Both are FAIL-SAFE (a throw is
|
|
1233
|
+
* swallowed + logged, never breaking the run). `state` is the SAME handle the run's tools saw via `ctx.state`.
|
|
1234
|
+
* `onRunEnd` fires at EACH segment's terminal: a suspend ends the run() segment with outcome `"suspended"`, and
|
|
1235
|
+
* the resume segment fires its own `onRunEnd` (`"done"`/`"failed"`/`"suspended"`) — a host keys billing/persist
|
|
1236
|
+
* on the terminal outcome. A `resume` RESTORES the per-run `state` persisted at suspend (so a client-tool /
|
|
1237
|
+
* evolving-document flow keeps state across the answer); `onRunStart` fires on `run()` only. State values must be
|
|
1238
|
+
* JSON-serializable (they ride the run snapshot). Omit ⇒ no-op.
|
|
1239
|
+
*/
|
|
1240
|
+
onRunStart?: (ctx: SwarmContext, info: {
|
|
1241
|
+
input: RunInput;
|
|
1242
|
+
state: RunStateHandle;
|
|
1243
|
+
}) => void | Promise<void>;
|
|
1244
|
+
onRunEnd?: (ctx: SwarmContext, info: {
|
|
1245
|
+
state: RunStateHandle;
|
|
1246
|
+
outcome: "done" | "failed" | "suspended";
|
|
1247
|
+
}) => void | Promise<void>;
|
|
537
1248
|
}
|
|
538
1249
|
declare class SwarmEngine {
|
|
539
1250
|
private opts;
|
|
@@ -541,11 +1252,56 @@ declare class SwarmEngine {
|
|
|
541
1252
|
private rowCache;
|
|
542
1253
|
private memory;
|
|
543
1254
|
private floor;
|
|
1255
|
+
private hooks;
|
|
544
1256
|
constructor(opts: EngineOpts);
|
|
1257
|
+
/** SP1: the swarm's metering config, in the shape DelegateBudgets/priceUsage expect. CostGovernor reads the
|
|
1258
|
+
* same fields directly off `opts.cost`; this packs them for the per-delegate tracker so both caps price
|
|
1259
|
+
* tokens identically (built-in PRICE_TABLE ← static `prices` ← live `priceFeed`, with `failOnUnknownModel`). */
|
|
1260
|
+
private pricingOpts;
|
|
1261
|
+
/** Fire the best-effort per-event observer (`EngineOpts.onEvent`). Awaited so an async observer (e.g. a
|
|
1262
|
+
* metering debit) completes in order, but FAIL-SAFE: a throw is swallowed (the host logs its own), never
|
|
1263
|
+
* breaking the run — same contract as the telemetry exporter. No-op when no observer is configured. */
|
|
1264
|
+
private notifyEvent;
|
|
1265
|
+
/** Run the completion supervisor (`EngineOpts.verifyCompletion`), FAIL-OPEN: no verifier, or a throwing one,
|
|
1266
|
+
* yields `{ complete: true }` so a missing/broken judge never traps a run in a verify loop. */
|
|
1267
|
+
private safeVerify;
|
|
1268
|
+
/** Best-effort recall of the run's ORIGINAL request (first user message on the thread) for the completion
|
|
1269
|
+
* verifier on RESUME, where the engine doesn't hold the opening message. Empty on any failure / no verifier. */
|
|
1270
|
+
private recallRequest;
|
|
545
1271
|
/** Cached agent-row load shared by the three dynamic agent fns AND run/resume. */
|
|
546
1272
|
private loadRow;
|
|
1273
|
+
/** Resolve an agent's STORED modelId — which may be a tier sentinel (`"tier:"` / `"tier:swift"`) — to the
|
|
1274
|
+
* CONCRETE model id the generation actually runs on, so metering/pricing + the preGeneration event see the
|
|
1275
|
+
* real model, not the sentinel (which has no price → every tier-routed turn would meter at $0). Mirrors
|
|
1276
|
+
* mastra-map's modelFor routing; with no tier config it returns the id unchanged. (SP10 pricing follow-up.) */
|
|
1277
|
+
private priceModelId;
|
|
547
1278
|
private agent;
|
|
548
1279
|
private requestContext;
|
|
1280
|
+
/**
|
|
1281
|
+
* SP5 — the action-approval gate handed to every gated tool via the RequestContext. Bound once (stable
|
|
1282
|
+
* reference). Delegates to the dispatcher's `preToolCall`, which is fail-closed (a throwing configured hook ⇒
|
|
1283
|
+
* deny) and applies the non-removable policy. The defineTool wrapper turns the returned `ToolDecision` into:
|
|
1284
|
+
* allow → run; deny → blocked result; ask → suspend-and-ask (the existing `swarm.question`/resume machinery).
|
|
1285
|
+
*/
|
|
1286
|
+
private readonly toolGate;
|
|
1287
|
+
/**
|
|
1288
|
+
* SP5 truth-fix — resolve whether a tool WILL require approval, for the `swarm.tool_call` event's
|
|
1289
|
+
* `needsApproval` (the react reducer reads it to render an approval card). The mapChunk emit currently
|
|
1290
|
+
* hardcodes `false` (the truth-bug). This computes the truthful value from the SAME policy + per-tool flag the
|
|
1291
|
+
* gate uses: the tool's resolved `needsApproval` (its own flag, defaulting by origin) run through the
|
|
1292
|
+
* dispatcher's SYNC `policyDecision` — `ask` ⇒ true (it will gate), else false. The async `preToolCall` hook
|
|
1293
|
+
* can still escalate a specific call at execute time, but the policy-derived baseline is the truthful default
|
|
1294
|
+
* the UI needs without speculatively running the hook for every tool_call event.
|
|
1295
|
+
*/
|
|
1296
|
+
private gatesApproval;
|
|
1297
|
+
/**
|
|
1298
|
+
* SP2: the preGeneration DECISION seam. Awaited immediately before each model launch (run + resume). The
|
|
1299
|
+
* dispatcher is fail-closed (a throwing hook ⇒ deny), so this only ever sees a clean `allow`/`deny`; a `deny`
|
|
1300
|
+
* THROWS `ReserveDenied` so the model call below never happens and the run/resume catch-all maps it to a
|
|
1301
|
+
* terminal `run_failed` stage "reserve" (NOT the generic "exception"). Allow-all + zero-overhead when no
|
|
1302
|
+
* hooks are configured (the default dispatcher returns allow synchronously-ish without invoking anything).
|
|
1303
|
+
*/
|
|
1304
|
+
private guardGeneration;
|
|
549
1305
|
/** Per-call Mastra memory ids + delegation, only when memory is configured (else stream is unchanged). */
|
|
550
1306
|
private memoryOpts;
|
|
551
1307
|
/**
|
|
@@ -606,11 +1362,28 @@ declare class SwarmEngine {
|
|
|
606
1362
|
scratchpadPublic(container: string, ctx: SwarmContext): Promise<ScratchpadEntry[]>;
|
|
607
1363
|
/** In-flight runs (running|suspended) for a container + its lanes — powers cross-lane background presence (E5). */
|
|
608
1364
|
activeRuns(container: string, ctx: SwarmContext): Promise<ActiveRun[]>;
|
|
1365
|
+
/** The full, globally-ordered event log for a thread's CONTAINER (all its runs + lane sub-threads) — lets a host
|
|
1366
|
+
* rebuild the RICH timeline (tool calls + delegation cards) on reload, since message history is text-only.
|
|
1367
|
+
* Returns [] when the store has no events table (`listForContainer` unset). */
|
|
1368
|
+
threadEvents(threadId: string, ctx: SwarmContext): Promise<SwarmEvent[]>;
|
|
609
1369
|
/** The tenant's agent roster (slug, title-cased display name, role, delegate graph) as wall-safe
|
|
610
1370
|
* AgentSummary[]. Sourced from the agent rows; no vendor type in the signature or result. Powers
|
|
611
1371
|
* the multi-agent pile / @mention UI. */
|
|
612
1372
|
listAgents(ctx: SwarmContext): Promise<AgentSummary[]>;
|
|
613
1373
|
run(input: RunInput, ctx: SwarmContext): AsyncIterable<SwarmEvent>;
|
|
1374
|
+
/**
|
|
1375
|
+
* Phase B — drive a STRICT workflow IN PLACE OF the free-form continue-nudge loop. Shared by `run()` (fresh)
|
|
1376
|
+
* and `resume()` (re-entry after a human/approval suspend). An `agent` step reuses `this.agent().stream()`
|
|
1377
|
+
* with a per-step requestContext (agentSlug = the step's agent) so it inherits persona/tools/gate/model/cost;
|
|
1378
|
+
* a `tool` step runs `executeToolWithGate`; a `human`/approval pause suspends SP9-style. Reserve, usage, and
|
|
1379
|
+
* the terminal turn_usage flow through the caller's machinery (`m`). Handles the terminal status/setStatus.
|
|
1380
|
+
*/
|
|
1381
|
+
private driveWorkflow;
|
|
1382
|
+
/** A workflow `agent` step: stream `slug` with `message` (a per-step requestContext so it inherits the agent's
|
|
1383
|
+
* persona/tools/gate/model), reserving + metering through the caller's machinery, returning the final text. */
|
|
1384
|
+
private streamWorkflowAgentStep;
|
|
1385
|
+
/** A workflow `tool` step: run the gate-free tool body through `executeToolWithGate` (the engine-owned gate). */
|
|
1386
|
+
private runWorkflowToolStep;
|
|
614
1387
|
resume(args: {
|
|
615
1388
|
runId: string;
|
|
616
1389
|
toolCallId: string;
|
|
@@ -619,6 +1392,24 @@ declare class SwarmEngine {
|
|
|
619
1392
|
context?: Record<string, unknown>;
|
|
620
1393
|
}, ctx: SwarmContext): AsyncIterable<SwarmEvent>;
|
|
621
1394
|
}
|
|
1395
|
+
/**
|
|
1396
|
+
* SP2: a typed veto thrown when the `preGeneration` decision hook DENIES a model launch. Caught explicitly in
|
|
1397
|
+
* the run/resume catch-all so it maps to a TERMINAL `run_failed` stage `"reserve"` (mirroring the cost cap's
|
|
1398
|
+
* `"cost"` stage) instead of falling through to the generic `"exception"` stage. The model call has NOT
|
|
1399
|
+
* happened when this throws — the seam is BEFORE `stream`/`resumeStream`.
|
|
1400
|
+
*/
|
|
1401
|
+
declare class ReserveDenied extends Error {
|
|
1402
|
+
readonly stage: "reserve";
|
|
1403
|
+
constructor(reason: string);
|
|
1404
|
+
}
|
|
1405
|
+
|
|
1406
|
+
/** The bound, run-scoped resolver handed to a tool body via `ctx.secrets`. `resolve(ref)` carries no ctx arg —
|
|
1407
|
+
* the run's tenant/auth scope is captured by the binding, so a tool can never resolve another tenant's secret
|
|
1408
|
+
* (the scope comes from the trusted RequestContext, NOT from tool args). Returns `undefined` when no resolver is
|
|
1409
|
+
* configured or the ref is unknown. */
|
|
1410
|
+
interface BoundSecrets {
|
|
1411
|
+
resolve(ref: string): Promise<string | undefined>;
|
|
1412
|
+
}
|
|
622
1413
|
|
|
623
1414
|
interface ToolSpec<I, O> {
|
|
624
1415
|
name: string;
|
|
@@ -633,9 +1424,23 @@ interface SwarmToolContext {
|
|
|
633
1424
|
tenantId: string;
|
|
634
1425
|
userId: string;
|
|
635
1426
|
runId: string;
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
1427
|
+
/**
|
|
1428
|
+
* SP15 — a run-scoped secret resolver (always present; bound to THIS run's tenant/auth ctx). A first-party
|
|
1429
|
+
* tool body calls `await ctx.secrets.resolve(ref)` to fetch a scoped secret value at execution time, mirroring
|
|
1430
|
+
* how the MCP connector resolves a credentialRef. The run's tenant scope is captured by the binding (NOT passed
|
|
1431
|
+
* by the tool), so a tool can never resolve another tenant's secret. Resolves to `undefined` when the swarm has
|
|
1432
|
+
* no SecretResolver configured (no vault) or the ref is unknown — never throws. Optional in the type only for
|
|
1433
|
+
* back-compat with code that constructs a bare ctx; the engine always populates it.
|
|
1434
|
+
*/
|
|
1435
|
+
secrets?: BoundSecrets;
|
|
1436
|
+
/**
|
|
1437
|
+
* FR-003 — the per-run mutable state store. The SAME handle across every tool call in this run (orchestrator
|
|
1438
|
+
* AND delegated sub-agents), so a chain of tools (`addPrimitive → addStageZone → …`) can read the previous
|
|
1439
|
+
* tool's result and write the next. Seeded by `SwarmConfig.onRunStart` (e.g. from the client payload), drained
|
|
1440
|
+
* by `onRunEnd`. GC'd with the run — no module-level registry. Optional in the type only for back-compat with a
|
|
1441
|
+
* hand-constructed bare ctx; the engine always populates it.
|
|
1442
|
+
*/
|
|
1443
|
+
state?: RunStateHandle;
|
|
639
1444
|
}
|
|
640
1445
|
interface SwarmTool {
|
|
641
1446
|
name: string;
|
|
@@ -645,6 +1450,20 @@ interface SwarmTool {
|
|
|
645
1450
|
type SwarmSkill = SwarmTool;
|
|
646
1451
|
declare function defineTool<I, O>(spec: ToolSpec<I, O>): SwarmTool;
|
|
647
1452
|
declare function defineSkill(tool: SwarmTool): SwarmSkill;
|
|
1453
|
+
interface ClientToolSpec<I, O> {
|
|
1454
|
+
name: string;
|
|
1455
|
+
description?: string;
|
|
1456
|
+
inputSchema: z.ZodType<I>;
|
|
1457
|
+
outputSchema?: z.ZodType<O>;
|
|
1458
|
+
/** Ask the UI to confirm before the client runs the action (the handler renders an approval step). Default false. */
|
|
1459
|
+
needsApproval?: boolean;
|
|
1460
|
+
}
|
|
1461
|
+
/** Thrown when a client tool's handler reports an error (the browser posted `{ error }`) — surfaces as a failed
|
|
1462
|
+
* tool_result so the model sees the action did not succeed. */
|
|
1463
|
+
declare class ClientToolError extends Error {
|
|
1464
|
+
constructor(toolName: string, reason?: string);
|
|
1465
|
+
}
|
|
1466
|
+
declare function defineClientTool<I, O>(spec: ClientToolSpec<I, O>): SwarmTool;
|
|
648
1467
|
interface AgentSpec {
|
|
649
1468
|
slug: string;
|
|
650
1469
|
role?: "orchestrator" | "specialist";
|
|
@@ -655,22 +1474,100 @@ interface AgentSpec {
|
|
|
655
1474
|
modelId?: string;
|
|
656
1475
|
/** Per-agent memory OPTIONS override (R9), merged over the swarm `memory` config. Infra stays swarm-wide. */
|
|
657
1476
|
memory?: AgentMemoryOverride;
|
|
1477
|
+
/** Per-agent rules (additive over swarm rules for THIS agent). Engine-local in v1 (not persisted/versioned). */
|
|
1478
|
+
rules?: RuleDef[];
|
|
1479
|
+
/** Per-agent workflow/procedure. Engine-local in v1. A strict one is rejected by `defineSwarm` until Phase B. */
|
|
1480
|
+
workflow?: WorkflowDef;
|
|
658
1481
|
}
|
|
659
1482
|
declare function defineAgent(spec: AgentSpec): AgentDef;
|
|
1483
|
+
/**
|
|
1484
|
+
* Normalize + validate a `RuleSpec` into a `RuleDef`. Plain data (no engine types) — the compiled rule the
|
|
1485
|
+
* engine holds. Validation is compile-time (throws): `enforce` requires an `action`; `ask` is TOOL-SEAM ONLY
|
|
1486
|
+
* (preGeneration is binary allow/deny — it cannot suspend); an `ask` cannot explicitly target a delegation
|
|
1487
|
+
* (`agent-*`) because `gateDelegation` defers `ask` to the sub-agent's inner gates (use `deny`).
|
|
1488
|
+
*/
|
|
1489
|
+
declare function defineRule(spec: RuleSpec): RuleDef;
|
|
1490
|
+
/**
|
|
1491
|
+
* Normalize + validate a `WorkflowSpec` into a `WorkflowDef`. Validation is GRAPH-ONLY (pure code): unique step
|
|
1492
|
+
* ids, exactly one kind per step, `start`/`next`/`to`/`$ref` reference known steps, no cycles. Agent/tool
|
|
1493
|
+
* EXISTENCE is NOT validated here (it's runtime — `defineSwarm` doesn't validate delegate slugs and tenant DB
|
|
1494
|
+
* rows make it impossible; an unknown agent/tool surfaces as a runtime `run_failed` stage "workflow").
|
|
1495
|
+
*/
|
|
1496
|
+
declare function defineWorkflow(spec: WorkflowSpec): WorkflowDef;
|
|
660
1497
|
/** Build a per-swarm skill resolver from the agents' attached skill handles. */
|
|
661
1498
|
declare function buildSkillResolver(agents: AgentDef[]): (name: string) => SwarmSkill | undefined;
|
|
1499
|
+
/**
|
|
1500
|
+
* Compose + CLOSURE-VALIDATE a capability bundle from `defineAgent` outputs (BN0 static composition + BN1 connector
|
|
1501
|
+
* grants). Pure normalizer/validator (no storage, no Mastra) — same posture as `defineRule`/`defineWorkflow`.
|
|
1502
|
+
* Validates, at author time, that the bundle is self-contained:
|
|
1503
|
+
* - every member `skillName` resolves to a first-party **handle** present on the bundle, OR is a declared
|
|
1504
|
+
* **connector grant** for that member (BN1 — connector-backed, materialized per-tenant by the host at runtime);
|
|
1505
|
+
* - every member delegate is a bundle member or a declared `requires` dependency;
|
|
1506
|
+
* - every tool-seam rule ref and every workflow `step.tool` resolves to a handle or any declared grant;
|
|
1507
|
+
* - no workflow step embeds a credential/connection ref.
|
|
1508
|
+
* So a missing handle/grant fails LOUD here, not as a runtime `run_failed`. Connector grants fold their action names
|
|
1509
|
+
* into the granted member's `skillNames` so the host's `connectorTools` resolver grants them by membership at call time.
|
|
1510
|
+
*/
|
|
1511
|
+
declare function defineBundle(spec: BundleSpec): BundleDef;
|
|
1512
|
+
/**
|
|
1513
|
+
* Fold a validated bundle into a `SwarmConfig` (its agents + swarm-scoped rules/workflows) so the result is a
|
|
1514
|
+
* drop-in `defineSwarm` input. Per-agent rules/workflows ride on the merged `AgentDef`s and are collected by
|
|
1515
|
+
* `defineSwarm` exactly as for hand-authored agents — the bundle is a FRONT-END to `defineSwarm`, not a parallel
|
|
1516
|
+
* engine, so it adds no new runtime path. A bundle that re-declares an existing agent slug is a conflict (fail loud).
|
|
1517
|
+
*/
|
|
1518
|
+
declare function mergeBundle(cfg: SwarmConfig, bundle: BundleDef): SwarmConfig;
|
|
1519
|
+
/**
|
|
1520
|
+
* Project a `BundleDef` (in-process, carrying skill HANDLES) into its SERIALIZABLE `BundleVersionContent` for
|
|
1521
|
+
* persistence (BN2): each member becomes its head's `AgentVersionContent` (the `version` + the handles dropped),
|
|
1522
|
+
* and the rules/workflows/connector-grants/deps carry through as the plain data they already are. This is the
|
|
1523
|
+
* bridge from BN0/BN1 (compose in-process) to BN2 (persist + version). The result has `skillNames` but no
|
|
1524
|
+
* handles — re-hydrating it into a live swarm (BN3 apply) needs a host-supplied handle manifest.
|
|
1525
|
+
*/
|
|
1526
|
+
declare function toBundleContent(def: BundleDef): BundleVersionContent;
|
|
662
1527
|
declare const ASK_TOOL_NAME = "ask";
|
|
663
1528
|
interface SwarmConfig {
|
|
664
1529
|
storage: StorageAdapter;
|
|
665
1530
|
agents: AgentDef[];
|
|
1531
|
+
/**
|
|
1532
|
+
* The model allow-list + optional SP10 cheap-model router. `allow` is the per-tenant allow-set every
|
|
1533
|
+
* resolved model (incl. a tier model) must pass. `tier` (optional) enables Swift/Genius routing: a non-pinning
|
|
1534
|
+
* agent (tier-sentinel `modelId`) lands on the cheap `swift` model by default; `genius` is reachable ONLY via
|
|
1535
|
+
* the server-enforced `allowGenius` premium gate (a pack/agent config cannot grant itself Genius). See TierConfig.
|
|
1536
|
+
*/
|
|
666
1537
|
models: {
|
|
667
1538
|
allow: string[];
|
|
1539
|
+
tier?: TierConfig;
|
|
668
1540
|
};
|
|
669
1541
|
modelFactory: (modelId: string, agentSlug?: string) => unknown;
|
|
1542
|
+
/**
|
|
1543
|
+
* PR2 — opt-in connector tools. Build with `materializeConnectors(connectors, backend)` from
|
|
1544
|
+
* `@nightowlsdev/connectors`; an agent gets a connector action only if the action `name` is in its
|
|
1545
|
+
* `skillNames` (i.e. it listed the action among its `skills`). Tenant-scoped + materialized per request.
|
|
1546
|
+
* Omit ⇒ no connector tools.
|
|
1547
|
+
*/
|
|
1548
|
+
connectorTools?: (ctx: SwarmContext) => Promise<SwarmTool[]>;
|
|
1549
|
+
/**
|
|
1550
|
+
* Global per-run caps + metering config (SP1). `prices` statically overrides the built-in PRICE_TABLE;
|
|
1551
|
+
* `priceFeed` is an optional live seam supplying NUMBERS only (engine wall); `failOnUnknownModel` (default
|
|
1552
|
+
* false) makes an unpriced model THROW instead of pricing at $0. `perDelegate` adds optional per-delegate
|
|
1553
|
+
* USD sub-budgets (R5). All metering fields are threaded into the engine's CostGovernor + DelegateBudgets.
|
|
1554
|
+
*
|
|
1555
|
+
* SP9-core — the cap-that-asks (`onCapHit` + `capIncrementUsd`). `onCapHit:"ask"` (DEFAULT "stop") turns a
|
|
1556
|
+
* GLOBAL cost/step-cap hit into a PAUSE-and-ASK ("Budget cap reached — continue?") instead of a terminal
|
|
1557
|
+
* `run_failed`, so a consumer run can be granted more budget mid-task rather than dying. A pack sets this
|
|
1558
|
+
* SERVER-SIDE (it is not a per-agent flag). Resume-approve raises `maxCostUsd` by `capIncrementUsd` (default
|
|
1559
|
+
* = the original `maxCostUsd`) and continues; resume-reject terminally stops (stage "cost"). The per-delegate
|
|
1560
|
+
* cap is unaffected — it stays terminal. Leave `onCapHit` unset for today's behaviour (terminal stop).
|
|
1561
|
+
*/
|
|
670
1562
|
cost: {
|
|
671
1563
|
maxSteps: number;
|
|
672
1564
|
maxCostUsd: number;
|
|
673
1565
|
perDelegate?: PerDelegateBudget;
|
|
1566
|
+
prices?: Record<string, Price>;
|
|
1567
|
+
priceFeed?: PriceFeed;
|
|
1568
|
+
failOnUnknownModel?: boolean;
|
|
1569
|
+
onCapHit?: "stop" | "ask";
|
|
1570
|
+
capIncrementUsd?: number;
|
|
674
1571
|
};
|
|
675
1572
|
/**
|
|
676
1573
|
* Telemetry exporter(s). One or many — many are composed best-effort
|
|
@@ -701,6 +1598,71 @@ interface SwarmConfig {
|
|
|
701
1598
|
/** Opt-in `recall_lane` tool (Part E): lets an agent read a peer agent's lane transcript in the same
|
|
702
1599
|
* conversation. Read-only; reuses the engine's history (best-effort — empty without memory). */
|
|
703
1600
|
recallLane?: boolean;
|
|
1601
|
+
/**
|
|
1602
|
+
* Opt-in decision/observer hooks (SP2). Types-only / engine-free (from `@nightowlsdev/hooks`). Today exposes a
|
|
1603
|
+
* `preGeneration` DECISION hook that the engine AWAITS immediately before EACH model launch (run + resume):
|
|
1604
|
+
* an `allow` proceeds, a `deny` VETOES the generation (terminal `run_failed` stage `"reserve"`, no model call).
|
|
1605
|
+
* Decision hooks are FAIL-CLOSED — a throwing hook is treated as a deny (a billing/safety veto must never
|
|
1606
|
+
* silently allow on error). Omit ⇒ allow-all, identical to prior behaviour with zero overhead. This is the
|
|
1607
|
+
* seam the platform's per-generation billing RESERVE (SP3) plugs into.
|
|
1608
|
+
*/
|
|
1609
|
+
hooks?: SwarmHooks;
|
|
1610
|
+
/**
|
|
1611
|
+
* SP5 — the NON-REMOVABLE action-approval policy (a P0 SAFETY control). Forces human approval on
|
|
1612
|
+
* side-effecting tools regardless of the per-tool `needsApproval` flag, so a consumer pack cannot ship a
|
|
1613
|
+
* `needsApproval:false` $0.50 action that causes $50k of damage (spend caps limit cost, not harm). Two modes:
|
|
1614
|
+
* - `{ mode: "flag" }` (DEFAULT): today's behaviour — only `needsApproval:true` tools gate.
|
|
1615
|
+
* - `{ mode: "all-side-effecting" }`: force-ask EVERY non-read-only tool (every MCP tool + every first-party
|
|
1616
|
+
* tool not on the read-only allowlist), regardless of the per-tool flag. The safe default for an untrusted
|
|
1617
|
+
* consumer pack. Optionally override `readOnly` to customise the exempt set.
|
|
1618
|
+
* Baked into the `hooks` dispatcher, which combines policy + flag + the optional `preToolCall` hook into the
|
|
1619
|
+
* effective decision (allow / deny / ask-the-human). A `preToolCall` hook (in `hooks`) can add a richer gate.
|
|
1620
|
+
*/
|
|
1621
|
+
toolApproval?: ToolApprovalPolicy;
|
|
1622
|
+
/**
|
|
1623
|
+
* SP15 — opt-in secret resolution for first-party tools. Pass a `SecretResolver` (the platform vault,
|
|
1624
|
+
* SP15-platform) and the engine scopes it per-run so a tool body can `await ctx.secrets.resolve(ref)` to fetch
|
|
1625
|
+
* a tenant-scoped secret at execution time — the same security posture as the MCP connector's credentialRef
|
|
1626
|
+
* resolution (resolve at execution, scoped by the live ctx, never from tool args). @mastra-free. Omit for
|
|
1627
|
+
* today's behaviour: `ctx.secrets.resolve(...)` yields `undefined` (no vault).
|
|
1628
|
+
*/
|
|
1629
|
+
secrets?: SecretResolver;
|
|
1630
|
+
/**
|
|
1631
|
+
* SP3 — best-effort per-event OBSERVER, fired by the engine after each event is persisted (run + resume).
|
|
1632
|
+
* Transport-agnostic (sees every event regardless of interactive-SSE vs durable+realtime delivery), so the
|
|
1633
|
+
* platform's metering — debit on `swarm.turn_usage`, settle on a terminal — lives HERE instead of teeing the
|
|
1634
|
+
* route's stream. Awaited but FAIL-SAFE: a throwing observer is swallowed (log your own), never breaking the
|
|
1635
|
+
* run. The `preGeneration` reserve hook (above) + this observer are the two halves of the credit ledger.
|
|
1636
|
+
*/
|
|
1637
|
+
onEvent?: (ev: SwarmEvent, ctx: SwarmContext) => void | Promise<void>;
|
|
1638
|
+
/**
|
|
1639
|
+
* Completion supervisor (reliability) — see EngineOpts.verifyCompletion. When set, the engine asks this at a
|
|
1640
|
+
* turn's end whether the user's request was actually satisfied, nudges the orchestrator with the specific gap
|
|
1641
|
+
* if not, and ends `run_failed:incomplete` (refundable) instead of a silent `done` if it still can't finish.
|
|
1642
|
+
* Omit ⇒ the cheap structural "did the root speak last?" fallback nudge.
|
|
1643
|
+
*/
|
|
1644
|
+
verifyCompletion?: CompletionVerifier;
|
|
1645
|
+
/**
|
|
1646
|
+
* FR-003 — per-run lifecycle hooks. `onRunStart(ctx, { input, state })` seeds the run's mutable `ctx.state`
|
|
1647
|
+
* store (e.g. `state.set("sceneCode", input.context?.currentCode ?? "")`) once before the first generation;
|
|
1648
|
+
* `onRunEnd(ctx, { state, outcome })` drains it deterministically at the run's terminal boundary (done /
|
|
1649
|
+
* failed / suspended / thrown). The SAME `state` handle flows into every tool call of the run (orchestrator +
|
|
1650
|
+
* delegated sub-agents) via `ctx.state`. Both are FAIL-SAFE. Omit ⇒ no per-run state seeding/draining.
|
|
1651
|
+
*/
|
|
1652
|
+
onRunStart?: EngineOpts["onRunStart"];
|
|
1653
|
+
onRunEnd?: EngineOpts["onRunEnd"];
|
|
1654
|
+
/**
|
|
1655
|
+
* Declarative conditional policy (Phase A). `advise` rules are injected into the system prompt; `enforce`
|
|
1656
|
+
* rules compile into the decision hooks (deny/ask), folding in the non-removable SP5 policy floor. Per-agent
|
|
1657
|
+
* rules are authored via `defineAgent({ rules })` and applied additively.
|
|
1658
|
+
*/
|
|
1659
|
+
rules?: RuleDef[];
|
|
1660
|
+
/**
|
|
1661
|
+
* Authorable procedures (Phase A: `advisory` only — an `advisory` workflow's `description` is injected as a
|
|
1662
|
+
* suggested procedure; `compliance: "strict"` is REJECTED until the Phase-B step-driver). Per-agent
|
|
1663
|
+
* procedures are authored via `defineAgent({ workflow })`.
|
|
1664
|
+
*/
|
|
1665
|
+
workflows?: WorkflowDef[];
|
|
704
1666
|
}
|
|
705
1667
|
interface Swarm {
|
|
706
1668
|
engine: SwarmEngine;
|
|
@@ -748,10 +1710,7 @@ declare class SpanCollector {
|
|
|
748
1710
|
* Close the open generation with this step's usage + its own per-call cost (already priced from
|
|
749
1711
|
* the step usage by the engine). `costUsd` is per-generation — never a cumulative running total.
|
|
750
1712
|
*/
|
|
751
|
-
closeGeneration(usage:
|
|
752
|
-
inputTokens: number;
|
|
753
|
-
outputTokens: number;
|
|
754
|
-
}, costUsd: number): void;
|
|
1713
|
+
closeGeneration(usage: UsageBreakdown, costUsd: number): void;
|
|
755
1714
|
openTool(toolCallId: string, name: string): void;
|
|
756
1715
|
closeTool(toolCallId: string, ok: boolean): void;
|
|
757
1716
|
/**
|
|
@@ -778,6 +1737,64 @@ declare class RowCache<V> {
|
|
|
778
1737
|
invalidate(key: string): void;
|
|
779
1738
|
}
|
|
780
1739
|
|
|
1740
|
+
/** Drain an `engine.run(...)` (an `AsyncIterable<SwarmEvent>`) into the eval-shaped trajectory + final output. The
|
|
1741
|
+
* output is the concatenation of the assistant `swarm.message` texts (the model's visible reply), in stream order. */
|
|
1742
|
+
declare function drainTrajectory(stream: AsyncIterable<SwarmEvent>): Promise<{
|
|
1743
|
+
events: SwarmEvent[];
|
|
1744
|
+
output: string;
|
|
1745
|
+
}>;
|
|
1746
|
+
/** FR-018 — run one input through a built `Swarm`/`SwarmEngine` and return its full trajectory + output. This is
|
|
1747
|
+
* the seam a host hands to `@nightowlsdev/eval` (`RunAgent = (case) => Promise<{ events, output }>`): no SSE,
|
|
1748
|
+
* no engine internals. `ctx` defaults to an ephemeral local context when omitted. */
|
|
1749
|
+
declare function runToTrajectory(target: Swarm | SwarmEngine, input: RunInput | string, ctx?: Partial<SwarmContext> & {
|
|
1750
|
+
agentSlug: string;
|
|
1751
|
+
}): Promise<{
|
|
1752
|
+
events: SwarmEvent[];
|
|
1753
|
+
output: string;
|
|
1754
|
+
}>;
|
|
1755
|
+
/** Options for {@link runAgent}. The model wiring is the only required part (the same `modelFactory` a swarm
|
|
1756
|
+
* uses); everything else mirrors the `SwarmConfig` fields and defaults to the single-agent/ephemeral case. */
|
|
1757
|
+
interface RunAgentOpts {
|
|
1758
|
+
/** Same `(modelId, agentSlug?) => model` factory `defineSwarm` takes (e.g. `openaiModels()`). Required. */
|
|
1759
|
+
modelFactory: SwarmConfig["modelFactory"];
|
|
1760
|
+
/** Model allow-list + optional tier router. Defaults to allowing the agent's own `modelId` (+ tier models). */
|
|
1761
|
+
models?: {
|
|
1762
|
+
allow?: string[];
|
|
1763
|
+
tier?: TierConfig;
|
|
1764
|
+
};
|
|
1765
|
+
/** Per-run caps. Defaults to a generous single-run budget (`{ maxSteps: 50, maxCostUsd: 10 }`). */
|
|
1766
|
+
cost?: Partial<SwarmConfig["cost"]>;
|
|
1767
|
+
/** Bring your own adapter (e.g. to inspect events afterwards). Defaults to a fresh `InMemoryStorage`. */
|
|
1768
|
+
storage?: StorageAdapter;
|
|
1769
|
+
/** Pass-throughs onto the one-agent SwarmConfig — identical semantics to `defineSwarm`. */
|
|
1770
|
+
telemetry?: SwarmConfig["telemetry"];
|
|
1771
|
+
memory?: SwarmConfig["memory"];
|
|
1772
|
+
hooks?: SwarmConfig["hooks"];
|
|
1773
|
+
toolApproval?: SwarmConfig["toolApproval"];
|
|
1774
|
+
secrets?: SwarmConfig["secrets"];
|
|
1775
|
+
onEvent?: SwarmConfig["onEvent"];
|
|
1776
|
+
onRunStart?: SwarmConfig["onRunStart"];
|
|
1777
|
+
onRunEnd?: SwarmConfig["onRunEnd"];
|
|
1778
|
+
pageContext?: SwarmConfig["pageContext"];
|
|
1779
|
+
mastraStore?: SwarmConfig["mastraStore"];
|
|
1780
|
+
/** Override the ephemeral run context (tenantId/userId/run/thread ids). */
|
|
1781
|
+
ctx?: Partial<SwarmContext>;
|
|
1782
|
+
}
|
|
1783
|
+
/** Build a one-agent `Swarm` from an `AgentDef` + options — the standalone equivalent of `defineSwarm` for a
|
|
1784
|
+
* single agent. Useful when you want the built engine (e.g. to call `resume`) rather than a one-shot run. */
|
|
1785
|
+
declare function buildSingleAgentSwarm(def: AgentDef, opts: RunAgentOpts): Swarm;
|
|
1786
|
+
/**
|
|
1787
|
+
* FR-019 — run a single `AgentDef` to completion and return its trajectory + final output, with NO Supabase
|
|
1788
|
+
* adapter and NO publish required. Honors tier resolution + cost cap + tool-approval (it builds the real engine).
|
|
1789
|
+
*
|
|
1790
|
+
* @example
|
|
1791
|
+
* const { output } = await runAgent(titleAgent, "Summarize: …", { modelFactory: openaiModels() });
|
|
1792
|
+
*/
|
|
1793
|
+
declare function runAgent(def: AgentDef, input: RunInput | string, opts: RunAgentOpts): Promise<{
|
|
1794
|
+
events: SwarmEvent[];
|
|
1795
|
+
output: string;
|
|
1796
|
+
}>;
|
|
1797
|
+
|
|
781
1798
|
declare class InMemoryStorage implements StorageAdapter {
|
|
782
1799
|
private evts;
|
|
783
1800
|
private seq;
|
|
@@ -789,13 +1806,22 @@ declare class InMemoryStorage implements StorageAdapter {
|
|
|
789
1806
|
private agentRows;
|
|
790
1807
|
private heads;
|
|
791
1808
|
private pads;
|
|
1809
|
+
private threadRows;
|
|
792
1810
|
seedAgent(v: AgentVersion, tenantId?: string): void;
|
|
793
1811
|
recordSuspend(runId: string, tenantId: string, followupId: string, toolCallId: string): void;
|
|
794
|
-
markFollowupAnswered(followupId: string, tenantId: string):
|
|
1812
|
+
markFollowupAnswered(followupId: string, tenantId: string): boolean;
|
|
795
1813
|
/** Test/host helper: read a run row (the RunStore interface is write-mostly). */
|
|
796
1814
|
getRun(runId: string): RunRow | undefined;
|
|
797
1815
|
events: EventStore;
|
|
798
1816
|
runs: RunStore;
|
|
1817
|
+
threads: ThreadStore;
|
|
1818
|
+
/** Test/host helper: read a recorded thread row. */
|
|
1819
|
+
getThread(id: string): {
|
|
1820
|
+
id: string;
|
|
1821
|
+
orgId: string;
|
|
1822
|
+
userId: string;
|
|
1823
|
+
projectId?: string;
|
|
1824
|
+
} | undefined;
|
|
799
1825
|
messages: MessageStore;
|
|
800
1826
|
scratchpad: ScratchpadStore;
|
|
801
1827
|
agents: AgentRepo;
|
|
@@ -806,9 +1832,57 @@ declare function composeSystemPrompt(row: AgentVersion): {
|
|
|
806
1832
|
role: "system";
|
|
807
1833
|
content: string;
|
|
808
1834
|
}[];
|
|
1835
|
+
/**
|
|
1836
|
+
* Render the soft-policy lines for an agent (advise-rule statements + advisory-workflow summaries, from
|
|
1837
|
+
* `softPolicyFor`) as a single system message. Returns `[]` when there are none (zero overhead / no message).
|
|
1838
|
+
*/
|
|
1839
|
+
declare function composePolicyPrompt(lines: string[]): {
|
|
1840
|
+
role: "system";
|
|
1841
|
+
content: string;
|
|
1842
|
+
}[];
|
|
809
1843
|
|
|
810
1844
|
declare const customAuth: (fn: AuthProvider["authenticate"]) => AuthProvider;
|
|
811
1845
|
|
|
1846
|
+
interface RateLimitConfig {
|
|
1847
|
+
/** Window length in seconds. */
|
|
1848
|
+
windowSec: number;
|
|
1849
|
+
/** Max allowed events per window. */
|
|
1850
|
+
max: number;
|
|
1851
|
+
}
|
|
1852
|
+
interface RateLimitState {
|
|
1853
|
+
count: number;
|
|
1854
|
+
windowStartSec: number;
|
|
1855
|
+
}
|
|
1856
|
+
interface RateLimitDecision {
|
|
1857
|
+
allow: boolean;
|
|
1858
|
+
/** Remaining allowance in the current window (0 when denied). */
|
|
1859
|
+
remaining: number;
|
|
1860
|
+
/** Seconds until the window resets (when the count clears). */
|
|
1861
|
+
resetSec: number;
|
|
1862
|
+
}
|
|
1863
|
+
/**
|
|
1864
|
+
* Pure fixed-window rate-limit decision. If there's no prior state or the window has elapsed, a fresh window
|
|
1865
|
+
* starts at count 1 (this event). Otherwise the count increments. Allow while count ≤ max. Returns the decision
|
|
1866
|
+
* AND the next state to persist. Fixed-window is chosen for simplicity + O(1) state (one counter); its known
|
|
1867
|
+
* burst-at-boundary tradeoff is acceptable for an abuse backstop (not a billing meter).
|
|
1868
|
+
*/
|
|
1869
|
+
declare function decideFixedWindow(prev: RateLimitState | null, cfg: RateLimitConfig, nowSec: number): {
|
|
1870
|
+
decision: RateLimitDecision;
|
|
1871
|
+
state: RateLimitState;
|
|
1872
|
+
};
|
|
1873
|
+
interface RateLimitStore {
|
|
1874
|
+
/** Record one event for `key` under `cfg` and return the decision. */
|
|
1875
|
+
hit(key: string, cfg: RateLimitConfig, nowSec: number): Promise<RateLimitDecision>;
|
|
1876
|
+
}
|
|
1877
|
+
/**
|
|
1878
|
+
* In-memory fixed-window store — a REAL limiter for a SINGLE instance. Keeps one window per key in a Map and
|
|
1879
|
+
* prunes expired keys opportunistically so memory stays bounded. NOT shared across instances: a horizontally
|
|
1880
|
+
* scaled deploy must back this with Redis/Postgres (same interface) or limits are per-instance.
|
|
1881
|
+
*/
|
|
1882
|
+
declare function createInMemoryRateLimitStore(): RateLimitStore;
|
|
1883
|
+
/** Parse a "max/window" config from env (e.g. "60" with a default window), clamped to sane positive values. */
|
|
1884
|
+
declare function rateConfig(max: number | undefined, windowSec: number, fallbackMax: number): RateLimitConfig;
|
|
1885
|
+
|
|
812
1886
|
declare const VERSION = "0.0.0";
|
|
813
1887
|
|
|
814
|
-
export { ASK_TOOL_NAME, type ActiveRun, type AgentDef, type AgentMemoryOverride, type AgentRepo, type AgentSpec, type AgentSummary, type AgentVersion, type AskField, type AskFieldOption, type AuthContext, type AuthProvider, CapturingExporter, type ContainerFloor, CostGovernor, DelegateBudgets, type EngineOpts, type EventStore, type FloorHolder, GUARDRAILS, InMemoryContainerFloor, InMemoryStorage, type MemoryConfig, type MessageStore, type ModelProvider, type NewRun, PRICE_TABLE, type PerDelegateBudget, type Price, type Release, RowCache, type RunInput, type RunRow, type RunStatus, type RunStore, type Runner, SCRATCHPAD_MAX_ENTRY_CHARS, SCRATCHPAD_MAX_KEYS, type ScratchpadEntry, type ScratchpadStore, type SecretResolver, SpanCollector, type StorageAdapter, type Swarm, type SwarmConfig, type SwarmContext, SwarmEngine, type SwarmEvent, type SwarmMessage, type SwarmSkill, type SwarmSpan, type SwarmTool, type SwarmToolContext, type TelemetryExporter, type ThreadSummary, type ToolSpec, VERSION, allowListModelProvider, buildSkillResolver, composeSystemPrompt, compositeTelemetry, containerFloor, customAuth, customTelemetry, defineAgent, defineSkill, defineSwarm, defineTool, ev, isEvent, resolveTelemetry };
|
|
1888
|
+
export { ASK_TOOL_NAME, type ActiveRun, type AgentDef, type AgentMemoryOverride, AgentMutationForbidden, type AgentRepo, type AgentSpec, type AgentSummary, type AgentVersion, type AgentVersionContent, type AgentVersionInfo, type AskField, type AskFieldOption, type AuthContext, type AuthProvider, type BoundSecrets, type BundleDef, type BundleDep, type BundleRepo, type BundleSpec, type BundleVersion, type BundleVersionContent, type BundleVersionInfo, type BundleWritableRepo, CapturingExporter, ClientToolError, type ClientToolSpec, type CompletionVerdict, type CompletionVerifier, type ConnectorGrant, type ContainerFloor, CostGovernor, DelegateBudgets, type EngineOpts, type EventStore, type FloorHolder, GUARDRAILS, InMemoryContainerFloor, InMemoryStorage, type MemoryConfig, type MessageStore, type ModelProvider, type ModelRef, type ModelTier, type NewRun, PRICE_TABLE, type PerDelegateBudget, type Price, type PriceFeed, type PricingOpts, type RateLimitConfig, type RateLimitDecision, type RateLimitState, type RateLimitStore, type Release, ReserveDenied, RowCache, type RuleAction, type RuleCondition, type RuleDef, type RuleLevel, type RuleSpec, type RunAgentOpts, type RunInput, type RunRow, type RunStateHandle, type RunStatus, type RunStore, type Runner, SCRATCHPAD_MAX_ENTRY_CHARS, SCRATCHPAD_MAX_KEYS, type ScratchpadEntry, type ScratchpadStore, type SecretResolver, type SlugUsage, SpanCollector, type StorageAdapter, type Swarm, type SwarmActor, type SwarmConfig, type SwarmContext, SwarmEngine, type SwarmEvent, type SwarmMessage, type SwarmSkill, type SwarmSpan, type SwarmTool, type SwarmToolContext, type TelemetryExporter, type ThreadStore, type ThreadSummary, type TierConfig, type TierEscalationContext, type TierResolution, type ToolSpec, type TurnUsage, type UsageBreakdown, type UsageCost, VERSION, type VersionedRepo, type WorkflowCompliance, type WorkflowDef, type WorkflowRef, type WorkflowRunState, type WorkflowSpec, type WorkflowStep, type WorkflowTransition, allowListModelProvider, assertActorMayMutateDefinition, buildSingleAgentSwarm, buildSkillResolver, composePolicyPrompt, composeSystemPrompt, compositeTelemetry, containerFloor, createInMemoryRateLimitStore, createRunState, customAuth, customTelemetry, decideFixedWindow, defineAgent, defineBundle, defineClientTool, defineRule, defineSkill, defineSwarm, defineTool, defineWorkflow, drainTrajectory, ev, isEvent, isTierSentinel, mergeBundle, priceUsage, rateConfig, resolveTelemetry, resolveTier, runAgent, runToTrajectory, sumBreakdowns, sumTurnUsage, tierModelId, toBundleContent };
|