@tangle-network/agent-app 0.7.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,698 @@
1
+ /**
2
+ * Durable mission state — the guarded status machine for a multi-step agent run.
3
+ *
4
+ * A mission is a persisted plan (ordered steps), a cursor (count of completed
5
+ * steps), a cost ledger + budget, and a status machine. This module owns the
6
+ * legal transitions and the guarded mutation surface; it does NOT execute steps
7
+ * (the engine in `./engine` does) and it does NOT own persistence — products
8
+ * implement {@link MissionStorePort} over their own tables. Every state change
9
+ * appends a {@link MissionAuditEvent} so the run timeline is a single durable
10
+ * audit trail.
11
+ *
12
+ * Concurrency contract: a mission MUST be driven by a single serialized owner
13
+ * (a Durable Object, a Cloudflare Workflow, a queue consumer — one per
14
+ * mission). The service is the typed guard layer, not a serializer: every
15
+ * mutation re-reads the record and asks the store for a compare-and-set write
16
+ * guarded on the values it read. When the guard misses, the row changed under
17
+ * us and the caller gets `{ succeeded: false, conflict: true }` — never a
18
+ * silent clobber, never a stale overwrite.
19
+ */
20
+ type MissionStatus = 'scheduled' | 'running' | 'paused' | 'waiting_approval' | 'blocked' | 'succeeded' | 'failed' | 'aborted' | 'cancelled';
21
+ type MissionStepStatus = 'pending' | 'running' | 'waiting_approval' | 'done' | 'failed';
22
+ interface MissionStep {
23
+ id: string;
24
+ /** What the step should accomplish — an intent, never an implementation. */
25
+ intent: string;
26
+ /** Product-defined kind label. Labels intent for gating/UX; it never selects
27
+ * a different execution path. */
28
+ kind: string;
29
+ status: MissionStepStatus;
30
+ /** Count of genuine `* -> running` edges (retries inflate this; idempotent
31
+ * re-asserts do not). */
32
+ attempts: number;
33
+ /** One-line live status surfaced on the step row ("7/15 refs"). */
34
+ sublabel?: string;
35
+ /** Small pointer at the produced artifact (vault path, asset id) — never the
36
+ * full payload. */
37
+ resultRef?: string;
38
+ }
39
+ interface MissionCostLedger {
40
+ tokensIn: number;
41
+ tokensOut: number;
42
+ costUsd: number;
43
+ wallMs: number;
44
+ llmCalls: number;
45
+ }
46
+ /** The durable mission row, shape-normalized. Timestamps are epoch ms. */
47
+ interface MissionRecord {
48
+ id: string;
49
+ workspaceId: string;
50
+ status: MissionStatus;
51
+ /** Product-defined origin label ('chat', 'manual', 'cron', …). */
52
+ trigger: string;
53
+ summary: string | null;
54
+ plan: MissionStep[];
55
+ /** Count of durably-completed steps; the next step to run is `plan[cursor]`. */
56
+ cursor: number;
57
+ cost: MissionCostLedger | null;
58
+ budgetUsd: number | null;
59
+ spentUsd: number;
60
+ pauseReason: string | null;
61
+ /** The single owning engine's instance id, write-once (see `setEngineRef`). */
62
+ engineRef: string | null;
63
+ scheduledAt: number | null;
64
+ startedAt: number;
65
+ completedAt: number | null;
66
+ metadata: Record<string, unknown> | null;
67
+ }
68
+ /**
69
+ * Discriminated outcome for guarded operations. Callers MUST inspect
70
+ * `succeeded` before reading `value` — illegal transitions and missing rows
71
+ * surface here, never as a throw-and-swallow or a silent no-op. `conflict`
72
+ * distinguishes a lost guarded race (retryable — re-read and re-apply) from a
73
+ * logic rejection (illegal edge, missing step — deterministic, never retried).
74
+ */
75
+ type MissionOutcome<T> = {
76
+ succeeded: true;
77
+ value: T;
78
+ } | {
79
+ succeeded: false;
80
+ error: string;
81
+ conflict: boolean;
82
+ };
83
+ /** Fields a guarded write compares against the values the caller read. A SQL
84
+ * implementation compares JSON columns as serialized text
85
+ * (`coalesce(col, 'null') = JSON.stringify(expected)`), matching how the
86
+ * in-memory store compares. An absent field is unguarded. */
87
+ interface MissionUpdateGuard {
88
+ status?: MissionStatus;
89
+ cursor?: number;
90
+ plan?: MissionStep[];
91
+ cost?: MissionCostLedger | null;
92
+ metadata?: Record<string, unknown> | null;
93
+ /** Guard that no engine has bound yet (the write-once bind). */
94
+ engineRefIsNull?: true;
95
+ }
96
+ /** Fields a guarded write sets when the guard holds. `null` values are real
97
+ * writes (clear the column), not skips. */
98
+ interface MissionUpdatePatch {
99
+ status?: MissionStatus;
100
+ pauseReason?: string | null;
101
+ summary?: string;
102
+ completedAt?: number | null;
103
+ plan?: MissionStep[];
104
+ cursor?: number;
105
+ cost?: MissionCostLedger;
106
+ spentUsd?: number;
107
+ metadata?: Record<string, unknown>;
108
+ engineRef?: string;
109
+ }
110
+ /** One audit-trail row. Appended after every committed state change, so an
111
+ * event always denotes a real transition (no phantom rows on rejected or
112
+ * no-op calls). */
113
+ interface MissionAuditEvent {
114
+ missionId: string;
115
+ workspaceId: string;
116
+ level: 'info' | 'warn' | 'error';
117
+ /** Machine-readable transition name ('mission.created', 'mission.step.done',
118
+ * 'mission.cursor', 'mission.cost', 'mission.paused', …). */
119
+ step: string;
120
+ message: string;
121
+ metadata: Record<string, unknown>;
122
+ at: number;
123
+ }
124
+ /**
125
+ * Persistence seam — the product implements this over its own tables. The
126
+ * invariant the implementation MUST keep: `update` applies `patch` ONLY when
127
+ * every guard field still equals the stored value, and returns `null` when the
128
+ * guard misses. That null is how a concurrent write surfaces as a typed
129
+ * failure instead of a clobber.
130
+ */
131
+ interface MissionStorePort {
132
+ load(id: string): Promise<MissionRecord | null>;
133
+ insert(record: MissionRecord): Promise<MissionRecord>;
134
+ update(id: string, guard: MissionUpdateGuard, patch: MissionUpdatePatch): Promise<MissionRecord | null>;
135
+ appendEvent(event: MissionAuditEvent): Promise<void>;
136
+ }
137
+ /** Statuses a mission can never leave — the run is done. */
138
+ declare function isMissionTerminal(status: MissionStatus): boolean;
139
+ /** The cooperative kill switch: a stop request rides metadata so it survives
140
+ * any status and is honored by the engine before the next side effect. */
141
+ declare function isMissionStopRequested(mission: MissionRecord): boolean;
142
+ interface CreateMissionInput {
143
+ /** Explicit row id. Omit to use the service's id generator. Pass a
144
+ * DETERMINISTIC id (derived from the originating turn) when the caller may
145
+ * re-create the same mission under at-least-once delivery — the duplicate
146
+ * insert then trips the store's uniqueness instead of spawning a second run. */
147
+ id?: string;
148
+ workspaceId: string;
149
+ /** Becomes the mission summary. */
150
+ title: string;
151
+ /** Plan step ids MUST be unique: the owning workflow keys its durable step
152
+ * cache by step id, so a collision would silently replay the wrong result.
153
+ * A duplicate is rejected here (fail loud). */
154
+ plan: MissionStep[];
155
+ budgetUsd?: number | null;
156
+ /** Epoch ms. Present → the mission starts `scheduled` instead of `running`. */
157
+ scheduledAt?: number | null;
158
+ trigger: string;
159
+ /** Caller-defined context stamped onto the record (thread ids, source turn,
160
+ * model). Read back via `mission.metadata`; the engine only reads
161
+ * `stopRequested` from it. */
162
+ metadata?: Record<string, unknown> | null;
163
+ }
164
+ interface SetStepStatusPatch {
165
+ sublabel?: string;
166
+ resultRef?: string;
167
+ error?: string;
168
+ }
169
+ interface CompleteMissionInput {
170
+ ok: boolean;
171
+ summary?: string;
172
+ }
173
+ interface MissionService {
174
+ createMission(input: CreateMissionInput): Promise<MissionRecord>;
175
+ getMission(id: string): Promise<MissionRecord | null>;
176
+ /** Bind the executing engine's instance id, write-once from the single
177
+ * owner: re-asserting the same ref is a no-op; binding a DIFFERENT ref over
178
+ * an existing one is rejected so a second owner can never steal the run. */
179
+ setEngineRef(id: string, engineRef: string): Promise<MissionOutcome<MissionRecord>>;
180
+ /** Shallow-merge keys into metadata. Guarded on the metadata read, so racing
181
+ * merges surface as conflicts instead of silently dropping keys. */
182
+ mergeMetadata(id: string, patch: Record<string, unknown>): Promise<MissionOutcome<MissionRecord>>;
183
+ /** Mutate one plan step's status (+ optional sublabel/resultRef) and append a
184
+ * transition event. Rejects unknown steps and illegal step edges. Does NOT
185
+ * move the cursor — call `advanceCursor` for that. */
186
+ setStepStatus(id: string, stepId: string, status: MissionStepStatus, patch?: SetStepStatusPatch): Promise<MissionOutcome<MissionRecord>>;
187
+ /** Move the done-count cursor forward by one. Rejects advancing past the end
188
+ * of the plan so the caller learns the mission has no further work. */
189
+ advanceCursor(id: string): Promise<MissionOutcome<MissionRecord>>;
190
+ /** Increment spentUsd and merge a partial ledger into the cumulative ledger.
191
+ * `deltaUsd` is the marginal spend; `ledgerDelta` carries the token/wall/
192
+ * llm-call breakdown for the same unit of work. */
193
+ addCost(id: string, deltaUsd: number, ledgerDelta?: Partial<MissionCostLedger>): Promise<MissionOutcome<MissionRecord>>;
194
+ pause(id: string, reason: string): Promise<MissionOutcome<MissionRecord>>;
195
+ resume(id: string): Promise<MissionOutcome<MissionRecord>>;
196
+ abort(id: string): Promise<MissionOutcome<MissionRecord>>;
197
+ /** Flip one step and the whole mission to waiting_approval together. The
198
+ * mission transition is validated FIRST so an illegal source is rejected
199
+ * without mutating the step — no half-applied state. */
200
+ markWaitingApproval(id: string, stepId: string): Promise<MissionOutcome<MissionRecord>>;
201
+ complete(id: string, input: CompleteMissionInput): Promise<MissionOutcome<MissionRecord>>;
202
+ }
203
+ interface MissionServiceOptions {
204
+ store: MissionStorePort;
205
+ /** Injectable clock (epoch ms). Default `Date.now`. */
206
+ now?: () => number;
207
+ /** Row-id generator when `CreateMissionInput.id` is omitted.
208
+ * Default `crypto.randomUUID`. */
209
+ generateId?: () => string;
210
+ }
211
+ declare function createMissionService(options: MissionServiceOptions): MissionService;
212
+ interface InMemoryMissionStore extends MissionStorePort {
213
+ /** The full audit trail, append order. */
214
+ events(): MissionAuditEvent[];
215
+ /** Unguarded direct write — simulates a concurrent owner or a crash-shaped
216
+ * state in tests. Production writers go through the guarded `update`. */
217
+ put(record: MissionRecord): void;
218
+ }
219
+ /**
220
+ * In-memory {@link MissionStorePort} — the portable backend for tests and
221
+ * sandbox/eval shells. Guard comparison uses JSON serialization of the read
222
+ * value, the same contract a SQL implementation honors by comparing stored
223
+ * JSON text. Records are deep-copied on every boundary so callers can never
224
+ * mutate stored state around the guards.
225
+ */
226
+ declare function createInMemoryMissionStore(): InMemoryMissionStore;
227
+
228
+ /**
229
+ * Shared mission realtime contract — the single source of truth for the typed
230
+ * events the engine BROADCASTS over a live channel and the client REDUCES into
231
+ * live mission state. Server emit and client reduce import the same module so
232
+ * the wire shape can never drift between the two ends.
233
+ *
234
+ * This module is CLIENT-SAFE: no server imports, no platform globals, no DB
235
+ * types. It is pure data + a pure reducer. Keep it that way — a server-only
236
+ * import here would leak into the browser bundle.
237
+ *
238
+ * Sink contract — best-effort UI notification, never load-bearing:
239
+ * - fire-and-forget: the engine never awaits `emit` and a sink failure can
240
+ * never fail a step (the engine wraps every emit). The durable audit-event
241
+ * row is the authoritative timeline; the socket is a convenience.
242
+ * - replay-safe: the engine re-emits on a resume/replay. The reducer below is
243
+ * idempotent + order-tolerant, so a re-sent or duplicated event converges.
244
+ * The sink itself does no dedupe.
245
+ */
246
+ interface MissionEventSink {
247
+ emit(event: MissionStreamEvent): void;
248
+ }
249
+ /** A sink that drops every event — the engine default when no live channel is
250
+ * wired (and the unit-test default). */
251
+ declare const noopEventSink: MissionEventSink;
252
+ /** Workspace-wide channel id missions broadcast on (alongside any per-thread
253
+ * channel the product keys). */
254
+ declare const MISSION_CONTROL_CHANNEL_ID = "missions";
255
+ /** One plan step as it appears on the wire — only what a live UI needs
256
+ * (`sublabel` updates travel separately via `step.updated` so the snapshot
257
+ * stays small). */
258
+ interface MissionStreamStep {
259
+ id: string;
260
+ intent: string;
261
+ kind: string;
262
+ status: MissionStreamStepStatus;
263
+ }
264
+ type MissionStreamStepStatus = 'pending' | 'running' | 'done' | 'failed' | 'waiting_approval';
265
+ type MissionStreamStatus = 'scheduled' | 'running' | 'paused' | 'waiting_approval' | 'succeeded' | 'aborted' | 'cancelled' | 'failed';
266
+ /**
267
+ * Discriminated union of every live mission event. Every member carries
268
+ * `missionId` (one channel may multiplex several missions) and a `type` the
269
+ * client switches on. `at` is the emitter's wall-clock ms — used only for
270
+ * display ordering; the reducer never trusts it for causality.
271
+ */
272
+ type MissionStreamEvent = {
273
+ type: 'mission.created';
274
+ missionId: string;
275
+ at: number;
276
+ title: string;
277
+ status?: MissionStreamStatus;
278
+ steps: MissionStreamStep[];
279
+ budgetUsd?: number | null;
280
+ } | {
281
+ type: 'mission.started';
282
+ missionId: string;
283
+ at: number;
284
+ } | {
285
+ type: 'step.started';
286
+ missionId: string;
287
+ at: number;
288
+ stepId: string;
289
+ } | {
290
+ type: 'step.updated';
291
+ missionId: string;
292
+ at: number;
293
+ stepId: string;
294
+ sublabel: string;
295
+ } | {
296
+ type: 'step.completed';
297
+ missionId: string;
298
+ at: number;
299
+ stepId: string;
300
+ ok: boolean;
301
+ reason?: string;
302
+ durationMs?: number;
303
+ } | {
304
+ type: 'cost.updated';
305
+ missionId: string;
306
+ at: number;
307
+ spentUsd: number;
308
+ capUsd?: number | null;
309
+ } | {
310
+ type: 'mission.paused';
311
+ missionId: string;
312
+ at: number;
313
+ reason?: string;
314
+ } | {
315
+ type: 'mission.waiting_approval';
316
+ missionId: string;
317
+ at: number;
318
+ reason?: string;
319
+ } | {
320
+ type: 'mission.resumed';
321
+ missionId: string;
322
+ at: number;
323
+ } | {
324
+ type: 'mission.plan.updated';
325
+ missionId: string;
326
+ at: number;
327
+ title: string;
328
+ steps: MissionStreamStep[];
329
+ budgetUsd?: number | null;
330
+ } | {
331
+ type: 'mission.completed';
332
+ missionId: string;
333
+ at: number;
334
+ ok: boolean;
335
+ status?: Extract<MissionStreamStatus, 'succeeded' | 'failed' | 'aborted' | 'cancelled'>;
336
+ summary?: string;
337
+ };
338
+ /**
339
+ * Reconstruct the flat MissionStreamEvent from a broadcast envelope of shape
340
+ * `{ type, data: { ...missionFields } }` (transports may also stamp routing
341
+ * fields like workspaceId/threadId into `data`). The envelope `type` is the
342
+ * AUTHORITATIVE discriminant set by the server, so it is spread LAST — a
343
+ * payload that happens to carry a top-level `type` inside `data` cannot shadow
344
+ * it and mis-render as a mission event. Non-mission envelopes and malformed
345
+ * payloads return null and are simply skipped, so one channel can carry both
346
+ * streams.
347
+ */
348
+ declare function parseSessionStreamEnvelope(raw: unknown): MissionStreamEvent | null;
349
+ /** Narrow an arbitrary channel payload to a MissionStreamEvent. Returns null
350
+ * for non-mission events and anything malformed — the reducer skips those. */
351
+ declare function asMissionStreamEvent(value: unknown): MissionStreamEvent | null;
352
+ /** Live per-step view the reducer maintains. `status` only ever moves FORWARD
353
+ * (see STEP_RANK) so a duplicate/out-of-order event can never regress a step
354
+ * from done back to running. */
355
+ interface MissionStepState {
356
+ id: string;
357
+ intent: string;
358
+ kind: string;
359
+ status: MissionStreamStepStatus;
360
+ sublabel?: string;
361
+ reason?: string;
362
+ durationMs?: number;
363
+ }
364
+ /** Live per-mission view the reducer folds events into. */
365
+ interface MissionState {
366
+ missionId: string;
367
+ title?: string;
368
+ status: MissionStreamStatus;
369
+ steps: MissionStepState[];
370
+ spentUsd: number;
371
+ capUsd?: number | null;
372
+ pauseReason?: string;
373
+ summary?: string;
374
+ /** The largest `at` folded so far — purely for display; never gates folding. */
375
+ lastEventAt: number;
376
+ /** The largest pause/resume control `at` folded — lets a newer resume beat an
377
+ * older pause that arrives late. */
378
+ lastControlAt?: number;
379
+ }
380
+ /**
381
+ * Fold one event into one mission's state. PURE: returns a new state, mutates
382
+ * nothing. Idempotent + order-tolerant — every status move is clamped through
383
+ * the monotonic ranks above, so duplicates and out-of-order delivery converge
384
+ * to the same terminal state regardless of arrival order.
385
+ */
386
+ declare function applyMissionEvent(prev: MissionState | undefined, event: MissionStreamEvent): MissionState;
387
+ /**
388
+ * Merge a loader SEED into the live state for one mission, advancing through
389
+ * the SAME monotonic clamps the event reducer uses. The durable mission row is
390
+ * the authoritative converged state: while the live channel is down the row
391
+ * advances but the frozen live state does not, and nothing re-fires the gap to
392
+ * a reconnecting client. Folding the seed THROUGH the clamps backfills that gap
393
+ * on reconnect while never regressing a more-advanced live value:
394
+ * - a stale seed for a more-advanced live mission is a no-op,
395
+ * - an advanced seed after an outage fills the gap (status/steps/spend move
396
+ * forward to the row's converged state).
397
+ * `live === undefined` (mission unknown to the client) just adopts the seed.
398
+ */
399
+ declare function mergeMissionState(live: MissionState | undefined, seed: MissionState): MissionState;
400
+ /**
401
+ * Fold a whole event sequence into a Map<missionId, MissionState>. PURE and
402
+ * order-tolerant: feeding the same events in any order (with duplicates)
403
+ * converges to the same map. `seed` lets a reload start from loader-rehydrated
404
+ * state before live events arrive.
405
+ */
406
+ declare function reduceMissionEvents(events: MissionStreamEvent[], seed?: Map<string, MissionState>): Map<string, MissionState>;
407
+
408
+ /**
409
+ * Mission execution engine — drives one mission's plan to completion under a
410
+ * SINGLE serialized owner (a Cloudflare Workflow, a Durable Object alarm, a
411
+ * queue consumer — one per mission). The owner wraps each `runStep` call in its
412
+ * durable-step primitive (e.g. Workflows `step.do(step.id, …)`) so a completed
413
+ * step's result is persisted and replayed instead of re-run after a mid-run
414
+ * restart. This module holds the logic that must be correct independent of any
415
+ * runtime, so it is injectable and unit-testable with the dispatch mocked.
416
+ *
417
+ * Idempotency is layered, belt-and-suspenders:
418
+ * 1. The owner's durable-step cache replays a completed step's result.
419
+ * 2. `runStep` re-reads the mission first; a step already `done` (with a
420
+ * resultRef) returns the cached pointer WITHOUT re-dispatching — this
421
+ * closes the at-least-once window where a callback re-runs after the
422
+ * side effect committed but before the owner durably recorded it.
423
+ * 3. The cursor advances only after a step is `done`, so a fresh run resumes
424
+ * from `mission.cursor` and never re-touches earlier steps.
425
+ *
426
+ * Seams (the product supplies domain; the engine owns mechanism):
427
+ * - {@link SandboxDispatch} — how a step actually executes.
428
+ * - {@link MissionEngineOptions.estimateStepCostUsd} — per-step USD estimate.
429
+ * - {@link MissionGateOptions.classifyStep} — which steps need approval.
430
+ * - {@link MissionApprovalsPort} — where gate proposals live and how they
431
+ * resolve.
432
+ */
433
+
434
+ /**
435
+ * A side-effecting unit of per-step work. The owner supplies the real
436
+ * implementation (e.g. a detached sandbox-session dispatcher); tests supply a
437
+ * mock. MUST return a SMALL pointer — large output is written to the product's
438
+ * storage and only the resultRef is returned.
439
+ */
440
+ type SandboxDispatch = (input: SandboxDispatchInput) => Promise<SandboxDispatchResult>;
441
+ interface SandboxDispatchInput {
442
+ mission: MissionRecord;
443
+ step: MissionStep;
444
+ stepIndex: number;
445
+ }
446
+ interface SandboxDispatchDoneResult {
447
+ kind?: 'done';
448
+ /** Small pointer at the produced artifact/output (vault path, asset id, exec
449
+ * digest). Stored on the step as `resultRef`; never the full payload. */
450
+ resultRef: string;
451
+ /** Optional one-line status surfaced on the step row. */
452
+ sublabel?: string;
453
+ /** Optional marginal spend for this step. `ledgerDelta` carries platform-
454
+ * reported truth (real token counts, wall time); `deltaUsd` is set ONLY when
455
+ * a provider-authored price is known. Omit fields rather than synthesizing
456
+ * zeros — the engine substitutes its injected per-step estimate for a
457
+ * missing deltaUsd and records that estimate in the ledger. */
458
+ cost?: {
459
+ deltaUsd?: number;
460
+ ledgerDelta?: Partial<MissionCostLedger>;
461
+ };
462
+ }
463
+ /** The dispatched step's detached session is still executing on the platform.
464
+ * The owner sleeps `pollAfterMs` and re-invokes the step; the dispatch is
465
+ * idempotent on the session ref, so the re-invocation settles the same session
466
+ * rather than starting a second run. */
467
+ interface SandboxDispatchInProgressResult {
468
+ kind: 'in_progress';
469
+ sessionRef: string;
470
+ pollAfterMs: number;
471
+ sublabel?: string;
472
+ }
473
+ type SandboxDispatchResult = SandboxDispatchDoneResult | SandboxDispatchInProgressResult;
474
+ /** Outcome of running a single step. `cached` distinguishes a replay/skip
475
+ * (step was already done) from a fresh execution so the engine and its tests
476
+ * can assert the dispatch was NOT re-invoked. */
477
+ type StepOutcome = {
478
+ kind: 'done';
479
+ resultRef: string;
480
+ cached: boolean;
481
+ } | {
482
+ kind: 'in_progress';
483
+ sessionRef: string;
484
+ pollAfterMs: number;
485
+ sublabel?: string;
486
+ } | {
487
+ kind: 'skipped-cursor';
488
+ reason: string;
489
+ } | {
490
+ kind: 'failed';
491
+ error: string;
492
+ fatal: boolean;
493
+ };
494
+ /** Outcome of running the whole plan from the cursor to the end. */
495
+ type PlanOutcome = {
496
+ kind: 'completed';
497
+ summary: string;
498
+ } | {
499
+ kind: 'in_progress';
500
+ stepId: string;
501
+ sessionRef: string;
502
+ pollAfterMs: number;
503
+ sublabel?: string;
504
+ } | {
505
+ kind: 'failed';
506
+ failedStepId: string;
507
+ error: string;
508
+ } | {
509
+ kind: 'halted';
510
+ status: MissionStatus;
511
+ reason?: string | null;
512
+ } | {
513
+ kind: 'terminal';
514
+ status: MissionStatus;
515
+ } | {
516
+ kind: 'not-found';
517
+ };
518
+ interface MissionPlanRunOptions {
519
+ /** Pre-step veto (kill switch, schedule window). A non-null return pauses the
520
+ * mission with that reason before the step's side effect starts. */
521
+ beforeStep?: (mission: MissionRecord, step: MissionStep) => Promise<string | null>;
522
+ }
523
+ /** Thrown to make the owner's durable-step wrapper retry. The single-owner
524
+ * invariant makes a genuine concurrent change rare (it means another writer
525
+ * touched the row), so retrying — rather than corrupting state by forcing a
526
+ * stale write — is the correct response. Distinct from a task failure, which
527
+ * is recorded on the step. */
528
+ declare class MissionConcurrencyError extends Error {
529
+ constructor(message: string);
530
+ }
531
+ /** Thrown by a {@link SandboxDispatch} for a TRANSIENT failure (platform blip,
532
+ * exec-time network fault) that should be re-attempted. `runStep` RE-THROWS it
533
+ * so the owner engages its bounded retry+backoff; the step is left `running`
534
+ * and the re-dispatch is made idempotent by the cached-done guard. A
535
+ * deterministic failure must be a plain Error instead — that is recorded as a
536
+ * fatal `failed` step and is never retried (no money-burning loop on a
537
+ * deterministic error). */
538
+ declare class RetryableStepError extends Error {
539
+ constructor(message: string);
540
+ }
541
+ /** Resolution states a gate proposal can be in. `approved`/`executed` unblock
542
+ * the gated step; everything else keeps the mission parked. */
543
+ type MissionProposalResolution = 'pending' | 'approved' | 'rejected' | 'executed' | 'ignored';
544
+ type MissionGateKind = 'step' | 'budget' | 'volume';
545
+ /** Product classification of one step. Returned by
546
+ * {@link MissionGateOptions.classifyStep}; the matching rules (regexes, intent
547
+ * vocabularies, path allowlists) are product domain and never live here. */
548
+ interface StepGateClassification {
549
+ /** Product approval-type label persisted on the proposal ('generate',
550
+ * 'integration_invoke', …). */
551
+ type: string;
552
+ /** Counted against the per-mission external-action volume cap. */
553
+ externalAction?: boolean;
554
+ estCostUsd?: number | null;
555
+ }
556
+ /** A gate proposal the engine asks the product to persist. The id is
557
+ * deterministic per (gate, mission, step) — see the `*ProposalId` helpers —
558
+ * so a replay re-finds the same proposal instead of duplicating it. The
559
+ * product composes its own title/description from the structured fields. */
560
+ interface MissionGateProposal {
561
+ id: string;
562
+ missionId: string;
563
+ stepId: string;
564
+ gate: MissionGateKind;
565
+ mission: MissionRecord;
566
+ step: MissionStep;
567
+ /** Present for `gate: 'step'` — the classification that triggered the gate. */
568
+ classification?: StepGateClassification;
569
+ /** Present for `gate: 'budget'`. */
570
+ budget?: {
571
+ spentUsd: number;
572
+ budgetUsd: number;
573
+ estimatedCostUsd: number;
574
+ };
575
+ /** Present for `gate: 'volume'`. */
576
+ volume?: {
577
+ externalActionCount: number;
578
+ cap: number;
579
+ };
580
+ }
581
+ /** Approval persistence seam — the product implements this over its own
582
+ * proposal table and resolution flow. */
583
+ interface MissionApprovalsPort {
584
+ /** Resolution of the proposal with this id, or null when none exists. */
585
+ findResolution(proposalId: string): Promise<MissionProposalResolution | null>;
586
+ /** Persist a new gate proposal (id is deterministic; called at most once per
587
+ * (gate, mission, step) absent a resolution). */
588
+ createProposal(proposal: MissionGateProposal): Promise<void>;
589
+ /** Count of this mission's `gate: 'step'` proposals whose classification was
590
+ * `externalAction: true` — the denominator of the volume cap. */
591
+ countExternalActionProposals(missionId: string): Promise<number>;
592
+ }
593
+ interface MissionGateOptions {
594
+ approvals: MissionApprovalsPort;
595
+ /** Which steps need human approval, and as what. Return null for an ungated
596
+ * step. The rules are product domain (intent regexes, kind tables). */
597
+ classifyStep: (step: MissionStep) => StepGateClassification | null;
598
+ /** Max external-action approvals per mission before an approved override is
599
+ * required to request another. Default 5. */
600
+ externalActionCap?: number;
601
+ }
602
+ interface MissionEngineOptions {
603
+ service: MissionService;
604
+ /** Per-step USD estimate. Load-bearing twice: the budget gate parks on it
605
+ * BEFORE a step runs, and the engine records it as the step's spend when the
606
+ * dispatch reports no provider-authored price — using one estimator keeps
607
+ * spend and gate consistent. */
608
+ estimateStepCostUsd: (step: MissionStep) => number;
609
+ /** Best-effort live notifier. Fired AFTER each guarded write commits, so a
610
+ * broadcast always reflects persisted state; re-fired on idempotent replays
611
+ * so a reconnecting client converges. Never awaited; a throwing sink can
612
+ * never fail a step. Default: drop everything. */
613
+ sink?: MissionEventSink;
614
+ /** Approval gating. Omitted → no classification/volume gates, and a budget
615
+ * overrun pauses the mission (fail closed) instead of parking it
616
+ * waiting_approval behind an override proposal. */
617
+ gates?: MissionGateOptions;
618
+ /** Step kinds whose failure does NOT abort the whole mission — enrichment
619
+ * steps the plan can complete without. Every other kind is fatal-on-failure.
620
+ * Default `['optional', 'best-effort']`. */
621
+ nonFatalStepKinds?: readonly string[];
622
+ }
623
+ interface MissionEngine {
624
+ /** Run exactly one plan step. Idempotent: re-invoking for a step already
625
+ * `done` returns the cached pointer without re-dispatching. A lost guarded
626
+ * race throws {@link MissionConcurrencyError} so the owner's durable-step
627
+ * wrapper retries instead of writing a stale value. */
628
+ runStep(missionId: string, stepId: string, dispatch: SandboxDispatch): Promise<StepOutcome>;
629
+ /** Walk the plan from the durable cursor to the end, re-reading the mission
630
+ * between steps so a pause/stop control that lands while a step is running
631
+ * is honored before the next side effect. `runStep` is the owner's boundary:
632
+ * in production `(step) => durableStep.do(step.id, () => engine.runStep(…))`;
633
+ * in tests `engine.runStep` directly. */
634
+ runPlan(missionId: string, runStep: (step: MissionStep, stepIndex: number) => Promise<StepOutcome>, options?: MissionPlanRunOptions): Promise<PlanOutcome>;
635
+ /** Record spend durable-first, live second: the guarded ledger write commits,
636
+ * then the sink sees the new total. A guarded failure returns unchanged. */
637
+ recordCost(missionId: string, deltaUsd: number, ledgerDelta?: Partial<MissionCostLedger>): Promise<MissionOutcome<MissionRecord>>;
638
+ /** Pause durable-first, live second (the paused event fires only on a real
639
+ * edge, not an idempotent re-pause). */
640
+ pauseMission(missionId: string, reason: string): Promise<MissionOutcome<MissionRecord>>;
641
+ }
642
+ /** Deterministic proposal id for a step-classification gate. */
643
+ declare function stepGateProposalId(missionId: string, stepId: string): string;
644
+ /** Deterministic proposal id for a budget-overrun override. */
645
+ declare function budgetGateProposalId(missionId: string, stepId: string): string;
646
+ /** Deterministic proposal id for an external-action volume-cap override. */
647
+ declare function volumeGateProposalId(missionId: string, stepId: string): string;
648
+ declare function createMissionEngine(options: MissionEngineOptions): MissionEngine;
649
+
650
+ /**
651
+ * Parsing for the agent-authored `:::mission` block — the bridge from a chat
652
+ * prompt contract to the engine's MissionStep[] shape. The block format:
653
+ *
654
+ * :::mission
655
+ * title: <mission title>
656
+ * <id>: <kind> | <intent>
657
+ * :::
658
+ *
659
+ * The allowed kind vocabulary is a PARAMETER — products pass their own list to
660
+ * match their prompt directive; {@link DEFAULT_MISSION_STEP_KINDS} is the
661
+ * default. Kinds label intent for gating and UX; they never select a different
662
+ * execution path.
663
+ */
664
+
665
+ /** Default step-kind vocabulary. `best-effort` matches the engine's default
666
+ * non-fatal kind (a failure does not abort the mission); the rest are
667
+ * fatal-on-failure agent sub-tasks. */
668
+ declare const DEFAULT_MISSION_STEP_KINDS: readonly string[];
669
+ interface ParsedMissionStep {
670
+ id: string;
671
+ kind: string;
672
+ intent: string;
673
+ }
674
+ interface ParsedMission {
675
+ title: string;
676
+ steps: ParsedMissionStep[];
677
+ }
678
+ interface ParseMissionBlocksOptions {
679
+ /** Allowed step kinds (lowercase). Default {@link DEFAULT_MISSION_STEP_KINDS}. */
680
+ kinds?: readonly string[];
681
+ }
682
+ /**
683
+ * Parse every well-formed `:::mission` block. A block without a title or
684
+ * without at least one valid step yields nothing (it is malformed — never
685
+ * guess a plan from loose prose). Unknown kinds and malformed step lines are
686
+ * dropped; an empty result lets the caller skip the block rather than start an
687
+ * empty mission.
688
+ */
689
+ declare function parseMissionBlocks(fullContent: string, options?: ParseMissionBlocksOptions): ParsedMission[];
690
+ /**
691
+ * Materialize parsed steps into the engine's MissionStep[] shape. Rejects a
692
+ * duplicate step id (fail loud — the owner keys its durable step cache by
693
+ * step id and `createMission` rejects duplicates anyway; catching it here
694
+ * gives a clearer diagnostic). Every step starts `pending` with zero attempts.
695
+ */
696
+ declare function buildAgentMissionPlan(steps: ParsedMissionStep[]): MissionStep[];
697
+
698
+ export { type CompleteMissionInput, type CreateMissionInput, DEFAULT_MISSION_STEP_KINDS, type InMemoryMissionStore, MISSION_CONTROL_CHANNEL_ID, type MissionApprovalsPort, type MissionAuditEvent, MissionConcurrencyError, type MissionCostLedger, type MissionEngine, type MissionEngineOptions, type MissionEventSink, type MissionGateKind, type MissionGateOptions, type MissionGateProposal, type MissionOutcome, type MissionPlanRunOptions, type MissionProposalResolution, type MissionRecord, type MissionService, type MissionServiceOptions, type MissionState, type MissionStatus, type MissionStep, type MissionStepState, type MissionStepStatus, type MissionStorePort, type MissionStreamEvent, type MissionStreamStatus, type MissionStreamStep, type MissionStreamStepStatus, type MissionUpdateGuard, type MissionUpdatePatch, type ParseMissionBlocksOptions, type ParsedMission, type ParsedMissionStep, type PlanOutcome, RetryableStepError, type SandboxDispatch, type SandboxDispatchDoneResult, type SandboxDispatchInProgressResult, type SandboxDispatchInput, type SandboxDispatchResult, type SetStepStatusPatch, type StepGateClassification, type StepOutcome, applyMissionEvent, asMissionStreamEvent, budgetGateProposalId, buildAgentMissionPlan, createInMemoryMissionStore, createMissionEngine, createMissionService, isMissionStopRequested, isMissionTerminal, mergeMissionState, noopEventSink, parseMissionBlocks, parseSessionStreamEnvelope, reduceMissionEvents, stepGateProposalId, volumeGateProposalId };