@voyant-travel/workflows-orchestrator 0.107.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/LICENSE +201 -0
  2. package/NOTICE +52 -0
  3. package/README.md +76 -0
  4. package/dist/abort-registry.d.ts +6 -0
  5. package/dist/abort-registry.d.ts.map +1 -0
  6. package/dist/abort-registry.js +37 -0
  7. package/dist/concurrency.d.ts +31 -0
  8. package/dist/concurrency.d.ts.map +1 -0
  9. package/dist/concurrency.js +145 -0
  10. package/dist/drive.d.ts +67 -0
  11. package/dist/drive.d.ts.map +1 -0
  12. package/dist/drive.js +373 -0
  13. package/dist/driver-inmemory.d.ts +30 -0
  14. package/dist/driver-inmemory.d.ts.map +1 -0
  15. package/dist/driver-inmemory.js +394 -0
  16. package/dist/event-router.d.ts +51 -0
  17. package/dist/event-router.d.ts.map +1 -0
  18. package/dist/event-router.js +68 -0
  19. package/dist/http-step-handler.d.ts +25 -0
  20. package/dist/http-step-handler.d.ts.map +1 -0
  21. package/dist/http-step-handler.js +78 -0
  22. package/dist/in-memory-store.d.ts +5 -0
  23. package/dist/in-memory-store.d.ts.map +1 -0
  24. package/dist/in-memory-store.js +41 -0
  25. package/dist/index.d.ts +13 -0
  26. package/dist/index.d.ts.map +1 -0
  27. package/dist/index.js +22 -0
  28. package/dist/journal-helpers.d.ts +3 -0
  29. package/dist/journal-helpers.d.ts.map +1 -0
  30. package/dist/journal-helpers.js +9 -0
  31. package/dist/orchestrator.d.ts +116 -0
  32. package/dist/orchestrator.d.ts.map +1 -0
  33. package/dist/orchestrator.js +411 -0
  34. package/dist/resume-run.d.ts +40 -0
  35. package/dist/resume-run.d.ts.map +1 -0
  36. package/dist/resume-run.js +119 -0
  37. package/dist/schedule.d.ts +51 -0
  38. package/dist/schedule.d.ts.map +1 -0
  39. package/dist/schedule.js +243 -0
  40. package/dist/testing/driver-compliance.d.ts +58 -0
  41. package/dist/testing/driver-compliance.d.ts.map +1 -0
  42. package/dist/testing/driver-compliance.js +667 -0
  43. package/dist/types.d.ts +182 -0
  44. package/dist/types.d.ts.map +1 -0
  45. package/dist/types.js +4 -0
  46. package/package.json +51 -0
  47. package/src/__tests__/orchestrator-test-support.ts +18 -0
  48. package/src/abort-registry.ts +41 -0
  49. package/src/concurrency.ts +217 -0
  50. package/src/drive.ts +477 -0
  51. package/src/driver-inmemory.ts +511 -0
  52. package/src/event-router.ts +120 -0
  53. package/src/http-step-handler.ts +112 -0
  54. package/src/in-memory-store.ts +44 -0
  55. package/src/index.ts +73 -0
  56. package/src/journal-helpers.ts +11 -0
  57. package/src/orchestrator.ts +527 -0
  58. package/src/resume-run.ts +162 -0
  59. package/src/schedule.ts +310 -0
  60. package/src/testing/driver-compliance.ts +800 -0
  61. package/src/types.ts +201 -0
package/src/drive.ts ADDED
@@ -0,0 +1,477 @@
1
+ // The orchestrator's core loop.
2
+ //
3
+ // `driveUntilPaused` calls the tenant step handler repeatedly,
4
+ // merging each response into the run's journal, until the run is
5
+ // either terminal or parked on a waitpoint. It is deliberately free
6
+ // of persistence and transport — callers compose it with a
7
+ // `RunRecordStore` and a `StepHandler` (in-process or HTTP).
8
+ //
9
+ // See docs/runtime-protocol.md §2 + §5 for the wire semantics.
10
+
11
+ import { PROTOCOL_VERSION } from "@voyant-travel/workflows/protocol"
12
+ import type {
13
+ JournalSlice,
14
+ PendingWaitpoint,
15
+ RunRecord,
16
+ StepHandler,
17
+ WaitpointInjection,
18
+ WorkflowStepRequest,
19
+ WorkflowStepResponse,
20
+ } from "./types.js"
21
+
22
+ export interface DriveOptions {
23
+ handler: StepHandler
24
+ /** Safety cap on invocations. Defaults to 128 — adjust upward for workloads with many waits. */
25
+ maxInvocations?: number
26
+ /** Injectable clock, ms since epoch. Defaults to Date.now. */
27
+ now?: () => number
28
+ /** Optional observer for tenant responses (logging/metrics hook). */
29
+ onStepResponse?: (args: {
30
+ runRecord: Readonly<RunRecord>
31
+ response: WorkflowStepResponse
32
+ }) => void
33
+ /**
34
+ * Resolve a RUN waitpoint by running a child workflow inline. When
35
+ * unset, encountering a RUN waitpoint fails the parent with a clear
36
+ * error. Callers that want `ctx.invoke` support wire this (usually
37
+ * to the orchestrator's own `trigger`). Child runs inherit the
38
+ * parent's tenantMeta unless the hook overrides.
39
+ */
40
+ triggerChild?: (args: {
41
+ parent: Readonly<RunRecord>
42
+ waitpoint: Readonly<PendingWaitpoint>
43
+ }) => Promise<RunRecord>
44
+ /**
45
+ * Called between tenant invocations. Implementations typically
46
+ * persist the record's mid-flight state and/or re-check the store
47
+ * for external mutations (e.g. a concurrent `cancel()` call).
48
+ *
49
+ * Returning `false` stops the drive loop; the record's current
50
+ * fields are what the caller will see. Return `true` to continue
51
+ * with the next invocation.
52
+ */
53
+ beforeInvocation?: (rec: RunRecord) => Promise<boolean>
54
+ /**
55
+ * Per-run AbortSignal forwarded into every handler call. When
56
+ * aborted mid-step (e.g. by a concurrent cancel), step bodies that
57
+ * honor `ctx.signal` stop cleanly and the drive loop will observe
58
+ * an aborted-status response on the current invocation.
59
+ */
60
+ signal?: AbortSignal
61
+ /**
62
+ * Live-chunk observer fired as each `ctx.stream.*` chunk is
63
+ * produced — before the invocation returns. Plumbed through to
64
+ * the handler's `opts.onStreamChunk`. Default: undefined (chunks
65
+ * only arrive as part of the per-invocation response).
66
+ */
67
+ onStreamChunk?: (chunk: import("./types.js").StreamChunk) => void
68
+ }
69
+
70
+ /**
71
+ * Drive a run forward. The passed-in record is mutated in place and
72
+ * also returned so callers can write it back to the store in one
73
+ * line: `await store.save(await driveUntilPaused(rec, opts))`.
74
+ */
75
+ export async function driveUntilPaused(rec: RunRecord, opts: DriveOptions): Promise<RunRecord> {
76
+ const maxInvocations = opts.maxInvocations ?? 128
77
+ const now = opts.now ?? (() => Date.now())
78
+
79
+ while (rec.invocationCount < maxInvocations) {
80
+ if (isTerminal(rec.status)) break
81
+ // Workflow-level timeout check. Compute-time-only: we compare
82
+ // cumulative invocation duration, not wall-clock, so parked runs
83
+ // don't starve their own budget while waiting on waitpoints.
84
+ if (rec.timeoutMs !== undefined && rec.timeoutMs > 0 && rec.computeTimeMs >= rec.timeoutMs) {
85
+ rec.status = "failed"
86
+ rec.error = {
87
+ category: "RUNTIME_ERROR",
88
+ code: "WORKFLOW_TIMEOUT",
89
+ message: `workflow exceeded its ${rec.timeoutMs}ms compute-time budget (${rec.computeTimeMs}ms used)`,
90
+ }
91
+ rec.completedAt = now()
92
+ rec.pendingWaitpoints = []
93
+ break
94
+ }
95
+ if (opts.beforeInvocation) {
96
+ const go = await opts.beforeInvocation(rec)
97
+ if (!go) break
98
+ if (isTerminal(rec.status)) break
99
+ }
100
+ rec.invocationCount += 1
101
+ const invocationStartedAt = now()
102
+ const req = buildStepRequest(rec)
103
+
104
+ const out = await opts.handler(req, {
105
+ signal: opts.signal,
106
+ onStreamChunk: opts.onStreamChunk,
107
+ })
108
+ rec.computeTimeMs += Math.max(0, now() - invocationStartedAt)
109
+ if (out.status !== 200) {
110
+ rec.status = "failed"
111
+ rec.error = {
112
+ category: "RUNTIME_ERROR",
113
+ code: "handler_error",
114
+ message: "message" in out.body ? out.body.message : `handler returned HTTP ${out.status}`,
115
+ }
116
+ rec.completedAt = now()
117
+ break
118
+ }
119
+ const response = out.body as WorkflowStepResponse
120
+ opts.onStepResponse?.({ runRecord: rec, response })
121
+ applyResponse(rec, response, now)
122
+
123
+ // Waiting with no pending waitpoints (all auto-resolved) is a
124
+ // protocol error; we still break rather than loop forever.
125
+ if (response.status === "waiting" && rec.pendingWaitpoints.length === 0) {
126
+ rec.status = "failed"
127
+ rec.error = {
128
+ category: "RUNTIME_ERROR",
129
+ code: "empty_waitpoint_list",
130
+ message: "tenant returned status=waiting without any registered waitpoints",
131
+ }
132
+ rec.completedAt = now()
133
+ break
134
+ }
135
+
136
+ // RUN waitpoints are resolvable inline via the triggerChild hook:
137
+ // run each child to completion, write the result back on the
138
+ // parent's journal, drop the RUN waitpoint, then loop.
139
+ if (response.status === "waiting") {
140
+ const runWaitpoints = rec.pendingWaitpoints.filter((w) => w.kind === "RUN")
141
+ if (runWaitpoints.length > 0) {
142
+ if (!opts.triggerChild) {
143
+ rec.status = "failed"
144
+ rec.error = {
145
+ category: "RUNTIME_ERROR",
146
+ code: "child_runs_unsupported",
147
+ message:
148
+ "workflow used ctx.invoke but the driver has no triggerChild hook wired. " +
149
+ "Use orchestrator.trigger() from @voyant-travel/workflows-orchestrator, which wires children automatically.",
150
+ }
151
+ rec.completedAt = now()
152
+ break
153
+ }
154
+ const resolvedRunIds = new Set<string>()
155
+ try {
156
+ for (const wp of runWaitpoints) {
157
+ const childResolution = await resolveChildRun(rec, wp, opts.triggerChild, now)
158
+ if (childResolution.kind === "resolved") {
159
+ rec.journal.waitpointsResolved[wp.clientWaitpointId] = childResolution.entry
160
+ resolvedRunIds.add(wp.clientWaitpointId)
161
+ }
162
+ // deferred → leave the RUN waitpoint pending; the child
163
+ // will cascade-resume the parent on its terminal transition.
164
+ }
165
+ } catch (err) {
166
+ rec.status = "failed"
167
+ rec.error = {
168
+ category: "RUNTIME_ERROR",
169
+ code: "child_run_unresolvable",
170
+ message: err instanceof Error ? err.message : String(err),
171
+ }
172
+ rec.completedAt = now()
173
+ break
174
+ }
175
+ // Keep RUN waitpoints that are still deferred (child parked).
176
+ rec.pendingWaitpoints = rec.pendingWaitpoints.filter(
177
+ (w) => w.kind !== "RUN" || !resolvedRunIds.has(w.clientWaitpointId),
178
+ )
179
+ if (rec.pendingWaitpoints.length === 0) {
180
+ rec.status = "running"
181
+ // Loop continues → re-invoke with the resolved waitpoints in the journal.
182
+ continue
183
+ }
184
+ // Still parked (non-RUN or deferred RUN); fall through to break.
185
+ }
186
+ }
187
+
188
+ if (rec.status !== "running") break
189
+ }
190
+
191
+ if (rec.invocationCount >= maxInvocations && rec.status === "running") {
192
+ rec.status = "failed"
193
+ rec.error = {
194
+ category: "RUNTIME_ERROR",
195
+ code: "max_invocations_exceeded",
196
+ message: `orchestrator drove the run ${maxInvocations} times without reaching a terminal or waiting state`,
197
+ }
198
+ rec.completedAt = now()
199
+ }
200
+ return rec
201
+ }
202
+
203
+ /**
204
+ * Accept a waitpoint injection for a parked run: match it against
205
+ * one of the pending waitpoints, write the resolution into the
206
+ * journal, flip the run to "running", and leave it ready to be
207
+ * re-driven by `driveUntilPaused`.
208
+ */
209
+ export function applyWaitpointInjection(
210
+ rec: RunRecord,
211
+ injection: WaitpointInjection,
212
+ now: () => number = () => Date.now(),
213
+ ): { ok: true } | { ok: false; message: string } {
214
+ if (rec.status !== "waiting") {
215
+ return { ok: false, message: `run ${rec.id} is not parked (status: ${rec.status})` }
216
+ }
217
+ const matched = matchWaitpoint(rec.pendingWaitpoints, injection)
218
+ if (!matched) {
219
+ return {
220
+ ok: false,
221
+ message: `no pending waitpoint matches kind=${injection.kind}, key=${injectionKey(injection)}`,
222
+ }
223
+ }
224
+ rec.journal.waitpointsResolved[matched.clientWaitpointId] = {
225
+ kind: matched.kind,
226
+ resolvedAt: now(),
227
+ payload: injection.payload,
228
+ source: "live",
229
+ matchedEventId: injection.kind === "EVENT" ? `evt_live_${injection.eventType}` : undefined,
230
+ }
231
+ rec.pendingWaitpoints = rec.pendingWaitpoints.filter(
232
+ (w) => w.clientWaitpointId !== matched.clientWaitpointId,
233
+ )
234
+ rec.status = "running"
235
+ return { ok: true }
236
+ }
237
+
238
+ // ---- Internals ----
239
+
240
+ function buildStepRequest(rec: RunRecord): WorkflowStepRequest {
241
+ return {
242
+ protocolVersion: PROTOCOL_VERSION,
243
+ runId: rec.id,
244
+ workflowId: rec.workflowId,
245
+ workflowVersion: rec.workflowVersion,
246
+ invocationCount: rec.invocationCount,
247
+ input: rec.input,
248
+ journal: rec.journal,
249
+ environment: rec.environment,
250
+ // Deadlines aren't enforced yet in the reference orchestrator; the
251
+ // handler accepts the field for forward-compat.
252
+ deadline: Number.MAX_SAFE_INTEGER,
253
+ tenantMeta: rec.tenantMeta,
254
+ runMeta: {
255
+ number: rec.runMeta.number,
256
+ attempt: rec.runMeta.attempt,
257
+ triggeredBy: rec.triggeredBy,
258
+ tags: rec.tags,
259
+ startedAt: rec.startedAt,
260
+ },
261
+ }
262
+ }
263
+
264
+ function applyResponse(rec: RunRecord, response: WorkflowStepResponse, now: () => number): void {
265
+ // Snapshot the metadata state from the prior invocation. The
266
+ // response journal is a clone of what we sent in, so its
267
+ // metadataState field won't reflect mutations the body just made —
268
+ // those come in `metadataUpdates`. We keep the prior state, apply
269
+ // the delta, then swap in the new journal shape.
270
+ const priorMetadata = rec.journal.metadataState
271
+
272
+ // The handler returned the executor's journal post-invocation —
273
+ // trust it as the new source of truth for steps / waitpoints /
274
+ // compensations. We deep-clone to isolate from future executor
275
+ // mutations.
276
+ rec.journal = structuredClone(response.journal) as JournalSlice
277
+ rec.journal.metadataState = { ...priorMetadata }
278
+
279
+ // Apply only the delta of metadata mutations. Each invocation's
280
+ // response re-emits every mutation the body made — including those
281
+ // from prior invocations, since the body replays from the start.
282
+ // The positional cursor on rec.metadataAppliedCount dedups them.
283
+ const newMutations = response.metadataUpdates.slice(rec.metadataAppliedCount)
284
+ applyMetadataUpdates(rec.journal.metadataState, newMutations)
285
+ rec.metadataAppliedCount = response.metadataUpdates.length
286
+
287
+ // Accumulate stream chunks across invocations, grouped by streamId.
288
+ // Each response carries only chunks emitted in that invocation.
289
+ for (const chunk of response.streamChunks) {
290
+ const bucket = rec.streams[chunk.streamId] ?? []
291
+ rec.streams[chunk.streamId] = bucket
292
+ bucket.push({ ...chunk })
293
+ }
294
+
295
+ if (response.status === "completed") {
296
+ rec.status = "completed"
297
+ rec.output = response.output
298
+ rec.completedAt = now()
299
+ rec.pendingWaitpoints = []
300
+ return
301
+ }
302
+ if (response.status === "failed") {
303
+ rec.status = "failed"
304
+ rec.error = {
305
+ category: response.error.category,
306
+ code: response.error.code,
307
+ message: response.error.message,
308
+ }
309
+ rec.completedAt = now()
310
+ rec.pendingWaitpoints = []
311
+ return
312
+ }
313
+ if (response.status === "cancelled") {
314
+ rec.status = "cancelled"
315
+ rec.completedAt = now()
316
+ rec.pendingWaitpoints = []
317
+ return
318
+ }
319
+ if (response.status === "compensated" || response.status === "compensation_failed") {
320
+ rec.status = response.status
321
+ if (response.error) {
322
+ rec.error = {
323
+ category: response.error.category,
324
+ code: response.error.code,
325
+ message: response.error.message,
326
+ }
327
+ }
328
+ rec.completedAt = now()
329
+ rec.pendingWaitpoints = []
330
+ return
331
+ }
332
+ // "waiting"
333
+ rec.status = "waiting"
334
+ const parkedAt = now()
335
+ rec.pendingWaitpoints = response.waitpoints.map<PendingWaitpoint>((w) => {
336
+ const meta = { ...w.meta }
337
+ // Stamp wall-clock wake times on DATETIME waitpoints at park time,
338
+ // so alarm loops (local serve + CF DO) can fire at the right moment
339
+ // without re-deriving wall-clock from wherever the run is stored.
340
+ if (w.kind === "DATETIME" && typeof meta.wakeAt !== "number") {
341
+ const durationMs = w.timeoutMs ?? (typeof meta.durationMs === "number" ? meta.durationMs : 0)
342
+ meta.wakeAt = parkedAt + durationMs
343
+ }
344
+ return {
345
+ clientWaitpointId: w.clientWaitpointId,
346
+ kind: w.kind,
347
+ meta,
348
+ timeoutMs: w.timeoutMs,
349
+ }
350
+ })
351
+ }
352
+
353
+ function isTerminal(status: RunRecord["status"]): boolean {
354
+ return (
355
+ status === "completed" ||
356
+ status === "failed" ||
357
+ status === "cancelled" ||
358
+ status === "compensated" ||
359
+ status === "compensation_failed"
360
+ )
361
+ }
362
+
363
+ function matchWaitpoint(
364
+ pending: readonly PendingWaitpoint[],
365
+ inj: WaitpointInjection,
366
+ ): PendingWaitpoint | undefined {
367
+ for (const wp of pending) {
368
+ if (wp.kind !== inj.kind) continue
369
+ if (inj.kind === "EVENT" && wp.meta.eventType === inj.eventType) return wp
370
+ if (inj.kind === "SIGNAL" && wp.meta.signalName === inj.name) return wp
371
+ if (inj.kind === "MANUAL" && wp.meta.tokenId === inj.tokenId) return wp
372
+ }
373
+ return undefined
374
+ }
375
+
376
+ function injectionKey(inj: WaitpointInjection): string {
377
+ if (inj.kind === "EVENT") return inj.eventType
378
+ if (inj.kind === "SIGNAL") return inj.name
379
+ return inj.tokenId
380
+ }
381
+
382
+ type ChildResolution =
383
+ | {
384
+ kind: "resolved"
385
+ entry: import("@voyant-travel/workflows/protocol").WaitpointResolutionEntry
386
+ }
387
+ | { kind: "deferred" }
388
+
389
+ async function resolveChildRun(
390
+ parent: RunRecord,
391
+ wp: PendingWaitpoint,
392
+ triggerChild: NonNullable<DriveOptions["triggerChild"]>,
393
+ now: () => number,
394
+ ): Promise<ChildResolution> {
395
+ const childRecord = await triggerChild({ parent, waitpoint: wp })
396
+ const at = now()
397
+ if (wp.meta.detach === true) {
398
+ return {
399
+ kind: "resolved",
400
+ entry: {
401
+ kind: "RUN",
402
+ resolvedAt: at,
403
+ payload: undefined,
404
+ source: "replay",
405
+ },
406
+ }
407
+ }
408
+ if (childRecord.status === "completed") {
409
+ return {
410
+ kind: "resolved",
411
+ entry: {
412
+ kind: "RUN",
413
+ resolvedAt: at,
414
+ payload: childRecord.output,
415
+ source: "replay",
416
+ },
417
+ }
418
+ }
419
+ if (childRecord.status === "waiting") {
420
+ // Child parked on its own waitpoint(s). The parent parks too; the
421
+ // child's parent pointer (set by trigger's driveOptionsFor) will
422
+ // cascade-resume the parent when the child later reaches a
423
+ // terminal state via resume/cancel/alarm.
424
+ return { kind: "deferred" }
425
+ }
426
+ // Failed / cancelled / compensated / compensation_failed → surface as error.
427
+ const errMsg = childRecord.error?.message ?? `child run ended with status ${childRecord.status}`
428
+ const errCode = childRecord.error?.code ?? "CHILD_RUN_ENDED"
429
+ return {
430
+ kind: "resolved",
431
+ entry: {
432
+ kind: "RUN",
433
+ resolvedAt: at,
434
+ source: "replay",
435
+ error: {
436
+ category:
437
+ (childRecord.error?.category as "USER_ERROR" | "RUNTIME_ERROR" | undefined) ??
438
+ "USER_ERROR",
439
+ code: errCode,
440
+ message: errMsg,
441
+ },
442
+ },
443
+ }
444
+ }
445
+
446
+ interface MetadataMutation {
447
+ op: "set" | "increment" | "append" | "remove"
448
+ key: string
449
+ value?: unknown
450
+ target?: "self" | "parent" | "root"
451
+ }
452
+
453
+ function applyMetadataUpdates(
454
+ state: Record<string, unknown>,
455
+ updates: readonly MetadataMutation[],
456
+ ): void {
457
+ for (const u of updates) {
458
+ switch (u.op) {
459
+ case "set":
460
+ state[u.key] = u.value
461
+ break
462
+ case "increment": {
463
+ const cur = typeof state[u.key] === "number" ? (state[u.key] as number) : 0
464
+ state[u.key] = cur + ((u.value as number) ?? 1)
465
+ break
466
+ }
467
+ case "append": {
468
+ const cur = Array.isArray(state[u.key]) ? (state[u.key] as unknown[]) : []
469
+ state[u.key] = [...cur, u.value]
470
+ break
471
+ }
472
+ case "remove":
473
+ delete state[u.key]
474
+ break
475
+ }
476
+ }
477
+ }