duron 0.2.1 → 0.3.0-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. package/dist/action-job.d.ts +2 -0
  2. package/dist/action-job.d.ts.map +1 -1
  3. package/dist/action-job.js +20 -1
  4. package/dist/action-manager.d.ts +2 -0
  5. package/dist/action-manager.d.ts.map +1 -1
  6. package/dist/action-manager.js +3 -0
  7. package/dist/action.d.ts +7 -0
  8. package/dist/action.d.ts.map +1 -1
  9. package/dist/action.js +1 -0
  10. package/dist/adapters/adapter.d.ts +10 -2
  11. package/dist/adapters/adapter.d.ts.map +1 -1
  12. package/dist/adapters/adapter.js +59 -1
  13. package/dist/adapters/postgres/base.d.ts +9 -4
  14. package/dist/adapters/postgres/base.d.ts.map +1 -1
  15. package/dist/adapters/postgres/base.js +269 -19
  16. package/dist/adapters/postgres/schema.d.ts +249 -105
  17. package/dist/adapters/postgres/schema.d.ts.map +1 -1
  18. package/dist/adapters/postgres/schema.default.d.ts +249 -106
  19. package/dist/adapters/postgres/schema.default.d.ts.map +1 -1
  20. package/dist/adapters/postgres/schema.default.js +2 -2
  21. package/dist/adapters/postgres/schema.js +29 -1
  22. package/dist/adapters/schemas.d.ts +140 -7
  23. package/dist/adapters/schemas.d.ts.map +1 -1
  24. package/dist/adapters/schemas.js +52 -4
  25. package/dist/client.d.ts +8 -1
  26. package/dist/client.d.ts.map +1 -1
  27. package/dist/client.js +29 -1
  28. package/dist/errors.d.ts +6 -0
  29. package/dist/errors.d.ts.map +1 -1
  30. package/dist/errors.js +16 -1
  31. package/dist/index.d.ts +3 -1
  32. package/dist/index.d.ts.map +1 -1
  33. package/dist/index.js +3 -1
  34. package/dist/server.d.ts +220 -16
  35. package/dist/server.d.ts.map +1 -1
  36. package/dist/server.js +123 -8
  37. package/dist/step-manager.d.ts +8 -2
  38. package/dist/step-manager.d.ts.map +1 -1
  39. package/dist/step-manager.js +138 -15
  40. package/dist/telemetry/adapter.d.ts +85 -0
  41. package/dist/telemetry/adapter.d.ts.map +1 -0
  42. package/dist/telemetry/adapter.js +128 -0
  43. package/dist/telemetry/index.d.ts +5 -0
  44. package/dist/telemetry/index.d.ts.map +1 -0
  45. package/dist/telemetry/index.js +4 -0
  46. package/dist/telemetry/local.d.ts +21 -0
  47. package/dist/telemetry/local.d.ts.map +1 -0
  48. package/dist/telemetry/local.js +180 -0
  49. package/dist/telemetry/noop.d.ts +16 -0
  50. package/dist/telemetry/noop.d.ts.map +1 -0
  51. package/dist/telemetry/noop.js +39 -0
  52. package/dist/telemetry/opentelemetry.d.ts +24 -0
  53. package/dist/telemetry/opentelemetry.d.ts.map +1 -0
  54. package/dist/telemetry/opentelemetry.js +202 -0
  55. package/migrations/postgres/20260117231749_clumsy_penance/migration.sql +3 -0
  56. package/migrations/postgres/20260117231749_clumsy_penance/snapshot.json +988 -0
  57. package/migrations/postgres/20260118202533_wealthy_mysterio/migration.sql +24 -0
  58. package/migrations/postgres/20260118202533_wealthy_mysterio/snapshot.json +1362 -0
  59. package/package.json +6 -4
  60. package/src/action-job.ts +35 -0
  61. package/src/action-manager.ts +5 -0
  62. package/src/action.ts +56 -0
  63. package/src/adapters/adapter.ts +151 -0
  64. package/src/adapters/postgres/base.ts +342 -23
  65. package/src/adapters/postgres/schema.default.ts +2 -2
  66. package/src/adapters/postgres/schema.ts +49 -1
  67. package/src/adapters/schemas.ts +81 -5
  68. package/src/client.ts +80 -2
  69. package/src/errors.ts +45 -1
  70. package/src/index.ts +3 -1
  71. package/src/server.ts +163 -8
  72. package/src/step-manager.ts +232 -13
  73. package/src/telemetry/adapter.ts +468 -0
  74. package/src/telemetry/index.ts +17 -0
  75. package/src/telemetry/local.ts +336 -0
  76. package/src/telemetry/noop.ts +95 -0
  77. package/src/telemetry/opentelemetry.ts +310 -0
@@ -19,7 +19,9 @@ import {
19
19
  StepAlreadyExecutedError,
20
20
  StepTimeoutError,
21
21
  serializeError,
22
+ UnhandledChildStepsError,
22
23
  } from './errors.js'
24
+ import type { ObserveContext, Span, TelemetryAdapter } from './telemetry/adapter.js'
23
25
  import pRetry from './utils/p-retry.js'
24
26
  import waitForAbort from './utils/wait-for-abort.js'
25
27
 
@@ -28,6 +30,8 @@ export interface TaskStep {
28
30
  cb: (ctx: StepHandlerContext) => Promise<any>
29
31
  options: StepOptions
30
32
  abortSignal: AbortSignal
33
+ parentStepId: string | null
34
+ parallel: boolean
31
35
  }
32
36
 
33
37
  /**
@@ -61,16 +65,27 @@ export class StepStore {
61
65
  * @param name - The name of the step
62
66
  * @param timeoutMs - Timeout in milliseconds for the step
63
67
  * @param retriesLimit - Maximum number of retries for the step
68
+ * @param parentStepId - The ID of the parent step (null for root steps)
69
+ * @param parallel - Whether this step runs in parallel (independent from siblings during time travel)
64
70
  * @returns Promise resolving to the created step ID
65
71
  * @throws Error if step creation fails
66
72
  */
67
- async getOrCreate(jobId: string, name: string, timeoutMs: number, retriesLimit: number) {
73
+ async getOrCreate(
74
+ jobId: string,
75
+ name: string,
76
+ timeoutMs: number,
77
+ retriesLimit: number,
78
+ parentStepId: string | null = null,
79
+ parallel: boolean = false,
80
+ ) {
68
81
  try {
69
82
  return await this.#adapter.createOrRecoverJobStep({
70
83
  jobId,
71
84
  name,
72
85
  timeoutMs,
73
86
  retriesLimit,
87
+ parentStepId,
88
+ parallel,
74
89
  })
75
90
  } catch (error) {
76
91
  throw new NonRetriableError(`Failed to get or create step "${name}" for job "${jobId}"`, { cause: error })
@@ -115,6 +130,7 @@ export interface StepManagerOptions {
115
130
  jobId: string
116
131
  actionName: string
117
132
  adapter: Adapter
133
+ telemetry: TelemetryAdapter
118
134
  logger: Logger
119
135
  concurrencyLimit: number
120
136
  }
@@ -127,10 +143,15 @@ export class StepManager {
127
143
  #jobId: string
128
144
  #actionName: string
129
145
  #stepStore: StepStore
146
+ #telemetry: TelemetryAdapter
130
147
  #queue: fastq.queueAsPromised<TaskStep, any>
131
148
  #logger: Logger
132
149
  // each step name should be executed only once per action job
133
150
  #historySteps = new Set<string>()
151
+ // Store step spans for nested step tracking
152
+ #stepSpans = new Map<string, Span>()
153
+ // Store the job span for creating step spans
154
+ #jobSpan: Span | null = null
134
155
 
135
156
  // ============================================================================
136
157
  // Constructor
@@ -145,16 +166,25 @@ export class StepManager {
145
166
  this.#jobId = options.jobId
146
167
  this.#actionName = options.actionName
147
168
  this.#logger = options.logger
169
+ this.#telemetry = options.telemetry
148
170
  this.#stepStore = new StepStore(options.adapter)
149
171
  this.#queue = fastq.promise(async (task: TaskStep) => {
150
172
  if (this.#historySteps.has(task.name)) {
151
173
  throw new StepAlreadyExecutedError(task.name, this.#jobId, this.#actionName)
152
174
  }
153
175
  this.#historySteps.add(task.name)
154
- return this.#executeStep(task.name, task.cb, task.options, task.abortSignal)
176
+ return this.#executeStep(task.name, task.cb, task.options, task.abortSignal, task.parentStepId, task.parallel)
155
177
  }, options.concurrencyLimit)
156
178
  }
157
179
 
180
+ /**
181
+ * Set the job span for this step manager.
182
+ * Called from ActionJob after the job span is created.
183
+ */
184
+ setJobSpan(span: Span): void {
185
+ this.#jobSpan = span
186
+ }
187
+
158
188
  // ============================================================================
159
189
  // Public API Methods
160
190
  // ============================================================================
@@ -168,6 +198,7 @@ export class StepManager {
168
198
  * @param variables - Variables available to the action
169
199
  * @param abortSignal - Abort signal for cancelling the action
170
200
  * @param logger - Pino child logger for this job
201
+ * @param observeContext - Observability context for telemetry
171
202
  * @returns ActionHandlerContext instance
172
203
  */
173
204
  createActionContext<TInput extends z.ZodObject, TOutput extends z.ZodObject, TVariables = Record<string, unknown>>(
@@ -176,8 +207,35 @@ export class StepManager {
176
207
  variables: TVariables,
177
208
  abortSignal: AbortSignal,
178
209
  logger: Logger,
210
+ observeContext: ObserveContext,
179
211
  ): ActionHandlerContext<TInput, TVariables> {
180
- return new ActionContext(this, job, action, variables, abortSignal, logger)
212
+ return new ActionContext(this, job, action, variables, abortSignal, logger, observeContext)
213
+ }
214
+
215
+ /**
216
+ * Create an observe context for a step.
217
+ */
218
+ createStepObserveContext(stepId: string): ObserveContext {
219
+ const stepSpan = this.#stepSpans.get(stepId)
220
+ if (stepSpan) {
221
+ return this.#telemetry.createObserveContext(this.#jobId, stepId, stepSpan)
222
+ }
223
+ // Fallback to job span if step span not found
224
+ if (this.#jobSpan) {
225
+ return this.#telemetry.createObserveContext(this.#jobId, stepId, this.#jobSpan)
226
+ }
227
+ // No-op observe context
228
+ return {
229
+ recordMetric: () => {
230
+ // No-op
231
+ },
232
+ addSpanAttribute: () => {
233
+ // No-op
234
+ },
235
+ addSpanEvent: () => {
236
+ // No-op
237
+ },
238
+ }
181
239
  }
182
240
 
183
241
  /**
@@ -206,9 +264,11 @@ export class StepManager {
206
264
  * @param cb - The step handler function
207
265
  * @param options - Step options including concurrency, retry, and expire settings
208
266
  * @param abortSignal - Abort signal for cancelling the step
267
+ * @param parentStepId - The ID of the parent step (null for root steps)
209
268
  * @returns Promise resolving to the step result
210
269
  * @throws StepTimeoutError if the step times out
211
270
  * @throws StepCancelError if the step is cancelled
271
+ * @throws UnhandledChildStepsError if child steps are not awaited
212
272
  * @throws Error if the step fails
213
273
  */
214
274
  async #executeStep<TResult>(
@@ -216,6 +276,8 @@ export class StepManager {
216
276
  cb: (ctx: StepHandlerContext) => Promise<TResult>,
217
277
  options: StepOptions,
218
278
  abortSignal: AbortSignal,
279
+ parentStepId: string | null,
280
+ parallel: boolean,
219
281
  ): Promise<TResult> {
220
282
  const expire = options.expire
221
283
  const retryOptions = options.retry
@@ -227,8 +289,15 @@ export class StepManager {
227
289
  throw new ActionCancelError(this.#actionName, this.#jobId, { cause: 'step cancelled before create step' })
228
290
  }
229
291
 
230
- // Create step record
231
- const newStep = await this.#stepStore.getOrCreate(this.#jobId, name, expire, retryOptions.limit)
292
+ // Create step record with parentStepId and parallel
293
+ const newStep = await this.#stepStore.getOrCreate(
294
+ this.#jobId,
295
+ name,
296
+ expire,
297
+ retryOptions.limit,
298
+ parentStepId,
299
+ parallel,
300
+ )
232
301
  if (!newStep) {
233
302
  throw new NonRetriableError(
234
303
  `Failed to create step "${name}" for job "${this.#jobId}" action "${this.#actionName}"`,
@@ -238,6 +307,17 @@ export class StepManager {
238
307
 
239
308
  step = newStep
240
309
 
310
+ // Start step telemetry span
311
+ const parentSpan = parentStepId ? this.#stepSpans.get(parentStepId) : this.#jobSpan
312
+ const stepSpan = await this.#telemetry.startStepSpan({
313
+ jobId: this.#jobId,
314
+ stepId: step.id,
315
+ stepName: name,
316
+ parentSpan: parentSpan ?? undefined,
317
+ parentStepId,
318
+ })
319
+ this.#stepSpans.set(step.id, stepSpan)
320
+
241
321
  if (abortSignal.aborted) {
242
322
  throw new ActionCancelError(this.#actionName, this.#jobId, { cause: 'step cancelled after create step' })
243
323
  }
@@ -245,7 +325,7 @@ export class StepManager {
245
325
  if (step.status === STEP_STATUS_COMPLETED) {
246
326
  // this is how we recover a completed step
247
327
  this.#logger.debug(
248
- { jobId: this.#jobId, actionName: this.#actionName, stepName: name, stepId: step.id },
328
+ { jobId: this.#jobId, actionName: this.#actionName, stepName: name, stepId: step.id, parentStepId },
249
329
  '[StepManager] Step recovered (already completed)',
250
330
  )
251
331
  return step.output as TResult
@@ -265,11 +345,12 @@ export class StepManager {
265
345
 
266
346
  // Log step start
267
347
  this.#logger.debug(
268
- { jobId: this.#jobId, actionName: this.#actionName, stepName: name, stepId: step.id },
348
+ { jobId: this.#jobId, actionName: this.#actionName, stepName: name, stepId: step.id, parentStepId },
269
349
  '[StepManager] Step started executing',
270
350
  )
271
351
  }
272
352
 
353
+ // Create abort controller for this step's timeout
273
354
  const stepAbortController = new AbortController()
274
355
  const timeoutId = setTimeout(() => {
275
356
  const timeoutError = new StepTimeoutError(name, this.#jobId, expire)
@@ -278,18 +359,85 @@ export class StepManager {
278
359
 
279
360
  timeoutId?.unref?.()
280
361
 
281
- // Combine abort signals
282
- const signal = AbortSignal.any([abortSignal, stepAbortController.signal])
362
+ // Combine abort signals: parent chain + this step's timeout
363
+ const stepSignal = AbortSignal.any([abortSignal, stepAbortController.signal])
364
+
365
+ // Track child steps for enforcement
366
+ interface TrackedChildStep {
367
+ promise: Promise<any>
368
+ settled: boolean
369
+ }
370
+ const childSteps: TrackedChildStep[] = []
371
+
372
+ // Create abort controller for child steps (used when parent returns with pending children)
373
+ const childAbortController = new AbortController()
374
+ const childSignal = AbortSignal.any([stepSignal, childAbortController.signal])
375
+
376
+ // Create observe context for this step
377
+ const stepObserveContext = this.createStepObserveContext(step.id)
378
+
379
+ // Create StepHandlerContext with nested step support
380
+ const stepContext: StepHandlerContext = {
381
+ signal: stepSignal,
382
+ stepId: step.id,
383
+ parentStepId,
384
+ observe: stepObserveContext,
385
+ step: <TChildResult>(
386
+ childName: string,
387
+ childCb: (ctx: StepHandlerContext) => Promise<TChildResult>,
388
+ childOptions: z.input<typeof StepOptionsSchema> = {},
389
+ ): Promise<TChildResult> => {
390
+ // Inherit parent step options EXCEPT parallel (each step's parallel status is independent)
391
+ const { parallel: _parentParallel, ...inheritableOptions } = options
392
+ const parsedChildOptions = StepOptionsSchema.parse({
393
+ ...inheritableOptions,
394
+ ...childOptions,
395
+ })
396
+
397
+ // Push child step with this step as parent
398
+ const childPromise = this.push({
399
+ name: childName,
400
+ cb: childCb,
401
+ options: parsedChildOptions,
402
+ abortSignal: childSignal, // Child uses composed signal
403
+ parentStepId: step!.id, // This step is the parent
404
+ parallel: parsedChildOptions.parallel, // Pass parallel option
405
+ })
406
+
407
+ // Track the child promise
408
+ const trackedChild: TrackedChildStep = {
409
+ promise: childPromise,
410
+ settled: false,
411
+ }
412
+ childSteps.push(trackedChild)
413
+
414
+ // Mark as settled when done (success or failure)
415
+ // Use .then/.catch instead of .finally to properly handle rejections
416
+ childPromise
417
+ .then(() => {
418
+ trackedChild.settled = true
419
+ })
420
+ .catch(() => {
421
+ trackedChild.settled = true
422
+ // Swallow the error here - it will be re-thrown to the caller via the returned promise
423
+ })
424
+
425
+ return childPromise
426
+ },
427
+ }
283
428
 
284
429
  try {
285
430
  // Race between abort signal and callback execution
286
- const abortPromise = waitForAbort(signal)
287
- const callbackPromise = cb({ signal })
431
+ const abortPromise = waitForAbort(stepSignal)
432
+ const callbackPromise = cb(stepContext)
288
433
 
289
434
  let result: any = null
435
+ let aborted = false
290
436
 
291
437
  await Promise.race([
292
- abortPromise.promise,
438
+ abortPromise.promise.then(() => {
439
+ aborted = true
440
+ }),
293
441
  callbackPromise
294
442
  .then((res) => {
295
443
  if (res !== undefined && res !== null) {
@@ -301,12 +449,54 @@ export class StepManager {
301
449
  }),
302
450
  ])
303
451
 
452
+ // If aborted, wait for child steps to settle before propagating
453
+ if (aborted) {
454
+ // Wait for all child steps to settle (they'll be aborted via signal propagation)
455
+ if (childSteps.length > 0) {
456
+ await Promise.allSettled(childSteps.map((c) => c.promise))
457
+ }
458
+ // Re-throw the abort reason
459
+ throw stepSignal.reason
460
+ }
461
+
462
+ // After parent callback returns, check for pending children
463
+ const unsettledChildren = childSteps.filter((c) => !c.settled)
464
+ if (unsettledChildren.length > 0) {
465
+ this.#logger.warn(
466
+ {
467
+ jobId: this.#jobId,
468
+ actionName: this.#actionName,
469
+ stepName: name,
470
+ stepId: step.id,
471
+ pendingCount: unsettledChildren.length,
472
+ },
473
+ '[StepManager] Parent step completed with unhandled child steps - aborting children',
474
+ )
475
+
476
+ // Abort all pending children
477
+ const unhandledError = new UnhandledChildStepsError(name, unsettledChildren.length)
478
+ childAbortController.abort(unhandledError)
479
+
480
+ // Wait for all children to settle (they'll reject with cancellation)
481
+ await Promise.allSettled(unsettledChildren.map((c) => c.promise))
482
+
483
+ // Now throw the error
484
+ throw unhandledError
485
+ }
486
+
304
487
  // Update step as completed
305
488
  const completed = await this.#stepStore.updateStatus(step.id, 'completed', result)
306
489
  if (!completed) {
307
490
  throw new Error(`Failed to complete step "${name}" for job "${this.#jobId}" action "${this.#actionName}"`)
308
491
  }
309
492
 
493
+ // End step telemetry span successfully
494
+ const stepSpan = this.#stepSpans.get(step.id)
495
+ if (stepSpan) {
496
+ await this.#telemetry.endStepSpan(stepSpan, { status: 'ok' })
497
+ this.#stepSpans.delete(step.id)
498
+ }
499
+
310
500
  // Log step completion
311
501
  this.#logger.debug(
312
502
  { jobId: this.#jobId, actionName: this.#actionName, stepName: name, stepId: step.id },
@@ -347,6 +537,17 @@ export class StepManager {
347
537
  },
348
538
  }).catch(async (error) => {
349
539
  if (step) {
540
+ // End step telemetry span with error/cancelled status
541
+ const stepSpan = this.#stepSpans.get(step.id)
542
+ if (stepSpan) {
543
+ if (isCancelError(error)) {
544
+ await this.#telemetry.endStepSpan(stepSpan, { status: 'cancelled' })
545
+ } else {
546
+ await this.#telemetry.endStepSpan(stepSpan, { status: 'error', error })
547
+ }
548
+ this.#stepSpans.delete(step.id)
549
+ }
550
+
350
551
  if (isCancelError(error)) {
351
552
  await this.#stepStore.updateStatus(step.id, 'cancelled')
352
553
  } else {
@@ -377,6 +578,7 @@ class ActionContext<TInput extends z.ZodObject, TOutput extends z.ZodObject, TVa
377
578
  #jobId: string
378
579
  #groupKey: string = '@default'
379
580
  #action: Action<TInput, TOutput, TVariables>
581
+ #observeContext: ObserveContext
380
582
 
381
583
  // ============================================================================
382
584
  // Constructor
@@ -389,6 +591,7 @@ class ActionContext<TInput extends z.ZodObject, TOutput extends z.ZodObject, TVa
389
591
  variables: TVariables,
390
592
  abortSignal: AbortSignal,
391
593
  logger: Logger,
594
+ observeContext: ObserveContext,
392
595
  ) {
393
596
  this.#stepManager = stepManager
394
597
  this.#variables = variables
@@ -397,6 +600,7 @@ class ActionContext<TInput extends z.ZodObject, TOutput extends z.ZodObject, TVa
397
600
  this.#action = action
398
601
  this.#jobId = job.id
399
602
  this.#groupKey = job.groupKey ?? '@default'
603
+ this.#observeContext = observeContext
400
604
  if (action.input) {
401
605
  this.#input = action.input.parse(job.input, {
402
606
  error: () => 'Error parsing action input',
@@ -450,8 +654,16 @@ class ActionContext<TInput extends z.ZodObject, TOutput extends z.ZodObject, TVa
450
654
  return this.#logger
451
655
  }
452
656
 
657
+ /**
658
+ * Get the observability context for recording metrics and span data.
659
+ */
660
+ get observe(): ObserveContext {
661
+ return this.#observeContext
662
+ }
663
+
453
664
  /**
454
665
  * Execute a step within the action.
666
+ * This creates a root step (no parent).
455
667
  *
456
668
  * @param name - The name of the step
457
669
  * @param cb - The step handler function
@@ -467,6 +679,13 @@ class ActionContext<TInput extends z.ZodObject, TOutput extends z.ZodObject, TVa
467
679
  ...this.#action.steps,
468
680
  ...options,
469
681
  })
470
- return this.#stepManager.push({ name, cb, options: parsedOptions, abortSignal: this.#abortSignal })
682
+ return this.#stepManager.push({
683
+ name,
684
+ cb,
685
+ options: parsedOptions,
686
+ abortSignal: this.#abortSignal,
687
+ parentStepId: null, // Root steps have no parent
688
+ parallel: parsedOptions.parallel, // Pass parallel option
689
+ })
471
690
  }
472
691
  }