duron 0.3.0-beta.8 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. package/dist/action-job.d.ts +33 -2
  2. package/dist/action-job.d.ts.map +1 -1
  3. package/dist/action-job.js +93 -26
  4. package/dist/action-manager.d.ts +44 -2
  5. package/dist/action-manager.d.ts.map +1 -1
  6. package/dist/action-manager.js +64 -3
  7. package/dist/action.d.ts +388 -7
  8. package/dist/action.d.ts.map +1 -1
  9. package/dist/action.js +44 -23
  10. package/dist/adapters/adapter.d.ts +365 -8
  11. package/dist/adapters/adapter.d.ts.map +1 -1
  12. package/dist/adapters/adapter.js +221 -15
  13. package/dist/adapters/postgres/base.d.ts +184 -6
  14. package/dist/adapters/postgres/base.d.ts.map +1 -1
  15. package/dist/adapters/postgres/base.js +436 -75
  16. package/dist/adapters/postgres/pglite.d.ts +37 -0
  17. package/dist/adapters/postgres/pglite.d.ts.map +1 -1
  18. package/dist/adapters/postgres/pglite.js +38 -0
  19. package/dist/adapters/postgres/postgres.d.ts +35 -0
  20. package/dist/adapters/postgres/postgres.d.ts.map +1 -1
  21. package/dist/adapters/postgres/postgres.js +42 -0
  22. package/dist/adapters/postgres/schema.d.ts +150 -37
  23. package/dist/adapters/postgres/schema.d.ts.map +1 -1
  24. package/dist/adapters/postgres/schema.default.d.ts +151 -38
  25. package/dist/adapters/postgres/schema.default.d.ts.map +1 -1
  26. package/dist/adapters/postgres/schema.default.js +2 -2
  27. package/dist/adapters/postgres/schema.js +60 -23
  28. package/dist/adapters/schemas.d.ts +124 -80
  29. package/dist/adapters/schemas.d.ts.map +1 -1
  30. package/dist/adapters/schemas.js +139 -26
  31. package/dist/client.d.ts +426 -22
  32. package/dist/client.d.ts.map +1 -1
  33. package/dist/client.js +370 -20
  34. package/dist/constants.js +6 -0
  35. package/dist/errors.d.ts +166 -9
  36. package/dist/errors.d.ts.map +1 -1
  37. package/dist/errors.js +189 -19
  38. package/dist/index.d.ts +2 -1
  39. package/dist/index.d.ts.map +1 -1
  40. package/dist/server.d.ts +99 -37
  41. package/dist/server.d.ts.map +1 -1
  42. package/dist/server.js +84 -25
  43. package/dist/step-manager.d.ts +111 -4
  44. package/dist/step-manager.d.ts.map +1 -1
  45. package/dist/step-manager.js +411 -75
  46. package/dist/telemetry/index.d.ts +1 -4
  47. package/dist/telemetry/index.d.ts.map +1 -1
  48. package/dist/telemetry/index.js +2 -4
  49. package/dist/telemetry/local-span-exporter.d.ts +56 -0
  50. package/dist/telemetry/local-span-exporter.d.ts.map +1 -0
  51. package/dist/telemetry/local-span-exporter.js +118 -0
  52. package/dist/utils/p-retry.d.ts +5 -0
  53. package/dist/utils/p-retry.d.ts.map +1 -1
  54. package/dist/utils/p-retry.js +8 -0
  55. package/dist/utils/wait-for-abort.d.ts +1 -0
  56. package/dist/utils/wait-for-abort.d.ts.map +1 -1
  57. package/dist/utils/wait-for-abort.js +1 -0
  58. package/migrations/postgres/{20260119153838_flimsy_thor_girl → 20260121160012_normal_bloodstrike}/migration.sql +32 -20
  59. package/migrations/postgres/{20260119153838_flimsy_thor_girl → 20260121160012_normal_bloodstrike}/snapshot.json +241 -66
  60. package/package.json +42 -26
  61. package/src/action-job.ts +43 -32
  62. package/src/action-manager.ts +5 -5
  63. package/src/action.ts +317 -149
  64. package/src/adapters/adapter.ts +54 -54
  65. package/src/adapters/postgres/base.ts +266 -86
  66. package/src/adapters/postgres/schema.default.ts +2 -2
  67. package/src/adapters/postgres/schema.ts +52 -24
  68. package/src/adapters/schemas.ts +91 -36
  69. package/src/client.ts +322 -68
  70. package/src/errors.ts +141 -30
  71. package/src/index.ts +2 -0
  72. package/src/server.ts +39 -37
  73. package/src/step-manager.ts +254 -91
  74. package/src/telemetry/index.ts +2 -20
  75. package/src/telemetry/local-span-exporter.ts +148 -0
  76. package/dist/telemetry/adapter.d.ts +0 -107
  77. package/dist/telemetry/adapter.d.ts.map +0 -1
  78. package/dist/telemetry/adapter.js +0 -134
  79. package/dist/telemetry/local.d.ts +0 -22
  80. package/dist/telemetry/local.d.ts.map +0 -1
  81. package/dist/telemetry/local.js +0 -243
  82. package/dist/telemetry/noop.d.ts +0 -17
  83. package/dist/telemetry/noop.d.ts.map +0 -1
  84. package/dist/telemetry/noop.js +0 -66
  85. package/dist/telemetry/opentelemetry.d.ts +0 -25
  86. package/dist/telemetry/opentelemetry.d.ts.map +0 -1
  87. package/dist/telemetry/opentelemetry.js +0 -312
  88. package/src/telemetry/adapter.ts +0 -642
  89. package/src/telemetry/local.ts +0 -429
  90. package/src/telemetry/noop.ts +0 -141
  91. package/src/telemetry/opentelemetry.ts +0 -453
@@ -1,39 +1,118 @@
1
+ import { context, SpanKind, SpanStatusCode, trace, } from '@opentelemetry/api';
1
2
  import fastq from 'fastq';
2
3
  import { StepOptionsSchema, } from './action.js';
3
4
  import { STEP_STATUS_CANCELLED, STEP_STATUS_COMPLETED, STEP_STATUS_FAILED } from './constants.js';
4
- import { ActionCancelError, isCancelError, isNonRetriableError, NonRetriableError, StepAlreadyExecutedError, StepTimeoutError, serializeError, UnhandledChildStepsError, } from './errors.js';
5
- const noopTracerSpan = {
6
- setAttribute() {
7
- },
8
- setAttributes() {
9
- },
10
- addEvent() {
11
- },
12
- recordException() {
13
- },
14
- setStatusOk() {
15
- },
16
- setStatusError() {
17
- },
18
- end() {
19
- },
20
- isRecording() {
21
- return false;
22
- },
23
- };
24
- const createNoopTracer = (name) => ({
25
- name,
26
- startSpan() {
27
- return noopTracerSpan;
28
- },
29
- });
5
+ import { ActionCancelError, isCancelError, isNonRetriableError, isTimeoutError, NonRetriableError, StepAlreadyExecutedError, StepTimeoutError, serializeError, UnhandledChildStepsError, } from './errors.js';
6
+ /**
7
+ * Inject parent span into a context if we have one.
8
+ */
9
+ function injectParentSpan(ctx, parentSpan) {
10
+ return parentSpan ? trace.setSpan(ctx, parentSpan) : ctx;
11
+ }
12
+ /**
13
+ * Create a context-aware tracer wrapper that automatically injects the parent span.
14
+ * This ensures spans created by external libraries (like AI SDK) are properly linked
15
+ * to the current job/step trace hierarchy.
16
+ */
17
+ function createContextAwareTracer(tracer, parentSpan) {
18
+ return {
19
+ startSpan(name, options, ctx) {
20
+ // Always inject our parent span into the context, regardless of what context is passed.
21
+ // This is necessary because without global registration, context.active() returns
22
+ // ROOT_CONTEXT, so external libraries (like AI SDK) that pass context.active()
23
+ // would otherwise create orphan spans.
24
+ const baseContext = ctx ?? context.active();
25
+ const effectiveContext = injectParentSpan(baseContext, parentSpan);
26
+ return tracer.startSpan(name, options, effectiveContext);
27
+ },
28
+ // startActiveSpan has multiple overloads, we need to handle them all
29
+ startActiveSpan(name, optionsOrFn, ctxOrFn, fn) {
30
+ // Parse the overloaded arguments
31
+ let options;
32
+ let ctx;
33
+ let callback;
34
+ if (typeof optionsOrFn === 'function') {
35
+ // startActiveSpan(name, fn)
36
+ callback = optionsOrFn;
37
+ }
38
+ else if (typeof ctxOrFn === 'function') {
39
+ // startActiveSpan(name, options, fn)
40
+ options = optionsOrFn;
41
+ callback = ctxOrFn;
42
+ }
43
+ else {
44
+ // startActiveSpan(name, options, context, fn)
45
+ options = optionsOrFn;
46
+ ctx = ctxOrFn;
47
+ callback = fn;
48
+ }
49
+ const baseContext = ctx ?? context.active();
50
+ const effectiveContext = injectParentSpan(baseContext, parentSpan);
51
+ return tracer.startActiveSpan(name, options ?? {}, effectiveContext, callback);
52
+ },
53
+ };
54
+ }
55
+ /**
56
+ * Create a TelemetryContext that wraps an OTel span.
57
+ */
58
+ function createTelemetryContext(span, tracer) {
59
+ return {
60
+ getActiveSpan() {
61
+ return span ?? undefined;
62
+ },
63
+ getTracer(_name) {
64
+ // Return a context-aware tracer that automatically links spans to the current trace
65
+ return createContextAwareTracer(tracer, span);
66
+ },
67
+ startSpan(name, options) {
68
+ // Create a child span linked to the current span (job or step)
69
+ const parentContext = span ? trace.setSpan(context.active(), span) : context.active();
70
+ return tracer.startSpan(name, { attributes: options?.attributes }, parentContext);
71
+ },
72
+ recordMetric(name, value, attributes) {
73
+ if (span) {
74
+ span.addEvent(`metric:${name}`, {
75
+ 'metric.value': value,
76
+ ...attributes,
77
+ });
78
+ }
79
+ },
80
+ };
81
+ }
30
82
  import pRetry from './utils/p-retry.js';
31
83
  import waitForAbort from './utils/wait-for-abort.js';
84
+ /**
85
+ * StepStore manages step records in the database.
86
+ * Provides methods to create, update, and delay steps.
87
+ */
32
88
  export class StepStore {
33
89
  #adapter;
90
+ // ============================================================================
91
+ // Constructor
92
+ // ============================================================================
93
+ /**
94
+ * Create a new StepStore instance.
95
+ *
96
+ * @param adapter - The database adapter to use for step operations
97
+ */
34
98
  constructor(adapter) {
35
99
  this.#adapter = adapter;
36
100
  }
101
+ // ============================================================================
102
+ // Public API Methods
103
+ // ============================================================================
104
+ /**
105
+ * Get or create a step record in the database.
106
+ *
107
+ * @param jobId - The ID of the job this step belongs to
108
+ * @param name - The name of the step
109
+ * @param timeoutMs - Timeout in milliseconds for the step
110
+ * @param retriesLimit - Maximum number of retries for the step
111
+ * @param parentStepId - The ID of the parent step (null for root steps)
112
+ * @param parallel - Whether this step runs in parallel (independent from siblings during time travel)
113
+ * @returns Promise resolving to the created step ID
114
+ * @throws Error if step creation fails
115
+ */
37
116
  async getOrCreate(jobId, name, timeoutMs, retriesLimit, parentStepId = null, parallel = false) {
38
117
  try {
39
118
  return await this.#adapter.createOrRecoverJobStep({
@@ -49,6 +128,15 @@ export class StepStore {
49
128
  throw new NonRetriableError(`Failed to get or create step "${name}" for job "${jobId}"`, { cause: error });
50
129
  }
51
130
  }
131
+ /**
132
+ * Update the status of a step in the database.
133
+ *
134
+ * @param stepId - The ID of the step to update
135
+ * @param status - The new status (completed, failed, or cancelled)
136
+ * @param output - Optional output data for completed steps
137
+ * @param error - Optional error data for failed steps
138
+ * @returns Promise resolving to `true` if update succeeded, `false` otherwise
139
+ */
52
140
  async updateStatus(stepId, status, output, error) {
53
141
  if (status === STEP_STATUS_COMPLETED) {
54
142
  return this.#adapter.completeJobStep({ stepId, output });
@@ -61,28 +149,54 @@ export class StepStore {
61
149
  }
62
150
  return false;
63
151
  }
152
+ /**
153
+ * Delay a step execution.
154
+ * Used when a step fails and needs to be retried after a delay.
155
+ *
156
+ * @param stepId - The ID of the step to delay
157
+ * @param delayMs - The delay in milliseconds before retrying
158
+ * @param error - The error that caused the delay
159
+ * @returns Promise resolving to `true` if delayed successfully, `false` otherwise
160
+ */
64
161
  async delay(stepId, delayMs, error) {
65
162
  return this.#adapter.delayJobStep({ stepId, delayMs, error });
66
163
  }
67
164
  }
165
+ /**
166
+ * StepManager manages steps for a single ActionJob.
167
+ * Each ActionJob has its own StepManager instance.
168
+ */
68
169
  export class StepManager {
69
170
  #jobId;
70
171
  #actionName;
71
172
  #stepStore;
72
- #telemetry;
173
+ #tracer;
73
174
  #queue;
74
175
  #logger;
176
+ // each step name should be executed only once per parent (name + parentStepId)
75
177
  #historySteps = new Set();
178
+ // Store step spans for nested step tracking
76
179
  #stepSpans = new Map();
77
- #jobSpan = null;
180
+ // Store the job span for creating step spans
181
+ #jobSpan;
182
+ // Factory function to create run functions with the correct parent step ID and abort signal
78
183
  #runFnFactory = null;
184
+ // ============================================================================
185
+ // Constructor
186
+ // ============================================================================
187
+ /**
188
+ * Create a new StepManager instance.
189
+ *
190
+ * @param options - Configuration options for the step manager
191
+ */
79
192
  constructor(options) {
80
193
  this.#jobId = options.jobId;
81
194
  this.#actionName = options.actionName;
82
195
  this.#logger = options.logger;
83
- this.#telemetry = options.telemetry;
196
+ this.#tracer = options.tracer;
84
197
  this.#stepStore = new StepStore(options.adapter);
85
198
  this.#queue = fastq.promise(async (task) => {
199
+ // Create composite key: name + parentStepId (allows same name under different parents)
86
200
  const stepKey = `${task.parentStepId ?? 'root'}:${task.name}`;
87
201
  if (this.#historySteps.has(stepKey)) {
88
202
  throw new StepAlreadyExecutedError(task.name, this.#jobId, this.#actionName);
@@ -91,36 +205,59 @@ export class StepManager {
91
205
  return this.#executeStep(task.name, task.cb, task.options, task.abortSignal, task.parentStepId, task.parallel);
92
206
  }, options.concurrencyLimit);
93
207
  }
208
+ /**
209
+ * Set the job span for this step manager.
210
+ * Called from ActionJob after the job span is created.
211
+ */
94
212
  setJobSpan(span) {
95
213
  this.#jobSpan = span;
96
214
  }
215
+ /**
216
+ * Set the run function factory for executing step definitions from inline steps.
217
+ * Called from ActionContext after it's initialized.
218
+ *
219
+ * @param factory - A function that creates run functions with the correct parent step ID and abort signal
220
+ */
97
221
  setRunFnFactory(factory) {
98
222
  this.#runFnFactory = factory;
99
223
  }
100
- createActionContext(job, action, variables, abortSignal, logger, observeContext) {
101
- return new ActionContext(this, job, action, variables, abortSignal, logger, observeContext);
224
+ // ============================================================================
225
+ // Public API Methods
226
+ // ============================================================================
227
+ /**
228
+ * Create an ActionContext for the action handler.
229
+ * The context provides access to input, variables, logger, and the step function.
230
+ *
231
+ * @param job - The job data including ID, input, and optional group key
232
+ * @param action - The action definition
233
+ * @param variables - Variables available to the action
234
+ * @param abortSignal - Abort signal for cancelling the action
235
+ * @param logger - Pino child logger for this job
236
+ * @returns ActionHandlerContext instance
237
+ */
238
+ createActionContext(job, action, variables, abortSignal, logger) {
239
+ const telemetryContext = createTelemetryContext(this.#jobSpan, this.#tracer);
240
+ return new ActionContext(this, job, action, variables, abortSignal, logger, telemetryContext);
102
241
  }
103
- createStepObserveContext(stepId) {
242
+ /**
243
+ * Create a telemetry context for a step.
244
+ */
245
+ createStepTelemetryContext(stepId) {
104
246
  const stepSpan = this.#stepSpans.get(stepId);
105
247
  if (stepSpan) {
106
- return this.#telemetry.createObserveContext(this.#jobId, stepId, stepSpan);
248
+ return createTelemetryContext(stepSpan, this.#tracer);
107
249
  }
108
- if (this.#jobSpan) {
109
- return this.#telemetry.createObserveContext(this.#jobId, stepId, this.#jobSpan);
110
- }
111
- return {
112
- recordMetric: () => {
113
- },
114
- addSpanAttribute: () => {
115
- },
116
- addSpanEvent: () => {
117
- },
118
- getTracer: (name) => {
119
- return createNoopTracer(name);
120
- },
121
- };
250
+ // Fallback to job span if step span not found
251
+ return createTelemetryContext(this.#jobSpan, this.#tracer);
122
252
  }
253
+ /**
254
+ * Queue a step task for execution.
255
+ *
256
+ * @param task - The step task to queue
257
+ * @returns Promise resolving to the step result
258
+ */
123
259
  async push(task) {
260
+ // Warn about potential starvation when child steps are queued and all slots are occupied
124
261
  if (task.parentStepId !== null && this.#queue.running() >= this.#queue.concurrency) {
125
262
  this.#logger.warn({
126
263
  jobId: this.#jobId,
@@ -134,9 +271,28 @@ export class StepManager {
134
271
  }
135
272
  return this.#queue.push(task);
136
273
  }
274
+ /**
275
+ * Clean up step queues by waiting for them to drain.
276
+ * Should be called when the job completes or is cancelled.
277
+ */
137
278
  async drain() {
138
279
  await this.#queue.drain();
139
280
  }
281
+ /**
282
+ * Execute a step with retry logic and timeout handling.
283
+ * Creates a step record, queues the execution, and handles errors appropriately.
284
+ *
285
+ * @param name - The name of the step
286
+ * @param cb - The step handler function
287
+ * @param options - Step options including concurrency, retry, and expire settings
288
+ * @param abortSignal - Abort signal for cancelling the step
289
+ * @param parentStepId - The ID of the parent step (null for root steps)
290
+ * @returns Promise resolving to the step result
291
+ * @throws StepTimeoutError if the step times out
292
+ * @throws StepCancelError if the step is cancelled
293
+ * @throws UnhandledChildStepsError if child steps are not awaited
294
+ * @throws Error if the step fails
295
+ */
140
296
  async #executeStep(name, cb, options, abortSignal, parentStepId, parallel) {
141
297
  const expire = options.expire;
142
298
  const retryOptions = options.retry;
@@ -146,24 +302,30 @@ export class StepManager {
146
302
  if (abortSignal.aborted) {
147
303
  throw new ActionCancelError(this.#actionName, this.#jobId, { cause: 'step cancelled before create step' });
148
304
  }
305
+ // Create step record with parentStepId and parallel
149
306
  const newStep = await this.#stepStore.getOrCreate(this.#jobId, name, expire, retryOptions.limit, parentStepId, parallel);
150
307
  if (!newStep) {
151
308
  throw new NonRetriableError(`Failed to create step "${name}" for job "${this.#jobId}" action "${this.#actionName}"`, { cause: 'step not created' });
152
309
  }
153
310
  step = newStep;
311
+ // Start step span - uses no-op tracer if no SDK is configured
154
312
  const parentSpan = parentStepId ? this.#stepSpans.get(parentStepId) : this.#jobSpan;
155
- const stepSpan = await this.#telemetry.startStepSpan({
156
- jobId: this.#jobId,
157
- stepId: step.id,
158
- stepName: name,
159
- parentSpan: parentSpan ?? undefined,
160
- parentStepId,
161
- });
313
+ const parentContext = parentSpan ? trace.setSpan(context.active(), parentSpan) : context.active();
314
+ const stepSpan = this.#tracer.startSpan(`step:${name}`, {
315
+ kind: SpanKind.INTERNAL,
316
+ attributes: {
317
+ 'duron.job.id': this.#jobId,
318
+ 'duron.step.id': step.id,
319
+ 'duron.step.name': name,
320
+ 'duron.step.parent_id': parentStepId ?? undefined,
321
+ },
322
+ }, parentContext);
162
323
  this.#stepSpans.set(step.id, stepSpan);
163
324
  if (abortSignal.aborted) {
164
325
  throw new ActionCancelError(this.#actionName, this.#jobId, { cause: 'step cancelled after create step' });
165
326
  }
166
327
  if (step.status === STEP_STATUS_COMPLETED) {
328
+ // this is how we recover a completed step
167
329
  this.#logger.debug({ jobId: this.#jobId, actionName: this.#actionName, stepName: name, stepId: step.id, parentStepId }, '[StepManager] Step recovered (already completed)');
168
330
  return step.output;
169
331
  }
@@ -175,59 +337,81 @@ export class StepManager {
175
337
  else if (step.status === STEP_STATUS_CANCELLED) {
176
338
  throw new NonRetriableError(`Cannot recover a cancelled step "${name}" for job "${this.#jobId}" action "${this.#actionName}"`, { cause: step.error });
177
339
  }
340
+ // Log step start
178
341
  this.#logger.debug({ jobId: this.#jobId, actionName: this.#actionName, stepName: name, stepId: step.id, parentStepId }, '[StepManager] Step started executing');
179
342
  }
343
+ // Create abort controller for this step's timeout
180
344
  const stepAbortController = new AbortController();
181
345
  const timeoutId = setTimeout(() => {
182
- const timeoutError = new StepTimeoutError(name, this.#jobId, expire);
346
+ const timeoutError = new StepTimeoutError(name, this.#jobId, expire, {
347
+ stepId: step?.id,
348
+ parentStepId,
349
+ actionName: this.#actionName,
350
+ });
183
351
  stepAbortController.abort(timeoutError);
184
352
  }, expire);
185
353
  timeoutId?.unref?.();
354
+ // Combine abort signals: parent chain + this step's timeout
186
355
  const stepSignal = AbortSignal.any([abortSignal, stepAbortController.signal]);
187
356
  const childSteps = [];
357
+ // Create abort controller for child steps (used when parent returns with pending children)
188
358
  const childAbortController = new AbortController();
189
359
  const childSignal = AbortSignal.any([stepSignal, childAbortController.signal]);
190
- const stepObserveContext = this.createStepObserveContext(step.id);
360
+ // Create telemetry context for this step
361
+ const stepTelemetryContext = this.createStepTelemetryContext(step.id);
362
+ // Create StepHandlerContext with nested step support
191
363
  const stepContext = {
192
364
  signal: stepSignal,
193
365
  stepId: step.id,
194
366
  parentStepId,
195
- observe: stepObserveContext,
367
+ telemetry: stepTelemetryContext,
196
368
  step: (childName, childCb, childOptions = {}) => {
369
+ // Inherit parent step options EXCEPT parallel (each step's parallel status is independent)
197
370
  const { parallel: _parentParallel, ...inheritableOptions } = options;
198
371
  const parsedChildOptions = StepOptionsSchema.parse({
199
372
  ...inheritableOptions,
200
373
  ...childOptions,
201
374
  });
375
+ // Push child step with this step as parent
202
376
  const childPromise = this.push({
203
377
  name: childName,
204
378
  cb: childCb,
205
379
  options: parsedChildOptions,
206
- abortSignal: childSignal,
207
- parentStepId: step.id,
208
- parallel: parsedChildOptions.parallel,
380
+ abortSignal: childSignal, // Child uses composed signal
381
+ parentStepId: step.id, // This step is the parent
382
+ parallel: parsedChildOptions.parallel, // Pass parallel option
209
383
  });
384
+ // Track the child promise
210
385
  const trackedChild = {
211
386
  promise: childPromise,
212
387
  settled: false,
213
388
  };
214
389
  childSteps.push(trackedChild);
390
+ // Mark as settled when done (success or failure)
391
+ // Use .then/.catch instead of .finally to properly handle rejections
215
392
  childPromise
216
393
  .then(() => {
217
394
  trackedChild.settled = true;
218
395
  })
219
396
  .catch(() => {
220
397
  trackedChild.settled = true;
398
+ // Swallow the error here - it will be re-thrown to the caller via the returned promise
399
+ // Note: sibling steps will be aborted when the error propagates to the action level
221
400
  });
222
401
  return childPromise;
223
402
  },
224
403
  run: this.#runFnFactory(step.id, childSignal),
225
404
  };
226
405
  try {
406
+ // Race between abort signal and callback execution
227
407
  const abortPromise = waitForAbort(stepSignal);
228
- const callbackPromise = cb(stepContext);
408
+ // Execute callback within the span context so that child spans inherit the trace
409
+ const currentStepSpan = step?.id ? this.#stepSpans.get(step.id) : undefined;
410
+ const spanContext = currentStepSpan ? trace.setSpan(context.active(), currentStepSpan) : context.active();
411
+ const callbackPromise = context.with(spanContext, () => cb(stepContext));
229
412
  let result = null;
230
413
  let aborted = false;
414
+ let callbackError = null;
231
415
  await Promise.race([
232
416
  abortPromise.promise.then(() => {
233
417
  aborted = true;
@@ -237,17 +421,34 @@ export class StepManager {
237
421
  if (res !== undefined && res !== null) {
238
422
  result = res;
239
423
  }
424
+ })
425
+ .catch((err) => {
426
+ callbackError = err;
240
427
  })
241
428
  .finally(() => {
242
429
  abortPromise.release();
243
430
  }),
244
431
  ]);
432
+ // If callback threw an error, abort children and wait for them before re-throwing
433
+ if (callbackError) {
434
+ if (childSteps.length > 0) {
435
+ // Abort all children with the callback error as reason
436
+ childAbortController.abort(callbackError);
437
+ // Wait for all children to settle
438
+ await Promise.allSettled(childSteps.map((c) => c.promise));
439
+ }
440
+ throw callbackError;
441
+ }
442
+ // If aborted, wait for child steps to settle before propagating
245
443
  if (aborted) {
444
+ // Wait for all child steps to settle (they'll be aborted via signal propagation)
246
445
  if (childSteps.length > 0) {
247
446
  await Promise.allSettled(childSteps.map((c) => c.promise));
248
447
  }
448
+ // Re-throw the abort reason
249
449
  throw stepSignal.reason;
250
450
  }
451
+ // After parent callback returns, check for pending children
251
452
  const unsettledChildren = childSteps.filter((c) => !c.settled);
252
453
  if (unsettledChildren.length > 0) {
253
454
  this.#logger.warn({
@@ -257,20 +458,32 @@ export class StepManager {
257
458
  stepId: step.id,
258
459
  pendingCount: unsettledChildren.length,
259
460
  }, '[StepManager] Parent step completed with unhandled child steps - aborting children');
260
- const unhandledError = new UnhandledChildStepsError(name, unsettledChildren.length);
461
+ // Abort all pending children
462
+ const unhandledError = new UnhandledChildStepsError(name, unsettledChildren.length, {
463
+ stepId: step.id,
464
+ parentStepId,
465
+ jobId: this.#jobId,
466
+ actionName: this.#actionName,
467
+ });
261
468
  childAbortController.abort(unhandledError);
469
+ // Wait for all children to settle (they'll reject with cancellation)
262
470
  await Promise.allSettled(unsettledChildren.map((c) => c.promise));
471
+ // Now throw the error
263
472
  throw unhandledError;
264
473
  }
474
+ // Update step as completed
265
475
  const completed = await this.#stepStore.updateStatus(step.id, 'completed', result);
266
476
  if (!completed) {
267
477
  throw new Error(`Failed to complete step "${name}" for job "${this.#jobId}" action "${this.#actionName}"`);
268
478
  }
479
+ // End step span successfully
269
480
  const stepSpan = this.#stepSpans.get(step.id);
270
481
  if (stepSpan) {
271
- await this.#telemetry.endStepSpan(stepSpan, { status: 'ok' });
482
+ stepSpan.setStatus({ code: SpanStatusCode.OK });
483
+ stepSpan.end();
272
484
  this.#stepSpans.delete(step.id);
273
485
  }
486
+ // Log step completion
274
487
  this.#logger.debug({ jobId: this.#jobId, actionName: this.#actionName, stepName: name, stepId: step.id }, '[StepManager] Step finished executing');
275
488
  return result;
276
489
  }
@@ -278,6 +491,7 @@ export class StepManager {
278
491
  clearTimeout(timeoutId);
279
492
  }
280
493
  };
494
+ // Apply retry logic - skip retries for NonRetriableError
281
495
  return pRetry(executeStep, {
282
496
  retries: retryOptions.limit,
283
497
  factor: retryOptions.factor,
@@ -287,28 +501,67 @@ export class StepManager {
287
501
  maxTimeout: retryOptions.maxTimeout,
288
502
  onFailedAttempt: async (ctx) => {
289
503
  const error = ctx.error;
504
+ // Don't retry if error is non-retriable
290
505
  if (isNonRetriableError(error) ||
291
506
  (error.cause && isNonRetriableError(error.cause)) ||
292
- (error instanceof Error && error.name === 'AbortError' && isNonRetriableError(error.cause))) {
293
- throw error;
507
+ (error instanceof Error && error.name === 'AbortError')) {
508
+ const err = isNonRetriableError(error)
509
+ ? error
510
+ : error instanceof Error && error.name === 'AbortError'
511
+ ? new NonRetriableError(error.message, { cause: error.cause })
512
+ : error.cause;
513
+ if (Object.keys(err.metadata).length === 0) {
514
+ err.setMetadata({
515
+ stepId: step?.id,
516
+ parentStepId,
517
+ jobId: this.#jobId,
518
+ actionName: this.#actionName,
519
+ });
520
+ }
521
+ throw err;
294
522
  }
295
523
  if (ctx.retriesLeft > 0 && step) {
524
+ this.#clearHistoryForStep(step.id);
296
525
  const delayed = await this.#stepStore.delay(step.id, ctx.finalDelay, serializeError(error));
297
526
  if (!delayed) {
298
527
  throw new Error(`Failed to delay step "${name}" for job "${this.#jobId}" action "${this.#actionName}"`);
299
528
  }
300
529
  }
530
+ else {
531
+ if (isTimeoutError(error)) {
532
+ ;
533
+ error.nonRetriable = true;
534
+ throw error;
535
+ }
536
+ const errorMessage = error instanceof Error ? error.message : String(error);
537
+ const err = new NonRetriableError(errorMessage, { cause: error });
538
+ err.setMetadata({
539
+ stepId: step?.id,
540
+ parentStepId,
541
+ jobId: this.#jobId,
542
+ actionName: this.#actionName,
543
+ });
544
+ throw err;
545
+ }
301
546
  },
302
547
  }).catch(async (error) => {
303
548
  if (step) {
549
+ // End step span with error/cancelled status
304
550
  const stepSpan = this.#stepSpans.get(step.id);
305
551
  if (stepSpan) {
306
552
  if (isCancelError(error)) {
307
- await this.#telemetry.endStepSpan(stepSpan, { status: 'cancelled' });
553
+ stepSpan.setStatus({ code: SpanStatusCode.ERROR, message: 'Step cancelled' });
308
554
  }
309
555
  else {
310
- await this.#telemetry.endStepSpan(stepSpan, { status: 'error', error });
556
+ stepSpan.setStatus({
557
+ code: SpanStatusCode.ERROR,
558
+ message: error instanceof Error ? error.message : String(error),
559
+ });
560
+ if (error instanceof Error) {
561
+ stepSpan.recordException(error);
562
+ }
311
563
  }
564
+ stepSpan.end();
312
565
  this.#stepSpans.delete(step.id);
313
566
  }
314
567
  if (isCancelError(error)) {
@@ -321,7 +574,26 @@ export class StepManager {
321
574
  throw error;
322
575
  });
323
576
  }
577
+ /**
578
+ * Clear the history of nested steps for a given step.
579
+ * We do't need to clear the history for the root step because it's not a parent step, it's the action itself.
580
+ * @param stepId - The ID of the step to clear the history for
581
+ */
582
+ #clearHistoryForStep(stepId) {
583
+ this.#historySteps.forEach((stepKey) => {
584
+ if (stepKey.startsWith(stepId)) {
585
+ this.#historySteps.delete(stepKey);
586
+ }
587
+ });
588
+ }
324
589
  }
590
+ // ============================================================================
591
+ // ActionContext Class
592
+ // ============================================================================
593
+ /**
594
+ * ActionContext provides the context for action handlers.
595
+ * It implements ActionHandlerContext and provides access to input, variables, logger, and the step function.
596
+ */
325
597
  class ActionContext {
326
598
  #stepManager;
327
599
  #variables;
@@ -331,8 +603,11 @@ class ActionContext {
331
603
  #jobId;
332
604
  #groupKey = '@default';
333
605
  #action;
334
- #observeContext;
335
- constructor(stepManager, job, action, variables, abortSignal, logger, observeContext) {
606
+ #telemetryContext;
607
+ // ============================================================================
608
+ // Constructor
609
+ // ============================================================================
610
+ constructor(stepManager, job, action, variables, abortSignal, logger, telemetryContext) {
336
611
  this.#stepManager = stepManager;
337
612
  this.#variables = variables;
338
613
  this.#abortSignal = abortSignal;
@@ -340,7 +615,7 @@ class ActionContext {
340
615
  this.#action = action;
341
616
  this.#jobId = job.id;
342
617
  this.#groupKey = job.groupKey ?? '@default';
343
- this.#observeContext = observeContext;
618
+ this.#telemetryContext = telemetryContext;
344
619
  if (action.input) {
345
620
  this.#input = action.input.parse(job.input, {
346
621
  error: () => 'Error parsing action input',
@@ -350,28 +625,63 @@ class ActionContext {
350
625
  this.#input = job.input ?? {};
351
626
  this.step = this.step.bind(this);
352
627
  this.run = this.run.bind(this);
628
+ // Set the run function factory so inline steps can call step definitions with correct parent
353
629
  this.#stepManager.setRunFnFactory((parentStepId, abortSignal) => {
354
630
  return (stepDef, input, options) => this.#runInternal(stepDef, input, options, parentStepId, abortSignal);
355
631
  });
356
632
  }
633
+ // ============================================================================
634
+ // Public API Methods
635
+ // ============================================================================
636
+ /**
637
+ * Get the input data for this action.
638
+ */
357
639
  get input() {
358
640
  return this.#input;
359
641
  }
642
+ /**
643
+ * Get the job ID for this action context.
644
+ *
645
+ * @returns The job ID
646
+ */
360
647
  get jobId() {
361
648
  return this.#jobId;
362
649
  }
650
+ /**
651
+ * Get the group key for this action context.
652
+ *
653
+ * @returns The group key
654
+ */
363
655
  get groupKey() {
364
656
  return this.#groupKey;
365
657
  }
658
+ /**
659
+ * Get the variables available to this action.
660
+ */
366
661
  get var() {
367
662
  return this.#variables;
368
663
  }
664
+ /**
665
+ * Get the logger for this action job.
666
+ */
369
667
  get logger() {
370
668
  return this.#logger;
371
669
  }
372
- get observe() {
373
- return this.#observeContext;
670
+ /**
671
+ * Get the telemetry context for recording metrics and span data.
672
+ */
673
+ get telemetry() {
674
+ return this.#telemetryContext;
374
675
  }
676
+ /**
677
+ * Execute a step within the action.
678
+ * This creates a root step (no parent).
679
+ *
680
+ * @param name - The name of the step
681
+ * @param cb - The step handler function
682
+ * @param options - Optional step options (will be merged with defaults)
683
+ * @returns Promise resolving to the step result
684
+ */
375
685
  async step(name, cb, options = {}) {
376
686
  const parsedOptions = StepOptionsSchema.parse({
377
687
  ...this.#action.steps,
@@ -382,21 +692,46 @@ class ActionContext {
382
692
  cb,
383
693
  options: parsedOptions,
384
694
  abortSignal: this.#abortSignal,
385
- parentStepId: null,
386
- parallel: parsedOptions.parallel,
695
+ parentStepId: null, // Root steps have no parent
696
+ parallel: parsedOptions.parallel, // Pass parallel option
387
697
  });
388
698
  }
699
+ /**
700
+ * Execute a reusable step definition created with createStep().
701
+ * This is the public method called from action handlers.
702
+ *
703
+ * @param stepDef - The step definition to execute
704
+ * @param input - The input data for the step (validated against the step's input schema)
705
+ * @param options - Optional step configuration overrides
706
+ * @returns Promise resolving to the step result
707
+ */
389
708
  async run(stepDef, input, options = {}) {
390
709
  return this.#runInternal(stepDef, input, options, null, this.#abortSignal);
391
710
  }
711
+ /**
712
+ * Internal method to execute a step definition with explicit parent step ID and abort signal.
713
+ * Used by both the public run method and the run functions passed to step contexts.
714
+ */
392
715
  async #runInternal(stepDef, input, options = {}, parentStepId, abortSignal) {
716
+ // Validate input against the step's schema if provided
717
+ // After parsing, validatedInput is z.output<TStepInput> (same as z.infer<TStepInput>)
393
718
  const validatedInput = stepDef.input
394
719
  ? stepDef.input.parse(input, {
395
720
  error: () => 'Error parsing step input',
396
721
  reportInput: true,
397
722
  })
398
723
  : input;
399
- const stepName = typeof stepDef.name === 'function' ? stepDef.name({ input: validatedInput }) : stepDef.name;
724
+ // Resolve step name (static or dynamic)
725
+ // If it's a function, pass the full context including input, variables, jobId, and parentStepId
726
+ const stepName = typeof stepDef.name === 'function'
727
+ ? stepDef.name({
728
+ input: validatedInput,
729
+ var: this.#variables,
730
+ jobId: this.#jobId,
731
+ parentStepId,
732
+ })
733
+ : stepDef.name;
734
+ // Merge options: action defaults -> step definition -> call-time overrides
400
735
  const mergedOptions = {
401
736
  ...this.#action.steps,
402
737
  ...(stepDef.retry !== undefined && { retry: stepDef.retry }),
@@ -405,6 +740,7 @@ class ActionContext {
405
740
  ...options,
406
741
  };
407
742
  const parsedOptions = StepOptionsSchema.parse(mergedOptions);
743
+ // Create a wrapper callback that provides the extended context
408
744
  const wrappedCb = async (baseCtx) => {
409
745
  const extendedCtx = {
410
746
  ...baseCtx,