duron 0.3.0-beta.8 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/action-job.d.ts +33 -2
- package/dist/action-job.d.ts.map +1 -1
- package/dist/action-job.js +93 -26
- package/dist/action-manager.d.ts +44 -2
- package/dist/action-manager.d.ts.map +1 -1
- package/dist/action-manager.js +64 -3
- package/dist/action.d.ts +388 -7
- package/dist/action.d.ts.map +1 -1
- package/dist/action.js +44 -23
- package/dist/adapters/adapter.d.ts +365 -8
- package/dist/adapters/adapter.d.ts.map +1 -1
- package/dist/adapters/adapter.js +221 -15
- package/dist/adapters/postgres/base.d.ts +184 -6
- package/dist/adapters/postgres/base.d.ts.map +1 -1
- package/dist/adapters/postgres/base.js +436 -75
- package/dist/adapters/postgres/pglite.d.ts +37 -0
- package/dist/adapters/postgres/pglite.d.ts.map +1 -1
- package/dist/adapters/postgres/pglite.js +38 -0
- package/dist/adapters/postgres/postgres.d.ts +35 -0
- package/dist/adapters/postgres/postgres.d.ts.map +1 -1
- package/dist/adapters/postgres/postgres.js +42 -0
- package/dist/adapters/postgres/schema.d.ts +150 -37
- package/dist/adapters/postgres/schema.d.ts.map +1 -1
- package/dist/adapters/postgres/schema.default.d.ts +151 -38
- package/dist/adapters/postgres/schema.default.d.ts.map +1 -1
- package/dist/adapters/postgres/schema.default.js +2 -2
- package/dist/adapters/postgres/schema.js +60 -23
- package/dist/adapters/schemas.d.ts +124 -80
- package/dist/adapters/schemas.d.ts.map +1 -1
- package/dist/adapters/schemas.js +139 -26
- package/dist/client.d.ts +426 -22
- package/dist/client.d.ts.map +1 -1
- package/dist/client.js +370 -20
- package/dist/constants.js +6 -0
- package/dist/errors.d.ts +166 -9
- package/dist/errors.d.ts.map +1 -1
- package/dist/errors.js +189 -19
- package/dist/index.d.ts +2 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/server.d.ts +99 -37
- package/dist/server.d.ts.map +1 -1
- package/dist/server.js +84 -25
- package/dist/step-manager.d.ts +111 -4
- package/dist/step-manager.d.ts.map +1 -1
- package/dist/step-manager.js +411 -75
- package/dist/telemetry/index.d.ts +1 -4
- package/dist/telemetry/index.d.ts.map +1 -1
- package/dist/telemetry/index.js +2 -4
- package/dist/telemetry/local-span-exporter.d.ts +56 -0
- package/dist/telemetry/local-span-exporter.d.ts.map +1 -0
- package/dist/telemetry/local-span-exporter.js +118 -0
- package/dist/utils/p-retry.d.ts +5 -0
- package/dist/utils/p-retry.d.ts.map +1 -1
- package/dist/utils/p-retry.js +8 -0
- package/dist/utils/wait-for-abort.d.ts +1 -0
- package/dist/utils/wait-for-abort.d.ts.map +1 -1
- package/dist/utils/wait-for-abort.js +1 -0
- package/migrations/postgres/{20260119153838_flimsy_thor_girl → 20260121160012_normal_bloodstrike}/migration.sql +32 -20
- package/migrations/postgres/{20260119153838_flimsy_thor_girl → 20260121160012_normal_bloodstrike}/snapshot.json +241 -66
- package/package.json +42 -26
- package/src/action-job.ts +43 -32
- package/src/action-manager.ts +5 -5
- package/src/action.ts +317 -149
- package/src/adapters/adapter.ts +54 -54
- package/src/adapters/postgres/base.ts +266 -86
- package/src/adapters/postgres/schema.default.ts +2 -2
- package/src/adapters/postgres/schema.ts +52 -24
- package/src/adapters/schemas.ts +91 -36
- package/src/client.ts +322 -68
- package/src/errors.ts +141 -30
- package/src/index.ts +2 -0
- package/src/server.ts +39 -37
- package/src/step-manager.ts +254 -91
- package/src/telemetry/index.ts +2 -20
- package/src/telemetry/local-span-exporter.ts +148 -0
- package/dist/telemetry/adapter.d.ts +0 -107
- package/dist/telemetry/adapter.d.ts.map +0 -1
- package/dist/telemetry/adapter.js +0 -134
- package/dist/telemetry/local.d.ts +0 -22
- package/dist/telemetry/local.d.ts.map +0 -1
- package/dist/telemetry/local.js +0 -243
- package/dist/telemetry/noop.d.ts +0 -17
- package/dist/telemetry/noop.d.ts.map +0 -1
- package/dist/telemetry/noop.js +0 -66
- package/dist/telemetry/opentelemetry.d.ts +0 -25
- package/dist/telemetry/opentelemetry.d.ts.map +0 -1
- package/dist/telemetry/opentelemetry.js +0 -312
- package/src/telemetry/adapter.ts +0 -642
- package/src/telemetry/local.ts +0 -429
- package/src/telemetry/noop.ts +0 -141
- package/src/telemetry/opentelemetry.ts +0 -453
package/dist/step-manager.js
CHANGED
|
@@ -1,39 +1,118 @@
|
|
|
1
|
+
import { context, SpanKind, SpanStatusCode, trace, } from '@opentelemetry/api';
|
|
1
2
|
import fastq from 'fastq';
|
|
2
3
|
import { StepOptionsSchema, } from './action.js';
|
|
3
4
|
import { STEP_STATUS_CANCELLED, STEP_STATUS_COMPLETED, STEP_STATUS_FAILED } from './constants.js';
|
|
4
|
-
import { ActionCancelError, isCancelError, isNonRetriableError, NonRetriableError, StepAlreadyExecutedError, StepTimeoutError, serializeError, UnhandledChildStepsError, } from './errors.js';
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
const
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
5
|
+
import { ActionCancelError, isCancelError, isNonRetriableError, isTimeoutError, NonRetriableError, StepAlreadyExecutedError, StepTimeoutError, serializeError, UnhandledChildStepsError, } from './errors.js';
|
|
6
|
+
/**
|
|
7
|
+
* Inject parent span into a context if we have one.
|
|
8
|
+
*/
|
|
9
|
+
function injectParentSpan(ctx, parentSpan) {
|
|
10
|
+
return parentSpan ? trace.setSpan(ctx, parentSpan) : ctx;
|
|
11
|
+
}
|
|
12
|
+
/**
|
|
13
|
+
* Create a context-aware tracer wrapper that automatically injects the parent span.
|
|
14
|
+
* This ensures spans created by external libraries (like AI SDK) are properly linked
|
|
15
|
+
* to the current job/step trace hierarchy.
|
|
16
|
+
*/
|
|
17
|
+
function createContextAwareTracer(tracer, parentSpan) {
|
|
18
|
+
return {
|
|
19
|
+
startSpan(name, options, ctx) {
|
|
20
|
+
// Always inject our parent span into the context, regardless of what context is passed.
|
|
21
|
+
// This is necessary because without global registration, context.active() returns
|
|
22
|
+
// ROOT_CONTEXT, so external libraries (like AI SDK) that pass context.active()
|
|
23
|
+
// would otherwise create orphan spans.
|
|
24
|
+
const baseContext = ctx ?? context.active();
|
|
25
|
+
const effectiveContext = injectParentSpan(baseContext, parentSpan);
|
|
26
|
+
return tracer.startSpan(name, options, effectiveContext);
|
|
27
|
+
},
|
|
28
|
+
// startActiveSpan has multiple overloads, we need to handle them all
|
|
29
|
+
startActiveSpan(name, optionsOrFn, ctxOrFn, fn) {
|
|
30
|
+
// Parse the overloaded arguments
|
|
31
|
+
let options;
|
|
32
|
+
let ctx;
|
|
33
|
+
let callback;
|
|
34
|
+
if (typeof optionsOrFn === 'function') {
|
|
35
|
+
// startActiveSpan(name, fn)
|
|
36
|
+
callback = optionsOrFn;
|
|
37
|
+
}
|
|
38
|
+
else if (typeof ctxOrFn === 'function') {
|
|
39
|
+
// startActiveSpan(name, options, fn)
|
|
40
|
+
options = optionsOrFn;
|
|
41
|
+
callback = ctxOrFn;
|
|
42
|
+
}
|
|
43
|
+
else {
|
|
44
|
+
// startActiveSpan(name, options, context, fn)
|
|
45
|
+
options = optionsOrFn;
|
|
46
|
+
ctx = ctxOrFn;
|
|
47
|
+
callback = fn;
|
|
48
|
+
}
|
|
49
|
+
const baseContext = ctx ?? context.active();
|
|
50
|
+
const effectiveContext = injectParentSpan(baseContext, parentSpan);
|
|
51
|
+
return tracer.startActiveSpan(name, options ?? {}, effectiveContext, callback);
|
|
52
|
+
},
|
|
53
|
+
};
|
|
54
|
+
}
|
|
55
|
+
/**
|
|
56
|
+
* Create a TelemetryContext that wraps an OTel span.
|
|
57
|
+
*/
|
|
58
|
+
function createTelemetryContext(span, tracer) {
|
|
59
|
+
return {
|
|
60
|
+
getActiveSpan() {
|
|
61
|
+
return span ?? undefined;
|
|
62
|
+
},
|
|
63
|
+
getTracer(_name) {
|
|
64
|
+
// Return a context-aware tracer that automatically links spans to the current trace
|
|
65
|
+
return createContextAwareTracer(tracer, span);
|
|
66
|
+
},
|
|
67
|
+
startSpan(name, options) {
|
|
68
|
+
// Create a child span linked to the current span (job or step)
|
|
69
|
+
const parentContext = span ? trace.setSpan(context.active(), span) : context.active();
|
|
70
|
+
return tracer.startSpan(name, { attributes: options?.attributes }, parentContext);
|
|
71
|
+
},
|
|
72
|
+
recordMetric(name, value, attributes) {
|
|
73
|
+
if (span) {
|
|
74
|
+
span.addEvent(`metric:${name}`, {
|
|
75
|
+
'metric.value': value,
|
|
76
|
+
...attributes,
|
|
77
|
+
});
|
|
78
|
+
}
|
|
79
|
+
},
|
|
80
|
+
};
|
|
81
|
+
}
|
|
30
82
|
import pRetry from './utils/p-retry.js';
|
|
31
83
|
import waitForAbort from './utils/wait-for-abort.js';
|
|
84
|
+
/**
|
|
85
|
+
* StepStore manages step records in the database.
|
|
86
|
+
* Provides methods to create, update, and delay steps.
|
|
87
|
+
*/
|
|
32
88
|
export class StepStore {
|
|
33
89
|
#adapter;
|
|
90
|
+
// ============================================================================
|
|
91
|
+
// Constructor
|
|
92
|
+
// ============================================================================
|
|
93
|
+
/**
|
|
94
|
+
* Create a new StepStore instance.
|
|
95
|
+
*
|
|
96
|
+
* @param adapter - The database adapter to use for step operations
|
|
97
|
+
*/
|
|
34
98
|
constructor(adapter) {
|
|
35
99
|
this.#adapter = adapter;
|
|
36
100
|
}
|
|
101
|
+
// ============================================================================
|
|
102
|
+
// Public API Methods
|
|
103
|
+
// ============================================================================
|
|
104
|
+
/**
|
|
105
|
+
* Get or create a step record in the database.
|
|
106
|
+
*
|
|
107
|
+
* @param jobId - The ID of the job this step belongs to
|
|
108
|
+
* @param name - The name of the step
|
|
109
|
+
* @param timeoutMs - Timeout in milliseconds for the step
|
|
110
|
+
* @param retriesLimit - Maximum number of retries for the step
|
|
111
|
+
* @param parentStepId - The ID of the parent step (null for root steps)
|
|
112
|
+
* @param parallel - Whether this step runs in parallel (independent from siblings during time travel)
|
|
113
|
+
* @returns Promise resolving to the created step ID
|
|
114
|
+
* @throws Error if step creation fails
|
|
115
|
+
*/
|
|
37
116
|
async getOrCreate(jobId, name, timeoutMs, retriesLimit, parentStepId = null, parallel = false) {
|
|
38
117
|
try {
|
|
39
118
|
return await this.#adapter.createOrRecoverJobStep({
|
|
@@ -49,6 +128,15 @@ export class StepStore {
|
|
|
49
128
|
throw new NonRetriableError(`Failed to get or create step "${name}" for job "${jobId}"`, { cause: error });
|
|
50
129
|
}
|
|
51
130
|
}
|
|
131
|
+
/**
|
|
132
|
+
* Update the status of a step in the database.
|
|
133
|
+
*
|
|
134
|
+
* @param stepId - The ID of the step to update
|
|
135
|
+
* @param status - The new status (completed, failed, or cancelled)
|
|
136
|
+
* @param output - Optional output data for completed steps
|
|
137
|
+
* @param error - Optional error data for failed steps
|
|
138
|
+
* @returns Promise resolving to `true` if update succeeded, `false` otherwise
|
|
139
|
+
*/
|
|
52
140
|
async updateStatus(stepId, status, output, error) {
|
|
53
141
|
if (status === STEP_STATUS_COMPLETED) {
|
|
54
142
|
return this.#adapter.completeJobStep({ stepId, output });
|
|
@@ -61,28 +149,54 @@ export class StepStore {
|
|
|
61
149
|
}
|
|
62
150
|
return false;
|
|
63
151
|
}
|
|
152
|
+
/**
|
|
153
|
+
* Delay a step execution.
|
|
154
|
+
* Used when a step fails and needs to be retried after a delay.
|
|
155
|
+
*
|
|
156
|
+
* @param stepId - The ID of the step to delay
|
|
157
|
+
* @param delayMs - The delay in milliseconds before retrying
|
|
158
|
+
* @param error - The error that caused the delay
|
|
159
|
+
* @returns Promise resolving to `true` if delayed successfully, `false` otherwise
|
|
160
|
+
*/
|
|
64
161
|
async delay(stepId, delayMs, error) {
|
|
65
162
|
return this.#adapter.delayJobStep({ stepId, delayMs, error });
|
|
66
163
|
}
|
|
67
164
|
}
|
|
165
|
+
/**
|
|
166
|
+
* StepManager manages steps for a single ActionJob.
|
|
167
|
+
* Each ActionJob has its own StepManager instance.
|
|
168
|
+
*/
|
|
68
169
|
export class StepManager {
|
|
69
170
|
#jobId;
|
|
70
171
|
#actionName;
|
|
71
172
|
#stepStore;
|
|
72
|
-
#
|
|
173
|
+
#tracer;
|
|
73
174
|
#queue;
|
|
74
175
|
#logger;
|
|
176
|
+
// each step name should be executed only once per parent (name + parentStepId)
|
|
75
177
|
#historySteps = new Set();
|
|
178
|
+
// Store step spans for nested step tracking
|
|
76
179
|
#stepSpans = new Map();
|
|
77
|
-
|
|
180
|
+
// Store the job span for creating step spans
|
|
181
|
+
#jobSpan;
|
|
182
|
+
// Factory function to create run functions with the correct parent step ID and abort signal
|
|
78
183
|
#runFnFactory = null;
|
|
184
|
+
// ============================================================================
|
|
185
|
+
// Constructor
|
|
186
|
+
// ============================================================================
|
|
187
|
+
/**
|
|
188
|
+
* Create a new StepManager instance.
|
|
189
|
+
*
|
|
190
|
+
* @param options - Configuration options for the step manager
|
|
191
|
+
*/
|
|
79
192
|
constructor(options) {
|
|
80
193
|
this.#jobId = options.jobId;
|
|
81
194
|
this.#actionName = options.actionName;
|
|
82
195
|
this.#logger = options.logger;
|
|
83
|
-
this.#
|
|
196
|
+
this.#tracer = options.tracer;
|
|
84
197
|
this.#stepStore = new StepStore(options.adapter);
|
|
85
198
|
this.#queue = fastq.promise(async (task) => {
|
|
199
|
+
// Create composite key: name + parentStepId (allows same name under different parents)
|
|
86
200
|
const stepKey = `${task.parentStepId ?? 'root'}:${task.name}`;
|
|
87
201
|
if (this.#historySteps.has(stepKey)) {
|
|
88
202
|
throw new StepAlreadyExecutedError(task.name, this.#jobId, this.#actionName);
|
|
@@ -91,36 +205,59 @@ export class StepManager {
|
|
|
91
205
|
return this.#executeStep(task.name, task.cb, task.options, task.abortSignal, task.parentStepId, task.parallel);
|
|
92
206
|
}, options.concurrencyLimit);
|
|
93
207
|
}
|
|
208
|
+
/**
|
|
209
|
+
* Set the job span for this step manager.
|
|
210
|
+
* Called from ActionJob after the job span is created.
|
|
211
|
+
*/
|
|
94
212
|
setJobSpan(span) {
|
|
95
213
|
this.#jobSpan = span;
|
|
96
214
|
}
|
|
215
|
+
/**
|
|
216
|
+
* Set the run function factory for executing step definitions from inline steps.
|
|
217
|
+
* Called from ActionContext after it's initialized.
|
|
218
|
+
*
|
|
219
|
+
* @param factory - A function that creates run functions with the correct parent step ID and abort signal
|
|
220
|
+
*/
|
|
97
221
|
setRunFnFactory(factory) {
|
|
98
222
|
this.#runFnFactory = factory;
|
|
99
223
|
}
|
|
100
|
-
|
|
101
|
-
|
|
224
|
+
// ============================================================================
|
|
225
|
+
// Public API Methods
|
|
226
|
+
// ============================================================================
|
|
227
|
+
/**
|
|
228
|
+
* Create an ActionContext for the action handler.
|
|
229
|
+
* The context provides access to input, variables, logger, and the step function.
|
|
230
|
+
*
|
|
231
|
+
* @param job - The job data including ID, input, and optional group key
|
|
232
|
+
* @param action - The action definition
|
|
233
|
+
* @param variables - Variables available to the action
|
|
234
|
+
* @param abortSignal - Abort signal for cancelling the action
|
|
235
|
+
* @param logger - Pino child logger for this job
|
|
236
|
+
* @returns ActionHandlerContext instance
|
|
237
|
+
*/
|
|
238
|
+
createActionContext(job, action, variables, abortSignal, logger) {
|
|
239
|
+
const telemetryContext = createTelemetryContext(this.#jobSpan, this.#tracer);
|
|
240
|
+
return new ActionContext(this, job, action, variables, abortSignal, logger, telemetryContext);
|
|
102
241
|
}
|
|
103
|
-
|
|
242
|
+
/**
|
|
243
|
+
* Create a telemetry context for a step.
|
|
244
|
+
*/
|
|
245
|
+
createStepTelemetryContext(stepId) {
|
|
104
246
|
const stepSpan = this.#stepSpans.get(stepId);
|
|
105
247
|
if (stepSpan) {
|
|
106
|
-
return
|
|
248
|
+
return createTelemetryContext(stepSpan, this.#tracer);
|
|
107
249
|
}
|
|
108
|
-
if
|
|
109
|
-
|
|
110
|
-
}
|
|
111
|
-
return {
|
|
112
|
-
recordMetric: () => {
|
|
113
|
-
},
|
|
114
|
-
addSpanAttribute: () => {
|
|
115
|
-
},
|
|
116
|
-
addSpanEvent: () => {
|
|
117
|
-
},
|
|
118
|
-
getTracer: (name) => {
|
|
119
|
-
return createNoopTracer(name);
|
|
120
|
-
},
|
|
121
|
-
};
|
|
250
|
+
// Fallback to job span if step span not found
|
|
251
|
+
return createTelemetryContext(this.#jobSpan, this.#tracer);
|
|
122
252
|
}
|
|
253
|
+
/**
|
|
254
|
+
* Queue a step task for execution.
|
|
255
|
+
*
|
|
256
|
+
* @param task - The step task to queue
|
|
257
|
+
* @returns Promise resolving to the step result
|
|
258
|
+
*/
|
|
123
259
|
async push(task) {
|
|
260
|
+
// Warn about potential starvation when child steps are queued and all slots are occupied
|
|
124
261
|
if (task.parentStepId !== null && this.#queue.running() >= this.#queue.concurrency) {
|
|
125
262
|
this.#logger.warn({
|
|
126
263
|
jobId: this.#jobId,
|
|
@@ -134,9 +271,28 @@ export class StepManager {
|
|
|
134
271
|
}
|
|
135
272
|
return this.#queue.push(task);
|
|
136
273
|
}
|
|
274
|
+
/**
|
|
275
|
+
* Clean up step queues by waiting for them to drain.
|
|
276
|
+
* Should be called when the job completes or is cancelled.
|
|
277
|
+
*/
|
|
137
278
|
async drain() {
|
|
138
279
|
await this.#queue.drain();
|
|
139
280
|
}
|
|
281
|
+
/**
|
|
282
|
+
* Execute a step with retry logic and timeout handling.
|
|
283
|
+
* Creates a step record, queues the execution, and handles errors appropriately.
|
|
284
|
+
*
|
|
285
|
+
* @param name - The name of the step
|
|
286
|
+
* @param cb - The step handler function
|
|
287
|
+
* @param options - Step options including concurrency, retry, and expire settings
|
|
288
|
+
* @param abortSignal - Abort signal for cancelling the step
|
|
289
|
+
* @param parentStepId - The ID of the parent step (null for root steps)
|
|
290
|
+
* @returns Promise resolving to the step result
|
|
291
|
+
* @throws StepTimeoutError if the step times out
|
|
292
|
+
* @throws StepCancelError if the step is cancelled
|
|
293
|
+
* @throws UnhandledChildStepsError if child steps are not awaited
|
|
294
|
+
* @throws Error if the step fails
|
|
295
|
+
*/
|
|
140
296
|
async #executeStep(name, cb, options, abortSignal, parentStepId, parallel) {
|
|
141
297
|
const expire = options.expire;
|
|
142
298
|
const retryOptions = options.retry;
|
|
@@ -146,24 +302,30 @@ export class StepManager {
|
|
|
146
302
|
if (abortSignal.aborted) {
|
|
147
303
|
throw new ActionCancelError(this.#actionName, this.#jobId, { cause: 'step cancelled before create step' });
|
|
148
304
|
}
|
|
305
|
+
// Create step record with parentStepId and parallel
|
|
149
306
|
const newStep = await this.#stepStore.getOrCreate(this.#jobId, name, expire, retryOptions.limit, parentStepId, parallel);
|
|
150
307
|
if (!newStep) {
|
|
151
308
|
throw new NonRetriableError(`Failed to create step "${name}" for job "${this.#jobId}" action "${this.#actionName}"`, { cause: 'step not created' });
|
|
152
309
|
}
|
|
153
310
|
step = newStep;
|
|
311
|
+
// Start step span - uses no-op tracer if no SDK is configured
|
|
154
312
|
const parentSpan = parentStepId ? this.#stepSpans.get(parentStepId) : this.#jobSpan;
|
|
155
|
-
const
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
313
|
+
const parentContext = parentSpan ? trace.setSpan(context.active(), parentSpan) : context.active();
|
|
314
|
+
const stepSpan = this.#tracer.startSpan(`step:${name}`, {
|
|
315
|
+
kind: SpanKind.INTERNAL,
|
|
316
|
+
attributes: {
|
|
317
|
+
'duron.job.id': this.#jobId,
|
|
318
|
+
'duron.step.id': step.id,
|
|
319
|
+
'duron.step.name': name,
|
|
320
|
+
'duron.step.parent_id': parentStepId ?? undefined,
|
|
321
|
+
},
|
|
322
|
+
}, parentContext);
|
|
162
323
|
this.#stepSpans.set(step.id, stepSpan);
|
|
163
324
|
if (abortSignal.aborted) {
|
|
164
325
|
throw new ActionCancelError(this.#actionName, this.#jobId, { cause: 'step cancelled after create step' });
|
|
165
326
|
}
|
|
166
327
|
if (step.status === STEP_STATUS_COMPLETED) {
|
|
328
|
+
// this is how we recover a completed step
|
|
167
329
|
this.#logger.debug({ jobId: this.#jobId, actionName: this.#actionName, stepName: name, stepId: step.id, parentStepId }, '[StepManager] Step recovered (already completed)');
|
|
168
330
|
return step.output;
|
|
169
331
|
}
|
|
@@ -175,59 +337,81 @@ export class StepManager {
|
|
|
175
337
|
else if (step.status === STEP_STATUS_CANCELLED) {
|
|
176
338
|
throw new NonRetriableError(`Cannot recover a cancelled step "${name}" for job "${this.#jobId}" action "${this.#actionName}"`, { cause: step.error });
|
|
177
339
|
}
|
|
340
|
+
// Log step start
|
|
178
341
|
this.#logger.debug({ jobId: this.#jobId, actionName: this.#actionName, stepName: name, stepId: step.id, parentStepId }, '[StepManager] Step started executing');
|
|
179
342
|
}
|
|
343
|
+
// Create abort controller for this step's timeout
|
|
180
344
|
const stepAbortController = new AbortController();
|
|
181
345
|
const timeoutId = setTimeout(() => {
|
|
182
|
-
const timeoutError = new StepTimeoutError(name, this.#jobId, expire
|
|
346
|
+
const timeoutError = new StepTimeoutError(name, this.#jobId, expire, {
|
|
347
|
+
stepId: step?.id,
|
|
348
|
+
parentStepId,
|
|
349
|
+
actionName: this.#actionName,
|
|
350
|
+
});
|
|
183
351
|
stepAbortController.abort(timeoutError);
|
|
184
352
|
}, expire);
|
|
185
353
|
timeoutId?.unref?.();
|
|
354
|
+
// Combine abort signals: parent chain + this step's timeout
|
|
186
355
|
const stepSignal = AbortSignal.any([abortSignal, stepAbortController.signal]);
|
|
187
356
|
const childSteps = [];
|
|
357
|
+
// Create abort controller for child steps (used when parent returns with pending children)
|
|
188
358
|
const childAbortController = new AbortController();
|
|
189
359
|
const childSignal = AbortSignal.any([stepSignal, childAbortController.signal]);
|
|
190
|
-
|
|
360
|
+
// Create telemetry context for this step
|
|
361
|
+
const stepTelemetryContext = this.createStepTelemetryContext(step.id);
|
|
362
|
+
// Create StepHandlerContext with nested step support
|
|
191
363
|
const stepContext = {
|
|
192
364
|
signal: stepSignal,
|
|
193
365
|
stepId: step.id,
|
|
194
366
|
parentStepId,
|
|
195
|
-
|
|
367
|
+
telemetry: stepTelemetryContext,
|
|
196
368
|
step: (childName, childCb, childOptions = {}) => {
|
|
369
|
+
// Inherit parent step options EXCEPT parallel (each step's parallel status is independent)
|
|
197
370
|
const { parallel: _parentParallel, ...inheritableOptions } = options;
|
|
198
371
|
const parsedChildOptions = StepOptionsSchema.parse({
|
|
199
372
|
...inheritableOptions,
|
|
200
373
|
...childOptions,
|
|
201
374
|
});
|
|
375
|
+
// Push child step with this step as parent
|
|
202
376
|
const childPromise = this.push({
|
|
203
377
|
name: childName,
|
|
204
378
|
cb: childCb,
|
|
205
379
|
options: parsedChildOptions,
|
|
206
|
-
abortSignal: childSignal,
|
|
207
|
-
parentStepId: step.id,
|
|
208
|
-
parallel: parsedChildOptions.parallel,
|
|
380
|
+
abortSignal: childSignal, // Child uses composed signal
|
|
381
|
+
parentStepId: step.id, // This step is the parent
|
|
382
|
+
parallel: parsedChildOptions.parallel, // Pass parallel option
|
|
209
383
|
});
|
|
384
|
+
// Track the child promise
|
|
210
385
|
const trackedChild = {
|
|
211
386
|
promise: childPromise,
|
|
212
387
|
settled: false,
|
|
213
388
|
};
|
|
214
389
|
childSteps.push(trackedChild);
|
|
390
|
+
// Mark as settled when done (success or failure)
|
|
391
|
+
// Use .then/.catch instead of .finally to properly handle rejections
|
|
215
392
|
childPromise
|
|
216
393
|
.then(() => {
|
|
217
394
|
trackedChild.settled = true;
|
|
218
395
|
})
|
|
219
396
|
.catch(() => {
|
|
220
397
|
trackedChild.settled = true;
|
|
398
|
+
// Swallow the error here - it will be re-thrown to the caller via the returned promise
|
|
399
|
+
// Note: sibling steps will be aborted when the error propagates to the action level
|
|
221
400
|
});
|
|
222
401
|
return childPromise;
|
|
223
402
|
},
|
|
224
403
|
run: this.#runFnFactory(step.id, childSignal),
|
|
225
404
|
};
|
|
226
405
|
try {
|
|
406
|
+
// Race between abort signal and callback execution
|
|
227
407
|
const abortPromise = waitForAbort(stepSignal);
|
|
228
|
-
|
|
408
|
+
// Execute callback within the span context so that child spans inherit the trace
|
|
409
|
+
const currentStepSpan = step?.id ? this.#stepSpans.get(step.id) : undefined;
|
|
410
|
+
const spanContext = currentStepSpan ? trace.setSpan(context.active(), currentStepSpan) : context.active();
|
|
411
|
+
const callbackPromise = context.with(spanContext, () => cb(stepContext));
|
|
229
412
|
let result = null;
|
|
230
413
|
let aborted = false;
|
|
414
|
+
let callbackError = null;
|
|
231
415
|
await Promise.race([
|
|
232
416
|
abortPromise.promise.then(() => {
|
|
233
417
|
aborted = true;
|
|
@@ -237,17 +421,34 @@ export class StepManager {
|
|
|
237
421
|
if (res !== undefined && res !== null) {
|
|
238
422
|
result = res;
|
|
239
423
|
}
|
|
424
|
+
})
|
|
425
|
+
.catch((err) => {
|
|
426
|
+
callbackError = err;
|
|
240
427
|
})
|
|
241
428
|
.finally(() => {
|
|
242
429
|
abortPromise.release();
|
|
243
430
|
}),
|
|
244
431
|
]);
|
|
432
|
+
// If callback threw an error, abort children and wait for them before re-throwing
|
|
433
|
+
if (callbackError) {
|
|
434
|
+
if (childSteps.length > 0) {
|
|
435
|
+
// Abort all children with the callback error as reason
|
|
436
|
+
childAbortController.abort(callbackError);
|
|
437
|
+
// Wait for all children to settle
|
|
438
|
+
await Promise.allSettled(childSteps.map((c) => c.promise));
|
|
439
|
+
}
|
|
440
|
+
throw callbackError;
|
|
441
|
+
}
|
|
442
|
+
// If aborted, wait for child steps to settle before propagating
|
|
245
443
|
if (aborted) {
|
|
444
|
+
// Wait for all child steps to settle (they'll be aborted via signal propagation)
|
|
246
445
|
if (childSteps.length > 0) {
|
|
247
446
|
await Promise.allSettled(childSteps.map((c) => c.promise));
|
|
248
447
|
}
|
|
448
|
+
// Re-throw the abort reason
|
|
249
449
|
throw stepSignal.reason;
|
|
250
450
|
}
|
|
451
|
+
// After parent callback returns, check for pending children
|
|
251
452
|
const unsettledChildren = childSteps.filter((c) => !c.settled);
|
|
252
453
|
if (unsettledChildren.length > 0) {
|
|
253
454
|
this.#logger.warn({
|
|
@@ -257,20 +458,32 @@ export class StepManager {
|
|
|
257
458
|
stepId: step.id,
|
|
258
459
|
pendingCount: unsettledChildren.length,
|
|
259
460
|
}, '[StepManager] Parent step completed with unhandled child steps - aborting children');
|
|
260
|
-
|
|
461
|
+
// Abort all pending children
|
|
462
|
+
const unhandledError = new UnhandledChildStepsError(name, unsettledChildren.length, {
|
|
463
|
+
stepId: step.id,
|
|
464
|
+
parentStepId,
|
|
465
|
+
jobId: this.#jobId,
|
|
466
|
+
actionName: this.#actionName,
|
|
467
|
+
});
|
|
261
468
|
childAbortController.abort(unhandledError);
|
|
469
|
+
// Wait for all children to settle (they'll reject with cancellation)
|
|
262
470
|
await Promise.allSettled(unsettledChildren.map((c) => c.promise));
|
|
471
|
+
// Now throw the error
|
|
263
472
|
throw unhandledError;
|
|
264
473
|
}
|
|
474
|
+
// Update step as completed
|
|
265
475
|
const completed = await this.#stepStore.updateStatus(step.id, 'completed', result);
|
|
266
476
|
if (!completed) {
|
|
267
477
|
throw new Error(`Failed to complete step "${name}" for job "${this.#jobId}" action "${this.#actionName}"`);
|
|
268
478
|
}
|
|
479
|
+
// End step span successfully
|
|
269
480
|
const stepSpan = this.#stepSpans.get(step.id);
|
|
270
481
|
if (stepSpan) {
|
|
271
|
-
|
|
482
|
+
stepSpan.setStatus({ code: SpanStatusCode.OK });
|
|
483
|
+
stepSpan.end();
|
|
272
484
|
this.#stepSpans.delete(step.id);
|
|
273
485
|
}
|
|
486
|
+
// Log step completion
|
|
274
487
|
this.#logger.debug({ jobId: this.#jobId, actionName: this.#actionName, stepName: name, stepId: step.id }, '[StepManager] Step finished executing');
|
|
275
488
|
return result;
|
|
276
489
|
}
|
|
@@ -278,6 +491,7 @@ export class StepManager {
|
|
|
278
491
|
clearTimeout(timeoutId);
|
|
279
492
|
}
|
|
280
493
|
};
|
|
494
|
+
// Apply retry logic - skip retries for NonRetriableError
|
|
281
495
|
return pRetry(executeStep, {
|
|
282
496
|
retries: retryOptions.limit,
|
|
283
497
|
factor: retryOptions.factor,
|
|
@@ -287,28 +501,67 @@ export class StepManager {
|
|
|
287
501
|
maxTimeout: retryOptions.maxTimeout,
|
|
288
502
|
onFailedAttempt: async (ctx) => {
|
|
289
503
|
const error = ctx.error;
|
|
504
|
+
// Don't retry if error is non-retriable
|
|
290
505
|
if (isNonRetriableError(error) ||
|
|
291
506
|
(error.cause && isNonRetriableError(error.cause)) ||
|
|
292
|
-
(error instanceof Error && error.name === 'AbortError'
|
|
293
|
-
|
|
507
|
+
(error instanceof Error && error.name === 'AbortError')) {
|
|
508
|
+
const err = isNonRetriableError(error)
|
|
509
|
+
? error
|
|
510
|
+
: error instanceof Error && error.name === 'AbortError'
|
|
511
|
+
? new NonRetriableError(error.message, { cause: error.cause })
|
|
512
|
+
: error.cause;
|
|
513
|
+
if (Object.keys(err.metadata).length === 0) {
|
|
514
|
+
err.setMetadata({
|
|
515
|
+
stepId: step?.id,
|
|
516
|
+
parentStepId,
|
|
517
|
+
jobId: this.#jobId,
|
|
518
|
+
actionName: this.#actionName,
|
|
519
|
+
});
|
|
520
|
+
}
|
|
521
|
+
throw err;
|
|
294
522
|
}
|
|
295
523
|
if (ctx.retriesLeft > 0 && step) {
|
|
524
|
+
this.#clearHistoryForStep(step.id);
|
|
296
525
|
const delayed = await this.#stepStore.delay(step.id, ctx.finalDelay, serializeError(error));
|
|
297
526
|
if (!delayed) {
|
|
298
527
|
throw new Error(`Failed to delay step "${name}" for job "${this.#jobId}" action "${this.#actionName}"`);
|
|
299
528
|
}
|
|
300
529
|
}
|
|
530
|
+
else {
|
|
531
|
+
if (isTimeoutError(error)) {
|
|
532
|
+
;
|
|
533
|
+
error.nonRetriable = true;
|
|
534
|
+
throw error;
|
|
535
|
+
}
|
|
536
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
537
|
+
const err = new NonRetriableError(errorMessage, { cause: error });
|
|
538
|
+
err.setMetadata({
|
|
539
|
+
stepId: step?.id,
|
|
540
|
+
parentStepId,
|
|
541
|
+
jobId: this.#jobId,
|
|
542
|
+
actionName: this.#actionName,
|
|
543
|
+
});
|
|
544
|
+
throw err;
|
|
545
|
+
}
|
|
301
546
|
},
|
|
302
547
|
}).catch(async (error) => {
|
|
303
548
|
if (step) {
|
|
549
|
+
// End step span with error/cancelled status
|
|
304
550
|
const stepSpan = this.#stepSpans.get(step.id);
|
|
305
551
|
if (stepSpan) {
|
|
306
552
|
if (isCancelError(error)) {
|
|
307
|
-
|
|
553
|
+
stepSpan.setStatus({ code: SpanStatusCode.ERROR, message: 'Step cancelled' });
|
|
308
554
|
}
|
|
309
555
|
else {
|
|
310
|
-
|
|
556
|
+
stepSpan.setStatus({
|
|
557
|
+
code: SpanStatusCode.ERROR,
|
|
558
|
+
message: error instanceof Error ? error.message : String(error),
|
|
559
|
+
});
|
|
560
|
+
if (error instanceof Error) {
|
|
561
|
+
stepSpan.recordException(error);
|
|
562
|
+
}
|
|
311
563
|
}
|
|
564
|
+
stepSpan.end();
|
|
312
565
|
this.#stepSpans.delete(step.id);
|
|
313
566
|
}
|
|
314
567
|
if (isCancelError(error)) {
|
|
@@ -321,7 +574,26 @@ export class StepManager {
|
|
|
321
574
|
throw error;
|
|
322
575
|
});
|
|
323
576
|
}
|
|
577
|
+
/**
|
|
578
|
+
* Clear the history of nested steps for a given step.
|
|
579
|
+
* We do't need to clear the history for the root step because it's not a parent step, it's the action itself.
|
|
580
|
+
* @param stepId - The ID of the step to clear the history for
|
|
581
|
+
*/
|
|
582
|
+
#clearHistoryForStep(stepId) {
|
|
583
|
+
this.#historySteps.forEach((stepKey) => {
|
|
584
|
+
if (stepKey.startsWith(stepId)) {
|
|
585
|
+
this.#historySteps.delete(stepKey);
|
|
586
|
+
}
|
|
587
|
+
});
|
|
588
|
+
}
|
|
324
589
|
}
|
|
590
|
+
// ============================================================================
|
|
591
|
+
// ActionContext Class
|
|
592
|
+
// ============================================================================
|
|
593
|
+
/**
|
|
594
|
+
* ActionContext provides the context for action handlers.
|
|
595
|
+
* It implements ActionHandlerContext and provides access to input, variables, logger, and the step function.
|
|
596
|
+
*/
|
|
325
597
|
class ActionContext {
|
|
326
598
|
#stepManager;
|
|
327
599
|
#variables;
|
|
@@ -331,8 +603,11 @@ class ActionContext {
|
|
|
331
603
|
#jobId;
|
|
332
604
|
#groupKey = '@default';
|
|
333
605
|
#action;
|
|
334
|
-
#
|
|
335
|
-
|
|
606
|
+
#telemetryContext;
|
|
607
|
+
// ============================================================================
|
|
608
|
+
// Constructor
|
|
609
|
+
// ============================================================================
|
|
610
|
+
constructor(stepManager, job, action, variables, abortSignal, logger, telemetryContext) {
|
|
336
611
|
this.#stepManager = stepManager;
|
|
337
612
|
this.#variables = variables;
|
|
338
613
|
this.#abortSignal = abortSignal;
|
|
@@ -340,7 +615,7 @@ class ActionContext {
|
|
|
340
615
|
this.#action = action;
|
|
341
616
|
this.#jobId = job.id;
|
|
342
617
|
this.#groupKey = job.groupKey ?? '@default';
|
|
343
|
-
this.#
|
|
618
|
+
this.#telemetryContext = telemetryContext;
|
|
344
619
|
if (action.input) {
|
|
345
620
|
this.#input = action.input.parse(job.input, {
|
|
346
621
|
error: () => 'Error parsing action input',
|
|
@@ -350,28 +625,63 @@ class ActionContext {
|
|
|
350
625
|
this.#input = job.input ?? {};
|
|
351
626
|
this.step = this.step.bind(this);
|
|
352
627
|
this.run = this.run.bind(this);
|
|
628
|
+
// Set the run function factory so inline steps can call step definitions with correct parent
|
|
353
629
|
this.#stepManager.setRunFnFactory((parentStepId, abortSignal) => {
|
|
354
630
|
return (stepDef, input, options) => this.#runInternal(stepDef, input, options, parentStepId, abortSignal);
|
|
355
631
|
});
|
|
356
632
|
}
|
|
633
|
+
// ============================================================================
|
|
634
|
+
// Public API Methods
|
|
635
|
+
// ============================================================================
|
|
636
|
+
/**
|
|
637
|
+
* Get the input data for this action.
|
|
638
|
+
*/
|
|
357
639
|
get input() {
|
|
358
640
|
return this.#input;
|
|
359
641
|
}
|
|
642
|
+
/**
|
|
643
|
+
* Get the job ID for this action context.
|
|
644
|
+
*
|
|
645
|
+
* @returns The job ID
|
|
646
|
+
*/
|
|
360
647
|
get jobId() {
|
|
361
648
|
return this.#jobId;
|
|
362
649
|
}
|
|
650
|
+
/**
|
|
651
|
+
* Get the group key for this action context.
|
|
652
|
+
*
|
|
653
|
+
* @returns The group key
|
|
654
|
+
*/
|
|
363
655
|
get groupKey() {
|
|
364
656
|
return this.#groupKey;
|
|
365
657
|
}
|
|
658
|
+
/**
|
|
659
|
+
* Get the variables available to this action.
|
|
660
|
+
*/
|
|
366
661
|
get var() {
|
|
367
662
|
return this.#variables;
|
|
368
663
|
}
|
|
664
|
+
/**
|
|
665
|
+
* Get the logger for this action job.
|
|
666
|
+
*/
|
|
369
667
|
get logger() {
|
|
370
668
|
return this.#logger;
|
|
371
669
|
}
|
|
372
|
-
|
|
373
|
-
|
|
670
|
+
/**
|
|
671
|
+
* Get the telemetry context for recording metrics and span data.
|
|
672
|
+
*/
|
|
673
|
+
get telemetry() {
|
|
674
|
+
return this.#telemetryContext;
|
|
374
675
|
}
|
|
676
|
+
/**
|
|
677
|
+
* Execute a step within the action.
|
|
678
|
+
* This creates a root step (no parent).
|
|
679
|
+
*
|
|
680
|
+
* @param name - The name of the step
|
|
681
|
+
* @param cb - The step handler function
|
|
682
|
+
* @param options - Optional step options (will be merged with defaults)
|
|
683
|
+
* @returns Promise resolving to the step result
|
|
684
|
+
*/
|
|
375
685
|
async step(name, cb, options = {}) {
|
|
376
686
|
const parsedOptions = StepOptionsSchema.parse({
|
|
377
687
|
...this.#action.steps,
|
|
@@ -382,21 +692,46 @@ class ActionContext {
|
|
|
382
692
|
cb,
|
|
383
693
|
options: parsedOptions,
|
|
384
694
|
abortSignal: this.#abortSignal,
|
|
385
|
-
parentStepId: null,
|
|
386
|
-
parallel: parsedOptions.parallel,
|
|
695
|
+
parentStepId: null, // Root steps have no parent
|
|
696
|
+
parallel: parsedOptions.parallel, // Pass parallel option
|
|
387
697
|
});
|
|
388
698
|
}
|
|
699
|
+
/**
|
|
700
|
+
* Execute a reusable step definition created with createStep().
|
|
701
|
+
* This is the public method called from action handlers.
|
|
702
|
+
*
|
|
703
|
+
* @param stepDef - The step definition to execute
|
|
704
|
+
* @param input - The input data for the step (validated against the step's input schema)
|
|
705
|
+
* @param options - Optional step configuration overrides
|
|
706
|
+
* @returns Promise resolving to the step result
|
|
707
|
+
*/
|
|
389
708
|
async run(stepDef, input, options = {}) {
|
|
390
709
|
return this.#runInternal(stepDef, input, options, null, this.#abortSignal);
|
|
391
710
|
}
|
|
711
|
+
/**
|
|
712
|
+
* Internal method to execute a step definition with explicit parent step ID and abort signal.
|
|
713
|
+
* Used by both the public run method and the run functions passed to step contexts.
|
|
714
|
+
*/
|
|
392
715
|
async #runInternal(stepDef, input, options = {}, parentStepId, abortSignal) {
|
|
716
|
+
// Validate input against the step's schema if provided
|
|
717
|
+
// After parsing, validatedInput is z.output<TStepInput> (same as z.infer<TStepInput>)
|
|
393
718
|
const validatedInput = stepDef.input
|
|
394
719
|
? stepDef.input.parse(input, {
|
|
395
720
|
error: () => 'Error parsing step input',
|
|
396
721
|
reportInput: true,
|
|
397
722
|
})
|
|
398
723
|
: input;
|
|
399
|
-
|
|
724
|
+
// Resolve step name (static or dynamic)
|
|
725
|
+
// If it's a function, pass the full context including input, variables, jobId, and parentStepId
|
|
726
|
+
const stepName = typeof stepDef.name === 'function'
|
|
727
|
+
? stepDef.name({
|
|
728
|
+
input: validatedInput,
|
|
729
|
+
var: this.#variables,
|
|
730
|
+
jobId: this.#jobId,
|
|
731
|
+
parentStepId,
|
|
732
|
+
})
|
|
733
|
+
: stepDef.name;
|
|
734
|
+
// Merge options: action defaults -> step definition -> call-time overrides
|
|
400
735
|
const mergedOptions = {
|
|
401
736
|
...this.#action.steps,
|
|
402
737
|
...(stepDef.retry !== undefined && { retry: stepDef.retry }),
|
|
@@ -405,6 +740,7 @@ class ActionContext {
|
|
|
405
740
|
...options,
|
|
406
741
|
};
|
|
407
742
|
const parsedOptions = StepOptionsSchema.parse(mergedOptions);
|
|
743
|
+
// Create a wrapper callback that provides the extended context
|
|
408
744
|
const wrappedCb = async (baseCtx) => {
|
|
409
745
|
const extendedCtx = {
|
|
410
746
|
...baseCtx,
|