@workflow/core 4.0.1-beta.27 → 4.0.1-beta.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/runtime/helpers.d.ts +25 -0
- package/dist/runtime/helpers.d.ts.map +1 -0
- package/dist/runtime/helpers.js +71 -0
- package/dist/runtime/step-handler.d.ts +7 -0
- package/dist/runtime/step-handler.d.ts.map +1 -0
- package/dist/runtime/step-handler.js +335 -0
- package/dist/runtime/suspension-handler.d.ts +20 -0
- package/dist/runtime/suspension-handler.d.ts.map +1 -0
- package/dist/runtime/suspension-handler.js +167 -0
- package/dist/runtime.d.ts +1 -6
- package/dist/runtime.d.ts.map +1 -1
- package/dist/runtime.js +18 -514
- package/dist/serialization.d.ts.map +1 -1
- package/dist/serialization.js +17 -6
- package/dist/symbols.d.ts +1 -0
- package/dist/symbols.d.ts.map +1 -1
- package/dist/symbols.js +2 -1
- package/dist/telemetry/semantic-conventions.d.ts +10 -2
- package/dist/telemetry/semantic-conventions.d.ts.map +1 -1
- package/dist/telemetry/semantic-conventions.js +5 -1
- package/dist/workflow.d.ts.map +1 -1
- package/dist/workflow.js +4 -2
- package/package.json +4 -4
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import type { Event, World } from '@workflow/world';
|
|
2
|
+
/**
|
|
3
|
+
* Loads all workflow run events by iterating through all pages of paginated results.
|
|
4
|
+
* This ensures that *all* events are loaded into memory before running the workflow.
|
|
5
|
+
* Events must be in chronological order (ascending) for proper workflow replay.
|
|
6
|
+
*/
|
|
7
|
+
export declare function getAllWorkflowRunEvents(runId: string): Promise<Event[]>;
|
|
8
|
+
/**
|
|
9
|
+
* Wraps a request/response handler and adds a health check "mode"
|
|
10
|
+
* based on the presence of a `__health` query parameter.
|
|
11
|
+
*/
|
|
12
|
+
export declare function withHealthCheck(handler: (req: Request) => Promise<Response>): (req: Request) => Promise<Response>;
|
|
13
|
+
/**
|
|
14
|
+
* Queues a message to the specified queue with tracing.
|
|
15
|
+
*/
|
|
16
|
+
export declare function queueMessage(world: World, ...args: Parameters<typeof world.queue>): Promise<void>;
|
|
17
|
+
/**
|
|
18
|
+
* Calculates the queue overhead time in milliseconds for a given message.
|
|
19
|
+
*/
|
|
20
|
+
export declare function getQueueOverhead(message: {
|
|
21
|
+
requestedAt?: Date;
|
|
22
|
+
}): {
|
|
23
|
+
[k: string]: number;
|
|
24
|
+
} | undefined;
|
|
25
|
+
//# sourceMappingURL=helpers.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"helpers.d.ts","sourceRoot":"","sources":["../../src/runtime/helpers.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM,iBAAiB,CAAC;AAKpD;;;;GAIG;AACH,wBAAsB,uBAAuB,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,KAAK,EAAE,CAAC,CAwB7E;AAED;;;GAGG;AACH,wBAAgB,eAAe,CAC7B,OAAO,EAAE,CAAC,GAAG,EAAE,OAAO,KAAK,OAAO,CAAC,QAAQ,CAAC,GAC3C,CAAC,GAAG,EAAE,OAAO,KAAK,OAAO,CAAC,QAAQ,CAAC,CAYrC;AAED;;GAEG;AACH,wBAAsB,YAAY,CAChC,KAAK,EAAE,KAAK,EACZ,GAAG,IAAI,EAAE,UAAU,CAAC,OAAO,KAAK,CAAC,KAAK,CAAC,iBAcxC;AAED;;GAEG;AACH,wBAAgB,gBAAgB,CAAC,OAAO,EAAE;IAAE,WAAW,CAAC,EAAE,IAAI,CAAA;CAAE;;cAS/D"}
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
import * as Attribute from '../telemetry/semantic-conventions.js';
|
|
2
|
+
import { getSpanKind, trace } from '../telemetry.js';
|
|
3
|
+
import { getWorld } from './world.js';
|
|
4
|
+
/**
|
|
5
|
+
* Loads all workflow run events by iterating through all pages of paginated results.
|
|
6
|
+
* This ensures that *all* events are loaded into memory before running the workflow.
|
|
7
|
+
* Events must be in chronological order (ascending) for proper workflow replay.
|
|
8
|
+
*/
|
|
9
|
+
export async function getAllWorkflowRunEvents(runId) {
|
|
10
|
+
const allEvents = [];
|
|
11
|
+
let cursor = null;
|
|
12
|
+
let hasMore = true;
|
|
13
|
+
const world = getWorld();
|
|
14
|
+
while (hasMore) {
|
|
15
|
+
// TODO: we're currently loading all the data with resolveRef behaviour. We need to update this
|
|
16
|
+
// to lazyload the data from the world instead so that we can optimize and make the event log loading
|
|
17
|
+
// much faster and memory efficient
|
|
18
|
+
const response = await world.events.list({
|
|
19
|
+
runId,
|
|
20
|
+
pagination: {
|
|
21
|
+
sortOrder: 'asc', // Required: events must be in chronological order for replay
|
|
22
|
+
cursor: cursor ?? undefined,
|
|
23
|
+
},
|
|
24
|
+
});
|
|
25
|
+
allEvents.push(...response.data);
|
|
26
|
+
hasMore = response.hasMore;
|
|
27
|
+
cursor = response.cursor;
|
|
28
|
+
}
|
|
29
|
+
return allEvents;
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* Wraps a request/response handler and adds a health check "mode"
|
|
33
|
+
* based on the presence of a `__health` query parameter.
|
|
34
|
+
*/
|
|
35
|
+
export function withHealthCheck(handler) {
|
|
36
|
+
return async (req) => {
|
|
37
|
+
const url = new URL(req.url);
|
|
38
|
+
const isHealthCheck = url.searchParams.has('__health');
|
|
39
|
+
if (isHealthCheck) {
|
|
40
|
+
return new Response(`Workflow DevKit "${url.pathname}" endpoint is healthy`, { status: 200, headers: { 'Content-Type': 'text/plain' } });
|
|
41
|
+
}
|
|
42
|
+
return await handler(req);
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* Queues a message to the specified queue with tracing.
|
|
47
|
+
*/
|
|
48
|
+
export async function queueMessage(world, ...args) {
|
|
49
|
+
const queueName = args[0];
|
|
50
|
+
await trace('queueMessage', {
|
|
51
|
+
attributes: Attribute.QueueName(queueName),
|
|
52
|
+
kind: await getSpanKind('PRODUCER'),
|
|
53
|
+
}, async (span) => {
|
|
54
|
+
const { messageId } = await world.queue(...args);
|
|
55
|
+
span?.setAttributes(Attribute.QueueMessageId(messageId));
|
|
56
|
+
});
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* Calculates the queue overhead time in milliseconds for a given message.
|
|
60
|
+
*/
|
|
61
|
+
export function getQueueOverhead(message) {
|
|
62
|
+
if (!message.requestedAt)
|
|
63
|
+
return;
|
|
64
|
+
try {
|
|
65
|
+
return Attribute.QueueOverheadMs(Date.now() - message.requestedAt.getTime());
|
|
66
|
+
}
|
|
67
|
+
catch {
|
|
68
|
+
return;
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
//# sourceMappingURL=data:application/json;base64,eyJ2ZXJzaW9uIjozLCJmaWxlIjoiaGVscGVycy5qcyIsInNvdXJjZVJvb3QiOiIiLCJzb3VyY2VzIjpbIi4uLy4uL3NyYy9ydW50aW1lL2hlbHBlcnMudHMiXSwibmFtZXMiOltdLCJtYXBwaW5ncyI6IkFBQ0EsT0FBTyxLQUFLLFNBQVMsTUFBTSxzQ0FBc0MsQ0FBQztBQUNsRSxPQUFPLEVBQUUsV0FBVyxFQUFFLEtBQUssRUFBRSxNQUFNLGlCQUFpQixDQUFDO0FBQ3JELE9BQU8sRUFBRSxRQUFRLEVBQUUsTUFBTSxZQUFZLENBQUM7QUFFdEM7Ozs7R0FJRztBQUNILE1BQU0sQ0FBQyxLQUFLLFVBQVUsdUJBQXVCLENBQUMsS0FBYTtJQUN6RCxNQUFNLFNBQVMsR0FBWSxFQUFFLENBQUM7SUFDOUIsSUFBSSxNQUFNLEdBQWtCLElBQUksQ0FBQztJQUNqQyxJQUFJLE9BQU8sR0FBRyxJQUFJLENBQUM7SUFFbkIsTUFBTSxLQUFLLEdBQUcsUUFBUSxFQUFFLENBQUM7SUFDekIsT0FBTyxPQUFPLEVBQUUsQ0FBQztRQUNmLCtGQUErRjtRQUMvRixxR0FBcUc7UUFDckcsbUNBQW1DO1FBQ25DLE1BQU0sUUFBUSxHQUFHLE1BQU0sS0FBSyxDQUFDLE1BQU0sQ0FBQyxJQUFJLENBQUM7WUFDdkMsS0FBSztZQUNMLFVBQVUsRUFBRTtnQkFDVixTQUFTLEVBQUUsS0FBSyxFQUFFLDZEQUE2RDtnQkFDL0UsTUFBTSxFQUFFLE1BQU0sSUFBSSxTQUFTO2FBQzVCO1NBQ0YsQ0FBQyxDQUFDO1FBRUgsU0FBUyxDQUFDLElBQUksQ0FBQyxHQUFHLFFBQVEsQ0FBQyxJQUFJLENBQUMsQ0FBQztRQUNqQyxPQUFPLEdBQUcsUUFBUSxDQUFDLE9BQU8sQ0FBQztRQUMzQixNQUFNLEdBQUcsUUFBUSxDQUFDLE1BQU0sQ0FBQztJQUMzQixDQUFDO0lBRUQsT0FBTyxTQUFTLENBQUM7QUFDbkIsQ0FBQztBQUVEOzs7R0FHRztBQUNILE1BQU0sVUFBVSxlQUFlLENBQzdCLE9BQTRDO0lBRTVDLE9BQU8sS0FBSyxFQUFFLEdBQVksRUFBRSxFQUFFO1FBQzVCLE1BQU0sR0FBRyxHQUFHLElBQUksR0FBRyxDQUFDLEdBQUcsQ0FBQyxHQUFHLENBQUMsQ0FBQztRQUM3QixNQUFNLGFBQWEsR0FBRyxHQUFHLENBQUMsWUFBWSxDQUFDLEdBQUcsQ0FBQyxVQUFVLENBQUMsQ0FBQztRQUN2RCxJQUFJLGFBQWEsRUFBRSxDQUFDO1lBQ2xCLE9BQU8sSUFBSSxRQUFRLENBQ2pCLG9CQUFvQixHQUFHLENBQUMsUUFBUSx1QkFBdUIsRUFDdkQsRUFBRSxNQUFNLEVBQUUsR0FBRyxFQUFFLE9BQU8sRUFBRSxFQUFFLGNBQWMsRUFBRSxZQUFZLEVBQUUsRUFBRSxDQUMzRCxDQUFDO1FBQ0osQ0FBQztRQUNELE9BQU8sTUFBTSxPQUFPLENBQUMsR0FBRyxDQUFDLENBQUM7SUFDNUIsQ0FBQyxDQUFDO0FBQ0osQ0FBQztBQUVEOztHQUVHO0FBQ0gsTUFBTSxDQUFDLEtBQUssVUFBVSxZQUFZLENBQ2hDLEtBQVksRUFDWixHQUFHLElBQW9DO0lBRXZDLE1BQU0sU0FBUyxHQUFHLElBQUksQ0FBQyxDQUFDLENBQUMsQ0FBQztJQUMxQixNQUFNLEtBQUssQ0FDVCxjQUFjLEVBQ2Q7UUFDRSxVQUFVLEVBQUUsU0FBUyxDQUFDLFNBQVMsQ0FBQyxTQUFTLENBQUM7UUFDMUMsSUFBSSxFQUFFLE1BQU0sV0FBVyxDQUFDLFVBQVUsQ0FBQztLQUNwQyxFQUNELEtBQUssRUFBRSxJQUFJLEVBQUUsRUFBRTtRQUNiLE1BQU0sRUFBRSxTQUFTLEVBQUUsR0FBRyxNQUFNLEtBQUssQ0FBQyxLQUFLLENBQUMsR0FBRyxJQUFJLENBQUMsQ0FBQztRQUNqRCxJQUFJLEVBQUUsYUFBYSxDQUFDLFNBQVMsQ0FBQyxjQUFjLENBQUMsU0FBUyxDQUFDLENBQUMsQ0FBQztJQUMzRCxDQUFDLENBQ0YsQ0FBQztBQUNKLENBQUM7QUFFRDs7R0FFRztBQUNILE1BQU0sVUFBVSxnQkFBZ0IsQ0FBQyxPQUErQjtJQUM5RCxJQUFJLENBQUMsT0FBTyxDQUFDLFdBQVc7UUFBRSxPQUFPO0lBQ2pDLElBQUksQ0FBQztRQUNILE9BQU8sU0FBUyxDQUFDLGVBQWUsQ0FDOUIsSUFBSSxDQUFDLEdBQUcsRUFBRSxHQUFHLE9BQU8sQ0FBQyxXQUFXLENBQUMsT0FBTyxFQUFFLENBQzNDLENBQUM7SUFDSixDQUFDO0lBQUMsTUFBTSxDQUFDO1FBQ1AsT0FBTztJQUNULENBQUM7QUFDSCxDQUFDIn0=
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* A single route that handles any step execution request and routes to the
|
|
3
|
+
* appropriate step function. We may eventually want to create different bundles
|
|
4
|
+
* for each step, this is temporary.
|
|
5
|
+
*/
|
|
6
|
+
export declare const stepEntrypoint: (req: Request) => Promise<Response>;
|
|
7
|
+
//# sourceMappingURL=step-handler.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"step-handler.d.ts","sourceRoot":"","sources":["../../src/runtime/step-handler.ts"],"names":[],"mappings":"AAqaA;;;;GAIG;AACH,eAAO,MAAM,cAAc,EAAE,CAAC,GAAG,EAAE,OAAO,KAAK,OAAO,CAAC,QAAQ,CACjB,CAAC"}
|
|
@@ -0,0 +1,335 @@
|
|
|
1
|
+
import { waitUntil } from '@vercel/functions';
|
|
2
|
+
import { FatalError, RetryableError, WorkflowAPIError, WorkflowRuntimeError, } from '@workflow/errors';
|
|
3
|
+
import { getPort } from '@workflow/utils/get-port';
|
|
4
|
+
import { StepInvokePayloadSchema } from '@workflow/world';
|
|
5
|
+
import { runtimeLogger } from '../logger.js';
|
|
6
|
+
import { getStepFunction } from '../private.js';
|
|
7
|
+
import { dehydrateStepReturnValue, hydrateStepArguments, } from '../serialization.js';
|
|
8
|
+
import { contextStorage } from '../step/context-storage.js';
|
|
9
|
+
import * as Attribute from '../telemetry/semantic-conventions.js';
|
|
10
|
+
import { getSpanKind, linkToCurrentContext, serializeTraceCarrier, trace, withTraceContext, } from '../telemetry.js';
|
|
11
|
+
import { getErrorName, getErrorStack } from '../types.js';
|
|
12
|
+
import { getQueueOverhead, queueMessage, withHealthCheck } from './helpers.js';
|
|
13
|
+
import { getWorld, getWorldHandlers } from './world.js';
|
|
14
|
+
const DEFAULT_STEP_MAX_RETRIES = 3;
|
|
15
|
+
const stepHandler = getWorldHandlers().createQueueHandler('__wkf_step_', async (message_, metadata) => {
|
|
16
|
+
const { workflowName, workflowRunId, workflowStartedAt, stepId, traceCarrier: traceContext, requestedAt, } = StepInvokePayloadSchema.parse(message_);
|
|
17
|
+
const spanLinks = await linkToCurrentContext();
|
|
18
|
+
// Execute step within the propagated trace context
|
|
19
|
+
return await withTraceContext(traceContext, async () => {
|
|
20
|
+
// Extract the step name from the topic name
|
|
21
|
+
const stepName = metadata.queueName.slice('__wkf_step_'.length);
|
|
22
|
+
const world = getWorld();
|
|
23
|
+
// Get the port early to avoid async operations during step execution
|
|
24
|
+
const port = await getPort();
|
|
25
|
+
return trace(`STEP ${stepName}`, { kind: await getSpanKind('CONSUMER'), links: spanLinks }, async (span) => {
|
|
26
|
+
span?.setAttributes({
|
|
27
|
+
...Attribute.StepName(stepName),
|
|
28
|
+
...Attribute.StepAttempt(metadata.attempt),
|
|
29
|
+
...Attribute.QueueName(metadata.queueName),
|
|
30
|
+
...Attribute.QueueMessageId(metadata.messageId),
|
|
31
|
+
...getQueueOverhead({ requestedAt }),
|
|
32
|
+
});
|
|
33
|
+
const stepFn = getStepFunction(stepName);
|
|
34
|
+
if (!stepFn) {
|
|
35
|
+
throw new Error(`Step "${stepName}" not found`);
|
|
36
|
+
}
|
|
37
|
+
if (typeof stepFn !== 'function') {
|
|
38
|
+
throw new Error(`Step "${stepName}" is not a function (got ${typeof stepFn})`);
|
|
39
|
+
}
|
|
40
|
+
const maxRetries = stepFn.maxRetries ?? DEFAULT_STEP_MAX_RETRIES;
|
|
41
|
+
span?.setAttributes({
|
|
42
|
+
...Attribute.WorkflowName(workflowName),
|
|
43
|
+
...Attribute.WorkflowRunId(workflowRunId),
|
|
44
|
+
...Attribute.StepId(stepId),
|
|
45
|
+
...Attribute.StepMaxRetries(maxRetries),
|
|
46
|
+
...Attribute.StepTracePropagated(!!traceContext),
|
|
47
|
+
});
|
|
48
|
+
let step = await world.steps.get(workflowRunId, stepId);
|
|
49
|
+
runtimeLogger.debug('Step execution details', {
|
|
50
|
+
stepName,
|
|
51
|
+
stepId: step.stepId,
|
|
52
|
+
status: step.status,
|
|
53
|
+
attempt: step.attempt,
|
|
54
|
+
});
|
|
55
|
+
span?.setAttributes({
|
|
56
|
+
...Attribute.StepStatus(step.status),
|
|
57
|
+
});
|
|
58
|
+
// Check if the step has a `retryAfter` timestamp that hasn't been reached yet
|
|
59
|
+
const now = Date.now();
|
|
60
|
+
if (step.retryAfter && step.retryAfter.getTime() > now) {
|
|
61
|
+
const timeoutSeconds = Math.ceil((step.retryAfter.getTime() - now) / 1000);
|
|
62
|
+
span?.setAttributes({
|
|
63
|
+
...Attribute.StepRetryTimeoutSeconds(timeoutSeconds),
|
|
64
|
+
});
|
|
65
|
+
runtimeLogger.debug('Step retryAfter timestamp not yet reached', {
|
|
66
|
+
stepName,
|
|
67
|
+
stepId: step.stepId,
|
|
68
|
+
retryAfter: step.retryAfter,
|
|
69
|
+
timeoutSeconds,
|
|
70
|
+
});
|
|
71
|
+
return { timeoutSeconds };
|
|
72
|
+
}
|
|
73
|
+
let result;
|
|
74
|
+
const attempt = step.attempt + 1;
|
|
75
|
+
// Check max retries FIRST before any state changes.
|
|
76
|
+
// This handles edge cases where the step handler is invoked after max retries have been exceeded
|
|
77
|
+
// (e.g., when the step repeatedly times out or fails before reaching the catch handler at line 822).
|
|
78
|
+
// Without this check, the step would retry forever.
|
|
79
|
+
if (attempt > maxRetries) {
|
|
80
|
+
const errorMessage = `Step "${stepName}" exceeded max retries (${attempt} attempts)`;
|
|
81
|
+
console.error(`[Workflows] "${workflowRunId}" - ${errorMessage}`);
|
|
82
|
+
// Update step status first (idempotent), then create event
|
|
83
|
+
await world.steps.update(workflowRunId, stepId, {
|
|
84
|
+
status: 'failed',
|
|
85
|
+
error: {
|
|
86
|
+
message: errorMessage,
|
|
87
|
+
stack: undefined,
|
|
88
|
+
},
|
|
89
|
+
});
|
|
90
|
+
await world.events.create(workflowRunId, {
|
|
91
|
+
eventType: 'step_failed',
|
|
92
|
+
correlationId: stepId,
|
|
93
|
+
eventData: {
|
|
94
|
+
error: errorMessage,
|
|
95
|
+
fatal: true,
|
|
96
|
+
},
|
|
97
|
+
});
|
|
98
|
+
span?.setAttributes({
|
|
99
|
+
...Attribute.StepStatus('failed'),
|
|
100
|
+
...Attribute.StepRetryExhausted(true),
|
|
101
|
+
});
|
|
102
|
+
// Re-invoke the workflow to handle the failed step
|
|
103
|
+
await queueMessage(world, `__wkf_workflow_${workflowName}`, {
|
|
104
|
+
runId: workflowRunId,
|
|
105
|
+
traceCarrier: await serializeTraceCarrier(),
|
|
106
|
+
requestedAt: new Date(),
|
|
107
|
+
});
|
|
108
|
+
return;
|
|
109
|
+
}
|
|
110
|
+
try {
|
|
111
|
+
if (!['pending', 'running'].includes(step.status)) {
|
|
112
|
+
// We should only be running the step if it's either
|
|
113
|
+
// a) pending - initial state, or state set on re-try
|
|
114
|
+
// b) running - if a step fails mid-execution, like a function timeout
|
|
115
|
+
// otherwise, the step has been invoked erroneously
|
|
116
|
+
console.error(`[Workflows] "${workflowRunId}" - Step invoked erroneously, expected status "pending" or "running", got "${step.status}" instead, skipping execution`);
|
|
117
|
+
span?.setAttributes({
|
|
118
|
+
...Attribute.StepSkipped(true),
|
|
119
|
+
...Attribute.StepSkipReason(step.status),
|
|
120
|
+
});
|
|
121
|
+
// There's a chance that a step terminates correctly, but the underlying process
|
|
122
|
+
// fails or gets killed before the stepEntrypoint has a chance to re-enqueue the run.
|
|
123
|
+
// The queue lease expires and stepEntrypoint again, which leads us here, so
|
|
124
|
+
// we optimistically re-enqueue the workflow if the step is in a terminal state,
|
|
125
|
+
// under the assumption that this edge case happened.
|
|
126
|
+
// Until we move to atomic entity/event updates (World V2), there _could_ be an edge case
|
|
127
|
+
// where the we execute this code based on the `step` entity status, but the runtime
|
|
128
|
+
// failed to create the `step_completed` event (due to failing between step and event update),
|
|
129
|
+
// in which case, this might lead to an infinite loop.
|
|
130
|
+
// https://vercel.slack.com/archives/C09125LC4AX/p1765313809066679
|
|
131
|
+
const isTerminalStep = [
|
|
132
|
+
'completed',
|
|
133
|
+
'failed',
|
|
134
|
+
'cancelled',
|
|
135
|
+
].includes(step.status);
|
|
136
|
+
if (isTerminalStep) {
|
|
137
|
+
await queueMessage(world, `__wkf_workflow_${workflowName}`, {
|
|
138
|
+
runId: workflowRunId,
|
|
139
|
+
traceCarrier: await serializeTraceCarrier(),
|
|
140
|
+
requestedAt: new Date(),
|
|
141
|
+
});
|
|
142
|
+
}
|
|
143
|
+
return;
|
|
144
|
+
}
|
|
145
|
+
await world.events.create(workflowRunId, {
|
|
146
|
+
eventType: 'step_started', // TODO: Replace with 'step_retrying'
|
|
147
|
+
correlationId: stepId,
|
|
148
|
+
});
|
|
149
|
+
step = await world.steps.update(workflowRunId, stepId, {
|
|
150
|
+
attempt,
|
|
151
|
+
status: 'running',
|
|
152
|
+
});
|
|
153
|
+
if (!step.startedAt) {
|
|
154
|
+
throw new WorkflowRuntimeError(`Step "${stepId}" has no "startedAt" timestamp`);
|
|
155
|
+
}
|
|
156
|
+
// Hydrate the step input arguments and closure variables
|
|
157
|
+
const ops = [];
|
|
158
|
+
const hydratedInput = hydrateStepArguments(step.input, ops, workflowRunId);
|
|
159
|
+
const args = hydratedInput.args;
|
|
160
|
+
span?.setAttributes({
|
|
161
|
+
...Attribute.StepArgumentsCount(args.length),
|
|
162
|
+
});
|
|
163
|
+
result = await contextStorage.run({
|
|
164
|
+
stepMetadata: {
|
|
165
|
+
stepId,
|
|
166
|
+
stepStartedAt: new Date(+step.startedAt),
|
|
167
|
+
attempt,
|
|
168
|
+
},
|
|
169
|
+
workflowMetadata: {
|
|
170
|
+
workflowRunId,
|
|
171
|
+
workflowStartedAt: new Date(+workflowStartedAt),
|
|
172
|
+
// TODO: there should be a getUrl method on the world interface itself. This
|
|
173
|
+
// solution only works for vercel + local worlds.
|
|
174
|
+
url: process.env.VERCEL_URL
|
|
175
|
+
? `https://${process.env.VERCEL_URL}`
|
|
176
|
+
: `http://localhost:${port ?? 3000}`,
|
|
177
|
+
},
|
|
178
|
+
ops,
|
|
179
|
+
closureVars: hydratedInput.closureVars,
|
|
180
|
+
}, () => stepFn.apply(null, args));
|
|
181
|
+
// NOTE: None of the code from this point is guaranteed to run
|
|
182
|
+
// Since the step might fail or cause a function timeout and the process might be SIGKILL'd
|
|
183
|
+
// The workflow runtime must be resilient to the below code not executing on a failed step
|
|
184
|
+
result = dehydrateStepReturnValue(result, ops, workflowRunId);
|
|
185
|
+
waitUntil(Promise.all(ops).catch((err) => {
|
|
186
|
+
// Ignore expected client disconnect errors (e.g., browser refresh during streaming)
|
|
187
|
+
const isAbortError = err?.name === 'AbortError' || err?.name === 'ResponseAborted';
|
|
188
|
+
if (!isAbortError)
|
|
189
|
+
throw err;
|
|
190
|
+
}));
|
|
191
|
+
// Mark the step as completed first. This order is important. If a concurrent
|
|
192
|
+
// execution marked the step as complete, this request should throw, and
|
|
193
|
+
// this prevent the step_completed event in the event log
|
|
194
|
+
// TODO: this should really be atomic and handled by the world
|
|
195
|
+
await world.steps.update(workflowRunId, stepId, {
|
|
196
|
+
status: 'completed',
|
|
197
|
+
output: result,
|
|
198
|
+
});
|
|
199
|
+
// Then, append the event log with the step result
|
|
200
|
+
await world.events.create(workflowRunId, {
|
|
201
|
+
eventType: 'step_completed',
|
|
202
|
+
correlationId: stepId,
|
|
203
|
+
eventData: {
|
|
204
|
+
result: result,
|
|
205
|
+
},
|
|
206
|
+
});
|
|
207
|
+
span?.setAttributes({
|
|
208
|
+
...Attribute.StepStatus('completed'),
|
|
209
|
+
...Attribute.StepResultType(typeof result),
|
|
210
|
+
});
|
|
211
|
+
}
|
|
212
|
+
catch (err) {
|
|
213
|
+
span?.setAttributes({
|
|
214
|
+
...Attribute.StepErrorName(getErrorName(err)),
|
|
215
|
+
...Attribute.StepErrorMessage(String(err)),
|
|
216
|
+
});
|
|
217
|
+
if (WorkflowAPIError.is(err)) {
|
|
218
|
+
if (err.status === 410) {
|
|
219
|
+
// Workflow has already completed, so no-op
|
|
220
|
+
console.warn(`Workflow run "${workflowRunId}" has already completed, skipping step "${stepId}": ${err.message}`);
|
|
221
|
+
return;
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
if (FatalError.is(err)) {
|
|
225
|
+
const errorStack = getErrorStack(err);
|
|
226
|
+
const stackLines = errorStack.split('\n').slice(0, 4);
|
|
227
|
+
console.error(`[Workflows] "${workflowRunId}" - Encountered \`FatalError\` while executing step "${stepName}":\n > ${stackLines.join('\n > ')}\n\nBubbling up error to parent workflow`);
|
|
228
|
+
// Fatal error - store the error in the event log and re-invoke the workflow
|
|
229
|
+
await world.events.create(workflowRunId, {
|
|
230
|
+
eventType: 'step_failed',
|
|
231
|
+
correlationId: stepId,
|
|
232
|
+
eventData: {
|
|
233
|
+
error: String(err),
|
|
234
|
+
stack: errorStack,
|
|
235
|
+
fatal: true,
|
|
236
|
+
},
|
|
237
|
+
});
|
|
238
|
+
await world.steps.update(workflowRunId, stepId, {
|
|
239
|
+
status: 'failed',
|
|
240
|
+
error: {
|
|
241
|
+
message: err.message || String(err),
|
|
242
|
+
stack: errorStack,
|
|
243
|
+
// TODO: include error codes when we define them
|
|
244
|
+
},
|
|
245
|
+
});
|
|
246
|
+
span?.setAttributes({
|
|
247
|
+
...Attribute.StepStatus('failed'),
|
|
248
|
+
...Attribute.StepFatalError(true),
|
|
249
|
+
});
|
|
250
|
+
}
|
|
251
|
+
else {
|
|
252
|
+
const maxRetries = stepFn.maxRetries ?? DEFAULT_STEP_MAX_RETRIES;
|
|
253
|
+
span?.setAttributes({
|
|
254
|
+
...Attribute.StepAttempt(attempt),
|
|
255
|
+
...Attribute.StepMaxRetries(maxRetries),
|
|
256
|
+
});
|
|
257
|
+
if (attempt > maxRetries) {
|
|
258
|
+
// Max retries reached
|
|
259
|
+
const errorStack = getErrorStack(err);
|
|
260
|
+
const stackLines = errorStack.split('\n').slice(0, 4);
|
|
261
|
+
console.error(`[Workflows] "${workflowRunId}" - Encountered \`Error\` while executing step "${stepName}" (attempt ${attempt}):\n > ${stackLines.join('\n > ')}\n\n Max retries reached\n Bubbling error to parent workflow`);
|
|
262
|
+
const errorMessage = `Step "${stepName}" failed after max retries: ${String(err)}`;
|
|
263
|
+
await world.events.create(workflowRunId, {
|
|
264
|
+
eventType: 'step_failed',
|
|
265
|
+
correlationId: stepId,
|
|
266
|
+
eventData: {
|
|
267
|
+
error: errorMessage,
|
|
268
|
+
stack: errorStack,
|
|
269
|
+
fatal: true,
|
|
270
|
+
},
|
|
271
|
+
});
|
|
272
|
+
await world.steps.update(workflowRunId, stepId, {
|
|
273
|
+
status: 'failed',
|
|
274
|
+
error: {
|
|
275
|
+
message: errorMessage,
|
|
276
|
+
stack: errorStack,
|
|
277
|
+
},
|
|
278
|
+
});
|
|
279
|
+
span?.setAttributes({
|
|
280
|
+
...Attribute.StepStatus('failed'),
|
|
281
|
+
...Attribute.StepRetryExhausted(true),
|
|
282
|
+
});
|
|
283
|
+
}
|
|
284
|
+
else {
|
|
285
|
+
// Not at max retries yet - log as a retryable error
|
|
286
|
+
if (RetryableError.is(err)) {
|
|
287
|
+
console.warn(`[Workflows] "${workflowRunId}" - Encountered \`RetryableError\` while executing step "${stepName}" (attempt ${attempt}):\n > ${String(err.message)}\n\n This step has failed but will be retried`);
|
|
288
|
+
}
|
|
289
|
+
else {
|
|
290
|
+
const stackLines = getErrorStack(err).split('\n').slice(0, 4);
|
|
291
|
+
console.error(`[Workflows] "${workflowRunId}" - Encountered \`Error\` while executing step "${stepName}" (attempt ${attempt}):\n > ${stackLines.join('\n > ')}\n\n This step has failed but will be retried`);
|
|
292
|
+
}
|
|
293
|
+
await world.events.create(workflowRunId, {
|
|
294
|
+
eventType: 'step_failed',
|
|
295
|
+
correlationId: stepId,
|
|
296
|
+
eventData: {
|
|
297
|
+
error: String(err),
|
|
298
|
+
stack: getErrorStack(err),
|
|
299
|
+
},
|
|
300
|
+
});
|
|
301
|
+
await world.steps.update(workflowRunId, stepId, {
|
|
302
|
+
status: 'pending', // TODO: Should be "retrying" once we have that status
|
|
303
|
+
...(RetryableError.is(err) && {
|
|
304
|
+
retryAfter: err.retryAfter,
|
|
305
|
+
}),
|
|
306
|
+
});
|
|
307
|
+
const timeoutSeconds = Math.max(1, RetryableError.is(err)
|
|
308
|
+
? Math.ceil((+err.retryAfter.getTime() - Date.now()) / 1000)
|
|
309
|
+
: 1);
|
|
310
|
+
span?.setAttributes({
|
|
311
|
+
...Attribute.StepRetryTimeoutSeconds(timeoutSeconds),
|
|
312
|
+
...Attribute.StepRetryWillRetry(true),
|
|
313
|
+
});
|
|
314
|
+
// It's a retryable error - so have the queue keep the message visible
|
|
315
|
+
// so that it gets retried.
|
|
316
|
+
return { timeoutSeconds };
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
}
|
|
320
|
+
await queueMessage(world, `__wkf_workflow_${workflowName}`, {
|
|
321
|
+
runId: workflowRunId,
|
|
322
|
+
traceCarrier: await serializeTraceCarrier(),
|
|
323
|
+
requestedAt: new Date(),
|
|
324
|
+
});
|
|
325
|
+
});
|
|
326
|
+
});
|
|
327
|
+
});
|
|
328
|
+
/**
|
|
329
|
+
* A single route that handles any step execution request and routes to the
|
|
330
|
+
* appropriate step function. We may eventually want to create different bundles
|
|
331
|
+
* for each step, this is temporary.
|
|
332
|
+
*/
|
|
333
|
+
export const stepEntrypoint =
|
|
334
|
+
/* @__PURE__ */ withHealthCheck(stepHandler);
|
|
335
|
+
//# sourceMappingURL=data:application/json;base64,
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import type { World } from '@workflow/world';
|
|
2
|
+
import type { WorkflowSuspension } from '../global.js';
|
|
3
|
+
import type { Span } from '@opentelemetry/api';
|
|
4
|
+
export interface SuspensionHandlerParams {
|
|
5
|
+
suspension: WorkflowSuspension;
|
|
6
|
+
world: World;
|
|
7
|
+
runId: string;
|
|
8
|
+
workflowName: string;
|
|
9
|
+
workflowStartedAt: number;
|
|
10
|
+
span?: Span;
|
|
11
|
+
}
|
|
12
|
+
export interface SuspensionHandlerResult {
|
|
13
|
+
timeoutSeconds?: number;
|
|
14
|
+
}
|
|
15
|
+
/**
|
|
16
|
+
* Handles a workflow suspension by processing all pending operations (hooks, steps, waits).
|
|
17
|
+
* Hooks are processed first to prevent race conditions, then steps and waits in parallel.
|
|
18
|
+
*/
|
|
19
|
+
export declare function handleSuspension({ suspension, world, runId, workflowName, workflowStartedAt, span, }: SuspensionHandlerParams): Promise<SuspensionHandlerResult>;
|
|
20
|
+
//# sourceMappingURL=suspension-handler.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"suspension-handler.d.ts","sourceRoot":"","sources":["../../src/runtime/suspension-handler.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,KAAK,EAAE,MAAM,iBAAiB,CAAC;AAC7C,OAAO,KAAK,EAIV,kBAAkB,EACnB,MAAM,cAAc,CAAC;AAKtB,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,oBAAoB,CAAC;AAG/C,MAAM,WAAW,uBAAuB;IACtC,UAAU,EAAE,kBAAkB,CAAC;IAC/B,KAAK,EAAE,KAAK,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,YAAY,EAAE,MAAM,CAAC;IACrB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,IAAI,CAAC,EAAE,IAAI,CAAC;CACb;AAED,MAAM,WAAW,uBAAuB;IACtC,cAAc,CAAC,EAAE,MAAM,CAAC;CACzB;AA2KD;;;GAGG;AACH,wBAAsB,gBAAgB,CAAC,EACrC,UAAU,EACV,KAAK,EACL,KAAK,EACL,YAAY,EACZ,iBAAiB,EACjB,IAAI,GACL,EAAE,uBAAuB,GAAG,OAAO,CAAC,uBAAuB,CAAC,CAwE5D"}
|