@donkeylabs/server 2.0.20 → 2.0.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,593 @@
1
+ // Workflow State Machine
2
+ // Pure state machine that runs in any context (inline or subprocess).
3
+ // Communicates through an event callback interface - no knowledge of IPC, SSE, or process management.
4
+
5
+ import type { CoreServices } from "../core";
6
+ import type { Jobs } from "./jobs";
7
+ import type {
8
+ WorkflowAdapter,
9
+ WorkflowDefinition,
10
+ WorkflowInstance,
11
+ WorkflowContext,
12
+ StepDefinition,
13
+ TaskStepDefinition,
14
+ ParallelStepDefinition,
15
+ ChoiceStepDefinition,
16
+ PassStepDefinition,
17
+ StepResult,
18
+ RetryConfig,
19
+ } from "./workflows";
20
+
21
+ // ============================================
22
+ // Event Callback Interface
23
+ // ============================================
24
+
25
+ export interface StateMachineEvents {
26
+ onStepStarted(instanceId: string, stepName: string, stepType: string): void;
27
+ onStepCompleted(instanceId: string, stepName: string, output: any, nextStep?: string): void;
28
+ onStepFailed(instanceId: string, stepName: string, error: string, attempts: number): void;
29
+ onStepRetry(instanceId: string, stepName: string, attempt: number, max: number, delayMs: number): void;
30
+ onProgress(instanceId: string, progress: number, currentStep: string, completed: number, total: number): void;
31
+ onCompleted(instanceId: string, output: any): void;
32
+ onFailed(instanceId: string, error: string): void;
33
+ }
34
+
35
+ // ============================================
36
+ // Configuration
37
+ // ============================================
38
+
39
+ export interface StateMachineConfig {
40
+ adapter: WorkflowAdapter;
41
+ core?: CoreServices;
42
+ plugins: Record<string, any>;
43
+ events: StateMachineEvents;
44
+ jobs?: Jobs;
45
+ /** Poll interval for checking job completion (ms) */
46
+ pollInterval?: number;
47
+ }
48
+
49
+ // ============================================
50
+ // State Machine Implementation
51
+ // ============================================
52
+
53
+ export class WorkflowStateMachine {
54
+ private adapter: WorkflowAdapter;
55
+ private core?: CoreServices;
56
+ private plugins: Record<string, any>;
57
+ private events: StateMachineEvents;
58
+ private jobs?: Jobs;
59
+ private pollInterval: number;
60
+ private cancelledInstances = new Set<string>();
61
+
62
+ constructor(config: StateMachineConfig) {
63
+ this.adapter = config.adapter;
64
+ this.core = config.core;
65
+ this.plugins = config.plugins;
66
+ this.events = config.events;
67
+ this.jobs = config.jobs;
68
+ this.pollInterval = config.pollInterval ?? 1000;
69
+ }
70
+
71
+ /**
72
+ * Run a workflow instance to completion.
73
+ * Iterative while loop over steps - no recursion.
74
+ */
75
+ async run(instanceId: string, definition: WorkflowDefinition): Promise<any> {
76
+ const instance = await this.adapter.getInstance(instanceId);
77
+ if (!instance) {
78
+ throw new Error(`Workflow instance ${instanceId} not found`);
79
+ }
80
+
81
+ // Mark as running if pending
82
+ if (instance.status === "pending") {
83
+ await this.adapter.updateInstance(instanceId, {
84
+ status: "running",
85
+ startedAt: new Date(),
86
+ });
87
+ }
88
+
89
+ let currentStepName: string | undefined = instance.currentStep ?? definition.startAt;
90
+ let lastOutput: any;
91
+
92
+ // Iterative step execution loop
93
+ while (currentStepName) {
94
+ // Capture as const so TypeScript narrows to string throughout the block
95
+ const stepName = currentStepName;
96
+
97
+ // Check for cancellation
98
+ if (this.cancelledInstances.has(instanceId)) {
99
+ this.cancelledInstances.delete(instanceId);
100
+ return lastOutput;
101
+ }
102
+
103
+ const step = definition.steps.get(stepName);
104
+ if (!step) {
105
+ const error = `Step "${stepName}" not found in workflow`;
106
+ await this.persistFailure(instanceId, error);
107
+ this.events.onFailed(instanceId, error);
108
+ throw new Error(error);
109
+ }
110
+
111
+ // Reload instance for fresh state (step results, metadata)
112
+ const freshInstance = await this.adapter.getInstance(instanceId);
113
+ if (!freshInstance || freshInstance.status !== "running") {
114
+ return lastOutput;
115
+ }
116
+
117
+ // Emit step started
118
+ this.events.onStepStarted(instanceId, stepName, step.type);
119
+
120
+ // Update step result as running
121
+ const stepResult: StepResult = {
122
+ stepName,
123
+ status: "running",
124
+ startedAt: new Date(),
125
+ attempts: (freshInstance.stepResults[stepName]?.attempts ?? 0) + 1,
126
+ };
127
+ await this.adapter.updateInstance(instanceId, {
128
+ currentStep: stepName,
129
+ stepResults: { ...freshInstance.stepResults, [stepName]: stepResult },
130
+ });
131
+
132
+ // Build context
133
+ const ctx = this.buildContext(freshInstance, definition);
134
+
135
+ try {
136
+ let output: any;
137
+
138
+ switch (step.type) {
139
+ case "task":
140
+ output = await this.executeTaskStep(instanceId, step, ctx, definition);
141
+ break;
142
+ case "parallel":
143
+ output = await this.executeParallelStep(instanceId, step, ctx, definition);
144
+ break;
145
+ case "choice":
146
+ output = await this.executeChoiceStep(step, ctx);
147
+ break;
148
+ case "pass":
149
+ output = await this.executePassStep(step, ctx);
150
+ break;
151
+ }
152
+
153
+ // Persist step completion
154
+ await this.completeStep(instanceId, stepName, output, step, definition);
155
+ lastOutput = output;
156
+
157
+ // Determine next step
158
+ if (step.type === "choice") {
159
+ // Choice step returns { chosen: "nextStepName" }
160
+ currentStepName = output?.chosen;
161
+ } else if (step.end) {
162
+ currentStepName = undefined;
163
+ } else if (step.next) {
164
+ currentStepName = step.next;
165
+ } else {
166
+ currentStepName = undefined;
167
+ }
168
+ } catch (error) {
169
+ const errorMsg = error instanceof Error ? error.message : String(error);
170
+
171
+ // Check retry config
172
+ const latestInstance = await this.adapter.getInstance(instanceId);
173
+ if (!latestInstance) return lastOutput;
174
+
175
+ const currentAttempts = latestInstance.stepResults[stepName]?.attempts ?? 1;
176
+ const retry = step.retry ?? definition.defaultRetry;
177
+
178
+ if (retry && currentAttempts < retry.maxAttempts) {
179
+ // Retry with backoff
180
+ const backoffRate = retry.backoffRate ?? 2;
181
+ const intervalMs = retry.intervalMs ?? 1000;
182
+ const maxIntervalMs = retry.maxIntervalMs ?? 30000;
183
+ const delay = Math.min(
184
+ intervalMs * Math.pow(backoffRate, currentAttempts - 1),
185
+ maxIntervalMs,
186
+ );
187
+
188
+ // Update step with error but keep it retryable
189
+ const retryResult: StepResult = {
190
+ stepName,
191
+ status: "running",
192
+ startedAt: latestInstance.stepResults[stepName]?.startedAt ?? new Date(),
193
+ attempts: currentAttempts,
194
+ error: errorMsg,
195
+ };
196
+ await this.adapter.updateInstance(instanceId, {
197
+ stepResults: { ...latestInstance.stepResults, [stepName]: retryResult },
198
+ });
199
+
200
+ this.events.onStepRetry(instanceId, stepName, currentAttempts, retry.maxAttempts, delay);
201
+
202
+ // Wait then continue the loop (same step)
203
+ await new Promise((resolve) => setTimeout(resolve, delay));
204
+ continue;
205
+ }
206
+
207
+ // No more retries - fail the step and workflow
208
+ const failedResult: StepResult = {
209
+ stepName,
210
+ status: "failed",
211
+ startedAt: latestInstance.stepResults[stepName]?.startedAt ?? new Date(),
212
+ completedAt: new Date(),
213
+ attempts: currentAttempts,
214
+ error: errorMsg,
215
+ };
216
+ await this.adapter.updateInstance(instanceId, {
217
+ stepResults: { ...latestInstance.stepResults, [stepName]: failedResult },
218
+ });
219
+
220
+ this.events.onStepFailed(instanceId, stepName, errorMsg, currentAttempts);
221
+
222
+ const fullError = `Step "${stepName}" failed: ${errorMsg}`;
223
+ await this.persistFailure(instanceId, fullError);
224
+ this.events.onFailed(instanceId, fullError);
225
+ throw error;
226
+ }
227
+ }
228
+
229
+ // Workflow completed
230
+ await this.adapter.updateInstance(instanceId, {
231
+ status: "completed",
232
+ output: lastOutput,
233
+ completedAt: new Date(),
234
+ currentStep: undefined,
235
+ });
236
+ this.events.onCompleted(instanceId, lastOutput);
237
+
238
+ return lastOutput;
239
+ }
240
+
241
+ /**
242
+ * Cooperative cancellation - the state machine checks this flag at step boundaries.
243
+ */
244
+ cancel(instanceId: string): void {
245
+ this.cancelledInstances.add(instanceId);
246
+ }
247
+
248
+ // ============================================
249
+ // Step Executors
250
+ // ============================================
251
+
252
+ private async executeTaskStep(
253
+ instanceId: string,
254
+ step: TaskStepDefinition,
255
+ ctx: WorkflowContext,
256
+ definition: WorkflowDefinition,
257
+ ): Promise<any> {
258
+ if (step.handler) {
259
+ // Inline handler with Zod schemas
260
+ let input: any;
261
+
262
+ if (step.inputSchema) {
263
+ if (typeof step.inputSchema === "function") {
264
+ input = step.inputSchema(ctx.prev, ctx.input);
265
+ } else {
266
+ const parseResult = step.inputSchema.safeParse(ctx.input);
267
+ if (!parseResult.success) {
268
+ throw new Error(`Input validation failed: ${parseResult.error.message}`);
269
+ }
270
+ input = parseResult.data;
271
+ }
272
+ } else {
273
+ input = ctx.input;
274
+ }
275
+
276
+ // Persist input on step result
277
+ const instance = await this.adapter.getInstance(instanceId);
278
+ if (instance) {
279
+ const sr = instance.stepResults[step.name];
280
+ if (sr) {
281
+ sr.input = input;
282
+ await this.adapter.updateInstance(instanceId, {
283
+ stepResults: { ...instance.stepResults, [step.name]: sr },
284
+ });
285
+ }
286
+ }
287
+
288
+ let result = await step.handler(input, ctx);
289
+
290
+ if (step.outputSchema) {
291
+ const parseResult = step.outputSchema.safeParse(result);
292
+ if (!parseResult.success) {
293
+ throw new Error(`Output validation failed: ${parseResult.error.message}`);
294
+ }
295
+ result = parseResult.data;
296
+ }
297
+
298
+ return result;
299
+ }
300
+
301
+ // Legacy job-based execution
302
+ if (!this.jobs) {
303
+ throw new Error("Jobs service not configured");
304
+ }
305
+ if (!step.job) {
306
+ throw new Error("Task step requires either 'handler' or 'job'");
307
+ }
308
+
309
+ const jobInput = step.input ? step.input(ctx) : ctx.input;
310
+
311
+ // Persist input on step result
312
+ const instance = await this.adapter.getInstance(instanceId);
313
+ if (instance) {
314
+ const sr = instance.stepResults[step.name];
315
+ if (sr) {
316
+ sr.input = jobInput;
317
+ await this.adapter.updateInstance(instanceId, {
318
+ stepResults: { ...instance.stepResults, [step.name]: sr },
319
+ });
320
+ }
321
+ }
322
+
323
+ const jobId = await this.jobs.enqueue(step.job, {
324
+ ...jobInput,
325
+ _workflowInstanceId: instanceId,
326
+ _workflowStepName: step.name,
327
+ });
328
+
329
+ const result = await this.waitForJob(jobId, step.timeout);
330
+ return step.output ? step.output(result, ctx) : result;
331
+ }
332
+
333
+ private async executeChoiceStep(
334
+ step: ChoiceStepDefinition,
335
+ ctx: WorkflowContext,
336
+ ): Promise<{ chosen: string }> {
337
+ // Evaluate conditions in order
338
+ for (const choice of step.choices) {
339
+ try {
340
+ if (choice.condition(ctx)) {
341
+ return { chosen: choice.next };
342
+ }
343
+ } catch {
344
+ // Condition threw, try next
345
+ }
346
+ }
347
+
348
+ // No condition matched, use default
349
+ if (step.default) {
350
+ return { chosen: step.default };
351
+ }
352
+
353
+ throw new Error("No choice condition matched and no default specified");
354
+ }
355
+
356
+ private async executeParallelStep(
357
+ instanceId: string,
358
+ step: ParallelStepDefinition,
359
+ ctx: WorkflowContext,
360
+ definition: WorkflowDefinition,
361
+ ): Promise<Record<string, any>> {
362
+ const branchInstanceIds: string[] = [];
363
+
364
+ // Create sub-instances for each branch
365
+ const branchRuns: Promise<{ name: string; result: any }>[] = [];
366
+
367
+ for (const branchDef of step.branches) {
368
+ const branchInstance = await this.adapter.createInstance({
369
+ workflowName: branchDef.name,
370
+ status: "pending",
371
+ currentStep: branchDef.startAt,
372
+ input: ctx.input,
373
+ stepResults: {},
374
+ createdAt: new Date(),
375
+ parentId: instanceId,
376
+ branchName: branchDef.name,
377
+ });
378
+ branchInstanceIds.push(branchInstance.id);
379
+
380
+ // Run each branch using the same state machine
381
+ const branchPromise = (async () => {
382
+ const result = await this.run(branchInstance.id, branchDef);
383
+ return { name: branchDef.name, result };
384
+ })();
385
+
386
+ branchRuns.push(branchPromise);
387
+ }
388
+
389
+ // Track branch instances
390
+ const parentInstance = await this.adapter.getInstance(instanceId);
391
+ if (parentInstance) {
392
+ await this.adapter.updateInstance(instanceId, {
393
+ branchInstances: {
394
+ ...(parentInstance.branchInstances ?? {}),
395
+ [step.name]: branchInstanceIds,
396
+ },
397
+ });
398
+ }
399
+
400
+ // Wait for all branches
401
+ if (step.onError === "wait-all") {
402
+ const results = await Promise.allSettled(branchRuns);
403
+ const output: Record<string, any> = {};
404
+ const errors: string[] = [];
405
+
406
+ for (const result of results) {
407
+ if (result.status === "fulfilled") {
408
+ output[result.value.name] = result.value.result;
409
+ } else {
410
+ errors.push(result.reason?.message ?? "Branch failed");
411
+ }
412
+ }
413
+
414
+ if (errors.length > 0) {
415
+ throw new Error(`Parallel branches failed: ${errors.join(", ")}`);
416
+ }
417
+
418
+ return output;
419
+ }
420
+
421
+ // fail-fast (default)
422
+ const results = await Promise.all(branchRuns);
423
+ const output: Record<string, any> = {};
424
+ for (const result of results) {
425
+ output[result.name] = result.result;
426
+ }
427
+ return output;
428
+ }
429
+
430
+ private async executePassStep(
431
+ step: PassStepDefinition,
432
+ ctx: WorkflowContext,
433
+ ): Promise<any> {
434
+ if (step.result !== undefined) {
435
+ return step.result;
436
+ }
437
+ if (step.transform) {
438
+ return step.transform(ctx);
439
+ }
440
+ return ctx.input;
441
+ }
442
+
443
+ // ============================================
444
+ // Context Building
445
+ // ============================================
446
+
447
+ private buildContext(instance: WorkflowInstance, definition: WorkflowDefinition): WorkflowContext {
448
+ // Build steps object with outputs
449
+ const steps: Record<string, any> = {};
450
+ for (const [name, result] of Object.entries(instance.stepResults)) {
451
+ if (result.status === "completed" && result.output !== undefined) {
452
+ steps[name] = result.output;
453
+ }
454
+ }
455
+
456
+ // Find the previous step's output by tracing the workflow path
457
+ let prev: any = undefined;
458
+ if (instance.currentStep) {
459
+ for (const [stepName, stepDef] of definition.steps) {
460
+ if (stepDef.next === instance.currentStep && steps[stepName] !== undefined) {
461
+ prev = steps[stepName];
462
+ break;
463
+ }
464
+ }
465
+ // If no explicit next found, use most recent completed step output
466
+ if (prev === undefined) {
467
+ const completedSteps = Object.entries(instance.stepResults)
468
+ .filter(([, r]) => r.status === "completed" && r.output !== undefined)
469
+ .sort((a, b) => {
470
+ const aTime = a[1].completedAt?.getTime() ?? 0;
471
+ const bTime = b[1].completedAt?.getTime() ?? 0;
472
+ return bTime - aTime;
473
+ });
474
+ if (completedSteps.length > 0) {
475
+ prev = completedSteps[0][1].output;
476
+ }
477
+ }
478
+ }
479
+
480
+ // Metadata snapshot
481
+ const metadata = { ...(instance.metadata ?? {}) };
482
+ const adapter = this.adapter;
483
+ const instanceId = instance.id;
484
+
485
+ return {
486
+ input: instance.input,
487
+ steps,
488
+ prev,
489
+ instance,
490
+ getStepResult: <T = any>(stepName: string): T | undefined => {
491
+ return steps[stepName] as T | undefined;
492
+ },
493
+ core: this.core!,
494
+ plugins: this.plugins,
495
+ metadata,
496
+ setMetadata: async (key: string, value: any): Promise<void> => {
497
+ metadata[key] = value;
498
+ await adapter.updateInstance(instanceId, {
499
+ metadata: { ...metadata },
500
+ });
501
+ },
502
+ getMetadata: <T = any>(key: string): T | undefined => {
503
+ return metadata[key] as T | undefined;
504
+ },
505
+ };
506
+ }
507
+
508
+ // ============================================
509
+ // Helpers
510
+ // ============================================
511
+
512
+ private async completeStep(
513
+ instanceId: string,
514
+ stepName: string,
515
+ output: any,
516
+ step: StepDefinition,
517
+ definition: WorkflowDefinition,
518
+ ): Promise<void> {
519
+ const instance = await this.adapter.getInstance(instanceId);
520
+ if (!instance || instance.status !== "running") return;
521
+
522
+ // Update step result
523
+ const stepResult = instance.stepResults[stepName] ?? {
524
+ stepName,
525
+ status: "pending" as const,
526
+ attempts: 0,
527
+ };
528
+ stepResult.status = "completed";
529
+ stepResult.output = output;
530
+ stepResult.completedAt = new Date();
531
+
532
+ await this.adapter.updateInstance(instanceId, {
533
+ stepResults: { ...instance.stepResults, [stepName]: stepResult },
534
+ });
535
+
536
+ // Determine next step for event
537
+ let nextStep: string | undefined;
538
+ if (step.type === "choice") {
539
+ nextStep = output?.chosen;
540
+ } else if (!step.end && step.next) {
541
+ nextStep = step.next;
542
+ }
543
+
544
+ this.events.onStepCompleted(instanceId, stepName, output, nextStep);
545
+
546
+ // Calculate progress
547
+ const totalSteps = definition.steps.size;
548
+ const completedSteps = Object.values(instance.stepResults).filter(
549
+ (r) => r.status === "completed",
550
+ ).length + 1; // +1 for current step
551
+ const progress = Math.round((completedSteps / totalSteps) * 100);
552
+
553
+ this.events.onProgress(instanceId, progress, stepName, completedSteps, totalSteps);
554
+ }
555
+
556
+ private async persistFailure(instanceId: string, error: string): Promise<void> {
557
+ await this.adapter.updateInstance(instanceId, {
558
+ status: "failed",
559
+ error,
560
+ completedAt: new Date(),
561
+ });
562
+ }
563
+
564
+ private async waitForJob(jobId: string, timeout?: number): Promise<any> {
565
+ if (!this.jobs) {
566
+ throw new Error("Jobs service not configured");
567
+ }
568
+
569
+ const startTime = Date.now();
570
+
571
+ while (true) {
572
+ const job = await this.jobs.get(jobId);
573
+
574
+ if (!job) {
575
+ throw new Error(`Job ${jobId} not found`);
576
+ }
577
+
578
+ if (job.status === "completed") {
579
+ return job.result;
580
+ }
581
+
582
+ if (job.status === "failed") {
583
+ throw new Error(job.error ?? "Job failed");
584
+ }
585
+
586
+ if (timeout && Date.now() - startTime > timeout) {
587
+ throw new Error("Job timed out");
588
+ }
589
+
590
+ await new Promise((resolve) => setTimeout(resolve, this.pollInterval));
591
+ }
592
+ }
593
+ }