@donkeylabs/server 2.0.26 → 2.0.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/docs/workflows.md CHANGED
@@ -285,6 +285,51 @@ workflow("example")
285
285
  .end("done")
286
286
  ```
287
287
 
288
+ ### Poll
289
+
290
+ Use a poll step for wait → check loops that persist across restarts.
291
+
292
+ ```typescript
293
+ workflow("batch.status")
294
+ .poll("wait-for-result", {
295
+ interval: 5000,
296
+ timeout: 600000,
297
+ maxAttempts: 120,
298
+ check: async (input, ctx) => {
299
+ const status = await fetchStatus(input.operationId);
300
+ if (status.state === "FAILED") throw new Error(status.error);
301
+ if (status.state === "SUCCEEDED") {
302
+ return { done: true, result: status.data };
303
+ }
304
+ return { done: false };
305
+ },
306
+ })
307
+ .build();
308
+ ```
309
+
310
+ Each poll cycle emits `workflow.step.poll` events and persists progress to the instance.
311
+
312
+ ### Loop
313
+
314
+ Use a loop step to jump back to a previous step until a condition is false.
315
+
316
+ ```typescript
317
+ workflow("loop-example")
318
+ .task("increment", {
319
+ handler: async (input) => ({ count: (input.count ?? 0) + 1 }),
320
+ })
321
+ .loop("repeat", {
322
+ condition: (ctx) => ctx.steps.increment.count < 3,
323
+ target: "increment",
324
+ interval: 1000,
325
+ maxIterations: 10,
326
+ timeout: 30000,
327
+ })
328
+ .build();
329
+ ```
330
+
331
+ Each loop iteration emits `workflow.step.loop` and persists loop counters to the instance.
332
+
288
333
  ## Workflow Context
289
334
 
290
335
  Every step receives a `WorkflowContext` with:
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@donkeylabs/server",
3
- "version": "2.0.26",
3
+ "version": "2.0.28",
4
4
  "type": "module",
5
5
  "description": "Type-safe plugin system for building RPC-style APIs with Bun",
6
6
  "main": "./src/index.ts",
@@ -505,6 +505,17 @@ export function createAdminRouter(config: AdminRouteContext) {
505
505
  stepName: z.string(),
506
506
  error: z.string(),
507
507
  }),
508
+ "step.poll": z.object({
509
+ stepName: z.string(),
510
+ pollCount: z.number(),
511
+ done: z.boolean(),
512
+ result: z.any().optional(),
513
+ }),
514
+ "step.loop": z.object({
515
+ stepName: z.string(),
516
+ loopCount: z.number(),
517
+ target: z.string(),
518
+ }),
508
519
  completed: z.object({
509
520
  output: z.any().optional(),
510
521
  }),
@@ -548,6 +559,19 @@ export function createAdminRouter(config: AdminRouteContext) {
548
559
  workflowName: z.string(),
549
560
  error: z.string(),
550
561
  }),
562
+ "workflow.step.poll": z.object({
563
+ instanceId: z.string(),
564
+ stepName: z.string(),
565
+ pollCount: z.number(),
566
+ done: z.boolean(),
567
+ result: z.any().optional(),
568
+ }),
569
+ "workflow.step.loop": z.object({
570
+ instanceId: z.string(),
571
+ stepName: z.string(),
572
+ loopCount: z.number(),
573
+ target: z.string(),
574
+ }),
551
575
  },
552
576
  handle: (input, ctx) => {
553
577
  if (!checkAuth(ctx)) {
package/src/core/index.ts CHANGED
@@ -149,6 +149,9 @@ export {
149
149
  type ChoiceStepDefinition,
150
150
  type ChoiceCondition,
151
151
  type PassStepDefinition,
152
+ type PollStepDefinition,
153
+ type PollStepResult,
154
+ type LoopStepDefinition,
152
155
  type RetryConfig,
153
156
  type GetAllWorkflowsOptions,
154
157
  type PluginMetadata,
@@ -34,6 +34,7 @@ export class SqliteJobAdapter implements JobAdapter {
34
34
  this.ensureDir(dbPath);
35
35
 
36
36
  this.db = new Database(dbPath);
37
+ this.db.run("PRAGMA busy_timeout = 5000");
37
38
  this.init();
38
39
 
39
40
  // Start cleanup timer
@@ -80,6 +80,7 @@ export class KyselyLogsAdapter implements LogsAdapter {
80
80
  // Enable WAL mode for better concurrent read/write performance
81
81
  sqliteDb.exec("PRAGMA journal_mode = WAL");
82
82
  sqliteDb.exec("PRAGMA synchronous = NORMAL");
83
+ sqliteDb.exec("PRAGMA busy_timeout = 5000");
83
84
 
84
85
  this.db = new Kysely<Database>({
85
86
  dialect: new SqliteDialect({
@@ -52,6 +52,7 @@ export class SqliteProcessAdapter implements ProcessAdapter {
52
52
  this.ensureDir(dbPath);
53
53
 
54
54
  this.db = new Database(dbPath);
55
+ this.db.run("PRAGMA busy_timeout = 5000");
55
56
  this.init();
56
57
 
57
58
  // Start cleanup timer
@@ -258,6 +258,11 @@ export class KyselyWorkflowAdapter implements WorkflowAdapter {
258
258
  startedAt: sr.startedAt ? new Date(sr.startedAt) : undefined,
259
259
  completedAt: sr.completedAt ? new Date(sr.completedAt) : undefined,
260
260
  attempts: sr.attempts,
261
+ pollCount: sr.pollCount,
262
+ lastPolledAt: sr.lastPolledAt ? new Date(sr.lastPolledAt) : undefined,
263
+ loopCount: sr.loopCount,
264
+ lastLoopedAt: sr.lastLoopedAt ? new Date(sr.lastLoopedAt) : undefined,
265
+ loopStartedAt: sr.loopStartedAt ? new Date(sr.loopStartedAt) : undefined,
261
266
  };
262
267
  }
263
268
 
@@ -184,6 +184,27 @@ function createIpcEventBridge(socket: Socket, instanceId: string): StateMachineE
184
184
  error,
185
185
  });
186
186
  },
187
+ onStepPoll: (id, stepName, pollCount, done, result) => {
188
+ sendEvent(socket, {
189
+ type: "step.poll",
190
+ instanceId: id,
191
+ timestamp: Date.now(),
192
+ stepName,
193
+ pollCount,
194
+ done,
195
+ result,
196
+ });
197
+ },
198
+ onStepLoop: (id, stepName, loopCount, target) => {
199
+ sendEvent(socket, {
200
+ type: "step.loop",
201
+ instanceId: id,
202
+ timestamp: Date.now(),
203
+ stepName,
204
+ loopCount,
205
+ target,
206
+ });
207
+ },
187
208
  onStepRetry: () => {
188
209
  // Retry is internal to the state machine - no IPC event needed
189
210
  },
@@ -20,6 +20,8 @@ export type WorkflowEventType =
20
20
  | "step.started"
21
21
  | "step.completed"
22
22
  | "step.failed"
23
+ | "step.poll"
24
+ | "step.loop"
23
25
  | "progress"
24
26
  | "completed"
25
27
  | "failed"
@@ -41,6 +43,11 @@ export interface WorkflowEvent {
41
43
  totalSteps?: number;
42
44
  /** Next step to execute (for step.completed events) */
43
45
  nextStep?: string;
46
+ pollCount?: number;
47
+ done?: boolean;
48
+ result?: any;
49
+ loopCount?: number;
50
+ target?: string;
44
51
  /** Custom event name (for event type) */
45
52
  event?: string;
46
53
  /** Custom event payload or log data */
@@ -12,6 +12,8 @@ import type {
12
12
  WorkflowContext,
13
13
  StepDefinition,
14
14
  TaskStepDefinition,
15
+ LoopStepDefinition,
16
+ PollStepDefinition,
15
17
  ParallelStepDefinition,
16
18
  ChoiceStepDefinition,
17
19
  PassStepDefinition,
@@ -28,6 +30,8 @@ export interface StateMachineEvents {
28
30
  onStepCompleted(instanceId: string, stepName: string, output: any, nextStep?: string): void;
29
31
  onStepFailed(instanceId: string, stepName: string, error: string, attempts: number): void;
30
32
  onStepRetry(instanceId: string, stepName: string, attempt: number, max: number, delayMs: number): void;
33
+ onStepPoll(instanceId: string, stepName: string, pollCount: number, done: boolean, result?: any): void;
34
+ onStepLoop(instanceId: string, stepName: string, loopCount: number, target: string): void;
31
35
  onProgress(instanceId: string, progress: number, currentStep: string, completed: number, total: number): void;
32
36
  onCompleted(instanceId: string, output: any): void;
33
37
  onFailed(instanceId: string, error: string): void;
@@ -136,11 +140,17 @@ export class WorkflowStateMachine {
136
140
  this.events.onStepStarted(instanceId, stepName, step.type);
137
141
 
138
142
  // Update step result as running
143
+ const previousStep = freshInstance.stepResults[stepName];
139
144
  const stepResult: StepResult = {
140
145
  stepName,
141
146
  status: "running",
142
- startedAt: new Date(),
143
- attempts: (freshInstance.stepResults[stepName]?.attempts ?? 0) + 1,
147
+ startedAt: previousStep?.startedAt ?? new Date(),
148
+ attempts: (previousStep?.attempts ?? 0) + 1,
149
+ pollCount: previousStep?.pollCount,
150
+ lastPolledAt: previousStep?.lastPolledAt,
151
+ loopCount: previousStep?.loopCount,
152
+ lastLoopedAt: previousStep?.lastLoopedAt,
153
+ loopStartedAt: previousStep?.loopStartedAt,
144
154
  };
145
155
  await this.adapter.updateInstance(instanceId, {
146
156
  currentStep: stepName,
@@ -166,6 +176,12 @@ export class WorkflowStateMachine {
166
176
  case "pass":
167
177
  output = await this.executePassStep(step, ctx);
168
178
  break;
179
+ case "poll":
180
+ output = await this.executePollStep(instanceId, step, ctx, definition);
181
+ break;
182
+ case "loop":
183
+ output = await this.executeLoopStep(instanceId, step, ctx);
184
+ break;
169
185
  }
170
186
 
171
187
  // Persist step completion
@@ -176,6 +192,8 @@ export class WorkflowStateMachine {
176
192
  if (step.type === "choice") {
177
193
  // Choice step returns { chosen: "nextStepName" }
178
194
  currentStepName = output?.chosen;
195
+ } else if (step.type === "loop" && output?.loopTo) {
196
+ currentStepName = output.loopTo;
179
197
  } else if (step.end) {
180
198
  currentStepName = undefined;
181
199
  } else if (step.next) {
@@ -445,6 +463,87 @@ export class WorkflowStateMachine {
445
463
  return output;
446
464
  }
447
465
 
466
+ private async executePollStep(
467
+ instanceId: string,
468
+ step: PollStepDefinition,
469
+ ctx: WorkflowContext,
470
+ _definition: WorkflowDefinition,
471
+ ): Promise<any> {
472
+ let input: any;
473
+
474
+ if (step.inputSchema) {
475
+ if (typeof step.inputSchema === "function") {
476
+ input = step.inputSchema(ctx.prev, ctx.input);
477
+ } else {
478
+ const parseResult = step.inputSchema.safeParse(ctx.input);
479
+ if (!parseResult.success) {
480
+ throw new Error(`Input validation failed: ${parseResult.error.message}`);
481
+ }
482
+ input = parseResult.data;
483
+ }
484
+ } else {
485
+ input = ctx.input;
486
+ }
487
+
488
+ let instance = await this.adapter.getInstance(instanceId);
489
+ const stepResult = instance?.stepResults[step.name];
490
+ const startedAt = stepResult?.startedAt ?? new Date();
491
+
492
+ if (instance && stepResult) {
493
+ stepResult.input = stepResult.input ?? input;
494
+ stepResult.pollCount = stepResult.pollCount ?? 0;
495
+ await this.adapter.updateInstance(instanceId, {
496
+ stepResults: { ...instance.stepResults, [step.name]: stepResult },
497
+ });
498
+ }
499
+
500
+ while (true) {
501
+ if (step.timeout && Date.now() - startedAt.getTime() > step.timeout) {
502
+ throw new Error(`Poll step "${step.name}" timed out`);
503
+ }
504
+
505
+ instance = await this.adapter.getInstance(instanceId);
506
+ const sr = instance?.stepResults[step.name];
507
+ const pollCount = sr?.pollCount ?? 0;
508
+
509
+ if (step.maxAttempts && pollCount >= step.maxAttempts) {
510
+ throw new Error(`Poll step "${step.name}" exceeded maxAttempts`);
511
+ }
512
+
513
+ if (step.interval > 0) {
514
+ await new Promise((resolve) => setTimeout(resolve, step.interval));
515
+ }
516
+
517
+ const result = await step.check(input, ctx);
518
+ const nextPollCount = pollCount + 1;
519
+
520
+ if (instance && sr) {
521
+ sr.pollCount = nextPollCount;
522
+ sr.lastPolledAt = new Date();
523
+ sr.output = result;
524
+ await this.adapter.updateInstance(instanceId, {
525
+ stepResults: { ...instance.stepResults, [step.name]: sr },
526
+ });
527
+ }
528
+
529
+ this.events.onStepPoll(instanceId, step.name, nextPollCount, result.done, result.result);
530
+
531
+ if (result.done) {
532
+ let output = result.result;
533
+
534
+ if (step.outputSchema) {
535
+ const parseResult = step.outputSchema.safeParse(output);
536
+ if (!parseResult.success) {
537
+ throw new Error(`Output validation failed: ${parseResult.error.message}`);
538
+ }
539
+ output = parseResult.data;
540
+ }
541
+
542
+ return output;
543
+ }
544
+ }
545
+ }
546
+
448
547
  private async executePassStep(
449
548
  step: PassStepDefinition,
450
549
  ctx: WorkflowContext,
@@ -458,6 +557,52 @@ export class WorkflowStateMachine {
458
557
  return ctx.input;
459
558
  }
460
559
 
560
+ private async executeLoopStep(
561
+ instanceId: string,
562
+ step: LoopStepDefinition,
563
+ ctx: WorkflowContext,
564
+ ): Promise<{ loopTo?: string }> {
565
+ const instance = await this.adapter.getInstance(instanceId);
566
+ const stepResult = instance?.stepResults[step.name] ?? {
567
+ stepName: step.name,
568
+ status: "running" as const,
569
+ attempts: 0,
570
+ startedAt: new Date(),
571
+ };
572
+ const loopStartedAt = stepResult.loopStartedAt ?? stepResult.startedAt ?? new Date();
573
+ const loopCount = stepResult.loopCount ?? 0;
574
+
575
+ if (step.timeout && Date.now() - loopStartedAt.getTime() > step.timeout) {
576
+ throw new Error(`Loop step "${step.name}" timed out`);
577
+ }
578
+
579
+ if (step.maxIterations && loopCount >= step.maxIterations) {
580
+ throw new Error(`Loop step "${step.name}" exceeded maxIterations`);
581
+ }
582
+
583
+ const shouldLoop = step.condition(ctx);
584
+
585
+ if (instance) {
586
+ stepResult.loopCount = shouldLoop ? loopCount + 1 : loopCount;
587
+ stepResult.loopStartedAt = loopStartedAt;
588
+ stepResult.lastLoopedAt = shouldLoop ? new Date() : stepResult.lastLoopedAt;
589
+ stepResult.output = { looped: shouldLoop };
590
+ await this.adapter.updateInstance(instanceId, {
591
+ stepResults: { ...instance.stepResults, [step.name]: stepResult },
592
+ });
593
+ }
594
+
595
+ if (shouldLoop) {
596
+ this.events.onStepLoop(instanceId, step.name, loopCount + 1, step.target);
597
+ if (step.interval && step.interval > 0) {
598
+ await new Promise((resolve) => setTimeout(resolve, step.interval));
599
+ }
600
+ return { loopTo: step.target };
601
+ }
602
+
603
+ return {};
604
+ }
605
+
461
606
  // ============================================
462
607
  // Context Building
463
608
  // ============================================
@@ -59,7 +59,7 @@ type InferZodOutput<T extends ZodSchema> = z.infer<T>;
59
59
  // Step Types
60
60
  // ============================================
61
61
 
62
- export type StepType = "task" | "parallel" | "choice" | "pass";
62
+ export type StepType = "task" | "parallel" | "choice" | "pass" | "poll" | "loop";
63
63
 
64
64
  export interface BaseStepDefinition {
65
65
  name: string;
@@ -146,11 +146,54 @@ export interface PassStepDefinition extends BaseStepDefinition {
146
146
  result?: any;
147
147
  }
148
148
 
149
+ export interface PollStepResult<T = any> {
150
+ done: boolean;
151
+ result?: T;
152
+ }
153
+
154
+ export interface PollStepDefinition<
155
+ TInput extends ZodSchema = ZodSchema,
156
+ TOutput extends ZodSchema = ZodSchema,
157
+ > extends BaseStepDefinition {
158
+ type: "poll";
159
+ /** Wait duration between checks in ms */
160
+ interval: number;
161
+ /** Max total time before failing this step (ms) */
162
+ timeout?: number;
163
+ /** Max number of check cycles before failing */
164
+ maxAttempts?: number;
165
+ /** Input schema or mapper */
166
+ inputSchema?: TInput | ((prev: any, workflowInput: any) => InferZodOutput<TInput>);
167
+ /** Output schema for the final result */
168
+ outputSchema?: TOutput;
169
+ /** Check handler: return done:true to proceed */
170
+ check: (
171
+ input: InferZodOutput<TInput>,
172
+ ctx: WorkflowContext
173
+ ) => Promise<PollStepResult<InferZodOutput<TOutput>>> | PollStepResult<InferZodOutput<TOutput>>;
174
+ }
175
+
176
+ export interface LoopStepDefinition extends BaseStepDefinition {
177
+ type: "loop";
178
+ /** Condition to continue looping */
179
+ condition: (ctx: WorkflowContext) => boolean;
180
+ /** Step name to jump back to when condition is true */
181
+ target: string;
182
+ /** Optional delay before looping (ms) */
183
+ interval?: number;
184
+ /** Max total time before failing this loop (ms) */
185
+ timeout?: number;
186
+ /** Max number of loop iterations before failing */
187
+ maxIterations?: number;
188
+ }
189
+
149
190
  export type StepDefinition =
150
191
  | TaskStepDefinition
151
192
  | ParallelStepDefinition
152
193
  | ChoiceStepDefinition
153
- | PassStepDefinition;
194
+ | PassStepDefinition
195
+ | PollStepDefinition
196
+ | LoopStepDefinition;
154
197
 
155
198
  // ============================================
156
199
  // Workflow Definition
@@ -203,6 +246,11 @@ export interface StepResult {
203
246
  startedAt?: Date;
204
247
  completedAt?: Date;
205
248
  attempts: number;
249
+ pollCount?: number;
250
+ lastPolledAt?: Date;
251
+ loopCount?: number;
252
+ lastLoopedAt?: Date;
253
+ loopStartedAt?: Date;
206
254
  }
207
255
 
208
256
  export interface WorkflowInstance {
@@ -569,6 +617,69 @@ export class WorkflowBuilder {
569
617
  return this;
570
618
  }
571
619
 
620
+ loop(
621
+ name: string,
622
+ config: {
623
+ condition: (ctx: WorkflowContext) => boolean;
624
+ target: string;
625
+ interval?: number;
626
+ timeout?: number;
627
+ maxIterations?: number;
628
+ next?: string;
629
+ end?: boolean;
630
+ }
631
+ ): this {
632
+ const step: LoopStepDefinition = {
633
+ name,
634
+ type: "loop",
635
+ condition: config.condition,
636
+ target: config.target,
637
+ interval: config.interval,
638
+ timeout: config.timeout,
639
+ maxIterations: config.maxIterations,
640
+ next: config.next,
641
+ end: config.end,
642
+ };
643
+
644
+ this.addStep(step);
645
+ return this;
646
+ }
647
+
648
+ poll<TInput extends ZodSchema = ZodSchema, TOutput extends ZodSchema = ZodSchema>(
649
+ name: string,
650
+ config: {
651
+ check: (
652
+ input: InferZodOutput<TInput>,
653
+ ctx: WorkflowContext
654
+ ) => Promise<PollStepResult<InferZodOutput<TOutput>>> | PollStepResult<InferZodOutput<TOutput>>;
655
+ interval: number;
656
+ timeout?: number;
657
+ maxAttempts?: number;
658
+ inputSchema?: TInput | ((prev: any, workflowInput: any) => InferZodOutput<TInput>);
659
+ outputSchema?: TOutput;
660
+ retry?: RetryConfig;
661
+ next?: string;
662
+ end?: boolean;
663
+ }
664
+ ): this {
665
+ const step: PollStepDefinition<TInput, TOutput> = {
666
+ name,
667
+ type: "poll",
668
+ check: config.check,
669
+ interval: config.interval,
670
+ timeout: config.timeout,
671
+ maxAttempts: config.maxAttempts,
672
+ inputSchema: config.inputSchema,
673
+ outputSchema: config.outputSchema,
674
+ retry: config.retry,
675
+ next: config.next,
676
+ end: config.end,
677
+ };
678
+
679
+ this.addStep(step);
680
+ return this;
681
+ }
682
+
572
683
  /** Add an end step (shorthand for pass with end: true) */
573
684
  end(name: string = "end"): this {
574
685
  return this.pass(name, { end: true });
@@ -649,8 +760,12 @@ export interface WorkflowsConfig {
649
760
  heartbeatTimeout?: number;
650
761
  /** Timeout waiting for isolated subprocess readiness (ms, default: 10000) */
651
762
  readyTimeout?: number;
763
+ /** Resume strategy for orphaned workflows (default: "blocking") */
764
+ resumeStrategy?: WorkflowResumeStrategy;
652
765
  }
653
766
 
767
+ export type WorkflowResumeStrategy = "blocking" | "background" | "skip";
768
+
654
769
  /** Options for registering a workflow */
655
770
  export interface WorkflowRegisterOptions {
656
771
  /**
@@ -687,7 +802,7 @@ export interface Workflows {
687
802
  /** Get all workflow instances with optional filtering (for admin dashboard) */
688
803
  getAllInstances(options?: GetAllWorkflowsOptions): Promise<WorkflowInstance[]>;
689
804
  /** Resume workflows after server restart */
690
- resume(): Promise<void>;
805
+ resume(options?: { strategy?: WorkflowResumeStrategy }): Promise<void>;
691
806
  /** Stop the workflow service */
692
807
  stop(): Promise<void>;
693
808
  /** Set core services (called after initialization to resolve circular dependency) */
@@ -739,6 +854,7 @@ class WorkflowsImpl implements Workflows {
739
854
  private dbPath?: string;
740
855
  private heartbeatTimeoutMs: number;
741
856
  private readyTimeoutMs: number;
857
+ private resumeStrategy!: WorkflowResumeStrategy;
742
858
  private workflowModulePaths = new Map<string, string>();
743
859
  private isolatedProcesses = new Map<string, IsolatedProcessInfo>();
744
860
  private readyWaiters = new Map<
@@ -772,6 +888,7 @@ class WorkflowsImpl implements Workflows {
772
888
  this.dbPath = config.dbPath;
773
889
  this.heartbeatTimeoutMs = config.heartbeatTimeout ?? 60000;
774
890
  this.readyTimeoutMs = config.readyTimeout ?? 10000;
891
+ this.resumeStrategy = config.resumeStrategy ?? "blocking";
775
892
  }
776
893
 
777
894
  private getSocketServer(): WorkflowSocketServer {
@@ -975,38 +1092,64 @@ class WorkflowsImpl implements Workflows {
975
1092
  return this.adapter.getAllInstances(options);
976
1093
  }
977
1094
 
978
- async resume(): Promise<void> {
1095
+ async resume(options?: { strategy?: WorkflowResumeStrategy }): Promise<void> {
1096
+ const strategy = options?.strategy ?? this.resumeStrategy;
979
1097
  const running = await this.adapter.getRunningInstances();
980
1098
 
981
- for (const instance of running) {
1099
+ if (this.dbPath) {
1100
+ await this.getSocketServer().cleanOrphanedSockets(
1101
+ new Set(running.map((instance) => instance.id))
1102
+ );
1103
+ }
1104
+
1105
+ if (strategy === "skip") {
1106
+ await this.markOrphanedAsFailed(running, "Workflow resume skipped");
1107
+ return;
1108
+ }
1109
+
1110
+ const resumeInstance = async (instance: WorkflowInstance) => {
982
1111
  const definition = this.definitions.get(instance.workflowName);
983
1112
  if (!definition) {
984
- // Workflow no longer registered, mark as failed
985
1113
  await this.adapter.updateInstance(instance.id, {
986
1114
  status: "failed",
987
1115
  error: "Workflow definition not found after restart",
988
1116
  completedAt: new Date(),
989
1117
  });
990
- continue;
1118
+ return;
991
1119
  }
992
1120
 
993
1121
  console.log(`[Workflows] Resuming workflow instance ${instance.id}`);
994
1122
 
995
- // Check isolation mode and call appropriate method
996
1123
  const isIsolated = definition.isolated !== false;
997
1124
  const modulePath = this.workflowModulePaths.get(instance.workflowName);
998
1125
 
999
1126
  if (isIsolated && modulePath && this.dbPath) {
1000
- try {
1001
- await this.executeIsolatedWorkflow(instance.id, definition, instance.input, modulePath);
1002
- } catch (error) {
1127
+ await this.executeIsolatedWorkflow(instance.id, definition, instance.input, modulePath);
1128
+ } else {
1129
+ this.startInlineWorkflow(instance.id, definition);
1130
+ }
1131
+ };
1132
+
1133
+ if (strategy === "background") {
1134
+ for (const instance of running) {
1135
+ resumeInstance(instance).catch((error) => {
1003
1136
  console.error(
1004
- `[Workflows] Failed to resume isolated workflow ${instance.id}:`,
1137
+ `[Workflows] Failed to resume workflow ${instance.id}:`,
1005
1138
  error instanceof Error ? error.message : String(error)
1006
1139
  );
1007
- }
1008
- } else {
1009
- this.startInlineWorkflow(instance.id, definition);
1140
+ });
1141
+ }
1142
+ return;
1143
+ }
1144
+
1145
+ for (const instance of running) {
1146
+ try {
1147
+ await resumeInstance(instance);
1148
+ } catch (error) {
1149
+ console.error(
1150
+ `[Workflows] Failed to resume workflow ${instance.id}:`,
1151
+ error instanceof Error ? error.message : String(error)
1152
+ );
1010
1153
  }
1011
1154
  }
1012
1155
  }
@@ -1166,6 +1309,51 @@ class WorkflowsImpl implements Workflows {
1166
1309
  });
1167
1310
  }
1168
1311
  },
1312
+ onStepPoll: (id, stepName, pollCount, done, result) => {
1313
+ this.emitEvent("workflow.step.poll", {
1314
+ instanceId: id,
1315
+ stepName,
1316
+ pollCount,
1317
+ done,
1318
+ result,
1319
+ });
1320
+ if (this.sse) {
1321
+ this.sse.broadcast(`workflow:${id}`, "step.poll", {
1322
+ stepName,
1323
+ pollCount,
1324
+ done,
1325
+ result,
1326
+ });
1327
+ this.sse.broadcast("workflows:all", "workflow.step.poll", {
1328
+ instanceId: id,
1329
+ stepName,
1330
+ pollCount,
1331
+ done,
1332
+ result,
1333
+ });
1334
+ }
1335
+ },
1336
+ onStepLoop: (id, stepName, loopCount, target) => {
1337
+ this.emitEvent("workflow.step.loop", {
1338
+ instanceId: id,
1339
+ stepName,
1340
+ loopCount,
1341
+ target,
1342
+ });
1343
+ if (this.sse) {
1344
+ this.sse.broadcast(`workflow:${id}`, "step.loop", {
1345
+ stepName,
1346
+ loopCount,
1347
+ target,
1348
+ });
1349
+ this.sse.broadcast("workflows:all", "workflow.step.loop", {
1350
+ instanceId: id,
1351
+ stepName,
1352
+ loopCount,
1353
+ target,
1354
+ });
1355
+ }
1356
+ },
1169
1357
  onStepRetry: (id, stepName, attempt, max, delayMs) => {
1170
1358
  this.emitEvent("workflow.step.retry", {
1171
1359
  instanceId: id,
@@ -1452,6 +1640,55 @@ class WorkflowsImpl implements Workflows {
1452
1640
  break;
1453
1641
  }
1454
1642
 
1643
+ case "step.poll": {
1644
+ await this.emitEvent("workflow.step.poll", {
1645
+ instanceId,
1646
+ stepName: event.stepName,
1647
+ pollCount: event.pollCount,
1648
+ done: event.done,
1649
+ result: event.result,
1650
+ });
1651
+ if (this.sse) {
1652
+ this.sse.broadcast(`workflow:${instanceId}`, "step.poll", {
1653
+ stepName: event.stepName,
1654
+ pollCount: event.pollCount,
1655
+ done: event.done,
1656
+ result: event.result,
1657
+ });
1658
+ this.sse.broadcast("workflows:all", "workflow.step.poll", {
1659
+ instanceId,
1660
+ stepName: event.stepName,
1661
+ pollCount: event.pollCount,
1662
+ done: event.done,
1663
+ result: event.result,
1664
+ });
1665
+ }
1666
+ break;
1667
+ }
1668
+
1669
+ case "step.loop": {
1670
+ await this.emitEvent("workflow.step.loop", {
1671
+ instanceId,
1672
+ stepName: event.stepName,
1673
+ loopCount: event.loopCount,
1674
+ target: event.target,
1675
+ });
1676
+ if (this.sse) {
1677
+ this.sse.broadcast(`workflow:${instanceId}`, "step.loop", {
1678
+ stepName: event.stepName,
1679
+ loopCount: event.loopCount,
1680
+ target: event.target,
1681
+ });
1682
+ this.sse.broadcast("workflows:all", "workflow.step.loop", {
1683
+ instanceId,
1684
+ stepName: event.stepName,
1685
+ loopCount: event.loopCount,
1686
+ target: event.target,
1687
+ });
1688
+ }
1689
+ break;
1690
+ }
1691
+
1455
1692
  case "progress": {
1456
1693
  await this.emitEvent("workflow.progress", {
1457
1694
  instanceId,
@@ -1710,6 +1947,34 @@ class WorkflowsImpl implements Workflows {
1710
1947
  this.rejectIsolatedReady(instanceId, new Error("Isolated workflow cleaned up"));
1711
1948
  }
1712
1949
 
1950
+ private async markOrphanedAsFailed(
1951
+ instances: WorkflowInstance[],
1952
+ reason: string
1953
+ ): Promise<void> {
1954
+ for (const instance of instances) {
1955
+ await this.adapter.updateInstance(instance.id, {
1956
+ status: "failed",
1957
+ error: reason,
1958
+ completedAt: new Date(),
1959
+ });
1960
+
1961
+ await this.emitEvent("workflow.failed", {
1962
+ instanceId: instance.id,
1963
+ workflowName: instance.workflowName,
1964
+ error: reason,
1965
+ });
1966
+
1967
+ if (this.sse) {
1968
+ this.sse.broadcast(`workflow:${instance.id}`, "failed", { error: reason });
1969
+ this.sse.broadcast("workflows:all", "workflow.failed", {
1970
+ instanceId: instance.id,
1971
+ workflowName: instance.workflowName,
1972
+ error: reason,
1973
+ });
1974
+ }
1975
+ }
1976
+ }
1977
+
1713
1978
  /**
1714
1979
  * Reset heartbeat timeout for an isolated workflow
1715
1980
  */
package/src/server.ts CHANGED
@@ -81,6 +81,11 @@ export interface ServerConfig {
81
81
  rateLimiter?: RateLimiterConfig;
82
82
  errors?: ErrorsConfig;
83
83
  workflows?: WorkflowsConfig;
84
+ /**
85
+ * Resume strategy for workflows on startup.
86
+ * Defaults to "blocking" for server mode and "background" for adapter mode.
87
+ */
88
+ workflowsResumeStrategy?: "blocking" | "background" | "skip";
84
89
  processes?: ProcessesConfig;
85
90
  audit?: AuditConfig;
86
91
  websocket?: WebSocketConfig;
@@ -214,6 +219,8 @@ export class AppServer {
214
219
  private isInitialized = false;
215
220
  private initializationPromise: Promise<void> | null = null;
216
221
  private generateModeSetup = false;
222
+ private initMode: "adapter" | "server" = "server";
223
+ private workflowsResumeStrategy?: "blocking" | "background" | "skip";
217
224
 
218
225
  // Custom services registry
219
226
  private serviceFactories = new Map<string, ServiceFactory<any>>();
@@ -225,6 +232,7 @@ export class AppServer {
225
232
  const envPort = process.env.PORT ? parseInt(process.env.PORT, 10) : undefined;
226
233
  this.port = options.port ?? envPort ?? 3000;
227
234
  this.maxPortAttempts = options.maxPortAttempts ?? 5;
235
+ this.workflowsResumeStrategy = options.workflowsResumeStrategy ?? options.workflows?.resumeStrategy;
228
236
 
229
237
  // Determine if we should use legacy databases
230
238
  const useLegacy = options.useLegacyCoreDatabases ?? false;
@@ -986,6 +994,7 @@ ${factoryFunction}
986
994
  * Used by adapters (e.g., SvelteKit) that manage their own HTTP server.
987
995
  */
988
996
  async initialize(): Promise<void> {
997
+ this.initMode = "adapter";
989
998
  // Handle CLI type generation mode - exit early before any initialization
990
999
  if (process.env.DONKEYLABS_GENERATE === "1") {
991
1000
  this.outputRoutesForGeneration();
@@ -1038,7 +1047,13 @@ ${factoryFunction}
1038
1047
  this.coreServices.cron.start();
1039
1048
  this.coreServices.jobs.start();
1040
1049
  await this.coreServices.workflows.resolveDbPath();
1041
- await this.coreServices.workflows.resume();
1050
+ const defaultStrategy = this.initMode === "adapter" ? "background" : undefined;
1051
+ const strategy = this.workflowsResumeStrategy ?? defaultStrategy;
1052
+ if (strategy) {
1053
+ await this.coreServices.workflows.resume({ strategy });
1054
+ } else {
1055
+ await this.coreServices.workflows.resume();
1056
+ }
1042
1057
  this.coreServices.processes.start();
1043
1058
  logger.info("Background services started (cron, jobs, workflows, processes)");
1044
1059
 
@@ -1252,6 +1267,7 @@ ${factoryFunction}
1252
1267
  * 5. Start the HTTP server
1253
1268
  */
1254
1269
  async start() {
1270
+ this.initMode = "server";
1255
1271
  // Handle CLI type generation mode - exit early before any initialization
1256
1272
  if (process.env.DONKEYLABS_GENERATE === "1") {
1257
1273
  this.outputRoutesForGeneration();