@donkeylabs/server 2.0.18 → 2.0.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,12 +6,22 @@
6
6
  // - parallel: Run multiple branches concurrently
7
7
  // - choice: Conditional branching
8
8
  // - pass: Transform data / no-op
9
+ // - isolated: Execute in subprocess to prevent event loop blocking (default)
9
10
 
10
11
  import type { Events } from "./events";
11
12
  import type { Jobs } from "./jobs";
12
13
  import type { SSE } from "./sse";
13
14
  import type { z } from "zod";
14
15
  import type { CoreServices } from "../core";
16
+ import { dirname, join } from "node:path";
17
+ import { fileURLToPath } from "node:url";
18
+ import {
19
+ createWorkflowSocketServer,
20
+ type WorkflowSocketServer,
21
+ type WorkflowEvent,
22
+ type ProxyRequest,
23
+ } from "./workflow-socket";
24
+ import { isProcessAlive } from "./external-jobs";
15
25
 
16
26
  // Type helper for Zod schema inference
17
27
  type ZodSchema = z.ZodTypeAny;
@@ -126,6 +136,13 @@ export interface WorkflowDefinition {
126
136
  timeout?: number;
127
137
  /** Default retry config for all steps */
128
138
  defaultRetry?: RetryConfig;
139
+ /**
140
+ * Whether to execute this workflow in an isolated subprocess.
141
+ * Default: true (isolated by default to prevent blocking the event loop)
142
+ *
143
+ * Set to false for lightweight workflows that benefit from inline execution.
144
+ */
145
+ isolated?: boolean;
129
146
  }
130
147
 
131
148
  // ============================================
@@ -176,6 +193,8 @@ export interface WorkflowInstance {
176
193
  parentId?: string;
177
194
  /** Branch name if this is a branch instance */
178
195
  branchName?: string;
196
+ /** Custom metadata that persists across steps (JSON-serializable) */
197
+ metadata?: Record<string, any>;
179
198
  }
180
199
 
181
200
  // ============================================
@@ -195,6 +214,33 @@ export interface WorkflowContext {
195
214
  getStepResult<T = any>(stepName: string): T | undefined;
196
215
  /** Core services (logger, events, cache, etc.) */
197
216
  core: CoreServices;
217
+ /** Plugin services - available for business logic in workflow handlers */
218
+ plugins: Record<string, any>;
219
+ /**
220
+ * Custom metadata that persists across steps (read-only snapshot).
221
+ * Use setMetadata() to update values.
222
+ */
223
+ metadata: Record<string, any>;
224
+ /**
225
+ * Set a metadata value that persists across workflow steps.
226
+ * Accepts any JSON-serializable value (objects, arrays, primitives).
227
+ *
228
+ * @example
229
+ * await ctx.setMetadata('orderContext', {
230
+ * correlationId: 'abc-123',
231
+ * customer: { id: 'cust_1', tier: 'premium' },
232
+ * flags: { expedited: true }
233
+ * });
234
+ */
235
+ setMetadata(key: string, value: any): Promise<void>;
236
+ /**
237
+ * Get a metadata value with type safety.
238
+ *
239
+ * @example
240
+ * interface OrderContext { correlationId: string; customer: { id: string } }
241
+ * const ctx = ctx.getMetadata<OrderContext>('orderContext');
242
+ */
243
+ getMetadata<T = any>(key: string): T | undefined;
198
244
  }
199
245
 
200
246
  // ============================================
@@ -305,11 +351,29 @@ export class WorkflowBuilder {
305
351
  private _timeout?: number;
306
352
  private _defaultRetry?: RetryConfig;
307
353
  private _lastStep?: string;
354
+ private _isolated = true; // Default to isolated execution
308
355
 
309
356
  constructor(name: string) {
310
357
  this._name = name;
311
358
  }
312
359
 
360
+ /**
361
+ * Set whether to execute this workflow in an isolated subprocess.
362
+ * Default: true (isolated by default to prevent blocking the event loop)
363
+ *
364
+ * @param enabled - Set to false for lightweight workflows that benefit from inline execution
365
+ * @example
366
+ * // Heavy workflow - uses default isolation (no call needed)
367
+ * workflow("data-ingestion").task("process", { ... }).build();
368
+ *
369
+ * // Lightweight workflow - opt out of isolation
370
+ * workflow("quick-validation").isolated(false).task("validate", { ... }).build();
371
+ */
372
+ isolated(enabled: boolean = true): this {
373
+ this._isolated = enabled;
374
+ return this;
375
+ }
376
+
313
377
  /** Set the starting step explicitly */
314
378
  startAt(stepName: string): this {
315
379
  this._startAt = stepName;
@@ -510,6 +574,7 @@ export class WorkflowBuilder {
510
574
  startAt: this._startAt,
511
575
  timeout: this._timeout,
512
576
  defaultRetry: this._defaultRetry,
577
+ isolated: this._isolated,
513
578
  };
514
579
  }
515
580
  }
@@ -537,11 +602,36 @@ export interface WorkflowsConfig {
537
602
  pollInterval?: number;
538
603
  /** Core services to pass to step handlers */
539
604
  core?: CoreServices;
605
+ /** Directory for Unix sockets (default: /tmp/donkeylabs-workflows) */
606
+ socketDir?: string;
607
+ /** TCP port range for Windows fallback (default: [49152, 65535]) */
608
+ tcpPortRange?: [number, number];
609
+ /** Database file path (required for isolated workflows) */
610
+ dbPath?: string;
611
+ /** Heartbeat timeout in ms (default: 60000) */
612
+ heartbeatTimeout?: number;
613
+ }
614
+
615
+ /** Options for registering a workflow */
616
+ export interface WorkflowRegisterOptions {
617
+ /**
618
+ * Module path for isolated workflows.
619
+ * Required when workflow.isolated !== false and running in isolated mode.
620
+ * Use `import.meta.url` to get the current module's path.
621
+ *
622
+ * @example
623
+ * workflows.register(myWorkflow, { modulePath: import.meta.url });
624
+ */
625
+ modulePath?: string;
540
626
  }
541
627
 
542
628
  export interface Workflows {
543
- /** Register a workflow definition */
544
- register(definition: WorkflowDefinition): void;
629
+ /**
630
+ * Register a workflow definition.
631
+ * @param definition - The workflow definition to register
632
+ * @param options - Registration options (modulePath required for isolated workflows)
633
+ */
634
+ register(definition: WorkflowDefinition, options?: WorkflowRegisterOptions): void;
545
635
  /** Start a new workflow instance */
546
636
  start<T = any>(workflowName: string, input: T): Promise<string>;
547
637
  /** Get a workflow instance by ID */
@@ -558,22 +648,43 @@ export interface Workflows {
558
648
  stop(): Promise<void>;
559
649
  /** Set core services (called after initialization to resolve circular dependency) */
560
650
  setCore(core: CoreServices): void;
651
+ /** Set plugin services (called after plugins are initialized) */
652
+ setPlugins(plugins: Record<string, any>): void;
653
+ /** Update metadata for a workflow instance (used by isolated workflows) */
654
+ updateMetadata(instanceId: string, key: string, value: any): Promise<void>;
561
655
  }
562
656
 
563
657
  // ============================================
564
658
  // Workflow Service Implementation
565
659
  // ============================================
566
660
 
661
+ interface IsolatedProcessInfo {
662
+ pid: number;
663
+ timeout?: ReturnType<typeof setTimeout>;
664
+ heartbeatTimeout?: ReturnType<typeof setTimeout>;
665
+ lastHeartbeat: number;
666
+ }
667
+
567
668
  class WorkflowsImpl implements Workflows {
568
669
  private adapter: WorkflowAdapter;
569
670
  private events?: Events;
570
671
  private jobs?: Jobs;
571
672
  private sse?: SSE;
572
673
  private core?: CoreServices;
674
+ private plugins: Record<string, any> = {};
573
675
  private definitions = new Map<string, WorkflowDefinition>();
574
676
  private running = new Map<string, { timeout?: ReturnType<typeof setTimeout> }>();
575
677
  private pollInterval: number;
576
678
 
679
+ // Isolated execution state
680
+ private socketServer?: WorkflowSocketServer;
681
+ private socketDir: string;
682
+ private tcpPortRange: [number, number];
683
+ private dbPath?: string;
684
+ private heartbeatTimeoutMs: number;
685
+ private workflowModulePaths = new Map<string, string>();
686
+ private isolatedProcesses = new Map<string, IsolatedProcessInfo>();
687
+
577
688
  constructor(config: WorkflowsConfig = {}) {
578
689
  this.adapter = config.adapter ?? new MemoryWorkflowAdapter();
579
690
  this.events = config.events;
@@ -581,16 +692,97 @@ class WorkflowsImpl implements Workflows {
581
692
  this.sse = config.sse;
582
693
  this.core = config.core;
583
694
  this.pollInterval = config.pollInterval ?? 1000;
695
+
696
+ // Isolated execution config
697
+ this.socketDir = config.socketDir ?? "/tmp/donkeylabs-workflows";
698
+ this.tcpPortRange = config.tcpPortRange ?? [49152, 65535];
699
+ this.dbPath = config.dbPath;
700
+ this.heartbeatTimeoutMs = config.heartbeatTimeout ?? 60000;
701
+ }
702
+
703
+ private getSocketServer(): WorkflowSocketServer {
704
+ if (!this.socketServer) {
705
+ this.socketServer = createWorkflowSocketServer(
706
+ {
707
+ socketDir: this.socketDir,
708
+ tcpPortRange: this.tcpPortRange,
709
+ },
710
+ {
711
+ onEvent: (event) => this.handleIsolatedEvent(event),
712
+ onProxyCall: (request) => this.handleProxyCall(request),
713
+ onConnect: (instanceId) => {
714
+ console.log(`[Workflows] Isolated workflow ${instanceId} connected`);
715
+ },
716
+ onDisconnect: (instanceId) => {
717
+ console.log(`[Workflows] Isolated workflow ${instanceId} disconnected`);
718
+ },
719
+ onError: (error, instanceId) => {
720
+ console.error(`[Workflows] Socket error for ${instanceId}:`, error);
721
+ },
722
+ }
723
+ );
724
+ }
725
+ return this.socketServer;
584
726
  }
585
727
 
586
728
  setCore(core: CoreServices): void {
587
729
  this.core = core;
730
+ // Extract DB path if using Kysely adapter (for isolated workflows)
731
+ if (!this.dbPath && (core.db as any)?.getExecutor) {
732
+ // Try to get the database path from the Kysely instance
733
+ // This is a bit hacky but necessary for isolated workflows
734
+ try {
735
+ const executor = (core.db as any).getExecutor();
736
+ const adapter = executor?.adapter;
737
+ if (adapter?.db?.filename) {
738
+ this.dbPath = adapter.db.filename;
739
+ }
740
+ } catch {
741
+ // Ignore - dbPath might be set manually
742
+ }
743
+ }
744
+ }
745
+
746
+ setPlugins(plugins: Record<string, any>): void {
747
+ this.plugins = plugins;
748
+ }
749
+
750
+ async updateMetadata(instanceId: string, key: string, value: any): Promise<void> {
751
+ const instance = await this.adapter.getInstance(instanceId);
752
+ if (!instance) return;
753
+
754
+ const metadata = { ...(instance.metadata || {}), [key]: value };
755
+ await this.adapter.updateInstance(instanceId, { metadata });
588
756
  }
589
757
 
590
- register(definition: WorkflowDefinition): void {
758
+ register(definition: WorkflowDefinition, options?: WorkflowRegisterOptions): void {
591
759
  if (this.definitions.has(definition.name)) {
592
760
  throw new Error(`Workflow "${definition.name}" is already registered`);
593
761
  }
762
+
763
+ // Validate isolated workflows don't use unsupported step types
764
+ if (definition.isolated !== false) {
765
+ for (const [stepName, step] of definition.steps) {
766
+ if (step.type === "choice" || step.type === "parallel") {
767
+ throw new Error(
768
+ `Workflow "${definition.name}" uses ${step.type} step "${stepName}" ` +
769
+ `which is not supported in isolated mode. Use .isolated(false) to run inline.`
770
+ );
771
+ }
772
+ }
773
+ }
774
+
775
+ // Store module path for isolated workflows
776
+ if (options?.modulePath) {
777
+ this.workflowModulePaths.set(definition.name, options.modulePath);
778
+ } else if (definition.isolated !== false) {
779
+ // Warn if isolated workflow has no module path
780
+ console.warn(
781
+ `[Workflows] Workflow "${definition.name}" is isolated but no modulePath provided. ` +
782
+ `Use: workflows.register(myWorkflow, { modulePath: import.meta.url })`
783
+ );
784
+ }
785
+
594
786
  this.definitions.set(definition.name, definition);
595
787
  }
596
788
 
@@ -616,8 +808,35 @@ class WorkflowsImpl implements Workflows {
616
808
  input,
617
809
  });
618
810
 
619
- // Start execution
620
- this.executeWorkflow(instance.id, definition);
811
+ // SSE broadcast for real-time monitoring
812
+ if (this.sse) {
813
+ this.sse.broadcast(`workflow:${instance.id}`, "started", {
814
+ workflowName,
815
+ input,
816
+ });
817
+ this.sse.broadcast("workflows:all", "workflow.started", {
818
+ instanceId: instance.id,
819
+ workflowName,
820
+ input,
821
+ });
822
+ }
823
+
824
+ // Start execution (isolated or inline based on definition.isolated)
825
+ const isIsolated = definition.isolated !== false;
826
+ const modulePath = this.workflowModulePaths.get(workflowName);
827
+
828
+ if (isIsolated && modulePath && this.dbPath) {
829
+ // Execute in isolated subprocess
830
+ this.executeIsolatedWorkflow(instance.id, definition, input, modulePath);
831
+ } else {
832
+ // Execute inline (existing behavior)
833
+ if (isIsolated && !modulePath) {
834
+ console.warn(
835
+ `[Workflows] Workflow "${workflowName}" falling back to inline execution (no modulePath)`
836
+ );
837
+ }
838
+ this.executeWorkflow(instance.id, definition);
839
+ }
621
840
 
622
841
  return instance.id;
623
842
  }
@@ -632,7 +851,21 @@ class WorkflowsImpl implements Workflows {
632
851
  return false;
633
852
  }
634
853
 
635
- // Clear timeout
854
+ // Kill isolated process if running
855
+ const isolatedInfo = this.isolatedProcesses.get(instanceId);
856
+ if (isolatedInfo) {
857
+ try {
858
+ process.kill(isolatedInfo.pid, "SIGTERM");
859
+ } catch {
860
+ // Process might already be dead
861
+ }
862
+ if (isolatedInfo.timeout) clearTimeout(isolatedInfo.timeout);
863
+ if (isolatedInfo.heartbeatTimeout) clearTimeout(isolatedInfo.heartbeatTimeout);
864
+ this.isolatedProcesses.delete(instanceId);
865
+ await this.getSocketServer().closeSocket(instanceId);
866
+ }
867
+
868
+ // Clear inline timeout
636
869
  const runInfo = this.running.get(instanceId);
637
870
  if (runInfo?.timeout) {
638
871
  clearTimeout(runInfo.timeout);
@@ -677,18 +910,50 @@ class WorkflowsImpl implements Workflows {
677
910
  }
678
911
 
679
912
  console.log(`[Workflows] Resuming workflow instance ${instance.id}`);
680
- this.executeWorkflow(instance.id, definition);
913
+
914
+ // Check isolation mode and call appropriate method
915
+ const isIsolated = definition.isolated !== false;
916
+ const modulePath = this.workflowModulePaths.get(instance.workflowName);
917
+
918
+ if (isIsolated && modulePath && this.dbPath) {
919
+ this.executeIsolatedWorkflow(instance.id, definition, instance.input, modulePath);
920
+ } else {
921
+ this.executeWorkflow(instance.id, definition);
922
+ }
681
923
  }
682
924
  }
683
925
 
684
926
  async stop(): Promise<void> {
685
- // Clear all timeouts
927
+ // Kill all isolated processes
928
+ for (const [instanceId, info] of this.isolatedProcesses) {
929
+ try {
930
+ process.kill(info.pid, "SIGTERM");
931
+ } catch {
932
+ // Process might already be dead
933
+ }
934
+ if (info.timeout) clearTimeout(info.timeout);
935
+ if (info.heartbeatTimeout) clearTimeout(info.heartbeatTimeout);
936
+ }
937
+ this.isolatedProcesses.clear();
938
+
939
+ // Shutdown socket server
940
+ if (this.socketServer) {
941
+ await this.socketServer.shutdown();
942
+ this.socketServer = undefined;
943
+ }
944
+
945
+ // Clear all inline timeouts
686
946
  for (const [instanceId, runInfo] of this.running) {
687
947
  if (runInfo.timeout) {
688
948
  clearTimeout(runInfo.timeout);
689
949
  }
690
950
  }
691
951
  this.running.clear();
952
+
953
+ // Stop adapter (cleanup timers and prevent further DB access)
954
+ if (this.adapter && typeof (this.adapter as any).stop === "function") {
955
+ (this.adapter as any).stop();
956
+ }
692
957
  }
693
958
 
694
959
  // ============================================
@@ -754,6 +1019,16 @@ class WorkflowsImpl implements Workflows {
754
1019
  stepType: step.type,
755
1020
  });
756
1021
 
1022
+ // Broadcast via SSE
1023
+ if (this.sse) {
1024
+ this.sse.broadcast(`workflow:${instanceId}`, "step.started", { stepName });
1025
+ this.sse.broadcast("workflows:all", "workflow.step.started", {
1026
+ instanceId,
1027
+ workflowName: instance.workflowName,
1028
+ stepName,
1029
+ });
1030
+ }
1031
+
757
1032
  // Update step result as running
758
1033
  const stepResult: StepResult = {
759
1034
  stepName,
@@ -1141,6 +1416,9 @@ class WorkflowsImpl implements Workflows {
1141
1416
  }
1142
1417
  }
1143
1418
 
1419
+ // Metadata snapshot (mutable reference for setMetadata updates)
1420
+ const metadata = { ...(instance.metadata ?? {}) };
1421
+
1144
1422
  return {
1145
1423
  input: instance.input,
1146
1424
  steps,
@@ -1150,6 +1428,19 @@ class WorkflowsImpl implements Workflows {
1150
1428
  return steps[stepName] as T | undefined;
1151
1429
  },
1152
1430
  core: this.core!,
1431
+ plugins: this.plugins,
1432
+ metadata,
1433
+ setMetadata: async (key: string, value: any): Promise<void> => {
1434
+ // Update local snapshot
1435
+ metadata[key] = value;
1436
+ // Persist to database
1437
+ await this.adapter.updateInstance(instance.id, {
1438
+ metadata: { ...metadata },
1439
+ });
1440
+ },
1441
+ getMetadata: <T = any>(key: string): T | undefined => {
1442
+ return metadata[key] as T | undefined;
1443
+ },
1153
1444
  };
1154
1445
  }
1155
1446
 
@@ -1163,6 +1454,12 @@ class WorkflowsImpl implements Workflows {
1163
1454
  const instance = await this.adapter.getInstance(instanceId);
1164
1455
  if (!instance) return;
1165
1456
 
1457
+ // Check if workflow is still running (not cancelled/failed/timed out)
1458
+ if (instance.status !== "running") {
1459
+ console.log(`[Workflows] Ignoring step completion for ${instanceId}, status is ${instance.status}`);
1460
+ return;
1461
+ }
1462
+
1166
1463
  // Update step result
1167
1464
  const stepResult = instance.stepResults[stepName] ?? {
1168
1465
  stepName,
@@ -1185,6 +1482,19 @@ class WorkflowsImpl implements Workflows {
1185
1482
  output,
1186
1483
  });
1187
1484
 
1485
+ // Broadcast step completed via SSE
1486
+ if (this.sse) {
1487
+ this.sse.broadcast(`workflow:${instanceId}`, "step.completed", {
1488
+ stepName,
1489
+ output,
1490
+ });
1491
+ this.sse.broadcast("workflows:all", "workflow.step.completed", {
1492
+ instanceId,
1493
+ workflowName: instance.workflowName,
1494
+ stepName,
1495
+ });
1496
+ }
1497
+
1188
1498
  // Calculate and emit progress
1189
1499
  const totalSteps = definition.steps.size;
1190
1500
  const completedSteps = Object.values(instance.stepResults).filter(
@@ -1201,7 +1511,7 @@ class WorkflowsImpl implements Workflows {
1201
1511
  totalSteps,
1202
1512
  });
1203
1513
 
1204
- // Broadcast via SSE
1514
+ // Broadcast progress via SSE
1205
1515
  if (this.sse) {
1206
1516
  this.sse.broadcast(`workflow:${instanceId}`, "progress", {
1207
1517
  progress,
@@ -1209,6 +1519,12 @@ class WorkflowsImpl implements Workflows {
1209
1519
  completedSteps,
1210
1520
  totalSteps,
1211
1521
  });
1522
+ this.sse.broadcast("workflows:all", "workflow.progress", {
1523
+ instanceId,
1524
+ workflowName: instance.workflowName,
1525
+ progress,
1526
+ currentStep: stepName,
1527
+ });
1212
1528
  }
1213
1529
 
1214
1530
  // Move to next step or complete
@@ -1298,6 +1614,20 @@ class WorkflowsImpl implements Workflows {
1298
1614
  attempts: stepResult.attempts,
1299
1615
  });
1300
1616
 
1617
+ // Broadcast step failed via SSE
1618
+ if (this.sse) {
1619
+ this.sse.broadcast(`workflow:${instanceId}`, "step.failed", {
1620
+ stepName,
1621
+ error,
1622
+ });
1623
+ this.sse.broadcast("workflows:all", "workflow.step.failed", {
1624
+ instanceId,
1625
+ workflowName: instance.workflowName,
1626
+ stepName,
1627
+ error,
1628
+ });
1629
+ }
1630
+
1301
1631
  // Fail the workflow
1302
1632
  await this.failWorkflow(instanceId, `Step "${stepName}" failed: ${error}`);
1303
1633
  }
@@ -1306,6 +1636,12 @@ class WorkflowsImpl implements Workflows {
1306
1636
  const instance = await this.adapter.getInstance(instanceId);
1307
1637
  if (!instance) return;
1308
1638
 
1639
+ // Check if workflow is still running (not cancelled/failed/timed out)
1640
+ if (instance.status !== "running") {
1641
+ console.log(`[Workflows] Ignoring workflow completion for ${instanceId}, status is ${instance.status}`);
1642
+ return;
1643
+ }
1644
+
1309
1645
  // Clear timeout
1310
1646
  const runInfo = this.running.get(instanceId);
1311
1647
  if (runInfo?.timeout) {
@@ -1329,6 +1665,10 @@ class WorkflowsImpl implements Workflows {
1329
1665
  // Broadcast via SSE
1330
1666
  if (this.sse) {
1331
1667
  this.sse.broadcast(`workflow:${instanceId}`, "completed", { output });
1668
+ this.sse.broadcast("workflows:all", "workflow.completed", {
1669
+ instanceId,
1670
+ workflowName: instance.workflowName,
1671
+ });
1332
1672
  }
1333
1673
  }
1334
1674
 
@@ -1358,6 +1698,11 @@ class WorkflowsImpl implements Workflows {
1358
1698
  // Broadcast via SSE
1359
1699
  if (this.sse) {
1360
1700
  this.sse.broadcast(`workflow:${instanceId}`, "failed", { error });
1701
+ this.sse.broadcast("workflows:all", "workflow.failed", {
1702
+ instanceId,
1703
+ workflowName: instance.workflowName,
1704
+ error,
1705
+ });
1361
1706
  }
1362
1707
  }
1363
1708
 
@@ -1366,6 +1711,434 @@ class WorkflowsImpl implements Workflows {
1366
1711
  await this.events.emit(event, data);
1367
1712
  }
1368
1713
  }
1714
+
1715
+ // ============================================
1716
+ // Isolated Execution Engine
1717
+ // ============================================
1718
+
1719
+ /**
1720
+ * Execute a workflow in an isolated subprocess
1721
+ */
1722
+ private async executeIsolatedWorkflow(
1723
+ instanceId: string,
1724
+ definition: WorkflowDefinition,
1725
+ input: any,
1726
+ modulePath: string
1727
+ ): Promise<void> {
1728
+ const socketServer = this.getSocketServer();
1729
+
1730
+ // Create socket for this workflow instance
1731
+ const { socketPath, tcpPort } = await socketServer.createSocket(instanceId);
1732
+
1733
+ // Mark workflow as running
1734
+ await this.adapter.updateInstance(instanceId, {
1735
+ status: "running",
1736
+ startedAt: new Date(),
1737
+ });
1738
+
1739
+ // Get the executor path
1740
+ const currentDir = dirname(fileURLToPath(import.meta.url));
1741
+ const executorPath = join(currentDir, "workflow-executor.ts");
1742
+
1743
+ // Prepare config for the executor
1744
+ const config = {
1745
+ instanceId,
1746
+ workflowName: definition.name,
1747
+ input,
1748
+ socketPath,
1749
+ tcpPort,
1750
+ modulePath,
1751
+ dbPath: this.dbPath,
1752
+ };
1753
+
1754
+ // Spawn the subprocess
1755
+ const proc = Bun.spawn(["bun", "run", executorPath], {
1756
+ stdin: "pipe",
1757
+ stdout: "inherit",
1758
+ stderr: "inherit",
1759
+ env: {
1760
+ ...process.env,
1761
+ // Ensure the subprocess can import from the same paths
1762
+ NODE_OPTIONS: process.env.NODE_OPTIONS ?? "",
1763
+ },
1764
+ });
1765
+
1766
+ // Send config via stdin
1767
+ proc.stdin.write(JSON.stringify(config));
1768
+ proc.stdin.end();
1769
+
1770
+ // Track the process
1771
+ this.isolatedProcesses.set(instanceId, {
1772
+ pid: proc.pid,
1773
+ lastHeartbeat: Date.now(),
1774
+ });
1775
+
1776
+ // Set up workflow timeout
1777
+ if (definition.timeout) {
1778
+ const timeoutHandle = setTimeout(async () => {
1779
+ await this.handleIsolatedTimeout(instanceId, proc.pid);
1780
+ }, definition.timeout);
1781
+ const info = this.isolatedProcesses.get(instanceId);
1782
+ if (info) info.timeout = timeoutHandle;
1783
+ }
1784
+
1785
+ // Set up heartbeat timeout
1786
+ this.resetHeartbeatTimeout(instanceId, proc.pid);
1787
+
1788
+ // Handle process exit
1789
+ proc.exited.then(async (exitCode) => {
1790
+ const info = this.isolatedProcesses.get(instanceId);
1791
+ if (info) {
1792
+ if (info.timeout) clearTimeout(info.timeout);
1793
+ if (info.heartbeatTimeout) clearTimeout(info.heartbeatTimeout);
1794
+ this.isolatedProcesses.delete(instanceId);
1795
+ }
1796
+ await socketServer.closeSocket(instanceId);
1797
+
1798
+ // Check if workflow is still running (crashed before completion)
1799
+ const instance = await this.adapter.getInstance(instanceId);
1800
+ if (instance && instance.status === "running") {
1801
+ console.error(`[Workflows] Isolated workflow ${instanceId} crashed with exit code ${exitCode}`);
1802
+ await this.failWorkflow(instanceId, `Subprocess crashed with exit code ${exitCode}`);
1803
+ }
1804
+ });
1805
+ }
1806
+
1807
+ /**
1808
+ * Handle events from isolated workflow subprocess
1809
+ */
1810
+ private async handleIsolatedEvent(event: WorkflowEvent): Promise<void> {
1811
+ const { instanceId, type } = event;
1812
+
1813
+ // Reset heartbeat timeout on any event
1814
+ const info = this.isolatedProcesses.get(instanceId);
1815
+ if (info) {
1816
+ info.lastHeartbeat = Date.now();
1817
+ this.resetHeartbeatTimeout(instanceId, info.pid);
1818
+ }
1819
+
1820
+ switch (type) {
1821
+ case "started":
1822
+ // Already marked as running in executeIsolatedWorkflow
1823
+ break;
1824
+
1825
+ case "heartbeat":
1826
+ // Heartbeat handled above
1827
+ break;
1828
+
1829
+ case "step.started": {
1830
+ const instance = await this.adapter.getInstance(instanceId);
1831
+ if (!instance) break;
1832
+
1833
+ // Update current step and step results in DB
1834
+ const stepResult = {
1835
+ stepName: event.stepName!,
1836
+ status: "running" as const,
1837
+ startedAt: new Date(),
1838
+ attempts: (instance.stepResults[event.stepName!]?.attempts ?? 0) + 1,
1839
+ };
1840
+ await this.adapter.updateInstance(instanceId, {
1841
+ currentStep: event.stepName,
1842
+ stepResults: { ...instance.stepResults, [event.stepName!]: stepResult },
1843
+ });
1844
+
1845
+ await this.emitEvent("workflow.step.started", {
1846
+ instanceId,
1847
+ workflowName: instance?.workflowName,
1848
+ stepName: event.stepName,
1849
+ });
1850
+ // Broadcast via SSE
1851
+ if (this.sse) {
1852
+ this.sse.broadcast(`workflow:${instanceId}`, "step.started", {
1853
+ stepName: event.stepName,
1854
+ });
1855
+ this.sse.broadcast("workflows:all", "workflow.step.started", {
1856
+ instanceId,
1857
+ workflowName: instance?.workflowName,
1858
+ stepName: event.stepName,
1859
+ });
1860
+ }
1861
+ break;
1862
+ }
1863
+
1864
+ case "step.completed": {
1865
+ const instance = await this.adapter.getInstance(instanceId);
1866
+ if (!instance) break;
1867
+
1868
+ // Update step results in DB
1869
+ const stepResult = instance.stepResults[event.stepName!] ?? {
1870
+ stepName: event.stepName!,
1871
+ status: "pending" as const,
1872
+ startedAt: new Date(),
1873
+ attempts: 0,
1874
+ };
1875
+ stepResult.status = "completed";
1876
+ stepResult.output = event.output;
1877
+ stepResult.completedAt = new Date();
1878
+
1879
+ await this.adapter.updateInstance(instanceId, {
1880
+ stepResults: { ...instance.stepResults, [event.stepName!]: stepResult },
1881
+ currentStep: event.nextStep,
1882
+ });
1883
+
1884
+ await this.emitEvent("workflow.step.completed", {
1885
+ instanceId,
1886
+ workflowName: instance?.workflowName,
1887
+ stepName: event.stepName,
1888
+ output: event.output,
1889
+ });
1890
+ // Broadcast via SSE
1891
+ if (this.sse) {
1892
+ this.sse.broadcast(`workflow:${instanceId}`, "step.completed", {
1893
+ stepName: event.stepName,
1894
+ output: event.output,
1895
+ });
1896
+ this.sse.broadcast("workflows:all", "workflow.step.completed", {
1897
+ instanceId,
1898
+ workflowName: instance?.workflowName,
1899
+ stepName: event.stepName,
1900
+ output: event.output,
1901
+ });
1902
+ }
1903
+ break;
1904
+ }
1905
+
1906
+ case "step.failed": {
1907
+ const instance = await this.adapter.getInstance(instanceId);
1908
+ if (!instance) break;
1909
+
1910
+ // Update step results in DB
1911
+ const stepResult = instance.stepResults[event.stepName!] ?? {
1912
+ stepName: event.stepName!,
1913
+ status: "pending" as const,
1914
+ startedAt: new Date(),
1915
+ attempts: 0,
1916
+ };
1917
+ stepResult.status = "failed";
1918
+ stepResult.error = event.error;
1919
+ stepResult.completedAt = new Date();
1920
+
1921
+ await this.adapter.updateInstance(instanceId, {
1922
+ stepResults: { ...instance.stepResults, [event.stepName!]: stepResult },
1923
+ });
1924
+
1925
+ await this.emitEvent("workflow.step.failed", {
1926
+ instanceId,
1927
+ workflowName: instance?.workflowName,
1928
+ stepName: event.stepName,
1929
+ error: event.error,
1930
+ });
1931
+ // Broadcast via SSE
1932
+ if (this.sse) {
1933
+ this.sse.broadcast(`workflow:${instanceId}`, "step.failed", {
1934
+ stepName: event.stepName,
1935
+ error: event.error,
1936
+ });
1937
+ this.sse.broadcast("workflows:all", "workflow.step.failed", {
1938
+ instanceId,
1939
+ workflowName: instance?.workflowName,
1940
+ stepName: event.stepName,
1941
+ error: event.error,
1942
+ });
1943
+ }
1944
+ break;
1945
+ }
1946
+
1947
+ case "progress": {
1948
+ const instance = await this.adapter.getInstance(instanceId);
1949
+ await this.emitEvent("workflow.progress", {
1950
+ instanceId,
1951
+ workflowName: instance?.workflowName,
1952
+ progress: event.progress,
1953
+ completedSteps: event.completedSteps,
1954
+ totalSteps: event.totalSteps,
1955
+ });
1956
+ // Broadcast via SSE
1957
+ if (this.sse) {
1958
+ this.sse.broadcast(`workflow:${instanceId}`, "progress", {
1959
+ progress: event.progress,
1960
+ completedSteps: event.completedSteps,
1961
+ totalSteps: event.totalSteps,
1962
+ });
1963
+ this.sse.broadcast("workflows:all", "workflow.progress", {
1964
+ instanceId,
1965
+ workflowName: instance?.workflowName,
1966
+ progress: event.progress,
1967
+ completedSteps: event.completedSteps,
1968
+ totalSteps: event.totalSteps,
1969
+ });
1970
+ }
1971
+ break;
1972
+ }
1973
+
1974
+ case "completed":
1975
+ await this.completeWorkflowIsolated(instanceId, event.output);
1976
+ break;
1977
+
1978
+ case "failed":
1979
+ await this.failWorkflowIsolated(instanceId, event.error ?? "Unknown error");
1980
+ break;
1981
+ }
1982
+ }
1983
+
1984
+ /**
1985
+ * Handle proxy calls from isolated subprocess
1986
+ */
1987
+ private async handleProxyCall(request: ProxyRequest): Promise<any> {
1988
+ const { target, service, method, args } = request;
1989
+
1990
+ if (target === "plugin") {
1991
+ const plugin = this.plugins[service];
1992
+ if (!plugin) {
1993
+ throw new Error(`Plugin "${service}" not found`);
1994
+ }
1995
+ const fn = plugin[method];
1996
+ if (typeof fn !== "function") {
1997
+ throw new Error(`Method "${method}" not found on plugin "${service}"`);
1998
+ }
1999
+ return fn.apply(plugin, args);
2000
+ } else if (target === "core") {
2001
+ if (!this.core) {
2002
+ throw new Error("Core services not available");
2003
+ }
2004
+ const coreService = (this.core as any)[service];
2005
+ if (!coreService) {
2006
+ throw new Error(`Core service "${service}" not found`);
2007
+ }
2008
+ const fn = coreService[method];
2009
+ if (typeof fn !== "function") {
2010
+ throw new Error(`Method "${method}" not found on core service "${service}"`);
2011
+ }
2012
+ return fn.apply(coreService, args);
2013
+ } else {
2014
+ throw new Error(`Unknown proxy target: ${target}`);
2015
+ }
2016
+ }
2017
+
2018
+ /**
2019
+ * Reset heartbeat timeout for an isolated workflow
2020
+ */
2021
+ private resetHeartbeatTimeout(instanceId: string, pid: number): void {
2022
+ const info = this.isolatedProcesses.get(instanceId);
2023
+ if (!info) return;
2024
+
2025
+ // Clear existing timeout
2026
+ if (info.heartbeatTimeout) {
2027
+ clearTimeout(info.heartbeatTimeout);
2028
+ }
2029
+
2030
+ // Set new timeout
2031
+ info.heartbeatTimeout = setTimeout(async () => {
2032
+ // Check if process is still alive
2033
+ if (!isProcessAlive(pid)) {
2034
+ return; // Process already dead, exit handler will handle it
2035
+ }
2036
+
2037
+ console.error(`[Workflows] No heartbeat from isolated workflow ${instanceId} for ${this.heartbeatTimeoutMs}ms`);
2038
+ await this.handleIsolatedTimeout(instanceId, pid);
2039
+ }, this.heartbeatTimeoutMs);
2040
+ }
2041
+
2042
+ /**
2043
+ * Handle timeout for isolated workflow (workflow timeout or heartbeat timeout)
2044
+ */
2045
+ private async handleIsolatedTimeout(instanceId: string, pid: number): Promise<void> {
2046
+ const info = this.isolatedProcesses.get(instanceId);
2047
+ if (!info) return;
2048
+
2049
+ // Kill the process
2050
+ try {
2051
+ process.kill(pid, "SIGKILL");
2052
+ } catch {
2053
+ // Process might already be dead
2054
+ }
2055
+
2056
+ // Clean up
2057
+ if (info.timeout) clearTimeout(info.timeout);
2058
+ if (info.heartbeatTimeout) clearTimeout(info.heartbeatTimeout);
2059
+ this.isolatedProcesses.delete(instanceId);
2060
+ await this.getSocketServer().closeSocket(instanceId);
2061
+
2062
+ // Fail the workflow
2063
+ await this.failWorkflow(instanceId, "Workflow timed out");
2064
+ }
2065
+
2066
+ /**
2067
+ * Complete an isolated workflow (called from event handler)
2068
+ */
2069
+ private async completeWorkflowIsolated(instanceId: string, output?: any): Promise<void> {
2070
+ const instance = await this.adapter.getInstance(instanceId);
2071
+ if (!instance) return;
2072
+
2073
+ // Clean up isolated process tracking (process should have exited)
2074
+ const info = this.isolatedProcesses.get(instanceId);
2075
+ if (info) {
2076
+ if (info.timeout) clearTimeout(info.timeout);
2077
+ if (info.heartbeatTimeout) clearTimeout(info.heartbeatTimeout);
2078
+ this.isolatedProcesses.delete(instanceId);
2079
+ }
2080
+
2081
+ await this.adapter.updateInstance(instanceId, {
2082
+ status: "completed",
2083
+ output,
2084
+ completedAt: new Date(),
2085
+ currentStep: undefined,
2086
+ });
2087
+
2088
+ await this.emitEvent("workflow.completed", {
2089
+ instanceId,
2090
+ workflowName: instance.workflowName,
2091
+ output,
2092
+ });
2093
+
2094
+ // Broadcast via SSE
2095
+ if (this.sse) {
2096
+ this.sse.broadcast(`workflow:${instanceId}`, "completed", { output });
2097
+ this.sse.broadcast("workflows:all", "workflow.completed", {
2098
+ instanceId,
2099
+ workflowName: instance.workflowName,
2100
+ output,
2101
+ });
2102
+ }
2103
+ }
2104
+
2105
+ /**
2106
+ * Fail an isolated workflow (called from event handler)
2107
+ */
2108
+ private async failWorkflowIsolated(instanceId: string, error: string): Promise<void> {
2109
+ const instance = await this.adapter.getInstance(instanceId);
2110
+ if (!instance) return;
2111
+
2112
+ // Clean up isolated process tracking
2113
+ const info = this.isolatedProcesses.get(instanceId);
2114
+ if (info) {
2115
+ if (info.timeout) clearTimeout(info.timeout);
2116
+ if (info.heartbeatTimeout) clearTimeout(info.heartbeatTimeout);
2117
+ this.isolatedProcesses.delete(instanceId);
2118
+ }
2119
+
2120
+ await this.adapter.updateInstance(instanceId, {
2121
+ status: "failed",
2122
+ error,
2123
+ completedAt: new Date(),
2124
+ });
2125
+
2126
+ await this.emitEvent("workflow.failed", {
2127
+ instanceId,
2128
+ workflowName: instance.workflowName,
2129
+ error,
2130
+ });
2131
+
2132
+ // Broadcast via SSE
2133
+ if (this.sse) {
2134
+ this.sse.broadcast(`workflow:${instanceId}`, "failed", { error });
2135
+ this.sse.broadcast("workflows:all", "workflow.failed", {
2136
+ instanceId,
2137
+ workflowName: instance.workflowName,
2138
+ error,
2139
+ });
2140
+ }
2141
+ }
1369
2142
  }
1370
2143
 
1371
2144
  // ============================================