@donkeylabs/server 2.0.19 → 2.0.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,12 +6,22 @@
6
6
  // - parallel: Run multiple branches concurrently
7
7
  // - choice: Conditional branching
8
8
  // - pass: Transform data / no-op
9
+ // - isolated: Execute in subprocess to prevent event loop blocking (default)
9
10
 
10
11
  import type { Events } from "./events";
11
12
  import type { Jobs } from "./jobs";
12
13
  import type { SSE } from "./sse";
13
14
  import type { z } from "zod";
14
15
  import type { CoreServices } from "../core";
16
+ import { dirname, join } from "node:path";
17
+ import { fileURLToPath } from "node:url";
18
+ import {
19
+ createWorkflowSocketServer,
20
+ type WorkflowSocketServer,
21
+ type WorkflowEvent,
22
+ type ProxyRequest,
23
+ } from "./workflow-socket";
24
+ import { isProcessAlive } from "./external-jobs";
15
25
 
16
26
  // Type helper for Zod schema inference
17
27
  type ZodSchema = z.ZodTypeAny;
@@ -126,6 +136,13 @@ export interface WorkflowDefinition {
126
136
  timeout?: number;
127
137
  /** Default retry config for all steps */
128
138
  defaultRetry?: RetryConfig;
139
+ /**
140
+ * Whether to execute this workflow in an isolated subprocess.
141
+ * Default: true (isolated by default to prevent blocking the event loop)
142
+ *
143
+ * Set to false for lightweight workflows that benefit from inline execution.
144
+ */
145
+ isolated?: boolean;
129
146
  }
130
147
 
131
148
  // ============================================
@@ -176,6 +193,8 @@ export interface WorkflowInstance {
176
193
  parentId?: string;
177
194
  /** Branch name if this is a branch instance */
178
195
  branchName?: string;
196
+ /** Custom metadata that persists across steps (JSON-serializable) */
197
+ metadata?: Record<string, any>;
179
198
  }
180
199
 
181
200
  // ============================================
@@ -197,6 +216,31 @@ export interface WorkflowContext {
197
216
  core: CoreServices;
198
217
  /** Plugin services - available for business logic in workflow handlers */
199
218
  plugins: Record<string, any>;
219
+ /**
220
+ * Custom metadata that persists across steps (read-only snapshot).
221
+ * Use setMetadata() to update values.
222
+ */
223
+ metadata: Record<string, any>;
224
+ /**
225
+ * Set a metadata value that persists across workflow steps.
226
+ * Accepts any JSON-serializable value (objects, arrays, primitives).
227
+ *
228
+ * @example
229
+ * await ctx.setMetadata('orderContext', {
230
+ * correlationId: 'abc-123',
231
+ * customer: { id: 'cust_1', tier: 'premium' },
232
+ * flags: { expedited: true }
233
+ * });
234
+ */
235
+ setMetadata(key: string, value: any): Promise<void>;
236
+ /**
237
+ * Get a metadata value with type safety.
238
+ *
239
+ * @example
240
+ * interface OrderContext { correlationId: string; customer: { id: string } }
241
+ * const ctx = ctx.getMetadata<OrderContext>('orderContext');
242
+ */
243
+ getMetadata<T = any>(key: string): T | undefined;
200
244
  }
201
245
 
202
246
  // ============================================
@@ -307,11 +351,29 @@ export class WorkflowBuilder {
307
351
  private _timeout?: number;
308
352
  private _defaultRetry?: RetryConfig;
309
353
  private _lastStep?: string;
354
+ private _isolated = true; // Default to isolated execution
310
355
 
311
356
  constructor(name: string) {
312
357
  this._name = name;
313
358
  }
314
359
 
360
+ /**
361
+ * Set whether to execute this workflow in an isolated subprocess.
362
+ * Default: true (isolated by default to prevent blocking the event loop)
363
+ *
364
+ * @param enabled - Set to false for lightweight workflows that benefit from inline execution
365
+ * @example
366
+ * // Heavy workflow - uses default isolation (no call needed)
367
+ * workflow("data-ingestion").task("process", { ... }).build();
368
+ *
369
+ * // Lightweight workflow - opt out of isolation
370
+ * workflow("quick-validation").isolated(false).task("validate", { ... }).build();
371
+ */
372
+ isolated(enabled: boolean = true): this {
373
+ this._isolated = enabled;
374
+ return this;
375
+ }
376
+
315
377
  /** Set the starting step explicitly */
316
378
  startAt(stepName: string): this {
317
379
  this._startAt = stepName;
@@ -512,6 +574,7 @@ export class WorkflowBuilder {
512
574
  startAt: this._startAt,
513
575
  timeout: this._timeout,
514
576
  defaultRetry: this._defaultRetry,
577
+ isolated: this._isolated,
515
578
  };
516
579
  }
517
580
  }
@@ -539,11 +602,36 @@ export interface WorkflowsConfig {
539
602
  pollInterval?: number;
540
603
  /** Core services to pass to step handlers */
541
604
  core?: CoreServices;
605
+ /** Directory for Unix sockets (default: /tmp/donkeylabs-workflows) */
606
+ socketDir?: string;
607
+ /** TCP port range for Windows fallback (default: [49152, 65535]) */
608
+ tcpPortRange?: [number, number];
609
+ /** Database file path (required for isolated workflows) */
610
+ dbPath?: string;
611
+ /** Heartbeat timeout in ms (default: 60000) */
612
+ heartbeatTimeout?: number;
613
+ }
614
+
615
+ /** Options for registering a workflow */
616
+ export interface WorkflowRegisterOptions {
617
+ /**
618
+ * Module path for isolated workflows.
619
+ * Required when workflow.isolated !== false and running in isolated mode.
620
+ * Use `import.meta.url` to get the current module's path.
621
+ *
622
+ * @example
623
+ * workflows.register(myWorkflow, { modulePath: import.meta.url });
624
+ */
625
+ modulePath?: string;
542
626
  }
543
627
 
544
628
  export interface Workflows {
545
- /** Register a workflow definition */
546
- register(definition: WorkflowDefinition): void;
629
+ /**
630
+ * Register a workflow definition.
631
+ * @param definition - The workflow definition to register
632
+ * @param options - Registration options (modulePath required for isolated workflows)
633
+ */
634
+ register(definition: WorkflowDefinition, options?: WorkflowRegisterOptions): void;
547
635
  /** Start a new workflow instance */
548
636
  start<T = any>(workflowName: string, input: T): Promise<string>;
549
637
  /** Get a workflow instance by ID */
@@ -562,12 +650,21 @@ export interface Workflows {
562
650
  setCore(core: CoreServices): void;
563
651
  /** Set plugin services (called after plugins are initialized) */
564
652
  setPlugins(plugins: Record<string, any>): void;
653
+ /** Update metadata for a workflow instance (used by isolated workflows) */
654
+ updateMetadata(instanceId: string, key: string, value: any): Promise<void>;
565
655
  }
566
656
 
567
657
  // ============================================
568
658
  // Workflow Service Implementation
569
659
  // ============================================
570
660
 
661
+ interface IsolatedProcessInfo {
662
+ pid: number;
663
+ timeout?: ReturnType<typeof setTimeout>;
664
+ heartbeatTimeout?: ReturnType<typeof setTimeout>;
665
+ lastHeartbeat: number;
666
+ }
667
+
571
668
  class WorkflowsImpl implements Workflows {
572
669
  private adapter: WorkflowAdapter;
573
670
  private events?: Events;
@@ -579,6 +676,15 @@ class WorkflowsImpl implements Workflows {
579
676
  private running = new Map<string, { timeout?: ReturnType<typeof setTimeout> }>();
580
677
  private pollInterval: number;
581
678
 
679
+ // Isolated execution state
680
+ private socketServer?: WorkflowSocketServer;
681
+ private socketDir: string;
682
+ private tcpPortRange: [number, number];
683
+ private dbPath?: string;
684
+ private heartbeatTimeoutMs: number;
685
+ private workflowModulePaths = new Map<string, string>();
686
+ private isolatedProcesses = new Map<string, IsolatedProcessInfo>();
687
+
582
688
  constructor(config: WorkflowsConfig = {}) {
583
689
  this.adapter = config.adapter ?? new MemoryWorkflowAdapter();
584
690
  this.events = config.events;
@@ -586,20 +692,97 @@ class WorkflowsImpl implements Workflows {
586
692
  this.sse = config.sse;
587
693
  this.core = config.core;
588
694
  this.pollInterval = config.pollInterval ?? 1000;
695
+
696
+ // Isolated execution config
697
+ this.socketDir = config.socketDir ?? "/tmp/donkeylabs-workflows";
698
+ this.tcpPortRange = config.tcpPortRange ?? [49152, 65535];
699
+ this.dbPath = config.dbPath;
700
+ this.heartbeatTimeoutMs = config.heartbeatTimeout ?? 60000;
701
+ }
702
+
703
+ private getSocketServer(): WorkflowSocketServer {
704
+ if (!this.socketServer) {
705
+ this.socketServer = createWorkflowSocketServer(
706
+ {
707
+ socketDir: this.socketDir,
708
+ tcpPortRange: this.tcpPortRange,
709
+ },
710
+ {
711
+ onEvent: (event) => this.handleIsolatedEvent(event),
712
+ onProxyCall: (request) => this.handleProxyCall(request),
713
+ onConnect: (instanceId) => {
714
+ console.log(`[Workflows] Isolated workflow ${instanceId} connected`);
715
+ },
716
+ onDisconnect: (instanceId) => {
717
+ console.log(`[Workflows] Isolated workflow ${instanceId} disconnected`);
718
+ },
719
+ onError: (error, instanceId) => {
720
+ console.error(`[Workflows] Socket error for ${instanceId}:`, error);
721
+ },
722
+ }
723
+ );
724
+ }
725
+ return this.socketServer;
589
726
  }
590
727
 
591
728
  setCore(core: CoreServices): void {
592
729
  this.core = core;
730
+ // Extract DB path if using Kysely adapter (for isolated workflows)
731
+ if (!this.dbPath && (core.db as any)?.getExecutor) {
732
+ // Try to get the database path from the Kysely instance
733
+ // This is a bit hacky but necessary for isolated workflows
734
+ try {
735
+ const executor = (core.db as any).getExecutor();
736
+ const adapter = executor?.adapter;
737
+ if (adapter?.db?.filename) {
738
+ this.dbPath = adapter.db.filename;
739
+ }
740
+ } catch {
741
+ // Ignore - dbPath might be set manually
742
+ }
743
+ }
593
744
  }
594
745
 
595
746
  setPlugins(plugins: Record<string, any>): void {
596
747
  this.plugins = plugins;
597
748
  }
598
749
 
599
- register(definition: WorkflowDefinition): void {
750
+ async updateMetadata(instanceId: string, key: string, value: any): Promise<void> {
751
+ const instance = await this.adapter.getInstance(instanceId);
752
+ if (!instance) return;
753
+
754
+ const metadata = { ...(instance.metadata || {}), [key]: value };
755
+ await this.adapter.updateInstance(instanceId, { metadata });
756
+ }
757
+
758
+ register(definition: WorkflowDefinition, options?: WorkflowRegisterOptions): void {
600
759
  if (this.definitions.has(definition.name)) {
601
760
  throw new Error(`Workflow "${definition.name}" is already registered`);
602
761
  }
762
+
763
+ // Validate isolated workflows don't use unsupported step types
764
+ if (definition.isolated !== false) {
765
+ for (const [stepName, step] of definition.steps) {
766
+ if (step.type === "choice" || step.type === "parallel") {
767
+ throw new Error(
768
+ `Workflow "${definition.name}" uses ${step.type} step "${stepName}" ` +
769
+ `which is not supported in isolated mode. Use .isolated(false) to run inline.`
770
+ );
771
+ }
772
+ }
773
+ }
774
+
775
+ // Store module path for isolated workflows
776
+ if (options?.modulePath) {
777
+ this.workflowModulePaths.set(definition.name, options.modulePath);
778
+ } else if (definition.isolated !== false) {
779
+ // Warn if isolated workflow has no module path
780
+ console.warn(
781
+ `[Workflows] Workflow "${definition.name}" is isolated but no modulePath provided. ` +
782
+ `Use: workflows.register(myWorkflow, { modulePath: import.meta.url })`
783
+ );
784
+ }
785
+
603
786
  this.definitions.set(definition.name, definition);
604
787
  }
605
788
 
@@ -625,8 +808,35 @@ class WorkflowsImpl implements Workflows {
625
808
  input,
626
809
  });
627
810
 
628
- // Start execution
629
- this.executeWorkflow(instance.id, definition);
811
+ // SSE broadcast for real-time monitoring
812
+ if (this.sse) {
813
+ this.sse.broadcast(`workflow:${instance.id}`, "started", {
814
+ workflowName,
815
+ input,
816
+ });
817
+ this.sse.broadcast("workflows:all", "workflow.started", {
818
+ instanceId: instance.id,
819
+ workflowName,
820
+ input,
821
+ });
822
+ }
823
+
824
+ // Start execution (isolated or inline based on definition.isolated)
825
+ const isIsolated = definition.isolated !== false;
826
+ const modulePath = this.workflowModulePaths.get(workflowName);
827
+
828
+ if (isIsolated && modulePath && this.dbPath) {
829
+ // Execute in isolated subprocess
830
+ this.executeIsolatedWorkflow(instance.id, definition, input, modulePath);
831
+ } else {
832
+ // Execute inline (existing behavior)
833
+ if (isIsolated && !modulePath) {
834
+ console.warn(
835
+ `[Workflows] Workflow "${workflowName}" falling back to inline execution (no modulePath)`
836
+ );
837
+ }
838
+ this.executeWorkflow(instance.id, definition);
839
+ }
630
840
 
631
841
  return instance.id;
632
842
  }
@@ -641,7 +851,21 @@ class WorkflowsImpl implements Workflows {
641
851
  return false;
642
852
  }
643
853
 
644
- // Clear timeout
854
+ // Kill isolated process if running
855
+ const isolatedInfo = this.isolatedProcesses.get(instanceId);
856
+ if (isolatedInfo) {
857
+ try {
858
+ process.kill(isolatedInfo.pid, "SIGTERM");
859
+ } catch {
860
+ // Process might already be dead
861
+ }
862
+ if (isolatedInfo.timeout) clearTimeout(isolatedInfo.timeout);
863
+ if (isolatedInfo.heartbeatTimeout) clearTimeout(isolatedInfo.heartbeatTimeout);
864
+ this.isolatedProcesses.delete(instanceId);
865
+ await this.getSocketServer().closeSocket(instanceId);
866
+ }
867
+
868
+ // Clear inline timeout
645
869
  const runInfo = this.running.get(instanceId);
646
870
  if (runInfo?.timeout) {
647
871
  clearTimeout(runInfo.timeout);
@@ -686,18 +910,50 @@ class WorkflowsImpl implements Workflows {
686
910
  }
687
911
 
688
912
  console.log(`[Workflows] Resuming workflow instance ${instance.id}`);
689
- this.executeWorkflow(instance.id, definition);
913
+
914
+ // Check isolation mode and call appropriate method
915
+ const isIsolated = definition.isolated !== false;
916
+ const modulePath = this.workflowModulePaths.get(instance.workflowName);
917
+
918
+ if (isIsolated && modulePath && this.dbPath) {
919
+ this.executeIsolatedWorkflow(instance.id, definition, instance.input, modulePath);
920
+ } else {
921
+ this.executeWorkflow(instance.id, definition);
922
+ }
690
923
  }
691
924
  }
692
925
 
693
926
  async stop(): Promise<void> {
694
- // Clear all timeouts
927
+ // Kill all isolated processes
928
+ for (const [instanceId, info] of this.isolatedProcesses) {
929
+ try {
930
+ process.kill(info.pid, "SIGTERM");
931
+ } catch {
932
+ // Process might already be dead
933
+ }
934
+ if (info.timeout) clearTimeout(info.timeout);
935
+ if (info.heartbeatTimeout) clearTimeout(info.heartbeatTimeout);
936
+ }
937
+ this.isolatedProcesses.clear();
938
+
939
+ // Shutdown socket server
940
+ if (this.socketServer) {
941
+ await this.socketServer.shutdown();
942
+ this.socketServer = undefined;
943
+ }
944
+
945
+ // Clear all inline timeouts
695
946
  for (const [instanceId, runInfo] of this.running) {
696
947
  if (runInfo.timeout) {
697
948
  clearTimeout(runInfo.timeout);
698
949
  }
699
950
  }
700
951
  this.running.clear();
952
+
953
+ // Stop adapter (cleanup timers and prevent further DB access)
954
+ if (this.adapter && typeof (this.adapter as any).stop === "function") {
955
+ (this.adapter as any).stop();
956
+ }
701
957
  }
702
958
 
703
959
  // ============================================
@@ -763,6 +1019,16 @@ class WorkflowsImpl implements Workflows {
763
1019
  stepType: step.type,
764
1020
  });
765
1021
 
1022
+ // Broadcast via SSE
1023
+ if (this.sse) {
1024
+ this.sse.broadcast(`workflow:${instanceId}`, "step.started", { stepName });
1025
+ this.sse.broadcast("workflows:all", "workflow.step.started", {
1026
+ instanceId,
1027
+ workflowName: instance.workflowName,
1028
+ stepName,
1029
+ });
1030
+ }
1031
+
766
1032
  // Update step result as running
767
1033
  const stepResult: StepResult = {
768
1034
  stepName,
@@ -1150,6 +1416,9 @@ class WorkflowsImpl implements Workflows {
1150
1416
  }
1151
1417
  }
1152
1418
 
1419
+ // Metadata snapshot (mutable reference for setMetadata updates)
1420
+ const metadata = { ...(instance.metadata ?? {}) };
1421
+
1153
1422
  return {
1154
1423
  input: instance.input,
1155
1424
  steps,
@@ -1160,6 +1429,18 @@ class WorkflowsImpl implements Workflows {
1160
1429
  },
1161
1430
  core: this.core!,
1162
1431
  plugins: this.plugins,
1432
+ metadata,
1433
+ setMetadata: async (key: string, value: any): Promise<void> => {
1434
+ // Update local snapshot
1435
+ metadata[key] = value;
1436
+ // Persist to database
1437
+ await this.adapter.updateInstance(instance.id, {
1438
+ metadata: { ...metadata },
1439
+ });
1440
+ },
1441
+ getMetadata: <T = any>(key: string): T | undefined => {
1442
+ return metadata[key] as T | undefined;
1443
+ },
1163
1444
  };
1164
1445
  }
1165
1446
 
@@ -1173,6 +1454,12 @@ class WorkflowsImpl implements Workflows {
1173
1454
  const instance = await this.adapter.getInstance(instanceId);
1174
1455
  if (!instance) return;
1175
1456
 
1457
+ // Check if workflow is still running (not cancelled/failed/timed out)
1458
+ if (instance.status !== "running") {
1459
+ console.log(`[Workflows] Ignoring step completion for ${instanceId}, status is ${instance.status}`);
1460
+ return;
1461
+ }
1462
+
1176
1463
  // Update step result
1177
1464
  const stepResult = instance.stepResults[stepName] ?? {
1178
1465
  stepName,
@@ -1195,6 +1482,19 @@ class WorkflowsImpl implements Workflows {
1195
1482
  output,
1196
1483
  });
1197
1484
 
1485
+ // Broadcast step completed via SSE
1486
+ if (this.sse) {
1487
+ this.sse.broadcast(`workflow:${instanceId}`, "step.completed", {
1488
+ stepName,
1489
+ output,
1490
+ });
1491
+ this.sse.broadcast("workflows:all", "workflow.step.completed", {
1492
+ instanceId,
1493
+ workflowName: instance.workflowName,
1494
+ stepName,
1495
+ });
1496
+ }
1497
+
1198
1498
  // Calculate and emit progress
1199
1499
  const totalSteps = definition.steps.size;
1200
1500
  const completedSteps = Object.values(instance.stepResults).filter(
@@ -1211,7 +1511,7 @@ class WorkflowsImpl implements Workflows {
1211
1511
  totalSteps,
1212
1512
  });
1213
1513
 
1214
- // Broadcast via SSE
1514
+ // Broadcast progress via SSE
1215
1515
  if (this.sse) {
1216
1516
  this.sse.broadcast(`workflow:${instanceId}`, "progress", {
1217
1517
  progress,
@@ -1219,6 +1519,12 @@ class WorkflowsImpl implements Workflows {
1219
1519
  completedSteps,
1220
1520
  totalSteps,
1221
1521
  });
1522
+ this.sse.broadcast("workflows:all", "workflow.progress", {
1523
+ instanceId,
1524
+ workflowName: instance.workflowName,
1525
+ progress,
1526
+ currentStep: stepName,
1527
+ });
1222
1528
  }
1223
1529
 
1224
1530
  // Move to next step or complete
@@ -1308,6 +1614,20 @@ class WorkflowsImpl implements Workflows {
1308
1614
  attempts: stepResult.attempts,
1309
1615
  });
1310
1616
 
1617
+ // Broadcast step failed via SSE
1618
+ if (this.sse) {
1619
+ this.sse.broadcast(`workflow:${instanceId}`, "step.failed", {
1620
+ stepName,
1621
+ error,
1622
+ });
1623
+ this.sse.broadcast("workflows:all", "workflow.step.failed", {
1624
+ instanceId,
1625
+ workflowName: instance.workflowName,
1626
+ stepName,
1627
+ error,
1628
+ });
1629
+ }
1630
+
1311
1631
  // Fail the workflow
1312
1632
  await this.failWorkflow(instanceId, `Step "${stepName}" failed: ${error}`);
1313
1633
  }
@@ -1316,6 +1636,12 @@ class WorkflowsImpl implements Workflows {
1316
1636
  const instance = await this.adapter.getInstance(instanceId);
1317
1637
  if (!instance) return;
1318
1638
 
1639
+ // Check if workflow is still running (not cancelled/failed/timed out)
1640
+ if (instance.status !== "running") {
1641
+ console.log(`[Workflows] Ignoring workflow completion for ${instanceId}, status is ${instance.status}`);
1642
+ return;
1643
+ }
1644
+
1319
1645
  // Clear timeout
1320
1646
  const runInfo = this.running.get(instanceId);
1321
1647
  if (runInfo?.timeout) {
@@ -1339,6 +1665,10 @@ class WorkflowsImpl implements Workflows {
1339
1665
  // Broadcast via SSE
1340
1666
  if (this.sse) {
1341
1667
  this.sse.broadcast(`workflow:${instanceId}`, "completed", { output });
1668
+ this.sse.broadcast("workflows:all", "workflow.completed", {
1669
+ instanceId,
1670
+ workflowName: instance.workflowName,
1671
+ });
1342
1672
  }
1343
1673
  }
1344
1674
 
@@ -1368,6 +1698,11 @@ class WorkflowsImpl implements Workflows {
1368
1698
  // Broadcast via SSE
1369
1699
  if (this.sse) {
1370
1700
  this.sse.broadcast(`workflow:${instanceId}`, "failed", { error });
1701
+ this.sse.broadcast("workflows:all", "workflow.failed", {
1702
+ instanceId,
1703
+ workflowName: instance.workflowName,
1704
+ error,
1705
+ });
1371
1706
  }
1372
1707
  }
1373
1708
 
@@ -1376,6 +1711,434 @@ class WorkflowsImpl implements Workflows {
1376
1711
  await this.events.emit(event, data);
1377
1712
  }
1378
1713
  }
1714
+
1715
+ // ============================================
1716
+ // Isolated Execution Engine
1717
+ // ============================================
1718
+
1719
+ /**
1720
+ * Execute a workflow in an isolated subprocess
1721
+ */
1722
+ private async executeIsolatedWorkflow(
1723
+ instanceId: string,
1724
+ definition: WorkflowDefinition,
1725
+ input: any,
1726
+ modulePath: string
1727
+ ): Promise<void> {
1728
+ const socketServer = this.getSocketServer();
1729
+
1730
+ // Create socket for this workflow instance
1731
+ const { socketPath, tcpPort } = await socketServer.createSocket(instanceId);
1732
+
1733
+ // Mark workflow as running
1734
+ await this.adapter.updateInstance(instanceId, {
1735
+ status: "running",
1736
+ startedAt: new Date(),
1737
+ });
1738
+
1739
+ // Get the executor path
1740
+ const currentDir = dirname(fileURLToPath(import.meta.url));
1741
+ const executorPath = join(currentDir, "workflow-executor.ts");
1742
+
1743
+ // Prepare config for the executor
1744
+ const config = {
1745
+ instanceId,
1746
+ workflowName: definition.name,
1747
+ input,
1748
+ socketPath,
1749
+ tcpPort,
1750
+ modulePath,
1751
+ dbPath: this.dbPath,
1752
+ };
1753
+
1754
+ // Spawn the subprocess
1755
+ const proc = Bun.spawn(["bun", "run", executorPath], {
1756
+ stdin: "pipe",
1757
+ stdout: "inherit",
1758
+ stderr: "inherit",
1759
+ env: {
1760
+ ...process.env,
1761
+ // Ensure the subprocess can import from the same paths
1762
+ NODE_OPTIONS: process.env.NODE_OPTIONS ?? "",
1763
+ },
1764
+ });
1765
+
1766
+ // Send config via stdin
1767
+ proc.stdin.write(JSON.stringify(config));
1768
+ proc.stdin.end();
1769
+
1770
+ // Track the process
1771
+ this.isolatedProcesses.set(instanceId, {
1772
+ pid: proc.pid,
1773
+ lastHeartbeat: Date.now(),
1774
+ });
1775
+
1776
+ // Set up workflow timeout
1777
+ if (definition.timeout) {
1778
+ const timeoutHandle = setTimeout(async () => {
1779
+ await this.handleIsolatedTimeout(instanceId, proc.pid);
1780
+ }, definition.timeout);
1781
+ const info = this.isolatedProcesses.get(instanceId);
1782
+ if (info) info.timeout = timeoutHandle;
1783
+ }
1784
+
1785
+ // Set up heartbeat timeout
1786
+ this.resetHeartbeatTimeout(instanceId, proc.pid);
1787
+
1788
+ // Handle process exit
1789
+ proc.exited.then(async (exitCode) => {
1790
+ const info = this.isolatedProcesses.get(instanceId);
1791
+ if (info) {
1792
+ if (info.timeout) clearTimeout(info.timeout);
1793
+ if (info.heartbeatTimeout) clearTimeout(info.heartbeatTimeout);
1794
+ this.isolatedProcesses.delete(instanceId);
1795
+ }
1796
+ await socketServer.closeSocket(instanceId);
1797
+
1798
+ // Check if workflow is still running (crashed before completion)
1799
+ const instance = await this.adapter.getInstance(instanceId);
1800
+ if (instance && instance.status === "running") {
1801
+ console.error(`[Workflows] Isolated workflow ${instanceId} crashed with exit code ${exitCode}`);
1802
+ await this.failWorkflow(instanceId, `Subprocess crashed with exit code ${exitCode}`);
1803
+ }
1804
+ });
1805
+ }
1806
+
1807
+ /**
1808
+ * Handle events from isolated workflow subprocess
1809
+ */
1810
+ private async handleIsolatedEvent(event: WorkflowEvent): Promise<void> {
1811
+ const { instanceId, type } = event;
1812
+
1813
+ // Reset heartbeat timeout on any event
1814
+ const info = this.isolatedProcesses.get(instanceId);
1815
+ if (info) {
1816
+ info.lastHeartbeat = Date.now();
1817
+ this.resetHeartbeatTimeout(instanceId, info.pid);
1818
+ }
1819
+
1820
+ switch (type) {
1821
+ case "started":
1822
+ // Already marked as running in executeIsolatedWorkflow
1823
+ break;
1824
+
1825
+ case "heartbeat":
1826
+ // Heartbeat handled above
1827
+ break;
1828
+
1829
+ case "step.started": {
1830
+ const instance = await this.adapter.getInstance(instanceId);
1831
+ if (!instance) break;
1832
+
1833
+ // Update current step and step results in DB
1834
+ const stepResult = {
1835
+ stepName: event.stepName!,
1836
+ status: "running" as const,
1837
+ startedAt: new Date(),
1838
+ attempts: (instance.stepResults[event.stepName!]?.attempts ?? 0) + 1,
1839
+ };
1840
+ await this.adapter.updateInstance(instanceId, {
1841
+ currentStep: event.stepName,
1842
+ stepResults: { ...instance.stepResults, [event.stepName!]: stepResult },
1843
+ });
1844
+
1845
+ await this.emitEvent("workflow.step.started", {
1846
+ instanceId,
1847
+ workflowName: instance?.workflowName,
1848
+ stepName: event.stepName,
1849
+ });
1850
+ // Broadcast via SSE
1851
+ if (this.sse) {
1852
+ this.sse.broadcast(`workflow:${instanceId}`, "step.started", {
1853
+ stepName: event.stepName,
1854
+ });
1855
+ this.sse.broadcast("workflows:all", "workflow.step.started", {
1856
+ instanceId,
1857
+ workflowName: instance?.workflowName,
1858
+ stepName: event.stepName,
1859
+ });
1860
+ }
1861
+ break;
1862
+ }
1863
+
1864
+ case "step.completed": {
1865
+ const instance = await this.adapter.getInstance(instanceId);
1866
+ if (!instance) break;
1867
+
1868
+ // Update step results in DB
1869
+ const stepResult = instance.stepResults[event.stepName!] ?? {
1870
+ stepName: event.stepName!,
1871
+ status: "pending" as const,
1872
+ startedAt: new Date(),
1873
+ attempts: 0,
1874
+ };
1875
+ stepResult.status = "completed";
1876
+ stepResult.output = event.output;
1877
+ stepResult.completedAt = new Date();
1878
+
1879
+ await this.adapter.updateInstance(instanceId, {
1880
+ stepResults: { ...instance.stepResults, [event.stepName!]: stepResult },
1881
+ currentStep: event.nextStep,
1882
+ });
1883
+
1884
+ await this.emitEvent("workflow.step.completed", {
1885
+ instanceId,
1886
+ workflowName: instance?.workflowName,
1887
+ stepName: event.stepName,
1888
+ output: event.output,
1889
+ });
1890
+ // Broadcast via SSE
1891
+ if (this.sse) {
1892
+ this.sse.broadcast(`workflow:${instanceId}`, "step.completed", {
1893
+ stepName: event.stepName,
1894
+ output: event.output,
1895
+ });
1896
+ this.sse.broadcast("workflows:all", "workflow.step.completed", {
1897
+ instanceId,
1898
+ workflowName: instance?.workflowName,
1899
+ stepName: event.stepName,
1900
+ output: event.output,
1901
+ });
1902
+ }
1903
+ break;
1904
+ }
1905
+
1906
+ case "step.failed": {
1907
+ const instance = await this.adapter.getInstance(instanceId);
1908
+ if (!instance) break;
1909
+
1910
+ // Update step results in DB
1911
+ const stepResult = instance.stepResults[event.stepName!] ?? {
1912
+ stepName: event.stepName!,
1913
+ status: "pending" as const,
1914
+ startedAt: new Date(),
1915
+ attempts: 0,
1916
+ };
1917
+ stepResult.status = "failed";
1918
+ stepResult.error = event.error;
1919
+ stepResult.completedAt = new Date();
1920
+
1921
+ await this.adapter.updateInstance(instanceId, {
1922
+ stepResults: { ...instance.stepResults, [event.stepName!]: stepResult },
1923
+ });
1924
+
1925
+ await this.emitEvent("workflow.step.failed", {
1926
+ instanceId,
1927
+ workflowName: instance?.workflowName,
1928
+ stepName: event.stepName,
1929
+ error: event.error,
1930
+ });
1931
+ // Broadcast via SSE
1932
+ if (this.sse) {
1933
+ this.sse.broadcast(`workflow:${instanceId}`, "step.failed", {
1934
+ stepName: event.stepName,
1935
+ error: event.error,
1936
+ });
1937
+ this.sse.broadcast("workflows:all", "workflow.step.failed", {
1938
+ instanceId,
1939
+ workflowName: instance?.workflowName,
1940
+ stepName: event.stepName,
1941
+ error: event.error,
1942
+ });
1943
+ }
1944
+ break;
1945
+ }
1946
+
1947
+ case "progress": {
1948
+ const instance = await this.adapter.getInstance(instanceId);
1949
+ await this.emitEvent("workflow.progress", {
1950
+ instanceId,
1951
+ workflowName: instance?.workflowName,
1952
+ progress: event.progress,
1953
+ completedSteps: event.completedSteps,
1954
+ totalSteps: event.totalSteps,
1955
+ });
1956
+ // Broadcast via SSE
1957
+ if (this.sse) {
1958
+ this.sse.broadcast(`workflow:${instanceId}`, "progress", {
1959
+ progress: event.progress,
1960
+ completedSteps: event.completedSteps,
1961
+ totalSteps: event.totalSteps,
1962
+ });
1963
+ this.sse.broadcast("workflows:all", "workflow.progress", {
1964
+ instanceId,
1965
+ workflowName: instance?.workflowName,
1966
+ progress: event.progress,
1967
+ completedSteps: event.completedSteps,
1968
+ totalSteps: event.totalSteps,
1969
+ });
1970
+ }
1971
+ break;
1972
+ }
1973
+
1974
+ case "completed":
1975
+ await this.completeWorkflowIsolated(instanceId, event.output);
1976
+ break;
1977
+
1978
+ case "failed":
1979
+ await this.failWorkflowIsolated(instanceId, event.error ?? "Unknown error");
1980
+ break;
1981
+ }
1982
+ }
1983
+
1984
+ /**
1985
+ * Handle proxy calls from isolated subprocess
1986
+ */
1987
+ private async handleProxyCall(request: ProxyRequest): Promise<any> {
1988
+ const { target, service, method, args } = request;
1989
+
1990
+ if (target === "plugin") {
1991
+ const plugin = this.plugins[service];
1992
+ if (!plugin) {
1993
+ throw new Error(`Plugin "${service}" not found`);
1994
+ }
1995
+ const fn = plugin[method];
1996
+ if (typeof fn !== "function") {
1997
+ throw new Error(`Method "${method}" not found on plugin "${service}"`);
1998
+ }
1999
+ return fn.apply(plugin, args);
2000
+ } else if (target === "core") {
2001
+ if (!this.core) {
2002
+ throw new Error("Core services not available");
2003
+ }
2004
+ const coreService = (this.core as any)[service];
2005
+ if (!coreService) {
2006
+ throw new Error(`Core service "${service}" not found`);
2007
+ }
2008
+ const fn = coreService[method];
2009
+ if (typeof fn !== "function") {
2010
+ throw new Error(`Method "${method}" not found on core service "${service}"`);
2011
+ }
2012
+ return fn.apply(coreService, args);
2013
+ } else {
2014
+ throw new Error(`Unknown proxy target: ${target}`);
2015
+ }
2016
+ }
2017
+
2018
+ /**
2019
+ * Reset heartbeat timeout for an isolated workflow
2020
+ */
2021
+ private resetHeartbeatTimeout(instanceId: string, pid: number): void {
2022
+ const info = this.isolatedProcesses.get(instanceId);
2023
+ if (!info) return;
2024
+
2025
+ // Clear existing timeout
2026
+ if (info.heartbeatTimeout) {
2027
+ clearTimeout(info.heartbeatTimeout);
2028
+ }
2029
+
2030
+ // Set new timeout
2031
+ info.heartbeatTimeout = setTimeout(async () => {
2032
+ // Check if process is still alive
2033
+ if (!isProcessAlive(pid)) {
2034
+ return; // Process already dead, exit handler will handle it
2035
+ }
2036
+
2037
+ console.error(`[Workflows] No heartbeat from isolated workflow ${instanceId} for ${this.heartbeatTimeoutMs}ms`);
2038
+ await this.handleIsolatedTimeout(instanceId, pid);
2039
+ }, this.heartbeatTimeoutMs);
2040
+ }
2041
+
2042
+ /**
2043
+ * Handle timeout for isolated workflow (workflow timeout or heartbeat timeout)
2044
+ */
2045
+ private async handleIsolatedTimeout(instanceId: string, pid: number): Promise<void> {
2046
+ const info = this.isolatedProcesses.get(instanceId);
2047
+ if (!info) return;
2048
+
2049
+ // Kill the process
2050
+ try {
2051
+ process.kill(pid, "SIGKILL");
2052
+ } catch {
2053
+ // Process might already be dead
2054
+ }
2055
+
2056
+ // Clean up
2057
+ if (info.timeout) clearTimeout(info.timeout);
2058
+ if (info.heartbeatTimeout) clearTimeout(info.heartbeatTimeout);
2059
+ this.isolatedProcesses.delete(instanceId);
2060
+ await this.getSocketServer().closeSocket(instanceId);
2061
+
2062
+ // Fail the workflow
2063
+ await this.failWorkflow(instanceId, "Workflow timed out");
2064
+ }
2065
+
2066
+ /**
2067
+ * Complete an isolated workflow (called from event handler)
2068
+ */
2069
+ private async completeWorkflowIsolated(instanceId: string, output?: any): Promise<void> {
2070
+ const instance = await this.adapter.getInstance(instanceId);
2071
+ if (!instance) return;
2072
+
2073
+ // Clean up isolated process tracking (process should have exited)
2074
+ const info = this.isolatedProcesses.get(instanceId);
2075
+ if (info) {
2076
+ if (info.timeout) clearTimeout(info.timeout);
2077
+ if (info.heartbeatTimeout) clearTimeout(info.heartbeatTimeout);
2078
+ this.isolatedProcesses.delete(instanceId);
2079
+ }
2080
+
2081
+ await this.adapter.updateInstance(instanceId, {
2082
+ status: "completed",
2083
+ output,
2084
+ completedAt: new Date(),
2085
+ currentStep: undefined,
2086
+ });
2087
+
2088
+ await this.emitEvent("workflow.completed", {
2089
+ instanceId,
2090
+ workflowName: instance.workflowName,
2091
+ output,
2092
+ });
2093
+
2094
+ // Broadcast via SSE
2095
+ if (this.sse) {
2096
+ this.sse.broadcast(`workflow:${instanceId}`, "completed", { output });
2097
+ this.sse.broadcast("workflows:all", "workflow.completed", {
2098
+ instanceId,
2099
+ workflowName: instance.workflowName,
2100
+ output,
2101
+ });
2102
+ }
2103
+ }
2104
+
2105
+ /**
2106
+ * Fail an isolated workflow (called from event handler)
2107
+ */
2108
+ private async failWorkflowIsolated(instanceId: string, error: string): Promise<void> {
2109
+ const instance = await this.adapter.getInstance(instanceId);
2110
+ if (!instance) return;
2111
+
2112
+ // Clean up isolated process tracking
2113
+ const info = this.isolatedProcesses.get(instanceId);
2114
+ if (info) {
2115
+ if (info.timeout) clearTimeout(info.timeout);
2116
+ if (info.heartbeatTimeout) clearTimeout(info.heartbeatTimeout);
2117
+ this.isolatedProcesses.delete(instanceId);
2118
+ }
2119
+
2120
+ await this.adapter.updateInstance(instanceId, {
2121
+ status: "failed",
2122
+ error,
2123
+ completedAt: new Date(),
2124
+ });
2125
+
2126
+ await this.emitEvent("workflow.failed", {
2127
+ instanceId,
2128
+ workflowName: instance.workflowName,
2129
+ error,
2130
+ });
2131
+
2132
+ // Broadcast via SSE
2133
+ if (this.sse) {
2134
+ this.sse.broadcast(`workflow:${instanceId}`, "failed", { error });
2135
+ this.sse.broadcast("workflows:all", "workflow.failed", {
2136
+ instanceId,
2137
+ workflowName: instance.workflowName,
2138
+ error,
2139
+ });
2140
+ }
2141
+ }
1379
2142
  }
1380
2143
 
1381
2144
  // ============================================