@donkeylabs/server 2.0.19 → 2.0.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,12 +6,23 @@
6
6
  // - parallel: Run multiple branches concurrently
7
7
  // - choice: Conditional branching
8
8
  // - pass: Transform data / no-op
9
+ // - isolated: Execute in subprocess to prevent event loop blocking (default)
9
10
 
10
11
  import type { Events } from "./events";
11
12
  import type { Jobs } from "./jobs";
12
13
  import type { SSE } from "./sse";
13
14
  import type { z } from "zod";
14
15
  import type { CoreServices } from "../core";
16
+ import { dirname, join } from "node:path";
17
+ import { fileURLToPath } from "node:url";
18
+ import {
19
+ createWorkflowSocketServer,
20
+ type WorkflowSocketServer,
21
+ type WorkflowEvent,
22
+ type ProxyRequest,
23
+ } from "./workflow-socket";
24
+ import { isProcessAlive } from "./external-jobs";
25
+ import { WorkflowStateMachine, type StateMachineEvents } from "./workflow-state-machine";
15
26
 
16
27
  // Type helper for Zod schema inference
17
28
  type ZodSchema = z.ZodTypeAny;
@@ -126,6 +137,13 @@ export interface WorkflowDefinition {
126
137
  timeout?: number;
127
138
  /** Default retry config for all steps */
128
139
  defaultRetry?: RetryConfig;
140
+ /**
141
+ * Whether to execute this workflow in an isolated subprocess.
142
+ * Default: true (isolated by default to prevent blocking the event loop)
143
+ *
144
+ * Set to false for lightweight workflows that benefit from inline execution.
145
+ */
146
+ isolated?: boolean;
129
147
  }
130
148
 
131
149
  // ============================================
@@ -176,6 +194,8 @@ export interface WorkflowInstance {
176
194
  parentId?: string;
177
195
  /** Branch name if this is a branch instance */
178
196
  branchName?: string;
197
+ /** Custom metadata that persists across steps (JSON-serializable) */
198
+ metadata?: Record<string, any>;
179
199
  }
180
200
 
181
201
  // ============================================
@@ -197,6 +217,31 @@ export interface WorkflowContext {
197
217
  core: CoreServices;
198
218
  /** Plugin services - available for business logic in workflow handlers */
199
219
  plugins: Record<string, any>;
220
+ /**
221
+ * Custom metadata that persists across steps (read-only snapshot).
222
+ * Use setMetadata() to update values.
223
+ */
224
+ metadata: Record<string, any>;
225
+ /**
226
+ * Set a metadata value that persists across workflow steps.
227
+ * Accepts any JSON-serializable value (objects, arrays, primitives).
228
+ *
229
+ * @example
230
+ * await ctx.setMetadata('orderContext', {
231
+ * correlationId: 'abc-123',
232
+ * customer: { id: 'cust_1', tier: 'premium' },
233
+ * flags: { expedited: true }
234
+ * });
235
+ */
236
+ setMetadata(key: string, value: any): Promise<void>;
237
+ /**
238
+ * Get a metadata value with type safety.
239
+ *
240
+ * @example
241
+ * interface OrderContext { correlationId: string; customer: { id: string } }
242
+ * const ctx = ctx.getMetadata<OrderContext>('orderContext');
243
+ */
244
+ getMetadata<T = any>(key: string): T | undefined;
200
245
  }
201
246
 
202
247
  // ============================================
@@ -307,11 +352,29 @@ export class WorkflowBuilder {
307
352
  private _timeout?: number;
308
353
  private _defaultRetry?: RetryConfig;
309
354
  private _lastStep?: string;
355
+ private _isolated = true; // Default to isolated execution
310
356
 
311
357
  constructor(name: string) {
312
358
  this._name = name;
313
359
  }
314
360
 
361
+ /**
362
+ * Set whether to execute this workflow in an isolated subprocess.
363
+ * Default: true (isolated by default to prevent blocking the event loop)
364
+ *
365
+ * @param enabled - Set to false for lightweight workflows that benefit from inline execution
366
+ * @example
367
+ * // Heavy workflow - uses default isolation (no call needed)
368
+ * workflow("data-ingestion").task("process", { ... }).build();
369
+ *
370
+ * // Lightweight workflow - opt out of isolation
371
+ * workflow("quick-validation").isolated(false).task("validate", { ... }).build();
372
+ */
373
+ isolated(enabled: boolean = true): this {
374
+ this._isolated = enabled;
375
+ return this;
376
+ }
377
+
315
378
  /** Set the starting step explicitly */
316
379
  startAt(stepName: string): this {
317
380
  this._startAt = stepName;
@@ -512,6 +575,7 @@ export class WorkflowBuilder {
512
575
  startAt: this._startAt,
513
576
  timeout: this._timeout,
514
577
  defaultRetry: this._defaultRetry,
578
+ isolated: this._isolated,
515
579
  };
516
580
  }
517
581
  }
@@ -539,11 +603,36 @@ export interface WorkflowsConfig {
539
603
  pollInterval?: number;
540
604
  /** Core services to pass to step handlers */
541
605
  core?: CoreServices;
606
+ /** Directory for Unix sockets (default: /tmp/donkeylabs-workflows) */
607
+ socketDir?: string;
608
+ /** TCP port range for Windows fallback (default: [49152, 65535]) */
609
+ tcpPortRange?: [number, number];
610
+ /** Database file path (required for isolated workflows) */
611
+ dbPath?: string;
612
+ /** Heartbeat timeout in ms (default: 60000) */
613
+ heartbeatTimeout?: number;
614
+ }
615
+
616
+ /** Options for registering a workflow */
617
+ export interface WorkflowRegisterOptions {
618
+ /**
619
+ * Module path for isolated workflows.
620
+ * Required when workflow.isolated !== false and running in isolated mode.
621
+ * Use `import.meta.url` to get the current module's path.
622
+ *
623
+ * @example
624
+ * workflows.register(myWorkflow, { modulePath: import.meta.url });
625
+ */
626
+ modulePath?: string;
542
627
  }
543
628
 
544
629
  export interface Workflows {
545
- /** Register a workflow definition */
546
- register(definition: WorkflowDefinition): void;
630
+ /**
631
+ * Register a workflow definition.
632
+ * @param definition - The workflow definition to register
633
+ * @param options - Registration options (modulePath required for isolated workflows)
634
+ */
635
+ register(definition: WorkflowDefinition, options?: WorkflowRegisterOptions): void;
547
636
  /** Start a new workflow instance */
548
637
  start<T = any>(workflowName: string, input: T): Promise<string>;
549
638
  /** Get a workflow instance by ID */
@@ -562,44 +651,127 @@ export interface Workflows {
562
651
  setCore(core: CoreServices): void;
563
652
  /** Set plugin services (called after plugins are initialized) */
564
653
  setPlugins(plugins: Record<string, any>): void;
654
+ /** Update metadata for a workflow instance (used by isolated workflows) */
655
+ updateMetadata(instanceId: string, key: string, value: any): Promise<void>;
565
656
  }
566
657
 
567
658
  // ============================================
568
- // Workflow Service Implementation
659
+ // Workflow Service Implementation (Supervisor)
569
660
  // ============================================
570
661
 
662
+ interface IsolatedProcessInfo {
663
+ pid: number;
664
+ timeout?: ReturnType<typeof setTimeout>;
665
+ heartbeatTimeout?: ReturnType<typeof setTimeout>;
666
+ lastHeartbeat: number;
667
+ }
668
+
571
669
  class WorkflowsImpl implements Workflows {
572
670
  private adapter: WorkflowAdapter;
573
- private events?: Events;
671
+ private eventsService?: Events;
574
672
  private jobs?: Jobs;
575
673
  private sse?: SSE;
576
674
  private core?: CoreServices;
577
675
  private plugins: Record<string, any> = {};
578
676
  private definitions = new Map<string, WorkflowDefinition>();
579
- private running = new Map<string, { timeout?: ReturnType<typeof setTimeout> }>();
677
+ private running = new Map<string, { timeout?: ReturnType<typeof setTimeout>; sm?: WorkflowStateMachine }>();
580
678
  private pollInterval: number;
581
679
 
680
+ // Isolated execution state
681
+ private socketServer?: WorkflowSocketServer;
682
+ private socketDir: string;
683
+ private tcpPortRange: [number, number];
684
+ private dbPath?: string;
685
+ private heartbeatTimeoutMs: number;
686
+ private workflowModulePaths = new Map<string, string>();
687
+ private isolatedProcesses = new Map<string, IsolatedProcessInfo>();
688
+
582
689
  constructor(config: WorkflowsConfig = {}) {
583
690
  this.adapter = config.adapter ?? new MemoryWorkflowAdapter();
584
- this.events = config.events;
691
+ this.eventsService = config.events;
585
692
  this.jobs = config.jobs;
586
693
  this.sse = config.sse;
587
694
  this.core = config.core;
588
695
  this.pollInterval = config.pollInterval ?? 1000;
696
+
697
+ // Isolated execution config
698
+ this.socketDir = config.socketDir ?? "/tmp/donkeylabs-workflows";
699
+ this.tcpPortRange = config.tcpPortRange ?? [49152, 65535];
700
+ this.dbPath = config.dbPath;
701
+ this.heartbeatTimeoutMs = config.heartbeatTimeout ?? 60000;
702
+ }
703
+
704
+ private getSocketServer(): WorkflowSocketServer {
705
+ if (!this.socketServer) {
706
+ this.socketServer = createWorkflowSocketServer(
707
+ {
708
+ socketDir: this.socketDir,
709
+ tcpPortRange: this.tcpPortRange,
710
+ },
711
+ {
712
+ onEvent: (event) => this.handleIsolatedEvent(event),
713
+ onProxyCall: (request) => this.handleProxyCall(request),
714
+ onConnect: (instanceId) => {
715
+ console.log(`[Workflows] Isolated workflow ${instanceId} connected`);
716
+ },
717
+ onDisconnect: (instanceId) => {
718
+ console.log(`[Workflows] Isolated workflow ${instanceId} disconnected`);
719
+ },
720
+ onError: (error, instanceId) => {
721
+ console.error(`[Workflows] Socket error for ${instanceId}:`, error);
722
+ },
723
+ }
724
+ );
725
+ }
726
+ return this.socketServer;
589
727
  }
590
728
 
591
729
  setCore(core: CoreServices): void {
592
730
  this.core = core;
731
+ // Extract DB path if using Kysely adapter (for isolated workflows)
732
+ if (!this.dbPath && (core.db as any)?.getExecutor) {
733
+ // Try to get the database path from the Kysely instance
734
+ // This is a bit hacky but necessary for isolated workflows
735
+ try {
736
+ const executor = (core.db as any).getExecutor();
737
+ const adapter = executor?.adapter;
738
+ if (adapter?.db?.filename) {
739
+ this.dbPath = adapter.db.filename;
740
+ }
741
+ } catch {
742
+ // Ignore - dbPath might be set manually
743
+ }
744
+ }
593
745
  }
594
746
 
595
747
  setPlugins(plugins: Record<string, any>): void {
596
748
  this.plugins = plugins;
597
749
  }
598
750
 
599
- register(definition: WorkflowDefinition): void {
751
+ async updateMetadata(instanceId: string, key: string, value: any): Promise<void> {
752
+ const instance = await this.adapter.getInstance(instanceId);
753
+ if (!instance) return;
754
+
755
+ const metadata = { ...(instance.metadata || {}), [key]: value };
756
+ await this.adapter.updateInstance(instanceId, { metadata });
757
+ }
758
+
759
+ register(definition: WorkflowDefinition, options?: WorkflowRegisterOptions): void {
600
760
  if (this.definitions.has(definition.name)) {
601
761
  throw new Error(`Workflow "${definition.name}" is already registered`);
602
762
  }
763
+
764
+ // Store module path for isolated workflows
765
+ if (options?.modulePath) {
766
+ this.workflowModulePaths.set(definition.name, options.modulePath);
767
+ } else if (definition.isolated !== false) {
768
+ // Warn if isolated workflow has no module path
769
+ console.warn(
770
+ `[Workflows] Workflow "${definition.name}" is isolated but no modulePath provided. ` +
771
+ `Use: workflows.register(myWorkflow, { modulePath: import.meta.url })`
772
+ );
773
+ }
774
+
603
775
  this.definitions.set(definition.name, definition);
604
776
  }
605
777
 
@@ -625,8 +797,35 @@ class WorkflowsImpl implements Workflows {
625
797
  input,
626
798
  });
627
799
 
628
- // Start execution
629
- this.executeWorkflow(instance.id, definition);
800
+ // SSE broadcast for real-time monitoring
801
+ if (this.sse) {
802
+ this.sse.broadcast(`workflow:${instance.id}`, "started", {
803
+ workflowName,
804
+ input,
805
+ });
806
+ this.sse.broadcast("workflows:all", "workflow.started", {
807
+ instanceId: instance.id,
808
+ workflowName,
809
+ input,
810
+ });
811
+ }
812
+
813
+ // Start execution (isolated or inline based on definition.isolated)
814
+ const isIsolated = definition.isolated !== false;
815
+ const modulePath = this.workflowModulePaths.get(workflowName);
816
+
817
+ if (isIsolated && modulePath && this.dbPath) {
818
+ // Execute in isolated subprocess
819
+ this.executeIsolatedWorkflow(instance.id, definition, input, modulePath);
820
+ } else {
821
+ // Execute inline using state machine
822
+ if (isIsolated && !modulePath) {
823
+ console.warn(
824
+ `[Workflows] Workflow "${workflowName}" falling back to inline execution (no modulePath)`
825
+ );
826
+ }
827
+ this.startInlineWorkflow(instance.id, definition);
828
+ }
630
829
 
631
830
  return instance.id;
632
831
  }
@@ -641,8 +840,25 @@ class WorkflowsImpl implements Workflows {
641
840
  return false;
642
841
  }
643
842
 
644
- // Clear timeout
843
+ // Kill isolated process if running
844
+ const isolatedInfo = this.isolatedProcesses.get(instanceId);
845
+ if (isolatedInfo) {
846
+ try {
847
+ process.kill(isolatedInfo.pid, "SIGTERM");
848
+ } catch {
849
+ // Process might already be dead
850
+ }
851
+ if (isolatedInfo.timeout) clearTimeout(isolatedInfo.timeout);
852
+ if (isolatedInfo.heartbeatTimeout) clearTimeout(isolatedInfo.heartbeatTimeout);
853
+ this.isolatedProcesses.delete(instanceId);
854
+ await this.getSocketServer().closeSocket(instanceId);
855
+ }
856
+
857
+ // Cancel inline state machine if running
645
858
  const runInfo = this.running.get(instanceId);
859
+ if (runInfo?.sm) {
860
+ runInfo.sm.cancel(instanceId);
861
+ }
646
862
  if (runInfo?.timeout) {
647
863
  clearTimeout(runInfo.timeout);
648
864
  }
@@ -686,694 +902,588 @@ class WorkflowsImpl implements Workflows {
686
902
  }
687
903
 
688
904
  console.log(`[Workflows] Resuming workflow instance ${instance.id}`);
689
- this.executeWorkflow(instance.id, definition);
905
+
906
+ // Check isolation mode and call appropriate method
907
+ const isIsolated = definition.isolated !== false;
908
+ const modulePath = this.workflowModulePaths.get(instance.workflowName);
909
+
910
+ if (isIsolated && modulePath && this.dbPath) {
911
+ this.executeIsolatedWorkflow(instance.id, definition, instance.input, modulePath);
912
+ } else {
913
+ this.startInlineWorkflow(instance.id, definition);
914
+ }
690
915
  }
691
916
  }
692
917
 
693
918
  async stop(): Promise<void> {
694
- // Clear all timeouts
919
+ // Kill all isolated processes
920
+ for (const [instanceId, info] of this.isolatedProcesses) {
921
+ try {
922
+ process.kill(info.pid, "SIGTERM");
923
+ } catch {
924
+ // Process might already be dead
925
+ }
926
+ if (info.timeout) clearTimeout(info.timeout);
927
+ if (info.heartbeatTimeout) clearTimeout(info.heartbeatTimeout);
928
+ }
929
+ this.isolatedProcesses.clear();
930
+
931
+ // Shutdown socket server
932
+ if (this.socketServer) {
933
+ await this.socketServer.shutdown();
934
+ this.socketServer = undefined;
935
+ }
936
+
937
+ // Clear all inline timeouts and cancel state machines
695
938
  for (const [instanceId, runInfo] of this.running) {
939
+ if (runInfo.sm) {
940
+ runInfo.sm.cancel(instanceId);
941
+ }
696
942
  if (runInfo.timeout) {
697
943
  clearTimeout(runInfo.timeout);
698
944
  }
699
945
  }
700
946
  this.running.clear();
947
+
948
+ // Stop adapter (cleanup timers and prevent further DB access)
949
+ if (this.adapter && typeof (this.adapter as any).stop === "function") {
950
+ (this.adapter as any).stop();
951
+ }
701
952
  }
702
953
 
703
954
  // ============================================
704
- // Execution Engine
955
+ // Inline Execution via State Machine
705
956
  // ============================================
706
957
 
707
- private async executeWorkflow(
958
+ private startInlineWorkflow(
708
959
  instanceId: string,
709
- definition: WorkflowDefinition
710
- ): Promise<void> {
711
- const instance = await this.adapter.getInstance(instanceId);
712
- if (!instance) return;
713
-
714
- // Mark as running
715
- if (instance.status === "pending") {
716
- await this.adapter.updateInstance(instanceId, {
717
- status: "running",
718
- startedAt: new Date(),
719
- });
720
- }
960
+ definition: WorkflowDefinition,
961
+ ): void {
962
+ const sm = new WorkflowStateMachine({
963
+ adapter: this.adapter,
964
+ core: this.core,
965
+ plugins: this.plugins,
966
+ events: this.createInlineEventHandler(instanceId),
967
+ jobs: this.jobs,
968
+ pollInterval: this.pollInterval,
969
+ });
721
970
 
722
971
  // Set up workflow timeout
972
+ let timeout: ReturnType<typeof setTimeout> | undefined;
723
973
  if (definition.timeout) {
724
- const timeout = setTimeout(async () => {
725
- await this.failWorkflow(instanceId, "Workflow timed out");
974
+ timeout = setTimeout(async () => {
975
+ sm.cancel(instanceId);
976
+ await this.adapter.updateInstance(instanceId, {
977
+ status: "failed",
978
+ error: "Workflow timed out",
979
+ completedAt: new Date(),
980
+ });
981
+ await this.emitEvent("workflow.failed", {
982
+ instanceId,
983
+ workflowName: definition.name,
984
+ error: "Workflow timed out",
985
+ });
986
+ if (this.sse) {
987
+ this.sse.broadcast(`workflow:${instanceId}`, "failed", { error: "Workflow timed out" });
988
+ this.sse.broadcast("workflows:all", "workflow.failed", {
989
+ instanceId,
990
+ workflowName: definition.name,
991
+ error: "Workflow timed out",
992
+ });
993
+ }
994
+ this.running.delete(instanceId);
726
995
  }, definition.timeout);
727
- this.running.set(instanceId, { timeout });
728
- } else {
729
- this.running.set(instanceId, {});
730
- }
731
-
732
- // Execute current step
733
- await this.executeStep(instanceId, definition);
734
- }
735
-
736
- private async executeStep(
737
- instanceId: string,
738
- definition: WorkflowDefinition
739
- ): Promise<void> {
740
- const instance = await this.adapter.getInstance(instanceId);
741
- if (!instance || instance.status !== "running") return;
742
-
743
- const stepName = instance.currentStep;
744
- if (!stepName) {
745
- await this.completeWorkflow(instanceId);
746
- return;
747
996
  }
748
997
 
749
- const step = definition.steps.get(stepName);
750
- if (!step) {
751
- await this.failWorkflow(instanceId, `Step "${stepName}" not found`);
752
- return;
753
- }
754
-
755
- // Build context
756
- const ctx = this.buildContext(instance, definition);
757
-
758
- // Emit step started event
759
- await this.emitEvent("workflow.step.started", {
760
- instanceId,
761
- workflowName: instance.workflowName,
762
- stepName,
763
- stepType: step.type,
764
- });
765
-
766
- // Update step result as running
767
- const stepResult: StepResult = {
768
- stepName,
769
- status: "running",
770
- startedAt: new Date(),
771
- attempts: (instance.stepResults[stepName]?.attempts ?? 0) + 1,
772
- };
773
- await this.adapter.updateInstance(instanceId, {
774
- stepResults: { ...instance.stepResults, [stepName]: stepResult },
775
- });
998
+ this.running.set(instanceId, { timeout, sm });
776
999
 
777
- try {
778
- let output: any;
779
-
780
- switch (step.type) {
781
- case "task":
782
- output = await this.executeTaskStep(instanceId, step, ctx, definition);
783
- break;
784
- case "parallel":
785
- output = await this.executeParallelStep(instanceId, step, ctx, definition);
786
- break;
787
- case "choice":
788
- output = await this.executeChoiceStep(instanceId, step, ctx, definition);
789
- break;
790
- case "pass":
791
- output = await this.executePassStep(instanceId, step, ctx);
792
- break;
1000
+ // Run the state machine (fire and forget - events handle communication)
1001
+ sm.run(instanceId, definition).then(() => {
1002
+ // Clean up timeout on completion
1003
+ const runInfo = this.running.get(instanceId);
1004
+ if (runInfo?.timeout) {
1005
+ clearTimeout(runInfo.timeout);
793
1006
  }
794
-
795
- // Step completed successfully
796
- await this.completeStep(instanceId, stepName, output, step, definition);
797
- } catch (error) {
798
- const errorMsg = error instanceof Error ? error.message : String(error);
799
- await this.handleStepError(instanceId, stepName, errorMsg, step, definition);
800
- }
1007
+ this.running.delete(instanceId);
1008
+ }).catch(() => {
1009
+ // State machine already persisted the failure - just clean up
1010
+ const runInfo = this.running.get(instanceId);
1011
+ if (runInfo?.timeout) {
1012
+ clearTimeout(runInfo.timeout);
1013
+ }
1014
+ this.running.delete(instanceId);
1015
+ });
801
1016
  }
802
1017
 
803
- private async executeTaskStep(
804
- instanceId: string,
805
- step: TaskStepDefinition,
806
- ctx: WorkflowContext,
807
- definition: WorkflowDefinition
808
- ): Promise<any> {
809
- // Determine which API is being used
810
- const useInlineHandler = !!step.handler;
811
-
812
- if (useInlineHandler) {
813
- // === NEW API: Inline handler with Zod schemas ===
814
- let input: any;
815
-
816
- if (step.inputSchema) {
817
- if (typeof step.inputSchema === "function") {
818
- // inputSchema is a mapper function: (prev, workflowInput) => input
819
- input = step.inputSchema(ctx.prev, ctx.input);
820
- } else {
821
- // inputSchema is a Zod schema - validate workflow input
822
- const parseResult = step.inputSchema.safeParse(ctx.input);
823
- if (!parseResult.success) {
824
- throw new Error(`Input validation failed: ${parseResult.error.message}`);
825
- }
826
- input = parseResult.data;
1018
+ /**
1019
+ * Create an event handler that bridges state machine events to Events service + SSE
1020
+ */
1021
+ private createInlineEventHandler(instanceId: string): StateMachineEvents {
1022
+ return {
1023
+ onStepStarted: (id, stepName, stepType) => {
1024
+ this.emitEvent("workflow.step.started", {
1025
+ instanceId: id,
1026
+ stepName,
1027
+ stepType,
1028
+ });
1029
+ if (this.sse) {
1030
+ this.sse.broadcast(`workflow:${id}`, "step.started", { stepName });
1031
+ this.sse.broadcast("workflows:all", "workflow.step.started", {
1032
+ instanceId: id,
1033
+ stepName,
1034
+ });
827
1035
  }
828
- } else {
829
- // No input schema, use workflow input directly
830
- input = ctx.input;
831
- }
832
-
833
- // Update step with input
834
- const instance = await this.adapter.getInstance(instanceId);
835
- if (instance) {
836
- const stepResult = instance.stepResults[step.name];
837
- if (stepResult) {
838
- stepResult.input = input;
839
- await this.adapter.updateInstance(instanceId, {
840
- stepResults: { ...instance.stepResults, [step.name]: stepResult },
1036
+ },
1037
+ onStepCompleted: (id, stepName, output, nextStep) => {
1038
+ this.emitEvent("workflow.step.completed", {
1039
+ instanceId: id,
1040
+ stepName,
1041
+ output,
1042
+ });
1043
+ if (this.sse) {
1044
+ this.sse.broadcast(`workflow:${id}`, "step.completed", { stepName, output });
1045
+ this.sse.broadcast("workflows:all", "workflow.step.completed", {
1046
+ instanceId: id,
1047
+ stepName,
841
1048
  });
842
1049
  }
843
- }
844
-
845
- // Execute the inline handler
846
- let result = await step.handler!(input, ctx);
847
-
848
- // Validate output if schema provided
849
- if (step.outputSchema) {
850
- const parseResult = step.outputSchema.safeParse(result);
851
- if (!parseResult.success) {
852
- throw new Error(`Output validation failed: ${parseResult.error.message}`);
1050
+ },
1051
+ onStepFailed: (id, stepName, error, attempts) => {
1052
+ this.emitEvent("workflow.step.failed", {
1053
+ instanceId: id,
1054
+ stepName,
1055
+ error,
1056
+ attempts,
1057
+ });
1058
+ if (this.sse) {
1059
+ this.sse.broadcast(`workflow:${id}`, "step.failed", { stepName, error });
1060
+ this.sse.broadcast("workflows:all", "workflow.step.failed", {
1061
+ instanceId: id,
1062
+ stepName,
1063
+ error,
1064
+ });
853
1065
  }
854
- result = parseResult.data;
855
- }
856
-
857
- return result;
858
- } else {
859
- // === LEGACY API: Job-based execution ===
860
- if (!this.jobs) {
861
- throw new Error("Jobs service not configured");
862
- }
863
-
864
- if (!step.job) {
865
- throw new Error("Task step requires either 'handler' or 'job'");
866
- }
867
-
868
- // Prepare job input
869
- const jobInput = step.input ? step.input(ctx) : ctx.input;
870
-
871
- // Update step with input
872
- const instance = await this.adapter.getInstance(instanceId);
873
- if (instance) {
874
- const stepResult = instance.stepResults[step.name];
875
- if (stepResult) {
876
- stepResult.input = jobInput;
877
- await this.adapter.updateInstance(instanceId, {
878
- stepResults: { ...instance.stepResults, [step.name]: stepResult },
1066
+ },
1067
+ onStepRetry: (id, stepName, attempt, max, delayMs) => {
1068
+ this.emitEvent("workflow.step.retry", {
1069
+ instanceId: id,
1070
+ stepName,
1071
+ attempt,
1072
+ maxAttempts: max,
1073
+ delay: delayMs,
1074
+ });
1075
+ },
1076
+ onProgress: (id, progress, currentStep, completed, total) => {
1077
+ this.emitEvent("workflow.progress", {
1078
+ instanceId: id,
1079
+ progress,
1080
+ currentStep,
1081
+ completedSteps: completed,
1082
+ totalSteps: total,
1083
+ });
1084
+ if (this.sse) {
1085
+ this.sse.broadcast(`workflow:${id}`, "progress", {
1086
+ progress,
1087
+ currentStep,
1088
+ completedSteps: completed,
1089
+ totalSteps: total,
1090
+ });
1091
+ this.sse.broadcast("workflows:all", "workflow.progress", {
1092
+ instanceId: id,
1093
+ progress,
1094
+ currentStep,
879
1095
  });
880
1096
  }
881
- }
882
-
883
- // Enqueue the job
884
- const jobId = await this.jobs.enqueue(step.job, {
885
- ...jobInput,
886
- _workflowInstanceId: instanceId,
887
- _workflowStepName: step.name,
888
- });
889
-
890
- // Wait for job completion
891
- const result = await this.waitForJob(jobId, step.timeout);
892
-
893
- // Transform output if needed
894
- return step.output ? step.output(result, ctx) : result;
895
- }
1097
+ },
1098
+ onCompleted: (id, output) => {
1099
+ this.emitEvent("workflow.completed", {
1100
+ instanceId: id,
1101
+ output,
1102
+ });
1103
+ if (this.sse) {
1104
+ this.sse.broadcast(`workflow:${id}`, "completed", { output });
1105
+ this.sse.broadcast("workflows:all", "workflow.completed", {
1106
+ instanceId: id,
1107
+ });
1108
+ }
1109
+ },
1110
+ onFailed: (id, error) => {
1111
+ this.emitEvent("workflow.failed", {
1112
+ instanceId: id,
1113
+ error,
1114
+ });
1115
+ if (this.sse) {
1116
+ this.sse.broadcast(`workflow:${id}`, "failed", { error });
1117
+ this.sse.broadcast("workflows:all", "workflow.failed", {
1118
+ instanceId: id,
1119
+ error,
1120
+ });
1121
+ }
1122
+ },
1123
+ };
896
1124
  }
897
1125
 
898
- private async waitForJob(jobId: string, timeout?: number): Promise<any> {
899
- if (!this.jobs) {
900
- throw new Error("Jobs service not configured");
901
- }
902
-
903
- const startTime = Date.now();
904
-
905
- while (true) {
906
- const job = await this.jobs.get(jobId);
907
-
908
- if (!job) {
909
- throw new Error(`Job ${jobId} not found`);
910
- }
911
-
912
- if (job.status === "completed") {
913
- return job.result;
914
- }
915
-
916
- if (job.status === "failed") {
917
- throw new Error(job.error ?? "Job failed");
918
- }
919
-
920
- // Check timeout
921
- if (timeout && Date.now() - startTime > timeout) {
922
- throw new Error("Job timed out");
923
- }
924
-
925
- // Wait before polling again
926
- await new Promise((resolve) => setTimeout(resolve, this.pollInterval));
927
- }
928
- }
1126
+ // ============================================
1127
+ // Isolated Execution Engine
1128
+ // ============================================
929
1129
 
930
- private async executeParallelStep(
1130
+ /**
1131
+ * Execute a workflow in an isolated subprocess
1132
+ */
1133
+ private async executeIsolatedWorkflow(
931
1134
  instanceId: string,
932
- step: ParallelStepDefinition,
933
- ctx: WorkflowContext,
934
- definition: WorkflowDefinition
935
- ): Promise<any> {
936
- const branchPromises: Promise<{ name: string; result: any }>[] = [];
937
- const branchInstanceIds: string[] = [];
938
-
939
- for (const branchDef of step.branches) {
940
- // Register branch workflow if not already
941
- if (!this.definitions.has(branchDef.name)) {
942
- this.definitions.set(branchDef.name, branchDef);
943
- }
1135
+ definition: WorkflowDefinition,
1136
+ input: any,
1137
+ modulePath: string
1138
+ ): Promise<void> {
1139
+ const socketServer = this.getSocketServer();
944
1140
 
945
- // Start branch as sub-workflow
946
- const branchInstanceId = await this.adapter.createInstance({
947
- workflowName: branchDef.name,
948
- status: "pending",
949
- currentStep: branchDef.startAt,
950
- input: ctx.input,
951
- stepResults: {},
952
- createdAt: new Date(),
953
- parentId: instanceId,
954
- branchName: branchDef.name,
955
- });
1141
+ // Create socket for this workflow instance
1142
+ const { socketPath, tcpPort } = await socketServer.createSocket(instanceId);
956
1143
 
957
- branchInstanceIds.push(branchInstanceId.id);
1144
+ // Mark workflow as running
1145
+ await this.adapter.updateInstance(instanceId, {
1146
+ status: "running",
1147
+ startedAt: new Date(),
1148
+ });
958
1149
 
959
- // Execute branch
960
- const branchPromise = (async () => {
961
- await this.executeWorkflow(branchInstanceId.id, branchDef);
1150
+ // Get the executor path
1151
+ const currentDir = dirname(fileURLToPath(import.meta.url));
1152
+ const executorPath = join(currentDir, "workflow-executor.ts");
962
1153
 
963
- // Wait for branch completion
964
- while (true) {
965
- const branchInstance = await this.adapter.getInstance(branchInstanceId.id);
966
- if (!branchInstance) {
967
- throw new Error(`Branch instance ${branchInstanceId.id} not found`);
968
- }
1154
+ // Prepare config for the executor
1155
+ const config = {
1156
+ instanceId,
1157
+ workflowName: definition.name,
1158
+ input,
1159
+ socketPath,
1160
+ tcpPort,
1161
+ modulePath,
1162
+ dbPath: this.dbPath,
1163
+ };
969
1164
 
970
- if (branchInstance.status === "completed") {
971
- return { name: branchDef.name, result: branchInstance.output };
972
- }
1165
+ // Spawn the subprocess
1166
+ const proc = Bun.spawn(["bun", "run", executorPath], {
1167
+ stdin: "pipe",
1168
+ stdout: "inherit",
1169
+ stderr: "inherit",
1170
+ env: {
1171
+ ...process.env,
1172
+ // Ensure the subprocess can import from the same paths
1173
+ NODE_OPTIONS: process.env.NODE_OPTIONS ?? "",
1174
+ },
1175
+ });
973
1176
 
974
- if (branchInstance.status === "failed") {
975
- throw new Error(branchInstance.error ?? `Branch ${branchDef.name} failed`);
976
- }
1177
+ // Send config via stdin
1178
+ proc.stdin.write(JSON.stringify(config));
1179
+ proc.stdin.end();
977
1180
 
978
- await new Promise((resolve) => setTimeout(resolve, this.pollInterval));
979
- }
980
- })();
1181
+ // Track the process
1182
+ this.isolatedProcesses.set(instanceId, {
1183
+ pid: proc.pid,
1184
+ lastHeartbeat: Date.now(),
1185
+ });
981
1186
 
982
- branchPromises.push(branchPromise);
1187
+ // Set up workflow timeout
1188
+ if (definition.timeout) {
1189
+ const timeoutHandle = setTimeout(async () => {
1190
+ await this.handleIsolatedTimeout(instanceId, proc.pid);
1191
+ }, definition.timeout);
1192
+ const info = this.isolatedProcesses.get(instanceId);
1193
+ if (info) info.timeout = timeoutHandle;
983
1194
  }
984
1195
 
985
- // Track branch instances
986
- await this.adapter.updateInstance(instanceId, {
987
- branchInstances: {
988
- ...((await this.adapter.getInstance(instanceId))?.branchInstances ?? {}),
989
- [step.name]: branchInstanceIds,
990
- },
991
- });
1196
+ // Set up heartbeat timeout
1197
+ this.resetHeartbeatTimeout(instanceId, proc.pid);
992
1198
 
993
- // Wait for all branches
994
- if (step.onError === "wait-all") {
995
- const results = await Promise.allSettled(branchPromises);
996
- const output: Record<string, any> = {};
997
- const errors: string[] = [];
998
-
999
- for (const result of results) {
1000
- if (result.status === "fulfilled") {
1001
- output[result.value.name] = result.value.result;
1002
- } else {
1003
- errors.push(result.reason?.message ?? "Branch failed");
1004
- }
1199
+ // Handle process exit
1200
+ proc.exited.then(async (exitCode) => {
1201
+ const info = this.isolatedProcesses.get(instanceId);
1202
+ if (info) {
1203
+ if (info.timeout) clearTimeout(info.timeout);
1204
+ if (info.heartbeatTimeout) clearTimeout(info.heartbeatTimeout);
1205
+ this.isolatedProcesses.delete(instanceId);
1005
1206
  }
1207
+ await socketServer.closeSocket(instanceId);
1006
1208
 
1007
- if (errors.length > 0) {
1008
- throw new Error(`Parallel branches failed: ${errors.join(", ")}`);
1209
+ // Check if workflow is still running (crashed before completion)
1210
+ const instance = await this.adapter.getInstance(instanceId);
1211
+ if (instance && instance.status === "running") {
1212
+ console.error(`[Workflows] Isolated workflow ${instanceId} crashed with exit code ${exitCode}`);
1213
+ await this.adapter.updateInstance(instanceId, {
1214
+ status: "failed",
1215
+ error: `Subprocess crashed with exit code ${exitCode}`,
1216
+ completedAt: new Date(),
1217
+ });
1218
+ await this.emitEvent("workflow.failed", {
1219
+ instanceId,
1220
+ workflowName: instance.workflowName,
1221
+ error: `Subprocess crashed with exit code ${exitCode}`,
1222
+ });
1223
+ if (this.sse) {
1224
+ this.sse.broadcast(`workflow:${instanceId}`, "failed", {
1225
+ error: `Subprocess crashed with exit code ${exitCode}`,
1226
+ });
1227
+ this.sse.broadcast("workflows:all", "workflow.failed", {
1228
+ instanceId,
1229
+ workflowName: instance.workflowName,
1230
+ error: `Subprocess crashed with exit code ${exitCode}`,
1231
+ });
1232
+ }
1009
1233
  }
1234
+ });
1235
+ }
1010
1236
 
1011
- return output;
1012
- } else {
1013
- // fail-fast (default)
1014
- const results = await Promise.all(branchPromises);
1015
- const output: Record<string, any> = {};
1016
- for (const result of results) {
1017
- output[result.name] = result.result;
1018
- }
1019
- return output;
1237
+ /**
1238
+ * Handle events from isolated workflow subprocess.
1239
+ * The subprocess owns persistence via its own adapter - we only forward events to SSE/Events.
1240
+ */
1241
+ private async handleIsolatedEvent(event: WorkflowEvent): Promise<void> {
1242
+ const { instanceId, type } = event;
1243
+
1244
+ // Reset heartbeat timeout on any event
1245
+ const info = this.isolatedProcesses.get(instanceId);
1246
+ if (info) {
1247
+ info.lastHeartbeat = Date.now();
1248
+ this.resetHeartbeatTimeout(instanceId, info.pid);
1020
1249
  }
1021
- }
1022
1250
 
1023
- private async executeChoiceStep(
1024
- instanceId: string,
1025
- step: ChoiceStepDefinition,
1026
- ctx: WorkflowContext,
1027
- definition: WorkflowDefinition
1028
- ): Promise<string> {
1029
- // Evaluate conditions in order
1030
- for (const choice of step.choices) {
1031
- try {
1032
- if (choice.condition(ctx)) {
1033
- // Update current step and continue
1034
- await this.adapter.updateInstance(instanceId, {
1035
- currentStep: choice.next,
1251
+ switch (type) {
1252
+ case "started":
1253
+ case "heartbeat":
1254
+ // No-op: heartbeat handled above, started is handled by executeIsolatedWorkflow
1255
+ break;
1256
+
1257
+ case "step.started": {
1258
+ await this.emitEvent("workflow.step.started", {
1259
+ instanceId,
1260
+ stepName: event.stepName,
1261
+ stepType: event.stepType,
1262
+ });
1263
+ if (this.sse) {
1264
+ this.sse.broadcast(`workflow:${instanceId}`, "step.started", {
1265
+ stepName: event.stepName,
1036
1266
  });
1037
-
1038
- // Mark choice step as complete
1039
- const instance = await this.adapter.getInstance(instanceId);
1040
- if (instance) {
1041
- const stepResult = instance.stepResults[step.name];
1042
- if (stepResult) {
1043
- stepResult.status = "completed";
1044
- stepResult.output = { chosen: choice.next };
1045
- stepResult.completedAt = new Date();
1046
- await this.adapter.updateInstance(instanceId, {
1047
- stepResults: { ...instance.stepResults, [step.name]: stepResult },
1048
- });
1049
- }
1050
- }
1051
-
1052
- // Emit progress
1053
- await this.emitEvent("workflow.step.completed", {
1267
+ this.sse.broadcast("workflows:all", "workflow.step.started", {
1054
1268
  instanceId,
1055
- workflowName: (await this.adapter.getInstance(instanceId))?.workflowName,
1056
- stepName: step.name,
1057
- output: { chosen: choice.next },
1269
+ stepName: event.stepName,
1058
1270
  });
1059
-
1060
- // Execute next step
1061
- await this.executeStep(instanceId, definition);
1062
- return choice.next;
1063
1271
  }
1064
- } catch {
1065
- // Condition threw, try next
1272
+ break;
1066
1273
  }
1067
- }
1068
-
1069
- // No condition matched, use default
1070
- if (step.default) {
1071
- await this.adapter.updateInstance(instanceId, {
1072
- currentStep: step.default,
1073
- });
1074
1274
 
1075
- // Mark choice step as complete
1076
- const instance = await this.adapter.getInstance(instanceId);
1077
- if (instance) {
1078
- const stepResult = instance.stepResults[step.name];
1079
- if (stepResult) {
1080
- stepResult.status = "completed";
1081
- stepResult.output = { chosen: step.default };
1082
- stepResult.completedAt = new Date();
1083
- await this.adapter.updateInstance(instanceId, {
1084
- stepResults: { ...instance.stepResults, [step.name]: stepResult },
1275
+ case "step.completed": {
1276
+ await this.emitEvent("workflow.step.completed", {
1277
+ instanceId,
1278
+ stepName: event.stepName,
1279
+ output: event.output,
1280
+ });
1281
+ if (this.sse) {
1282
+ this.sse.broadcast(`workflow:${instanceId}`, "step.completed", {
1283
+ stepName: event.stepName,
1284
+ output: event.output,
1285
+ });
1286
+ this.sse.broadcast("workflows:all", "workflow.step.completed", {
1287
+ instanceId,
1288
+ stepName: event.stepName,
1289
+ output: event.output,
1085
1290
  });
1086
1291
  }
1292
+ break;
1087
1293
  }
1088
1294
 
1089
- await this.emitEvent("workflow.step.completed", {
1090
- instanceId,
1091
- workflowName: instance?.workflowName,
1092
- stepName: step.name,
1093
- output: { chosen: step.default },
1094
- });
1095
-
1096
- await this.executeStep(instanceId, definition);
1097
- return step.default;
1098
- }
1099
-
1100
- throw new Error("No choice condition matched and no default specified");
1101
- }
1102
-
1103
- private async executePassStep(
1104
- instanceId: string,
1105
- step: PassStepDefinition,
1106
- ctx: WorkflowContext
1107
- ): Promise<any> {
1108
- if (step.result !== undefined) {
1109
- return step.result;
1110
- }
1111
-
1112
- if (step.transform) {
1113
- return step.transform(ctx);
1114
- }
1115
-
1116
- return ctx.input;
1117
- }
1295
+ case "step.failed": {
1296
+ await this.emitEvent("workflow.step.failed", {
1297
+ instanceId,
1298
+ stepName: event.stepName,
1299
+ error: event.error,
1300
+ });
1301
+ if (this.sse) {
1302
+ this.sse.broadcast(`workflow:${instanceId}`, "step.failed", {
1303
+ stepName: event.stepName,
1304
+ error: event.error,
1305
+ });
1306
+ this.sse.broadcast("workflows:all", "workflow.step.failed", {
1307
+ instanceId,
1308
+ stepName: event.stepName,
1309
+ error: event.error,
1310
+ });
1311
+ }
1312
+ break;
1313
+ }
1118
1314
 
1119
- private buildContext(instance: WorkflowInstance, definition: WorkflowDefinition): WorkflowContext {
1120
- // Build steps object with outputs
1121
- const steps: Record<string, any> = {};
1122
- for (const [name, result] of Object.entries(instance.stepResults)) {
1123
- if (result.status === "completed" && result.output !== undefined) {
1124
- steps[name] = result.output;
1315
+ case "progress": {
1316
+ await this.emitEvent("workflow.progress", {
1317
+ instanceId,
1318
+ progress: event.progress,
1319
+ completedSteps: event.completedSteps,
1320
+ totalSteps: event.totalSteps,
1321
+ });
1322
+ if (this.sse) {
1323
+ this.sse.broadcast(`workflow:${instanceId}`, "progress", {
1324
+ progress: event.progress,
1325
+ completedSteps: event.completedSteps,
1326
+ totalSteps: event.totalSteps,
1327
+ });
1328
+ this.sse.broadcast("workflows:all", "workflow.progress", {
1329
+ instanceId,
1330
+ progress: event.progress,
1331
+ completedSteps: event.completedSteps,
1332
+ totalSteps: event.totalSteps,
1333
+ });
1334
+ }
1335
+ break;
1125
1336
  }
1126
- }
1127
1337
 
1128
- // Find the previous step's output by tracing the workflow path
1129
- let prev: any = undefined;
1130
- if (instance.currentStep) {
1131
- // Find which step comes before current step
1132
- for (const [stepName, stepDef] of definition.steps) {
1133
- if (stepDef.next === instance.currentStep && steps[stepName] !== undefined) {
1134
- prev = steps[stepName];
1135
- break;
1338
+ case "completed": {
1339
+ // Clean up isolated process tracking
1340
+ this.cleanupIsolatedProcess(instanceId);
1341
+
1342
+ // Subprocess already persisted state - just emit events
1343
+ await this.emitEvent("workflow.completed", {
1344
+ instanceId,
1345
+ output: event.output,
1346
+ });
1347
+ if (this.sse) {
1348
+ this.sse.broadcast(`workflow:${instanceId}`, "completed", { output: event.output });
1349
+ this.sse.broadcast("workflows:all", "workflow.completed", { instanceId });
1136
1350
  }
1351
+ break;
1137
1352
  }
1138
- // If no explicit next found, use most recent completed step output
1139
- if (prev === undefined) {
1140
- const completedSteps = Object.entries(instance.stepResults)
1141
- .filter(([, r]) => r.status === "completed" && r.output !== undefined)
1142
- .sort((a, b) => {
1143
- const aTime = a[1].completedAt?.getTime() ?? 0;
1144
- const bTime = b[1].completedAt?.getTime() ?? 0;
1145
- return bTime - aTime;
1353
+
1354
+ case "failed": {
1355
+ // Clean up isolated process tracking
1356
+ this.cleanupIsolatedProcess(instanceId);
1357
+
1358
+ // Subprocess already persisted state - just emit events
1359
+ await this.emitEvent("workflow.failed", {
1360
+ instanceId,
1361
+ error: event.error,
1362
+ });
1363
+ if (this.sse) {
1364
+ this.sse.broadcast(`workflow:${instanceId}`, "failed", { error: event.error });
1365
+ this.sse.broadcast("workflows:all", "workflow.failed", {
1366
+ instanceId,
1367
+ error: event.error,
1146
1368
  });
1147
- if (completedSteps.length > 0) {
1148
- prev = completedSteps[0][1].output;
1149
1369
  }
1370
+ break;
1150
1371
  }
1151
1372
  }
1152
-
1153
- return {
1154
- input: instance.input,
1155
- steps,
1156
- prev,
1157
- instance,
1158
- getStepResult: <T = any>(stepName: string): T | undefined => {
1159
- return steps[stepName] as T | undefined;
1160
- },
1161
- core: this.core!,
1162
- plugins: this.plugins,
1163
- };
1164
1373
  }
1165
1374
 
1166
- private async completeStep(
1167
- instanceId: string,
1168
- stepName: string,
1169
- output: any,
1170
- step: StepDefinition,
1171
- definition: WorkflowDefinition
1172
- ): Promise<void> {
1173
- const instance = await this.adapter.getInstance(instanceId);
1174
- if (!instance) return;
1175
-
1176
- // Update step result
1177
- const stepResult = instance.stepResults[stepName] ?? {
1178
- stepName,
1179
- status: "pending",
1180
- attempts: 0,
1181
- };
1182
- stepResult.status = "completed";
1183
- stepResult.output = output;
1184
- stepResult.completedAt = new Date();
1185
-
1186
- await this.adapter.updateInstance(instanceId, {
1187
- stepResults: { ...instance.stepResults, [stepName]: stepResult },
1188
- });
1189
-
1190
- // Emit step completed event
1191
- await this.emitEvent("workflow.step.completed", {
1192
- instanceId,
1193
- workflowName: instance.workflowName,
1194
- stepName,
1195
- output,
1196
- });
1197
-
1198
- // Calculate and emit progress
1199
- const totalSteps = definition.steps.size;
1200
- const completedSteps = Object.values(instance.stepResults).filter(
1201
- (r) => r.status === "completed"
1202
- ).length + 1; // +1 for current step
1203
- const progress = Math.round((completedSteps / totalSteps) * 100);
1204
-
1205
- await this.emitEvent("workflow.progress", {
1206
- instanceId,
1207
- workflowName: instance.workflowName,
1208
- progress,
1209
- currentStep: stepName,
1210
- completedSteps,
1211
- totalSteps,
1212
- });
1213
-
1214
- // Broadcast via SSE
1215
- if (this.sse) {
1216
- this.sse.broadcast(`workflow:${instanceId}`, "progress", {
1217
- progress,
1218
- currentStep: stepName,
1219
- completedSteps,
1220
- totalSteps,
1221
- });
1222
- }
1375
+ /**
1376
+ * Handle proxy calls from isolated subprocess
1377
+ */
1378
+ private async handleProxyCall(request: ProxyRequest): Promise<any> {
1379
+ const { target, service, method, args } = request;
1223
1380
 
1224
- // Move to next step or complete
1225
- if (step.end) {
1226
- await this.completeWorkflow(instanceId, output);
1227
- } else if (step.next) {
1228
- await this.adapter.updateInstance(instanceId, {
1229
- currentStep: step.next,
1230
- });
1231
- await this.executeStep(instanceId, definition);
1381
+ if (target === "plugin") {
1382
+ const plugin = this.plugins[service];
1383
+ if (!plugin) {
1384
+ throw new Error(`Plugin "${service}" not found`);
1385
+ }
1386
+ const fn = plugin[method];
1387
+ if (typeof fn !== "function") {
1388
+ throw new Error(`Method "${method}" not found on plugin "${service}"`);
1389
+ }
1390
+ return fn.apply(plugin, args);
1391
+ } else if (target === "core") {
1392
+ if (!this.core) {
1393
+ throw new Error("Core services not available");
1394
+ }
1395
+ const coreService = (this.core as any)[service];
1396
+ if (!coreService) {
1397
+ throw new Error(`Core service "${service}" not found`);
1398
+ }
1399
+ const fn = coreService[method];
1400
+ if (typeof fn !== "function") {
1401
+ throw new Error(`Method "${method}" not found on core service "${service}"`);
1402
+ }
1403
+ return fn.apply(coreService, args);
1232
1404
  } else {
1233
- // No next step, complete
1234
- await this.completeWorkflow(instanceId, output);
1405
+ throw new Error(`Unknown proxy target: ${target}`);
1235
1406
  }
1236
1407
  }
1237
1408
 
1238
- private async handleStepError(
1239
- instanceId: string,
1240
- stepName: string,
1241
- error: string,
1242
- step: StepDefinition,
1243
- definition: WorkflowDefinition
1244
- ): Promise<void> {
1245
- const instance = await this.adapter.getInstance(instanceId);
1246
- if (!instance) return;
1247
-
1248
- const stepResult = instance.stepResults[stepName] ?? {
1249
- stepName,
1250
- status: "pending",
1251
- attempts: 0,
1252
- };
1253
-
1254
- // Check retry config
1255
- const retry = step.retry ?? definition.defaultRetry;
1256
- if (retry && stepResult.attempts < retry.maxAttempts) {
1257
- // Retry with backoff
1258
- const backoffRate = retry.backoffRate ?? 2;
1259
- const intervalMs = retry.intervalMs ?? 1000;
1260
- const maxIntervalMs = retry.maxIntervalMs ?? 30000;
1261
- const delay = Math.min(
1262
- intervalMs * Math.pow(backoffRate, stepResult.attempts - 1),
1263
- maxIntervalMs
1264
- );
1265
-
1266
- console.log(
1267
- `[Workflows] Retrying step ${stepName} in ${delay}ms (attempt ${stepResult.attempts}/${retry.maxAttempts})`
1268
- );
1269
-
1270
- await this.emitEvent("workflow.step.retry", {
1271
- instanceId,
1272
- workflowName: instance.workflowName,
1273
- stepName,
1274
- attempt: stepResult.attempts,
1275
- maxAttempts: retry.maxAttempts,
1276
- delay,
1277
- error,
1278
- });
1279
-
1280
- // Update step result
1281
- stepResult.error = error;
1282
- await this.adapter.updateInstance(instanceId, {
1283
- stepResults: { ...instance.stepResults, [stepName]: stepResult },
1284
- });
1285
-
1286
- // Retry after delay
1287
- setTimeout(() => {
1288
- this.executeStep(instanceId, definition);
1289
- }, delay);
1290
-
1291
- return;
1409
+ /**
1410
+ * Clean up isolated process tracking
1411
+ */
1412
+ private cleanupIsolatedProcess(instanceId: string): void {
1413
+ const info = this.isolatedProcesses.get(instanceId);
1414
+ if (info) {
1415
+ if (info.timeout) clearTimeout(info.timeout);
1416
+ if (info.heartbeatTimeout) clearTimeout(info.heartbeatTimeout);
1417
+ this.isolatedProcesses.delete(instanceId);
1292
1418
  }
1293
-
1294
- // No more retries, fail the step
1295
- stepResult.status = "failed";
1296
- stepResult.error = error;
1297
- stepResult.completedAt = new Date();
1298
-
1299
- await this.adapter.updateInstance(instanceId, {
1300
- stepResults: { ...instance.stepResults, [stepName]: stepResult },
1301
- });
1302
-
1303
- await this.emitEvent("workflow.step.failed", {
1304
- instanceId,
1305
- workflowName: instance.workflowName,
1306
- stepName,
1307
- error,
1308
- attempts: stepResult.attempts,
1309
- });
1310
-
1311
- // Fail the workflow
1312
- await this.failWorkflow(instanceId, `Step "${stepName}" failed: ${error}`);
1313
1419
  }
1314
1420
 
1315
- private async completeWorkflow(instanceId: string, output?: any): Promise<void> {
1316
- const instance = await this.adapter.getInstance(instanceId);
1317
- if (!instance) return;
1421
+ /**
1422
+ * Reset heartbeat timeout for an isolated workflow
1423
+ */
1424
+ private resetHeartbeatTimeout(instanceId: string, pid: number): void {
1425
+ const info = this.isolatedProcesses.get(instanceId);
1426
+ if (!info) return;
1318
1427
 
1319
- // Clear timeout
1320
- const runInfo = this.running.get(instanceId);
1321
- if (runInfo?.timeout) {
1322
- clearTimeout(runInfo.timeout);
1428
+ // Clear existing timeout
1429
+ if (info.heartbeatTimeout) {
1430
+ clearTimeout(info.heartbeatTimeout);
1323
1431
  }
1324
- this.running.delete(instanceId);
1325
-
1326
- await this.adapter.updateInstance(instanceId, {
1327
- status: "completed",
1328
- output,
1329
- completedAt: new Date(),
1330
- currentStep: undefined,
1331
- });
1332
1432
 
1333
- await this.emitEvent("workflow.completed", {
1334
- instanceId,
1335
- workflowName: instance.workflowName,
1336
- output,
1337
- });
1433
+ // Set new timeout
1434
+ info.heartbeatTimeout = setTimeout(async () => {
1435
+ // Check if process is still alive
1436
+ if (!isProcessAlive(pid)) {
1437
+ return; // Process already dead, exit handler will handle it
1438
+ }
1338
1439
 
1339
- // Broadcast via SSE
1340
- if (this.sse) {
1341
- this.sse.broadcast(`workflow:${instanceId}`, "completed", { output });
1342
- }
1440
+ console.error(`[Workflows] No heartbeat from isolated workflow ${instanceId} for ${this.heartbeatTimeoutMs}ms`);
1441
+ await this.handleIsolatedTimeout(instanceId, pid);
1442
+ }, this.heartbeatTimeoutMs);
1343
1443
  }
1344
1444
 
1345
- private async failWorkflow(instanceId: string, error: string): Promise<void> {
1346
- const instance = await this.adapter.getInstance(instanceId);
1347
- if (!instance) return;
1445
+ /**
1446
+ * Handle timeout for isolated workflow (workflow timeout or heartbeat timeout)
1447
+ */
1448
+ private async handleIsolatedTimeout(instanceId: string, pid: number): Promise<void> {
1449
+ const info = this.isolatedProcesses.get(instanceId);
1450
+ if (!info) return;
1348
1451
 
1349
- // Clear timeout
1350
- const runInfo = this.running.get(instanceId);
1351
- if (runInfo?.timeout) {
1352
- clearTimeout(runInfo.timeout);
1452
+ // Kill the process
1453
+ try {
1454
+ process.kill(pid, "SIGKILL");
1455
+ } catch {
1456
+ // Process might already be dead
1353
1457
  }
1354
- this.running.delete(instanceId);
1355
1458
 
1459
+ // Clean up
1460
+ if (info.timeout) clearTimeout(info.timeout);
1461
+ if (info.heartbeatTimeout) clearTimeout(info.heartbeatTimeout);
1462
+ this.isolatedProcesses.delete(instanceId);
1463
+ await this.getSocketServer().closeSocket(instanceId);
1464
+
1465
+ // Fail the workflow
1356
1466
  await this.adapter.updateInstance(instanceId, {
1357
1467
  status: "failed",
1358
- error,
1468
+ error: "Workflow timed out",
1359
1469
  completedAt: new Date(),
1360
1470
  });
1361
-
1362
1471
  await this.emitEvent("workflow.failed", {
1363
1472
  instanceId,
1364
- workflowName: instance.workflowName,
1365
- error,
1473
+ error: "Workflow timed out",
1366
1474
  });
1367
-
1368
- // Broadcast via SSE
1369
1475
  if (this.sse) {
1370
- this.sse.broadcast(`workflow:${instanceId}`, "failed", { error });
1476
+ this.sse.broadcast(`workflow:${instanceId}`, "failed", { error: "Workflow timed out" });
1477
+ this.sse.broadcast("workflows:all", "workflow.failed", {
1478
+ instanceId,
1479
+ error: "Workflow timed out",
1480
+ });
1371
1481
  }
1372
1482
  }
1373
1483
 
1374
1484
  private async emitEvent(event: string, data: any): Promise<void> {
1375
- if (this.events) {
1376
- await this.events.emit(event, data);
1485
+ if (this.eventsService) {
1486
+ await this.eventsService.emit(event, data);
1377
1487
  }
1378
1488
  }
1379
1489
  }