@donkeylabs/server 2.0.18 → 2.0.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/docs/caching-strategies.md +677 -0
- package/docs/dev-experience.md +656 -0
- package/docs/hot-reload-limitations.md +166 -0
- package/docs/load-testing.md +974 -0
- package/docs/plugin-registry-design.md +1064 -0
- package/docs/production.md +1229 -0
- package/docs/workflows.md +90 -3
- package/package.json +18 -2
- package/src/admin/routes.ts +153 -0
- package/src/core/cron.ts +184 -15
- package/src/core/index.ts +25 -0
- package/src/core/job-adapter-kysely.ts +176 -73
- package/src/core/job-adapter-sqlite.ts +10 -0
- package/src/core/jobs.ts +112 -17
- package/src/core/migrations/workflows/002_add_metadata_column.ts +28 -0
- package/src/core/process-adapter-kysely.ts +62 -21
- package/src/core/storage-adapter-local.test.ts +199 -0
- package/src/core/storage.test.ts +197 -0
- package/src/core/workflow-adapter-kysely.ts +66 -19
- package/src/core/workflow-executor.ts +469 -0
- package/src/core/workflow-proxy.ts +238 -0
- package/src/core/workflow-socket.ts +447 -0
- package/src/core/workflows.test.ts +415 -0
- package/src/core/workflows.ts +782 -9
- package/src/core.ts +17 -6
- package/src/index.ts +14 -0
- package/src/server.ts +40 -26
- package/src/testing/database.test.ts +263 -0
- package/src/testing/database.ts +173 -0
- package/src/testing/e2e.test.ts +189 -0
- package/src/testing/e2e.ts +272 -0
- package/src/testing/index.ts +18 -0
package/src/core/workflows.ts
CHANGED
|
@@ -6,12 +6,22 @@
|
|
|
6
6
|
// - parallel: Run multiple branches concurrently
|
|
7
7
|
// - choice: Conditional branching
|
|
8
8
|
// - pass: Transform data / no-op
|
|
9
|
+
// - isolated: Execute in subprocess to prevent event loop blocking (default)
|
|
9
10
|
|
|
10
11
|
import type { Events } from "./events";
|
|
11
12
|
import type { Jobs } from "./jobs";
|
|
12
13
|
import type { SSE } from "./sse";
|
|
13
14
|
import type { z } from "zod";
|
|
14
15
|
import type { CoreServices } from "../core";
|
|
16
|
+
import { dirname, join } from "node:path";
|
|
17
|
+
import { fileURLToPath } from "node:url";
|
|
18
|
+
import {
|
|
19
|
+
createWorkflowSocketServer,
|
|
20
|
+
type WorkflowSocketServer,
|
|
21
|
+
type WorkflowEvent,
|
|
22
|
+
type ProxyRequest,
|
|
23
|
+
} from "./workflow-socket";
|
|
24
|
+
import { isProcessAlive } from "./external-jobs";
|
|
15
25
|
|
|
16
26
|
// Type helper for Zod schema inference
|
|
17
27
|
type ZodSchema = z.ZodTypeAny;
|
|
@@ -126,6 +136,13 @@ export interface WorkflowDefinition {
|
|
|
126
136
|
timeout?: number;
|
|
127
137
|
/** Default retry config for all steps */
|
|
128
138
|
defaultRetry?: RetryConfig;
|
|
139
|
+
/**
|
|
140
|
+
* Whether to execute this workflow in an isolated subprocess.
|
|
141
|
+
* Default: true (isolated by default to prevent blocking the event loop)
|
|
142
|
+
*
|
|
143
|
+
* Set to false for lightweight workflows that benefit from inline execution.
|
|
144
|
+
*/
|
|
145
|
+
isolated?: boolean;
|
|
129
146
|
}
|
|
130
147
|
|
|
131
148
|
// ============================================
|
|
@@ -176,6 +193,8 @@ export interface WorkflowInstance {
|
|
|
176
193
|
parentId?: string;
|
|
177
194
|
/** Branch name if this is a branch instance */
|
|
178
195
|
branchName?: string;
|
|
196
|
+
/** Custom metadata that persists across steps (JSON-serializable) */
|
|
197
|
+
metadata?: Record<string, any>;
|
|
179
198
|
}
|
|
180
199
|
|
|
181
200
|
// ============================================
|
|
@@ -195,6 +214,33 @@ export interface WorkflowContext {
|
|
|
195
214
|
getStepResult<T = any>(stepName: string): T | undefined;
|
|
196
215
|
/** Core services (logger, events, cache, etc.) */
|
|
197
216
|
core: CoreServices;
|
|
217
|
+
/** Plugin services - available for business logic in workflow handlers */
|
|
218
|
+
plugins: Record<string, any>;
|
|
219
|
+
/**
|
|
220
|
+
* Custom metadata that persists across steps (read-only snapshot).
|
|
221
|
+
* Use setMetadata() to update values.
|
|
222
|
+
*/
|
|
223
|
+
metadata: Record<string, any>;
|
|
224
|
+
/**
|
|
225
|
+
* Set a metadata value that persists across workflow steps.
|
|
226
|
+
* Accepts any JSON-serializable value (objects, arrays, primitives).
|
|
227
|
+
*
|
|
228
|
+
* @example
|
|
229
|
+
* await ctx.setMetadata('orderContext', {
|
|
230
|
+
* correlationId: 'abc-123',
|
|
231
|
+
* customer: { id: 'cust_1', tier: 'premium' },
|
|
232
|
+
* flags: { expedited: true }
|
|
233
|
+
* });
|
|
234
|
+
*/
|
|
235
|
+
setMetadata(key: string, value: any): Promise<void>;
|
|
236
|
+
/**
|
|
237
|
+
* Get a metadata value with type safety.
|
|
238
|
+
*
|
|
239
|
+
* @example
|
|
240
|
+
* interface OrderContext { correlationId: string; customer: { id: string } }
|
|
241
|
+
* const ctx = ctx.getMetadata<OrderContext>('orderContext');
|
|
242
|
+
*/
|
|
243
|
+
getMetadata<T = any>(key: string): T | undefined;
|
|
198
244
|
}
|
|
199
245
|
|
|
200
246
|
// ============================================
|
|
@@ -305,11 +351,29 @@ export class WorkflowBuilder {
|
|
|
305
351
|
private _timeout?: number;
|
|
306
352
|
private _defaultRetry?: RetryConfig;
|
|
307
353
|
private _lastStep?: string;
|
|
354
|
+
private _isolated = true; // Default to isolated execution
|
|
308
355
|
|
|
309
356
|
constructor(name: string) {
|
|
310
357
|
this._name = name;
|
|
311
358
|
}
|
|
312
359
|
|
|
360
|
+
/**
|
|
361
|
+
* Set whether to execute this workflow in an isolated subprocess.
|
|
362
|
+
* Default: true (isolated by default to prevent blocking the event loop)
|
|
363
|
+
*
|
|
364
|
+
* @param enabled - Set to false for lightweight workflows that benefit from inline execution
|
|
365
|
+
* @example
|
|
366
|
+
* // Heavy workflow - uses default isolation (no call needed)
|
|
367
|
+
* workflow("data-ingestion").task("process", { ... }).build();
|
|
368
|
+
*
|
|
369
|
+
* // Lightweight workflow - opt out of isolation
|
|
370
|
+
* workflow("quick-validation").isolated(false).task("validate", { ... }).build();
|
|
371
|
+
*/
|
|
372
|
+
isolated(enabled: boolean = true): this {
|
|
373
|
+
this._isolated = enabled;
|
|
374
|
+
return this;
|
|
375
|
+
}
|
|
376
|
+
|
|
313
377
|
/** Set the starting step explicitly */
|
|
314
378
|
startAt(stepName: string): this {
|
|
315
379
|
this._startAt = stepName;
|
|
@@ -510,6 +574,7 @@ export class WorkflowBuilder {
|
|
|
510
574
|
startAt: this._startAt,
|
|
511
575
|
timeout: this._timeout,
|
|
512
576
|
defaultRetry: this._defaultRetry,
|
|
577
|
+
isolated: this._isolated,
|
|
513
578
|
};
|
|
514
579
|
}
|
|
515
580
|
}
|
|
@@ -537,11 +602,36 @@ export interface WorkflowsConfig {
|
|
|
537
602
|
pollInterval?: number;
|
|
538
603
|
/** Core services to pass to step handlers */
|
|
539
604
|
core?: CoreServices;
|
|
605
|
+
/** Directory for Unix sockets (default: /tmp/donkeylabs-workflows) */
|
|
606
|
+
socketDir?: string;
|
|
607
|
+
/** TCP port range for Windows fallback (default: [49152, 65535]) */
|
|
608
|
+
tcpPortRange?: [number, number];
|
|
609
|
+
/** Database file path (required for isolated workflows) */
|
|
610
|
+
dbPath?: string;
|
|
611
|
+
/** Heartbeat timeout in ms (default: 60000) */
|
|
612
|
+
heartbeatTimeout?: number;
|
|
613
|
+
}
|
|
614
|
+
|
|
615
|
+
/** Options for registering a workflow */
|
|
616
|
+
export interface WorkflowRegisterOptions {
|
|
617
|
+
/**
|
|
618
|
+
* Module path for isolated workflows.
|
|
619
|
+
* Required when workflow.isolated !== false and running in isolated mode.
|
|
620
|
+
* Use `import.meta.url` to get the current module's path.
|
|
621
|
+
*
|
|
622
|
+
* @example
|
|
623
|
+
* workflows.register(myWorkflow, { modulePath: import.meta.url });
|
|
624
|
+
*/
|
|
625
|
+
modulePath?: string;
|
|
540
626
|
}
|
|
541
627
|
|
|
542
628
|
export interface Workflows {
|
|
543
|
-
/**
|
|
544
|
-
|
|
629
|
+
/**
|
|
630
|
+
* Register a workflow definition.
|
|
631
|
+
* @param definition - The workflow definition to register
|
|
632
|
+
* @param options - Registration options (modulePath required for isolated workflows)
|
|
633
|
+
*/
|
|
634
|
+
register(definition: WorkflowDefinition, options?: WorkflowRegisterOptions): void;
|
|
545
635
|
/** Start a new workflow instance */
|
|
546
636
|
start<T = any>(workflowName: string, input: T): Promise<string>;
|
|
547
637
|
/** Get a workflow instance by ID */
|
|
@@ -558,22 +648,43 @@ export interface Workflows {
|
|
|
558
648
|
stop(): Promise<void>;
|
|
559
649
|
/** Set core services (called after initialization to resolve circular dependency) */
|
|
560
650
|
setCore(core: CoreServices): void;
|
|
651
|
+
/** Set plugin services (called after plugins are initialized) */
|
|
652
|
+
setPlugins(plugins: Record<string, any>): void;
|
|
653
|
+
/** Update metadata for a workflow instance (used by isolated workflows) */
|
|
654
|
+
updateMetadata(instanceId: string, key: string, value: any): Promise<void>;
|
|
561
655
|
}
|
|
562
656
|
|
|
563
657
|
// ============================================
|
|
564
658
|
// Workflow Service Implementation
|
|
565
659
|
// ============================================
|
|
566
660
|
|
|
661
|
+
interface IsolatedProcessInfo {
|
|
662
|
+
pid: number;
|
|
663
|
+
timeout?: ReturnType<typeof setTimeout>;
|
|
664
|
+
heartbeatTimeout?: ReturnType<typeof setTimeout>;
|
|
665
|
+
lastHeartbeat: number;
|
|
666
|
+
}
|
|
667
|
+
|
|
567
668
|
class WorkflowsImpl implements Workflows {
|
|
568
669
|
private adapter: WorkflowAdapter;
|
|
569
670
|
private events?: Events;
|
|
570
671
|
private jobs?: Jobs;
|
|
571
672
|
private sse?: SSE;
|
|
572
673
|
private core?: CoreServices;
|
|
674
|
+
private plugins: Record<string, any> = {};
|
|
573
675
|
private definitions = new Map<string, WorkflowDefinition>();
|
|
574
676
|
private running = new Map<string, { timeout?: ReturnType<typeof setTimeout> }>();
|
|
575
677
|
private pollInterval: number;
|
|
576
678
|
|
|
679
|
+
// Isolated execution state
|
|
680
|
+
private socketServer?: WorkflowSocketServer;
|
|
681
|
+
private socketDir: string;
|
|
682
|
+
private tcpPortRange: [number, number];
|
|
683
|
+
private dbPath?: string;
|
|
684
|
+
private heartbeatTimeoutMs: number;
|
|
685
|
+
private workflowModulePaths = new Map<string, string>();
|
|
686
|
+
private isolatedProcesses = new Map<string, IsolatedProcessInfo>();
|
|
687
|
+
|
|
577
688
|
constructor(config: WorkflowsConfig = {}) {
|
|
578
689
|
this.adapter = config.adapter ?? new MemoryWorkflowAdapter();
|
|
579
690
|
this.events = config.events;
|
|
@@ -581,16 +692,97 @@ class WorkflowsImpl implements Workflows {
|
|
|
581
692
|
this.sse = config.sse;
|
|
582
693
|
this.core = config.core;
|
|
583
694
|
this.pollInterval = config.pollInterval ?? 1000;
|
|
695
|
+
|
|
696
|
+
// Isolated execution config
|
|
697
|
+
this.socketDir = config.socketDir ?? "/tmp/donkeylabs-workflows";
|
|
698
|
+
this.tcpPortRange = config.tcpPortRange ?? [49152, 65535];
|
|
699
|
+
this.dbPath = config.dbPath;
|
|
700
|
+
this.heartbeatTimeoutMs = config.heartbeatTimeout ?? 60000;
|
|
701
|
+
}
|
|
702
|
+
|
|
703
|
+
private getSocketServer(): WorkflowSocketServer {
|
|
704
|
+
if (!this.socketServer) {
|
|
705
|
+
this.socketServer = createWorkflowSocketServer(
|
|
706
|
+
{
|
|
707
|
+
socketDir: this.socketDir,
|
|
708
|
+
tcpPortRange: this.tcpPortRange,
|
|
709
|
+
},
|
|
710
|
+
{
|
|
711
|
+
onEvent: (event) => this.handleIsolatedEvent(event),
|
|
712
|
+
onProxyCall: (request) => this.handleProxyCall(request),
|
|
713
|
+
onConnect: (instanceId) => {
|
|
714
|
+
console.log(`[Workflows] Isolated workflow ${instanceId} connected`);
|
|
715
|
+
},
|
|
716
|
+
onDisconnect: (instanceId) => {
|
|
717
|
+
console.log(`[Workflows] Isolated workflow ${instanceId} disconnected`);
|
|
718
|
+
},
|
|
719
|
+
onError: (error, instanceId) => {
|
|
720
|
+
console.error(`[Workflows] Socket error for ${instanceId}:`, error);
|
|
721
|
+
},
|
|
722
|
+
}
|
|
723
|
+
);
|
|
724
|
+
}
|
|
725
|
+
return this.socketServer;
|
|
584
726
|
}
|
|
585
727
|
|
|
586
728
|
setCore(core: CoreServices): void {
|
|
587
729
|
this.core = core;
|
|
730
|
+
// Extract DB path if using Kysely adapter (for isolated workflows)
|
|
731
|
+
if (!this.dbPath && (core.db as any)?.getExecutor) {
|
|
732
|
+
// Try to get the database path from the Kysely instance
|
|
733
|
+
// This is a bit hacky but necessary for isolated workflows
|
|
734
|
+
try {
|
|
735
|
+
const executor = (core.db as any).getExecutor();
|
|
736
|
+
const adapter = executor?.adapter;
|
|
737
|
+
if (adapter?.db?.filename) {
|
|
738
|
+
this.dbPath = adapter.db.filename;
|
|
739
|
+
}
|
|
740
|
+
} catch {
|
|
741
|
+
// Ignore - dbPath might be set manually
|
|
742
|
+
}
|
|
743
|
+
}
|
|
744
|
+
}
|
|
745
|
+
|
|
746
|
+
setPlugins(plugins: Record<string, any>): void {
|
|
747
|
+
this.plugins = plugins;
|
|
748
|
+
}
|
|
749
|
+
|
|
750
|
+
async updateMetadata(instanceId: string, key: string, value: any): Promise<void> {
|
|
751
|
+
const instance = await this.adapter.getInstance(instanceId);
|
|
752
|
+
if (!instance) return;
|
|
753
|
+
|
|
754
|
+
const metadata = { ...(instance.metadata || {}), [key]: value };
|
|
755
|
+
await this.adapter.updateInstance(instanceId, { metadata });
|
|
588
756
|
}
|
|
589
757
|
|
|
590
|
-
register(definition: WorkflowDefinition): void {
|
|
758
|
+
register(definition: WorkflowDefinition, options?: WorkflowRegisterOptions): void {
|
|
591
759
|
if (this.definitions.has(definition.name)) {
|
|
592
760
|
throw new Error(`Workflow "${definition.name}" is already registered`);
|
|
593
761
|
}
|
|
762
|
+
|
|
763
|
+
// Validate isolated workflows don't use unsupported step types
|
|
764
|
+
if (definition.isolated !== false) {
|
|
765
|
+
for (const [stepName, step] of definition.steps) {
|
|
766
|
+
if (step.type === "choice" || step.type === "parallel") {
|
|
767
|
+
throw new Error(
|
|
768
|
+
`Workflow "${definition.name}" uses ${step.type} step "${stepName}" ` +
|
|
769
|
+
`which is not supported in isolated mode. Use .isolated(false) to run inline.`
|
|
770
|
+
);
|
|
771
|
+
}
|
|
772
|
+
}
|
|
773
|
+
}
|
|
774
|
+
|
|
775
|
+
// Store module path for isolated workflows
|
|
776
|
+
if (options?.modulePath) {
|
|
777
|
+
this.workflowModulePaths.set(definition.name, options.modulePath);
|
|
778
|
+
} else if (definition.isolated !== false) {
|
|
779
|
+
// Warn if isolated workflow has no module path
|
|
780
|
+
console.warn(
|
|
781
|
+
`[Workflows] Workflow "${definition.name}" is isolated but no modulePath provided. ` +
|
|
782
|
+
`Use: workflows.register(myWorkflow, { modulePath: import.meta.url })`
|
|
783
|
+
);
|
|
784
|
+
}
|
|
785
|
+
|
|
594
786
|
this.definitions.set(definition.name, definition);
|
|
595
787
|
}
|
|
596
788
|
|
|
@@ -616,8 +808,35 @@ class WorkflowsImpl implements Workflows {
|
|
|
616
808
|
input,
|
|
617
809
|
});
|
|
618
810
|
|
|
619
|
-
//
|
|
620
|
-
this.
|
|
811
|
+
// SSE broadcast for real-time monitoring
|
|
812
|
+
if (this.sse) {
|
|
813
|
+
this.sse.broadcast(`workflow:${instance.id}`, "started", {
|
|
814
|
+
workflowName,
|
|
815
|
+
input,
|
|
816
|
+
});
|
|
817
|
+
this.sse.broadcast("workflows:all", "workflow.started", {
|
|
818
|
+
instanceId: instance.id,
|
|
819
|
+
workflowName,
|
|
820
|
+
input,
|
|
821
|
+
});
|
|
822
|
+
}
|
|
823
|
+
|
|
824
|
+
// Start execution (isolated or inline based on definition.isolated)
|
|
825
|
+
const isIsolated = definition.isolated !== false;
|
|
826
|
+
const modulePath = this.workflowModulePaths.get(workflowName);
|
|
827
|
+
|
|
828
|
+
if (isIsolated && modulePath && this.dbPath) {
|
|
829
|
+
// Execute in isolated subprocess
|
|
830
|
+
this.executeIsolatedWorkflow(instance.id, definition, input, modulePath);
|
|
831
|
+
} else {
|
|
832
|
+
// Execute inline (existing behavior)
|
|
833
|
+
if (isIsolated && !modulePath) {
|
|
834
|
+
console.warn(
|
|
835
|
+
`[Workflows] Workflow "${workflowName}" falling back to inline execution (no modulePath)`
|
|
836
|
+
);
|
|
837
|
+
}
|
|
838
|
+
this.executeWorkflow(instance.id, definition);
|
|
839
|
+
}
|
|
621
840
|
|
|
622
841
|
return instance.id;
|
|
623
842
|
}
|
|
@@ -632,7 +851,21 @@ class WorkflowsImpl implements Workflows {
|
|
|
632
851
|
return false;
|
|
633
852
|
}
|
|
634
853
|
|
|
635
|
-
//
|
|
854
|
+
// Kill isolated process if running
|
|
855
|
+
const isolatedInfo = this.isolatedProcesses.get(instanceId);
|
|
856
|
+
if (isolatedInfo) {
|
|
857
|
+
try {
|
|
858
|
+
process.kill(isolatedInfo.pid, "SIGTERM");
|
|
859
|
+
} catch {
|
|
860
|
+
// Process might already be dead
|
|
861
|
+
}
|
|
862
|
+
if (isolatedInfo.timeout) clearTimeout(isolatedInfo.timeout);
|
|
863
|
+
if (isolatedInfo.heartbeatTimeout) clearTimeout(isolatedInfo.heartbeatTimeout);
|
|
864
|
+
this.isolatedProcesses.delete(instanceId);
|
|
865
|
+
await this.getSocketServer().closeSocket(instanceId);
|
|
866
|
+
}
|
|
867
|
+
|
|
868
|
+
// Clear inline timeout
|
|
636
869
|
const runInfo = this.running.get(instanceId);
|
|
637
870
|
if (runInfo?.timeout) {
|
|
638
871
|
clearTimeout(runInfo.timeout);
|
|
@@ -677,18 +910,50 @@ class WorkflowsImpl implements Workflows {
|
|
|
677
910
|
}
|
|
678
911
|
|
|
679
912
|
console.log(`[Workflows] Resuming workflow instance ${instance.id}`);
|
|
680
|
-
|
|
913
|
+
|
|
914
|
+
// Check isolation mode and call appropriate method
|
|
915
|
+
const isIsolated = definition.isolated !== false;
|
|
916
|
+
const modulePath = this.workflowModulePaths.get(instance.workflowName);
|
|
917
|
+
|
|
918
|
+
if (isIsolated && modulePath && this.dbPath) {
|
|
919
|
+
this.executeIsolatedWorkflow(instance.id, definition, instance.input, modulePath);
|
|
920
|
+
} else {
|
|
921
|
+
this.executeWorkflow(instance.id, definition);
|
|
922
|
+
}
|
|
681
923
|
}
|
|
682
924
|
}
|
|
683
925
|
|
|
684
926
|
async stop(): Promise<void> {
|
|
685
|
-
//
|
|
927
|
+
// Kill all isolated processes
|
|
928
|
+
for (const [instanceId, info] of this.isolatedProcesses) {
|
|
929
|
+
try {
|
|
930
|
+
process.kill(info.pid, "SIGTERM");
|
|
931
|
+
} catch {
|
|
932
|
+
// Process might already be dead
|
|
933
|
+
}
|
|
934
|
+
if (info.timeout) clearTimeout(info.timeout);
|
|
935
|
+
if (info.heartbeatTimeout) clearTimeout(info.heartbeatTimeout);
|
|
936
|
+
}
|
|
937
|
+
this.isolatedProcesses.clear();
|
|
938
|
+
|
|
939
|
+
// Shutdown socket server
|
|
940
|
+
if (this.socketServer) {
|
|
941
|
+
await this.socketServer.shutdown();
|
|
942
|
+
this.socketServer = undefined;
|
|
943
|
+
}
|
|
944
|
+
|
|
945
|
+
// Clear all inline timeouts
|
|
686
946
|
for (const [instanceId, runInfo] of this.running) {
|
|
687
947
|
if (runInfo.timeout) {
|
|
688
948
|
clearTimeout(runInfo.timeout);
|
|
689
949
|
}
|
|
690
950
|
}
|
|
691
951
|
this.running.clear();
|
|
952
|
+
|
|
953
|
+
// Stop adapter (cleanup timers and prevent further DB access)
|
|
954
|
+
if (this.adapter && typeof (this.adapter as any).stop === "function") {
|
|
955
|
+
(this.adapter as any).stop();
|
|
956
|
+
}
|
|
692
957
|
}
|
|
693
958
|
|
|
694
959
|
// ============================================
|
|
@@ -754,6 +1019,16 @@ class WorkflowsImpl implements Workflows {
|
|
|
754
1019
|
stepType: step.type,
|
|
755
1020
|
});
|
|
756
1021
|
|
|
1022
|
+
// Broadcast via SSE
|
|
1023
|
+
if (this.sse) {
|
|
1024
|
+
this.sse.broadcast(`workflow:${instanceId}`, "step.started", { stepName });
|
|
1025
|
+
this.sse.broadcast("workflows:all", "workflow.step.started", {
|
|
1026
|
+
instanceId,
|
|
1027
|
+
workflowName: instance.workflowName,
|
|
1028
|
+
stepName,
|
|
1029
|
+
});
|
|
1030
|
+
}
|
|
1031
|
+
|
|
757
1032
|
// Update step result as running
|
|
758
1033
|
const stepResult: StepResult = {
|
|
759
1034
|
stepName,
|
|
@@ -1141,6 +1416,9 @@ class WorkflowsImpl implements Workflows {
|
|
|
1141
1416
|
}
|
|
1142
1417
|
}
|
|
1143
1418
|
|
|
1419
|
+
// Metadata snapshot (mutable reference for setMetadata updates)
|
|
1420
|
+
const metadata = { ...(instance.metadata ?? {}) };
|
|
1421
|
+
|
|
1144
1422
|
return {
|
|
1145
1423
|
input: instance.input,
|
|
1146
1424
|
steps,
|
|
@@ -1150,6 +1428,19 @@ class WorkflowsImpl implements Workflows {
|
|
|
1150
1428
|
return steps[stepName] as T | undefined;
|
|
1151
1429
|
},
|
|
1152
1430
|
core: this.core!,
|
|
1431
|
+
plugins: this.plugins,
|
|
1432
|
+
metadata,
|
|
1433
|
+
setMetadata: async (key: string, value: any): Promise<void> => {
|
|
1434
|
+
// Update local snapshot
|
|
1435
|
+
metadata[key] = value;
|
|
1436
|
+
// Persist to database
|
|
1437
|
+
await this.adapter.updateInstance(instance.id, {
|
|
1438
|
+
metadata: { ...metadata },
|
|
1439
|
+
});
|
|
1440
|
+
},
|
|
1441
|
+
getMetadata: <T = any>(key: string): T | undefined => {
|
|
1442
|
+
return metadata[key] as T | undefined;
|
|
1443
|
+
},
|
|
1153
1444
|
};
|
|
1154
1445
|
}
|
|
1155
1446
|
|
|
@@ -1163,6 +1454,12 @@ class WorkflowsImpl implements Workflows {
|
|
|
1163
1454
|
const instance = await this.adapter.getInstance(instanceId);
|
|
1164
1455
|
if (!instance) return;
|
|
1165
1456
|
|
|
1457
|
+
// Check if workflow is still running (not cancelled/failed/timed out)
|
|
1458
|
+
if (instance.status !== "running") {
|
|
1459
|
+
console.log(`[Workflows] Ignoring step completion for ${instanceId}, status is ${instance.status}`);
|
|
1460
|
+
return;
|
|
1461
|
+
}
|
|
1462
|
+
|
|
1166
1463
|
// Update step result
|
|
1167
1464
|
const stepResult = instance.stepResults[stepName] ?? {
|
|
1168
1465
|
stepName,
|
|
@@ -1185,6 +1482,19 @@ class WorkflowsImpl implements Workflows {
|
|
|
1185
1482
|
output,
|
|
1186
1483
|
});
|
|
1187
1484
|
|
|
1485
|
+
// Broadcast step completed via SSE
|
|
1486
|
+
if (this.sse) {
|
|
1487
|
+
this.sse.broadcast(`workflow:${instanceId}`, "step.completed", {
|
|
1488
|
+
stepName,
|
|
1489
|
+
output,
|
|
1490
|
+
});
|
|
1491
|
+
this.sse.broadcast("workflows:all", "workflow.step.completed", {
|
|
1492
|
+
instanceId,
|
|
1493
|
+
workflowName: instance.workflowName,
|
|
1494
|
+
stepName,
|
|
1495
|
+
});
|
|
1496
|
+
}
|
|
1497
|
+
|
|
1188
1498
|
// Calculate and emit progress
|
|
1189
1499
|
const totalSteps = definition.steps.size;
|
|
1190
1500
|
const completedSteps = Object.values(instance.stepResults).filter(
|
|
@@ -1201,7 +1511,7 @@ class WorkflowsImpl implements Workflows {
|
|
|
1201
1511
|
totalSteps,
|
|
1202
1512
|
});
|
|
1203
1513
|
|
|
1204
|
-
// Broadcast via SSE
|
|
1514
|
+
// Broadcast progress via SSE
|
|
1205
1515
|
if (this.sse) {
|
|
1206
1516
|
this.sse.broadcast(`workflow:${instanceId}`, "progress", {
|
|
1207
1517
|
progress,
|
|
@@ -1209,6 +1519,12 @@ class WorkflowsImpl implements Workflows {
|
|
|
1209
1519
|
completedSteps,
|
|
1210
1520
|
totalSteps,
|
|
1211
1521
|
});
|
|
1522
|
+
this.sse.broadcast("workflows:all", "workflow.progress", {
|
|
1523
|
+
instanceId,
|
|
1524
|
+
workflowName: instance.workflowName,
|
|
1525
|
+
progress,
|
|
1526
|
+
currentStep: stepName,
|
|
1527
|
+
});
|
|
1212
1528
|
}
|
|
1213
1529
|
|
|
1214
1530
|
// Move to next step or complete
|
|
@@ -1298,6 +1614,20 @@ class WorkflowsImpl implements Workflows {
|
|
|
1298
1614
|
attempts: stepResult.attempts,
|
|
1299
1615
|
});
|
|
1300
1616
|
|
|
1617
|
+
// Broadcast step failed via SSE
|
|
1618
|
+
if (this.sse) {
|
|
1619
|
+
this.sse.broadcast(`workflow:${instanceId}`, "step.failed", {
|
|
1620
|
+
stepName,
|
|
1621
|
+
error,
|
|
1622
|
+
});
|
|
1623
|
+
this.sse.broadcast("workflows:all", "workflow.step.failed", {
|
|
1624
|
+
instanceId,
|
|
1625
|
+
workflowName: instance.workflowName,
|
|
1626
|
+
stepName,
|
|
1627
|
+
error,
|
|
1628
|
+
});
|
|
1629
|
+
}
|
|
1630
|
+
|
|
1301
1631
|
// Fail the workflow
|
|
1302
1632
|
await this.failWorkflow(instanceId, `Step "${stepName}" failed: ${error}`);
|
|
1303
1633
|
}
|
|
@@ -1306,6 +1636,12 @@ class WorkflowsImpl implements Workflows {
|
|
|
1306
1636
|
const instance = await this.adapter.getInstance(instanceId);
|
|
1307
1637
|
if (!instance) return;
|
|
1308
1638
|
|
|
1639
|
+
// Check if workflow is still running (not cancelled/failed/timed out)
|
|
1640
|
+
if (instance.status !== "running") {
|
|
1641
|
+
console.log(`[Workflows] Ignoring workflow completion for ${instanceId}, status is ${instance.status}`);
|
|
1642
|
+
return;
|
|
1643
|
+
}
|
|
1644
|
+
|
|
1309
1645
|
// Clear timeout
|
|
1310
1646
|
const runInfo = this.running.get(instanceId);
|
|
1311
1647
|
if (runInfo?.timeout) {
|
|
@@ -1329,6 +1665,10 @@ class WorkflowsImpl implements Workflows {
|
|
|
1329
1665
|
// Broadcast via SSE
|
|
1330
1666
|
if (this.sse) {
|
|
1331
1667
|
this.sse.broadcast(`workflow:${instanceId}`, "completed", { output });
|
|
1668
|
+
this.sse.broadcast("workflows:all", "workflow.completed", {
|
|
1669
|
+
instanceId,
|
|
1670
|
+
workflowName: instance.workflowName,
|
|
1671
|
+
});
|
|
1332
1672
|
}
|
|
1333
1673
|
}
|
|
1334
1674
|
|
|
@@ -1358,6 +1698,11 @@ class WorkflowsImpl implements Workflows {
|
|
|
1358
1698
|
// Broadcast via SSE
|
|
1359
1699
|
if (this.sse) {
|
|
1360
1700
|
this.sse.broadcast(`workflow:${instanceId}`, "failed", { error });
|
|
1701
|
+
this.sse.broadcast("workflows:all", "workflow.failed", {
|
|
1702
|
+
instanceId,
|
|
1703
|
+
workflowName: instance.workflowName,
|
|
1704
|
+
error,
|
|
1705
|
+
});
|
|
1361
1706
|
}
|
|
1362
1707
|
}
|
|
1363
1708
|
|
|
@@ -1366,6 +1711,434 @@ class WorkflowsImpl implements Workflows {
|
|
|
1366
1711
|
await this.events.emit(event, data);
|
|
1367
1712
|
}
|
|
1368
1713
|
}
|
|
1714
|
+
|
|
1715
|
+
// ============================================
|
|
1716
|
+
// Isolated Execution Engine
|
|
1717
|
+
// ============================================
|
|
1718
|
+
|
|
1719
|
+
/**
|
|
1720
|
+
* Execute a workflow in an isolated subprocess
|
|
1721
|
+
*/
|
|
1722
|
+
private async executeIsolatedWorkflow(
|
|
1723
|
+
instanceId: string,
|
|
1724
|
+
definition: WorkflowDefinition,
|
|
1725
|
+
input: any,
|
|
1726
|
+
modulePath: string
|
|
1727
|
+
): Promise<void> {
|
|
1728
|
+
const socketServer = this.getSocketServer();
|
|
1729
|
+
|
|
1730
|
+
// Create socket for this workflow instance
|
|
1731
|
+
const { socketPath, tcpPort } = await socketServer.createSocket(instanceId);
|
|
1732
|
+
|
|
1733
|
+
// Mark workflow as running
|
|
1734
|
+
await this.adapter.updateInstance(instanceId, {
|
|
1735
|
+
status: "running",
|
|
1736
|
+
startedAt: new Date(),
|
|
1737
|
+
});
|
|
1738
|
+
|
|
1739
|
+
// Get the executor path
|
|
1740
|
+
const currentDir = dirname(fileURLToPath(import.meta.url));
|
|
1741
|
+
const executorPath = join(currentDir, "workflow-executor.ts");
|
|
1742
|
+
|
|
1743
|
+
// Prepare config for the executor
|
|
1744
|
+
const config = {
|
|
1745
|
+
instanceId,
|
|
1746
|
+
workflowName: definition.name,
|
|
1747
|
+
input,
|
|
1748
|
+
socketPath,
|
|
1749
|
+
tcpPort,
|
|
1750
|
+
modulePath,
|
|
1751
|
+
dbPath: this.dbPath,
|
|
1752
|
+
};
|
|
1753
|
+
|
|
1754
|
+
// Spawn the subprocess
|
|
1755
|
+
const proc = Bun.spawn(["bun", "run", executorPath], {
|
|
1756
|
+
stdin: "pipe",
|
|
1757
|
+
stdout: "inherit",
|
|
1758
|
+
stderr: "inherit",
|
|
1759
|
+
env: {
|
|
1760
|
+
...process.env,
|
|
1761
|
+
// Ensure the subprocess can import from the same paths
|
|
1762
|
+
NODE_OPTIONS: process.env.NODE_OPTIONS ?? "",
|
|
1763
|
+
},
|
|
1764
|
+
});
|
|
1765
|
+
|
|
1766
|
+
// Send config via stdin
|
|
1767
|
+
proc.stdin.write(JSON.stringify(config));
|
|
1768
|
+
proc.stdin.end();
|
|
1769
|
+
|
|
1770
|
+
// Track the process
|
|
1771
|
+
this.isolatedProcesses.set(instanceId, {
|
|
1772
|
+
pid: proc.pid,
|
|
1773
|
+
lastHeartbeat: Date.now(),
|
|
1774
|
+
});
|
|
1775
|
+
|
|
1776
|
+
// Set up workflow timeout
|
|
1777
|
+
if (definition.timeout) {
|
|
1778
|
+
const timeoutHandle = setTimeout(async () => {
|
|
1779
|
+
await this.handleIsolatedTimeout(instanceId, proc.pid);
|
|
1780
|
+
}, definition.timeout);
|
|
1781
|
+
const info = this.isolatedProcesses.get(instanceId);
|
|
1782
|
+
if (info) info.timeout = timeoutHandle;
|
|
1783
|
+
}
|
|
1784
|
+
|
|
1785
|
+
// Set up heartbeat timeout
|
|
1786
|
+
this.resetHeartbeatTimeout(instanceId, proc.pid);
|
|
1787
|
+
|
|
1788
|
+
// Handle process exit
|
|
1789
|
+
proc.exited.then(async (exitCode) => {
|
|
1790
|
+
const info = this.isolatedProcesses.get(instanceId);
|
|
1791
|
+
if (info) {
|
|
1792
|
+
if (info.timeout) clearTimeout(info.timeout);
|
|
1793
|
+
if (info.heartbeatTimeout) clearTimeout(info.heartbeatTimeout);
|
|
1794
|
+
this.isolatedProcesses.delete(instanceId);
|
|
1795
|
+
}
|
|
1796
|
+
await socketServer.closeSocket(instanceId);
|
|
1797
|
+
|
|
1798
|
+
// Check if workflow is still running (crashed before completion)
|
|
1799
|
+
const instance = await this.adapter.getInstance(instanceId);
|
|
1800
|
+
if (instance && instance.status === "running") {
|
|
1801
|
+
console.error(`[Workflows] Isolated workflow ${instanceId} crashed with exit code ${exitCode}`);
|
|
1802
|
+
await this.failWorkflow(instanceId, `Subprocess crashed with exit code ${exitCode}`);
|
|
1803
|
+
}
|
|
1804
|
+
});
|
|
1805
|
+
}
|
|
1806
|
+
|
|
1807
|
+
/**
|
|
1808
|
+
* Handle events from isolated workflow subprocess
|
|
1809
|
+
*/
|
|
1810
|
+
private async handleIsolatedEvent(event: WorkflowEvent): Promise<void> {
|
|
1811
|
+
const { instanceId, type } = event;
|
|
1812
|
+
|
|
1813
|
+
// Reset heartbeat timeout on any event
|
|
1814
|
+
const info = this.isolatedProcesses.get(instanceId);
|
|
1815
|
+
if (info) {
|
|
1816
|
+
info.lastHeartbeat = Date.now();
|
|
1817
|
+
this.resetHeartbeatTimeout(instanceId, info.pid);
|
|
1818
|
+
}
|
|
1819
|
+
|
|
1820
|
+
switch (type) {
|
|
1821
|
+
case "started":
|
|
1822
|
+
// Already marked as running in executeIsolatedWorkflow
|
|
1823
|
+
break;
|
|
1824
|
+
|
|
1825
|
+
case "heartbeat":
|
|
1826
|
+
// Heartbeat handled above
|
|
1827
|
+
break;
|
|
1828
|
+
|
|
1829
|
+
case "step.started": {
|
|
1830
|
+
const instance = await this.adapter.getInstance(instanceId);
|
|
1831
|
+
if (!instance) break;
|
|
1832
|
+
|
|
1833
|
+
// Update current step and step results in DB
|
|
1834
|
+
const stepResult = {
|
|
1835
|
+
stepName: event.stepName!,
|
|
1836
|
+
status: "running" as const,
|
|
1837
|
+
startedAt: new Date(),
|
|
1838
|
+
attempts: (instance.stepResults[event.stepName!]?.attempts ?? 0) + 1,
|
|
1839
|
+
};
|
|
1840
|
+
await this.adapter.updateInstance(instanceId, {
|
|
1841
|
+
currentStep: event.stepName,
|
|
1842
|
+
stepResults: { ...instance.stepResults, [event.stepName!]: stepResult },
|
|
1843
|
+
});
|
|
1844
|
+
|
|
1845
|
+
await this.emitEvent("workflow.step.started", {
|
|
1846
|
+
instanceId,
|
|
1847
|
+
workflowName: instance?.workflowName,
|
|
1848
|
+
stepName: event.stepName,
|
|
1849
|
+
});
|
|
1850
|
+
// Broadcast via SSE
|
|
1851
|
+
if (this.sse) {
|
|
1852
|
+
this.sse.broadcast(`workflow:${instanceId}`, "step.started", {
|
|
1853
|
+
stepName: event.stepName,
|
|
1854
|
+
});
|
|
1855
|
+
this.sse.broadcast("workflows:all", "workflow.step.started", {
|
|
1856
|
+
instanceId,
|
|
1857
|
+
workflowName: instance?.workflowName,
|
|
1858
|
+
stepName: event.stepName,
|
|
1859
|
+
});
|
|
1860
|
+
}
|
|
1861
|
+
break;
|
|
1862
|
+
}
|
|
1863
|
+
|
|
1864
|
+
case "step.completed": {
|
|
1865
|
+
const instance = await this.adapter.getInstance(instanceId);
|
|
1866
|
+
if (!instance) break;
|
|
1867
|
+
|
|
1868
|
+
// Update step results in DB
|
|
1869
|
+
const stepResult = instance.stepResults[event.stepName!] ?? {
|
|
1870
|
+
stepName: event.stepName!,
|
|
1871
|
+
status: "pending" as const,
|
|
1872
|
+
startedAt: new Date(),
|
|
1873
|
+
attempts: 0,
|
|
1874
|
+
};
|
|
1875
|
+
stepResult.status = "completed";
|
|
1876
|
+
stepResult.output = event.output;
|
|
1877
|
+
stepResult.completedAt = new Date();
|
|
1878
|
+
|
|
1879
|
+
await this.adapter.updateInstance(instanceId, {
|
|
1880
|
+
stepResults: { ...instance.stepResults, [event.stepName!]: stepResult },
|
|
1881
|
+
currentStep: event.nextStep,
|
|
1882
|
+
});
|
|
1883
|
+
|
|
1884
|
+
await this.emitEvent("workflow.step.completed", {
|
|
1885
|
+
instanceId,
|
|
1886
|
+
workflowName: instance?.workflowName,
|
|
1887
|
+
stepName: event.stepName,
|
|
1888
|
+
output: event.output,
|
|
1889
|
+
});
|
|
1890
|
+
// Broadcast via SSE
|
|
1891
|
+
if (this.sse) {
|
|
1892
|
+
this.sse.broadcast(`workflow:${instanceId}`, "step.completed", {
|
|
1893
|
+
stepName: event.stepName,
|
|
1894
|
+
output: event.output,
|
|
1895
|
+
});
|
|
1896
|
+
this.sse.broadcast("workflows:all", "workflow.step.completed", {
|
|
1897
|
+
instanceId,
|
|
1898
|
+
workflowName: instance?.workflowName,
|
|
1899
|
+
stepName: event.stepName,
|
|
1900
|
+
output: event.output,
|
|
1901
|
+
});
|
|
1902
|
+
}
|
|
1903
|
+
break;
|
|
1904
|
+
}
|
|
1905
|
+
|
|
1906
|
+
case "step.failed": {
|
|
1907
|
+
const instance = await this.adapter.getInstance(instanceId);
|
|
1908
|
+
if (!instance) break;
|
|
1909
|
+
|
|
1910
|
+
// Update step results in DB
|
|
1911
|
+
const stepResult = instance.stepResults[event.stepName!] ?? {
|
|
1912
|
+
stepName: event.stepName!,
|
|
1913
|
+
status: "pending" as const,
|
|
1914
|
+
startedAt: new Date(),
|
|
1915
|
+
attempts: 0,
|
|
1916
|
+
};
|
|
1917
|
+
stepResult.status = "failed";
|
|
1918
|
+
stepResult.error = event.error;
|
|
1919
|
+
stepResult.completedAt = new Date();
|
|
1920
|
+
|
|
1921
|
+
await this.adapter.updateInstance(instanceId, {
|
|
1922
|
+
stepResults: { ...instance.stepResults, [event.stepName!]: stepResult },
|
|
1923
|
+
});
|
|
1924
|
+
|
|
1925
|
+
await this.emitEvent("workflow.step.failed", {
|
|
1926
|
+
instanceId,
|
|
1927
|
+
workflowName: instance?.workflowName,
|
|
1928
|
+
stepName: event.stepName,
|
|
1929
|
+
error: event.error,
|
|
1930
|
+
});
|
|
1931
|
+
// Broadcast via SSE
|
|
1932
|
+
if (this.sse) {
|
|
1933
|
+
this.sse.broadcast(`workflow:${instanceId}`, "step.failed", {
|
|
1934
|
+
stepName: event.stepName,
|
|
1935
|
+
error: event.error,
|
|
1936
|
+
});
|
|
1937
|
+
this.sse.broadcast("workflows:all", "workflow.step.failed", {
|
|
1938
|
+
instanceId,
|
|
1939
|
+
workflowName: instance?.workflowName,
|
|
1940
|
+
stepName: event.stepName,
|
|
1941
|
+
error: event.error,
|
|
1942
|
+
});
|
|
1943
|
+
}
|
|
1944
|
+
break;
|
|
1945
|
+
}
|
|
1946
|
+
|
|
1947
|
+
case "progress": {
|
|
1948
|
+
const instance = await this.adapter.getInstance(instanceId);
|
|
1949
|
+
await this.emitEvent("workflow.progress", {
|
|
1950
|
+
instanceId,
|
|
1951
|
+
workflowName: instance?.workflowName,
|
|
1952
|
+
progress: event.progress,
|
|
1953
|
+
completedSteps: event.completedSteps,
|
|
1954
|
+
totalSteps: event.totalSteps,
|
|
1955
|
+
});
|
|
1956
|
+
// Broadcast via SSE
|
|
1957
|
+
if (this.sse) {
|
|
1958
|
+
this.sse.broadcast(`workflow:${instanceId}`, "progress", {
|
|
1959
|
+
progress: event.progress,
|
|
1960
|
+
completedSteps: event.completedSteps,
|
|
1961
|
+
totalSteps: event.totalSteps,
|
|
1962
|
+
});
|
|
1963
|
+
this.sse.broadcast("workflows:all", "workflow.progress", {
|
|
1964
|
+
instanceId,
|
|
1965
|
+
workflowName: instance?.workflowName,
|
|
1966
|
+
progress: event.progress,
|
|
1967
|
+
completedSteps: event.completedSteps,
|
|
1968
|
+
totalSteps: event.totalSteps,
|
|
1969
|
+
});
|
|
1970
|
+
}
|
|
1971
|
+
break;
|
|
1972
|
+
}
|
|
1973
|
+
|
|
1974
|
+
case "completed":
|
|
1975
|
+
await this.completeWorkflowIsolated(instanceId, event.output);
|
|
1976
|
+
break;
|
|
1977
|
+
|
|
1978
|
+
case "failed":
|
|
1979
|
+
await this.failWorkflowIsolated(instanceId, event.error ?? "Unknown error");
|
|
1980
|
+
break;
|
|
1981
|
+
}
|
|
1982
|
+
}
|
|
1983
|
+
|
|
1984
|
+
/**
|
|
1985
|
+
* Handle proxy calls from isolated subprocess
|
|
1986
|
+
*/
|
|
1987
|
+
private async handleProxyCall(request: ProxyRequest): Promise<any> {
|
|
1988
|
+
const { target, service, method, args } = request;
|
|
1989
|
+
|
|
1990
|
+
if (target === "plugin") {
|
|
1991
|
+
const plugin = this.plugins[service];
|
|
1992
|
+
if (!plugin) {
|
|
1993
|
+
throw new Error(`Plugin "${service}" not found`);
|
|
1994
|
+
}
|
|
1995
|
+
const fn = plugin[method];
|
|
1996
|
+
if (typeof fn !== "function") {
|
|
1997
|
+
throw new Error(`Method "${method}" not found on plugin "${service}"`);
|
|
1998
|
+
}
|
|
1999
|
+
return fn.apply(plugin, args);
|
|
2000
|
+
} else if (target === "core") {
|
|
2001
|
+
if (!this.core) {
|
|
2002
|
+
throw new Error("Core services not available");
|
|
2003
|
+
}
|
|
2004
|
+
const coreService = (this.core as any)[service];
|
|
2005
|
+
if (!coreService) {
|
|
2006
|
+
throw new Error(`Core service "${service}" not found`);
|
|
2007
|
+
}
|
|
2008
|
+
const fn = coreService[method];
|
|
2009
|
+
if (typeof fn !== "function") {
|
|
2010
|
+
throw new Error(`Method "${method}" not found on core service "${service}"`);
|
|
2011
|
+
}
|
|
2012
|
+
return fn.apply(coreService, args);
|
|
2013
|
+
} else {
|
|
2014
|
+
throw new Error(`Unknown proxy target: ${target}`);
|
|
2015
|
+
}
|
|
2016
|
+
}
|
|
2017
|
+
|
|
2018
|
+
/**
|
|
2019
|
+
* Reset heartbeat timeout for an isolated workflow
|
|
2020
|
+
*/
|
|
2021
|
+
private resetHeartbeatTimeout(instanceId: string, pid: number): void {
|
|
2022
|
+
const info = this.isolatedProcesses.get(instanceId);
|
|
2023
|
+
if (!info) return;
|
|
2024
|
+
|
|
2025
|
+
// Clear existing timeout
|
|
2026
|
+
if (info.heartbeatTimeout) {
|
|
2027
|
+
clearTimeout(info.heartbeatTimeout);
|
|
2028
|
+
}
|
|
2029
|
+
|
|
2030
|
+
// Set new timeout
|
|
2031
|
+
info.heartbeatTimeout = setTimeout(async () => {
|
|
2032
|
+
// Check if process is still alive
|
|
2033
|
+
if (!isProcessAlive(pid)) {
|
|
2034
|
+
return; // Process already dead, exit handler will handle it
|
|
2035
|
+
}
|
|
2036
|
+
|
|
2037
|
+
console.error(`[Workflows] No heartbeat from isolated workflow ${instanceId} for ${this.heartbeatTimeoutMs}ms`);
|
|
2038
|
+
await this.handleIsolatedTimeout(instanceId, pid);
|
|
2039
|
+
}, this.heartbeatTimeoutMs);
|
|
2040
|
+
}
|
|
2041
|
+
|
|
2042
|
+
/**
|
|
2043
|
+
* Handle timeout for isolated workflow (workflow timeout or heartbeat timeout)
|
|
2044
|
+
*/
|
|
2045
|
+
private async handleIsolatedTimeout(instanceId: string, pid: number): Promise<void> {
|
|
2046
|
+
const info = this.isolatedProcesses.get(instanceId);
|
|
2047
|
+
if (!info) return;
|
|
2048
|
+
|
|
2049
|
+
// Kill the process
|
|
2050
|
+
try {
|
|
2051
|
+
process.kill(pid, "SIGKILL");
|
|
2052
|
+
} catch {
|
|
2053
|
+
// Process might already be dead
|
|
2054
|
+
}
|
|
2055
|
+
|
|
2056
|
+
// Clean up
|
|
2057
|
+
if (info.timeout) clearTimeout(info.timeout);
|
|
2058
|
+
if (info.heartbeatTimeout) clearTimeout(info.heartbeatTimeout);
|
|
2059
|
+
this.isolatedProcesses.delete(instanceId);
|
|
2060
|
+
await this.getSocketServer().closeSocket(instanceId);
|
|
2061
|
+
|
|
2062
|
+
// Fail the workflow
|
|
2063
|
+
await this.failWorkflow(instanceId, "Workflow timed out");
|
|
2064
|
+
}
|
|
2065
|
+
|
|
2066
|
+
/**
|
|
2067
|
+
* Complete an isolated workflow (called from event handler)
|
|
2068
|
+
*/
|
|
2069
|
+
private async completeWorkflowIsolated(instanceId: string, output?: any): Promise<void> {
|
|
2070
|
+
const instance = await this.adapter.getInstance(instanceId);
|
|
2071
|
+
if (!instance) return;
|
|
2072
|
+
|
|
2073
|
+
// Clean up isolated process tracking (process should have exited)
|
|
2074
|
+
const info = this.isolatedProcesses.get(instanceId);
|
|
2075
|
+
if (info) {
|
|
2076
|
+
if (info.timeout) clearTimeout(info.timeout);
|
|
2077
|
+
if (info.heartbeatTimeout) clearTimeout(info.heartbeatTimeout);
|
|
2078
|
+
this.isolatedProcesses.delete(instanceId);
|
|
2079
|
+
}
|
|
2080
|
+
|
|
2081
|
+
await this.adapter.updateInstance(instanceId, {
|
|
2082
|
+
status: "completed",
|
|
2083
|
+
output,
|
|
2084
|
+
completedAt: new Date(),
|
|
2085
|
+
currentStep: undefined,
|
|
2086
|
+
});
|
|
2087
|
+
|
|
2088
|
+
await this.emitEvent("workflow.completed", {
|
|
2089
|
+
instanceId,
|
|
2090
|
+
workflowName: instance.workflowName,
|
|
2091
|
+
output,
|
|
2092
|
+
});
|
|
2093
|
+
|
|
2094
|
+
// Broadcast via SSE
|
|
2095
|
+
if (this.sse) {
|
|
2096
|
+
this.sse.broadcast(`workflow:${instanceId}`, "completed", { output });
|
|
2097
|
+
this.sse.broadcast("workflows:all", "workflow.completed", {
|
|
2098
|
+
instanceId,
|
|
2099
|
+
workflowName: instance.workflowName,
|
|
2100
|
+
output,
|
|
2101
|
+
});
|
|
2102
|
+
}
|
|
2103
|
+
}
|
|
2104
|
+
|
|
2105
|
+
/**
|
|
2106
|
+
* Fail an isolated workflow (called from event handler)
|
|
2107
|
+
*/
|
|
2108
|
+
private async failWorkflowIsolated(instanceId: string, error: string): Promise<void> {
|
|
2109
|
+
const instance = await this.adapter.getInstance(instanceId);
|
|
2110
|
+
if (!instance) return;
|
|
2111
|
+
|
|
2112
|
+
// Clean up isolated process tracking
|
|
2113
|
+
const info = this.isolatedProcesses.get(instanceId);
|
|
2114
|
+
if (info) {
|
|
2115
|
+
if (info.timeout) clearTimeout(info.timeout);
|
|
2116
|
+
if (info.heartbeatTimeout) clearTimeout(info.heartbeatTimeout);
|
|
2117
|
+
this.isolatedProcesses.delete(instanceId);
|
|
2118
|
+
}
|
|
2119
|
+
|
|
2120
|
+
await this.adapter.updateInstance(instanceId, {
|
|
2121
|
+
status: "failed",
|
|
2122
|
+
error,
|
|
2123
|
+
completedAt: new Date(),
|
|
2124
|
+
});
|
|
2125
|
+
|
|
2126
|
+
await this.emitEvent("workflow.failed", {
|
|
2127
|
+
instanceId,
|
|
2128
|
+
workflowName: instance.workflowName,
|
|
2129
|
+
error,
|
|
2130
|
+
});
|
|
2131
|
+
|
|
2132
|
+
// Broadcast via SSE
|
|
2133
|
+
if (this.sse) {
|
|
2134
|
+
this.sse.broadcast(`workflow:${instanceId}`, "failed", { error });
|
|
2135
|
+
this.sse.broadcast("workflows:all", "workflow.failed", {
|
|
2136
|
+
instanceId,
|
|
2137
|
+
workflowName: instance.workflowName,
|
|
2138
|
+
error,
|
|
2139
|
+
});
|
|
2140
|
+
}
|
|
2141
|
+
}
|
|
1369
2142
|
}
|
|
1370
2143
|
|
|
1371
2144
|
// ============================================
|