nvent 0.4.5 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/module.d.mts +1 -1
- package/dist/module.mjs +433 -175
- package/dist/runtime/adapters/base/index.d.ts +6 -0
- package/dist/runtime/adapters/base/index.js +1 -0
- package/dist/runtime/adapters/base/store-validator.d.ts +48 -0
- package/dist/runtime/adapters/base/store-validator.js +147 -0
- package/dist/runtime/adapters/builtin/file-queue.d.ts +15 -1
- package/dist/runtime/adapters/builtin/file-queue.js +70 -6
- package/dist/runtime/adapters/builtin/file-store.d.ts +4 -18
- package/dist/runtime/adapters/builtin/file-store.js +90 -109
- package/dist/runtime/adapters/builtin/memory-queue.js +4 -0
- package/dist/runtime/adapters/builtin/memory-store.d.ts +42 -31
- package/dist/runtime/adapters/builtin/memory-store.js +253 -183
- package/dist/runtime/adapters/factory.d.ts +2 -2
- package/dist/runtime/adapters/factory.js +54 -20
- package/dist/runtime/adapters/interfaces/store.d.ts +177 -113
- package/dist/runtime/config/index.d.ts +2 -2
- package/dist/runtime/config/index.js +14 -6
- package/dist/runtime/config/types.d.ts +32 -2
- package/dist/runtime/events/eventBus.d.ts +1 -1
- package/dist/runtime/events/types.d.ts +31 -2
- package/dist/runtime/events/utils/scheduleTrigger.d.ts +8 -0
- package/dist/runtime/events/utils/scheduleTrigger.js +69 -0
- package/dist/runtime/events/utils/stallDetector.d.ts +44 -3
- package/dist/runtime/events/utils/stallDetector.js +288 -89
- package/dist/runtime/events/utils/triggerRuntime.d.ts +58 -0
- package/dist/runtime/events/utils/triggerRuntime.js +212 -0
- package/dist/runtime/events/wiring/flowWiring.d.ts +11 -5
- package/dist/runtime/events/wiring/flowWiring.js +620 -92
- package/dist/runtime/events/wiring/registry.d.ts +2 -2
- package/dist/runtime/events/wiring/registry.js +8 -6
- package/dist/runtime/events/wiring/streamWiring.d.ts +15 -11
- package/dist/runtime/events/wiring/streamWiring.js +88 -11
- package/dist/runtime/events/wiring/triggerWiring.d.ts +21 -0
- package/dist/runtime/events/wiring/triggerWiring.js +412 -0
- package/dist/runtime/{server → nitro}/plugins/00.adapters.js +8 -4
- package/dist/runtime/{server → nitro}/plugins/02.workers.js +21 -3
- package/dist/runtime/nitro/plugins/03.triggers.d.ts +12 -0
- package/dist/runtime/nitro/plugins/03.triggers.js +55 -0
- package/dist/runtime/nitro/routes/webhook.await.d.ts +23 -0
- package/dist/runtime/nitro/routes/webhook.await.js +90 -0
- package/dist/runtime/nitro/routes/webhook.trigger.d.ts +69 -0
- package/dist/runtime/nitro/routes/webhook.trigger.js +64 -0
- package/dist/runtime/{utils → nitro/utils}/adapters.d.ts +6 -6
- package/dist/runtime/nitro/utils/awaitPatterns/event.d.ts +15 -0
- package/dist/runtime/nitro/utils/awaitPatterns/event.js +120 -0
- package/dist/runtime/nitro/utils/awaitPatterns/index.d.ts +28 -0
- package/dist/runtime/nitro/utils/awaitPatterns/index.js +55 -0
- package/dist/runtime/nitro/utils/awaitPatterns/schedule.d.ts +16 -0
- package/dist/runtime/nitro/utils/awaitPatterns/schedule.js +78 -0
- package/dist/runtime/nitro/utils/awaitPatterns/time.d.ts +15 -0
- package/dist/runtime/nitro/utils/awaitPatterns/time.js +67 -0
- package/dist/runtime/nitro/utils/awaitPatterns/webhook.d.ts +15 -0
- package/dist/runtime/nitro/utils/awaitPatterns/webhook.js +120 -0
- package/dist/runtime/{utils → nitro/utils}/defineFunction.d.ts +2 -2
- package/dist/runtime/{utils → nitro/utils}/defineFunction.js +3 -3
- package/dist/runtime/{utils → nitro/utils}/defineFunctionConfig.d.ts +156 -0
- package/dist/runtime/{utils → nitro/utils}/defineFunctionConfig.js +1 -0
- package/dist/runtime/nitro/utils/defineHooks.d.ts +41 -0
- package/dist/runtime/nitro/utils/defineHooks.js +6 -0
- package/dist/runtime/{utils → nitro/utils}/registerAdapter.d.ts +3 -3
- package/dist/runtime/{utils → nitro/utils}/registerAdapter.js +1 -1
- package/dist/runtime/nitro/utils/useAwait.d.ts +71 -0
- package/dist/runtime/nitro/utils/useAwait.js +139 -0
- package/dist/runtime/{utils → nitro/utils}/useEventManager.d.ts +2 -2
- package/dist/runtime/{utils → nitro/utils}/useEventManager.js +1 -1
- package/dist/runtime/nitro/utils/useFlow.d.ts +68 -0
- package/dist/runtime/nitro/utils/useFlow.js +226 -0
- package/dist/runtime/nitro/utils/useHookRegistry.d.ts +34 -0
- package/dist/runtime/nitro/utils/useHookRegistry.js +25 -0
- package/dist/runtime/nitro/utils/useRunContext.d.ts +6 -0
- package/dist/runtime/nitro/utils/useRunContext.js +102 -0
- package/dist/runtime/nitro/utils/useStreamTopics.d.ts +83 -0
- package/dist/runtime/nitro/utils/useStreamTopics.js +94 -0
- package/dist/runtime/nitro/utils/useTrigger.d.ts +150 -0
- package/dist/runtime/nitro/utils/useTrigger.js +320 -0
- package/dist/runtime/scheduler/index.d.ts +33 -0
- package/dist/runtime/scheduler/index.js +38 -0
- package/dist/runtime/scheduler/scheduler.d.ts +113 -0
- package/dist/runtime/scheduler/scheduler.js +623 -0
- package/dist/runtime/scheduler/types.d.ts +116 -0
- package/dist/runtime/scheduler/types.js +0 -0
- package/dist/runtime/worker/node/runner.d.ts +12 -2
- package/dist/runtime/worker/node/runner.js +141 -37
- package/package.json +6 -6
- package/dist/runtime/server/api/_flows/[name]/clear-history.delete.d.ts +0 -10
- package/dist/runtime/server/api/_flows/[name]/clear-history.delete.js +0 -55
- package/dist/runtime/server/api/_flows/[name]/runs/[runId]/cancel.post.d.ts +0 -2
- package/dist/runtime/server/api/_flows/[name]/runs/[runId]/cancel.post.js +0 -21
- package/dist/runtime/server/api/_flows/[name]/runs.get.d.ts +0 -17
- package/dist/runtime/server/api/_flows/[name]/runs.get.js +0 -64
- package/dist/runtime/server/api/_flows/[name]/schedule.post.d.ts +0 -2
- package/dist/runtime/server/api/_flows/[name]/schedule.post.js +0 -66
- package/dist/runtime/server/api/_flows/[name]/schedules/[id].delete.d.ts +0 -2
- package/dist/runtime/server/api/_flows/[name]/schedules/[id].delete.js +0 -47
- package/dist/runtime/server/api/_flows/[name]/schedules.get.d.ts +0 -2
- package/dist/runtime/server/api/_flows/[name]/schedules.get.js +0 -50
- package/dist/runtime/server/api/_flows/[name]/start.post.d.ts +0 -2
- package/dist/runtime/server/api/_flows/[name]/start.post.js +0 -9
- package/dist/runtime/server/api/_flows/index.get.d.ts +0 -6
- package/dist/runtime/server/api/_flows/index.get.js +0 -5
- package/dist/runtime/server/api/_flows/ws.d.ts +0 -60
- package/dist/runtime/server/api/_flows/ws.js +0 -209
- package/dist/runtime/server/api/_queues/[name]/job/[id].get.d.ts +0 -2
- package/dist/runtime/server/api/_queues/[name]/job/[id].get.js +0 -14
- package/dist/runtime/server/api/_queues/[name]/job/index.get.d.ts +0 -2
- package/dist/runtime/server/api/_queues/[name]/job/index.get.js +0 -27
- package/dist/runtime/server/api/_queues/index.get.d.ts +0 -2
- package/dist/runtime/server/api/_queues/index.get.js +0 -106
- package/dist/runtime/server/api/_queues/ws.d.ts +0 -48
- package/dist/runtime/server/api/_queues/ws.js +0 -215
- package/dist/runtime/utils/useFlowEngine.d.ts +0 -19
- package/dist/runtime/utils/useFlowEngine.js +0 -108
- package/dist/runtime/utils/useStreamTopics.d.ts +0 -72
- package/dist/runtime/utils/useStreamTopics.js +0 -47
- /package/dist/runtime/{server → nitro}/plugins/00.adapters.d.ts +0 -0
- /package/dist/runtime/{server → nitro}/plugins/01.ws-lifecycle.d.ts +0 -0
- /package/dist/runtime/{server → nitro}/plugins/01.ws-lifecycle.js +0 -0
- /package/dist/runtime/{server → nitro}/plugins/02.workers.d.ts +0 -0
- /package/dist/runtime/{utils → nitro/utils}/adapters.js +0 -0
- /package/dist/runtime/{utils → nitro/utils}/useNventLogger.d.ts +0 -0
- /package/dist/runtime/{utils → nitro/utils}/useNventLogger.js +0 -0
- /package/dist/runtime/{utils → nitro/utils}/wsPeerManager.d.ts +0 -0
- /package/dist/runtime/{utils → nitro/utils}/wsPeerManager.js +0 -0
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
import { useNventLogger, useScheduler } from "#imports";
|
|
2
|
+
import { getEventBus } from "../eventBus.js";
|
|
3
|
+
export async function scheduleTrigger(triggerName, scheduleConfig, triggerStatus) {
|
|
4
|
+
const logger = useNventLogger("trigger-wiring");
|
|
5
|
+
const scheduler = useScheduler();
|
|
6
|
+
const eventBus = getEventBus();
|
|
7
|
+
try {
|
|
8
|
+
const jobId = `trigger:${triggerName}`;
|
|
9
|
+
const handler = async () => {
|
|
10
|
+
logger.debug("Schedule trigger fired", { trigger: triggerName });
|
|
11
|
+
await eventBus.publish({
|
|
12
|
+
type: "trigger.fired",
|
|
13
|
+
triggerName,
|
|
14
|
+
data: {
|
|
15
|
+
scheduledAt: Date.now(),
|
|
16
|
+
timezone: scheduleConfig.timezone || "UTC"
|
|
17
|
+
}
|
|
18
|
+
});
|
|
19
|
+
};
|
|
20
|
+
await scheduler.unschedule(jobId);
|
|
21
|
+
const jobConfig = {
|
|
22
|
+
id: jobId,
|
|
23
|
+
type: scheduleConfig.cron ? "cron" : "interval",
|
|
24
|
+
name: `Schedule Trigger: ${triggerName}`,
|
|
25
|
+
handler,
|
|
26
|
+
metadata: {
|
|
27
|
+
triggerName,
|
|
28
|
+
type: "schedule-trigger",
|
|
29
|
+
scheduleConfig
|
|
30
|
+
},
|
|
31
|
+
enabled: triggerStatus === "active"
|
|
32
|
+
};
|
|
33
|
+
if (scheduleConfig.cron) {
|
|
34
|
+
jobConfig.cron = scheduleConfig.cron;
|
|
35
|
+
jobConfig.timezone = scheduleConfig.timezone || "UTC";
|
|
36
|
+
} else if (scheduleConfig.interval) {
|
|
37
|
+
jobConfig.interval = scheduleConfig.interval * 1e3;
|
|
38
|
+
}
|
|
39
|
+
await scheduler.schedule(jobConfig);
|
|
40
|
+
logger.info("Scheduled trigger", {
|
|
41
|
+
trigger: triggerName,
|
|
42
|
+
cron: scheduleConfig.cron,
|
|
43
|
+
interval: scheduleConfig.interval,
|
|
44
|
+
timezone: scheduleConfig.timezone
|
|
45
|
+
});
|
|
46
|
+
} catch (error) {
|
|
47
|
+
logger.error("Failed to schedule trigger", {
|
|
48
|
+
trigger: triggerName,
|
|
49
|
+
error: error instanceof Error ? error.message : String(error)
|
|
50
|
+
});
|
|
51
|
+
throw error;
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
export async function unscheduleTrigger(triggerName) {
|
|
55
|
+
const logger = useNventLogger("trigger-wiring");
|
|
56
|
+
const scheduler = useScheduler();
|
|
57
|
+
try {
|
|
58
|
+
const jobId = `trigger:${triggerName}`;
|
|
59
|
+
const removed = await scheduler.unschedule(jobId);
|
|
60
|
+
if (removed) {
|
|
61
|
+
logger.info("Unscheduled trigger", { trigger: triggerName });
|
|
62
|
+
}
|
|
63
|
+
} catch (error) {
|
|
64
|
+
logger.error("Failed to unschedule trigger", {
|
|
65
|
+
trigger: triggerName,
|
|
66
|
+
error: error instanceof Error ? error.message : String(error)
|
|
67
|
+
});
|
|
68
|
+
}
|
|
69
|
+
}
|
|
@@ -43,18 +43,37 @@ export declare class FlowStallDetector {
|
|
|
43
43
|
private store;
|
|
44
44
|
private config;
|
|
45
45
|
private logger;
|
|
46
|
-
private
|
|
46
|
+
private schedulerJobId?;
|
|
47
47
|
private started;
|
|
48
48
|
constructor(store: StoreAdapter, config?: StallDetectorConfig);
|
|
49
49
|
/**
|
|
50
50
|
* Start the periodic stall detector
|
|
51
51
|
* Should be called once per instance after adapters are initialized
|
|
52
|
+
* Runs startup recovery to clean up flows from previous server instances
|
|
52
53
|
*/
|
|
53
|
-
start(): void
|
|
54
|
+
start(): Promise<void>;
|
|
55
|
+
/**
|
|
56
|
+
* Get the configuration for scheduling
|
|
57
|
+
* Returns config needed by flowWiring to register the scheduler job
|
|
58
|
+
*/
|
|
59
|
+
getScheduleConfig(): {
|
|
60
|
+
enabled: boolean;
|
|
61
|
+
interval: number;
|
|
62
|
+
stallTimeout: number;
|
|
63
|
+
};
|
|
64
|
+
/**
|
|
65
|
+
* Set the scheduler job ID (called from flowWiring after scheduling)
|
|
66
|
+
*/
|
|
67
|
+
setSchedulerJobId(jobId: string): void;
|
|
54
68
|
/**
|
|
55
69
|
* Stop the periodic stall detector
|
|
56
70
|
*/
|
|
57
|
-
stop(): void
|
|
71
|
+
stop(): Promise<void>;
|
|
72
|
+
/**
|
|
73
|
+
* Get stall timeout for a specific flow
|
|
74
|
+
* Uses flow-specific timeout from analyzed metadata, falls back to global config
|
|
75
|
+
*/
|
|
76
|
+
private getFlowStallTimeout;
|
|
58
77
|
/**
|
|
59
78
|
* Update activity timestamp for a flow
|
|
60
79
|
* Should be called on every step event (started, completed, failed, retry)
|
|
@@ -63,6 +82,7 @@ export declare class FlowStallDetector {
|
|
|
63
82
|
/**
|
|
64
83
|
* Check if a specific flow is stalled (lazy detection)
|
|
65
84
|
* Returns true if the flow should be marked as stalled
|
|
85
|
+
* v0.5: Await-aware - uses flow-specific timeout and skips awaiting flows
|
|
66
86
|
*/
|
|
67
87
|
isStalled(flowName: string, runId: string): Promise<boolean>;
|
|
68
88
|
/**
|
|
@@ -78,6 +98,27 @@ export declare class FlowStallDetector {
|
|
|
78
98
|
* For now, we'll need to pass flow names to check, or iterate known flows from registry.
|
|
79
99
|
*/
|
|
80
100
|
checkFlowsForStalls(flowNames: string[]): Promise<void>;
|
|
101
|
+
/**
|
|
102
|
+
* Run startup recovery to clean up flows left in running state from previous server instance
|
|
103
|
+
* This marks all running flows as stalled since their in-memory state is lost
|
|
104
|
+
* Also validates and cleans up flow stats index
|
|
105
|
+
*/
|
|
106
|
+
private runStartupRecovery;
|
|
107
|
+
/**
|
|
108
|
+
* Validate flow stats index and remove entries for non-existent flows
|
|
109
|
+
* Also corrects running/awaiting counts based on actual scanned data
|
|
110
|
+
*
|
|
111
|
+
* NOTE: We only validate running/awaiting counts because:
|
|
112
|
+
* - They are small snapshot values (usually < 100)
|
|
113
|
+
* - We already scanned all flows during startup recovery
|
|
114
|
+
* - Discrepancies indicate actual bugs (flows stuck in wrong state)
|
|
115
|
+
*
|
|
116
|
+
* We do NOT validate total/success/failure/cancel because:
|
|
117
|
+
* - These are cumulative counters that can be millions in production
|
|
118
|
+
* - Validation would require full table scan (prohibitively expensive)
|
|
119
|
+
* - Minor discrepancies don't affect runtime behavior
|
|
120
|
+
*/
|
|
121
|
+
private validateFlowStats;
|
|
81
122
|
/**
|
|
82
123
|
* Internal method for periodic checks
|
|
83
124
|
* Gets flow names from registry and checks them
|
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
import { useNventLogger, useStreamTopics } from "#imports";
|
|
1
|
+
import { useNventLogger, useStreamTopics, $useAnalyzedFlows, useScheduler } from "#imports";
|
|
2
2
|
const DEFAULT_STALL_TIMEOUT = 30 * 60 * 1e3;
|
|
3
3
|
const DEFAULT_CHECK_INTERVAL = 15 * 60 * 1e3;
|
|
4
4
|
export class FlowStallDetector {
|
|
5
5
|
store;
|
|
6
6
|
config;
|
|
7
7
|
logger = useNventLogger("stall-detector");
|
|
8
|
-
|
|
8
|
+
schedulerJobId;
|
|
9
9
|
started = false;
|
|
10
10
|
constructor(store, config = {}) {
|
|
11
11
|
this.store = store;
|
|
@@ -18,92 +18,115 @@ export class FlowStallDetector {
|
|
|
18
18
|
/**
|
|
19
19
|
* Start the periodic stall detector
|
|
20
20
|
* Should be called once per instance after adapters are initialized
|
|
21
|
+
* Runs startup recovery to clean up flows from previous server instances
|
|
21
22
|
*/
|
|
22
|
-
start() {
|
|
23
|
+
async start() {
|
|
23
24
|
if (this.started) {
|
|
24
25
|
this.logger.warn("Stall detector already started");
|
|
25
26
|
return;
|
|
26
27
|
}
|
|
27
28
|
this.started = true;
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
29
|
+
await this.runStartupRecovery();
|
|
30
|
+
this.logger.info(`Stall detector started - periodicCheck: ${this.config.enablePeriodicCheck}, stallTimeout: ${this.config.stallTimeout / 1e3}s, checkInterval: ${this.config.checkInterval / 1e3}s`);
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* Get the configuration for scheduling
|
|
34
|
+
* Returns config needed by flowWiring to register the scheduler job
|
|
35
|
+
*/
|
|
36
|
+
getScheduleConfig() {
|
|
37
|
+
return {
|
|
38
|
+
enabled: this.config.enablePeriodicCheck,
|
|
39
|
+
interval: this.config.checkInterval,
|
|
40
|
+
stallTimeout: this.config.stallTimeout
|
|
41
|
+
};
|
|
42
|
+
}
|
|
43
|
+
/**
|
|
44
|
+
* Set the scheduler job ID (called from flowWiring after scheduling)
|
|
45
|
+
*/
|
|
46
|
+
setSchedulerJobId(jobId) {
|
|
47
|
+
this.schedulerJobId = jobId;
|
|
42
48
|
}
|
|
43
49
|
/**
|
|
44
50
|
* Stop the periodic stall detector
|
|
45
51
|
*/
|
|
46
|
-
stop() {
|
|
47
|
-
if (this.
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
52
|
+
async stop() {
|
|
53
|
+
if (this.schedulerJobId) {
|
|
54
|
+
try {
|
|
55
|
+
const scheduler = useScheduler();
|
|
56
|
+
await scheduler.unschedule(this.schedulerJobId);
|
|
57
|
+
this.schedulerJobId = void 0;
|
|
58
|
+
this.logger.info("Stopped periodic stall detector");
|
|
59
|
+
} catch (error) {
|
|
60
|
+
this.logger.error(`Failed to stop stall detector: ${error.message}`);
|
|
61
|
+
}
|
|
51
62
|
}
|
|
52
63
|
this.started = false;
|
|
53
64
|
}
|
|
65
|
+
/**
|
|
66
|
+
* Get stall timeout for a specific flow
|
|
67
|
+
* Uses flow-specific timeout from analyzed metadata, falls back to global config
|
|
68
|
+
*/
|
|
69
|
+
async getFlowStallTimeout(flowName) {
|
|
70
|
+
try {
|
|
71
|
+
const analyzedFlows = $useAnalyzedFlows();
|
|
72
|
+
const flowMeta = analyzedFlows.find((f) => f.id === flowName);
|
|
73
|
+
if (flowMeta?.stallTimeout) {
|
|
74
|
+
this.logger.debug(`Using flow-specific stall timeout for '${flowName}': ${flowMeta.stallTimeout / 1e3}s`);
|
|
75
|
+
return flowMeta.stallTimeout;
|
|
76
|
+
}
|
|
77
|
+
} catch (error) {
|
|
78
|
+
this.logger.warn(`Failed to get flow-specific stall timeout for '${flowName}': ${error.message}`);
|
|
79
|
+
}
|
|
80
|
+
return this.config.stallTimeout;
|
|
81
|
+
}
|
|
54
82
|
/**
|
|
55
83
|
* Update activity timestamp for a flow
|
|
56
84
|
* Should be called on every step event (started, completed, failed, retry)
|
|
57
85
|
*/
|
|
58
86
|
async updateActivity(flowName, runId) {
|
|
59
|
-
const {
|
|
60
|
-
const indexKey =
|
|
87
|
+
const { StoreSubjects } = useStreamTopics();
|
|
88
|
+
const indexKey = StoreSubjects.flowRunIndex(flowName);
|
|
61
89
|
try {
|
|
62
|
-
if (!this.store.
|
|
90
|
+
if (!this.store.index.update) {
|
|
63
91
|
this.logger.warn("Store does not support indexUpdate, cannot update activity");
|
|
64
92
|
return;
|
|
65
93
|
}
|
|
66
|
-
await this.store.
|
|
94
|
+
await this.store.index.update(indexKey, runId, {
|
|
67
95
|
lastActivityAt: Date.now()
|
|
68
96
|
});
|
|
69
97
|
} catch (error) {
|
|
70
|
-
this.logger.warn(
|
|
71
|
-
flowName,
|
|
72
|
-
runId,
|
|
73
|
-
error: error.message
|
|
74
|
-
});
|
|
98
|
+
this.logger.warn(`Failed to update flow activity for '${flowName}' runId '${runId}': ${error.message}`);
|
|
75
99
|
}
|
|
76
100
|
}
|
|
77
101
|
/**
|
|
78
102
|
* Check if a specific flow is stalled (lazy detection)
|
|
79
103
|
* Returns true if the flow should be marked as stalled
|
|
104
|
+
* v0.5: Await-aware - uses flow-specific timeout and skips awaiting flows
|
|
80
105
|
*/
|
|
81
106
|
async isStalled(flowName, runId) {
|
|
82
|
-
const {
|
|
83
|
-
const indexKey =
|
|
107
|
+
const { StoreSubjects } = useStreamTopics();
|
|
108
|
+
const indexKey = StoreSubjects.flowRunIndex(flowName);
|
|
84
109
|
try {
|
|
85
|
-
if (!this.store.
|
|
86
|
-
const flowEntry = await this.store.
|
|
110
|
+
if (!this.store.index.get) return false;
|
|
111
|
+
const flowEntry = await this.store.index.get(indexKey, runId);
|
|
87
112
|
if (!flowEntry?.metadata) return false;
|
|
88
113
|
if (flowEntry.metadata.status !== "running") return false;
|
|
114
|
+
const awaitingSteps = flowEntry.metadata.awaitingSteps || {};
|
|
115
|
+
const hasActiveAwaits = Object.keys(awaitingSteps).length > 0;
|
|
116
|
+
if (hasActiveAwaits) {
|
|
117
|
+
this.logger.debug(`Flow '${flowName}' runId '${runId}' has active awaits [${Object.keys(awaitingSteps).join(", ")}], skipping stall check`);
|
|
118
|
+
return false;
|
|
119
|
+
}
|
|
120
|
+
const stallTimeout = await this.getFlowStallTimeout(flowName);
|
|
89
121
|
const lastActivity = flowEntry.metadata.lastActivityAt || flowEntry.metadata.startedAt || 0;
|
|
90
122
|
const timeSinceActivity = Date.now() - lastActivity;
|
|
91
|
-
if (timeSinceActivity >
|
|
92
|
-
this.logger.info(
|
|
93
|
-
flowName,
|
|
94
|
-
runId,
|
|
95
|
-
timeSinceActivity: `${Math.round(timeSinceActivity / 1e3)}s`,
|
|
96
|
-
stallTimeout: `${this.config.stallTimeout / 1e3}s`
|
|
97
|
-
});
|
|
123
|
+
if (timeSinceActivity > stallTimeout) {
|
|
124
|
+
this.logger.info(`Flow detected as stalled (lazy check) - '${flowName}' runId '${runId}': ${Math.round(timeSinceActivity / 1e3)}s since activity (timeout: ${stallTimeout / 1e3}s)`);
|
|
98
125
|
return true;
|
|
99
126
|
}
|
|
100
127
|
return false;
|
|
101
128
|
} catch (error) {
|
|
102
|
-
this.logger.warn(
|
|
103
|
-
flowName,
|
|
104
|
-
runId,
|
|
105
|
-
error: error.message
|
|
106
|
-
});
|
|
129
|
+
this.logger.warn(`Failed to check if flow is stalled for '${flowName}' runId '${runId}': ${error.message}`);
|
|
107
130
|
return false;
|
|
108
131
|
}
|
|
109
132
|
}
|
|
@@ -112,43 +135,40 @@ export class FlowStallDetector {
|
|
|
112
135
|
* Emits a flow.stalled event and updates the flow status
|
|
113
136
|
*/
|
|
114
137
|
async markAsStalled(flowName, runId, reason = "No activity timeout") {
|
|
115
|
-
const {
|
|
116
|
-
const indexKey =
|
|
138
|
+
const { StoreSubjects } = useStreamTopics();
|
|
139
|
+
const indexKey = StoreSubjects.flowRunIndex(flowName);
|
|
117
140
|
try {
|
|
118
|
-
if (!this.store.
|
|
141
|
+
if (!this.store.index.get) {
|
|
119
142
|
this.logger.warn("Store does not support indexGet, cannot mark as stalled");
|
|
120
143
|
return;
|
|
121
144
|
}
|
|
122
|
-
const flowEntry = await this.store.
|
|
145
|
+
const flowEntry = await this.store.index.get(indexKey, runId);
|
|
123
146
|
if (!flowEntry?.metadata) return;
|
|
124
|
-
|
|
125
|
-
if (
|
|
126
|
-
|
|
147
|
+
const previousStatus = flowEntry.metadata.status;
|
|
148
|
+
if (previousStatus !== "running" && previousStatus !== "awaiting") return;
|
|
149
|
+
if (this.store.index.update) {
|
|
150
|
+
await this.store.index.update(indexKey, runId, {
|
|
127
151
|
status: "stalled",
|
|
152
|
+
previousStatus,
|
|
153
|
+
// Track what state it was in before stalling
|
|
128
154
|
stalledAt: Date.now(),
|
|
129
155
|
stallReason: reason
|
|
130
156
|
});
|
|
131
157
|
}
|
|
132
|
-
const streamName =
|
|
133
|
-
await this.store.append(streamName, {
|
|
158
|
+
const streamName = StoreSubjects.flowRun(runId);
|
|
159
|
+
await this.store.stream.append(streamName, {
|
|
134
160
|
type: "flow.stalled",
|
|
135
161
|
runId,
|
|
136
162
|
flowName,
|
|
137
163
|
data: {
|
|
138
|
-
reason
|
|
164
|
+
reason,
|
|
165
|
+
previousStatus
|
|
166
|
+
// Include previous status so stats handler knows which counter to decrement
|
|
139
167
|
}
|
|
140
168
|
});
|
|
141
|
-
this.logger.info(
|
|
142
|
-
flowName,
|
|
143
|
-
runId,
|
|
144
|
-
reason
|
|
145
|
-
});
|
|
169
|
+
this.logger.info(`Marked flow as stalled - '${flowName}' runId '${runId}': ${reason}`);
|
|
146
170
|
} catch (error) {
|
|
147
|
-
this.logger.error(
|
|
148
|
-
flowName,
|
|
149
|
-
runId,
|
|
150
|
-
error: error.message
|
|
151
|
-
});
|
|
171
|
+
this.logger.error(`Failed to mark flow as stalled for '${flowName}' runId '${runId}': ${error.message}`);
|
|
152
172
|
}
|
|
153
173
|
}
|
|
154
174
|
/**
|
|
@@ -159,54 +179,235 @@ export class FlowStallDetector {
|
|
|
159
179
|
* For now, we'll need to pass flow names to check, or iterate known flows from registry.
|
|
160
180
|
*/
|
|
161
181
|
async checkFlowsForStalls(flowNames) {
|
|
162
|
-
this.logger.
|
|
182
|
+
this.logger.info(`Running periodic stall check for ${flowNames.length} flows`);
|
|
163
183
|
try {
|
|
164
|
-
if (!this.store.
|
|
184
|
+
if (!this.store.index.get || !this.store.index.read) {
|
|
165
185
|
this.logger.warn("Store does not support required index operations");
|
|
166
186
|
return;
|
|
167
187
|
}
|
|
168
|
-
const {
|
|
188
|
+
const { StoreSubjects } = useStreamTopics();
|
|
169
189
|
let checkedCount = 0;
|
|
170
190
|
let stalledCount = 0;
|
|
171
191
|
for (const flowName of flowNames) {
|
|
172
|
-
const indexKey =
|
|
173
|
-
const
|
|
192
|
+
const indexKey = StoreSubjects.flowRunIndex(flowName);
|
|
193
|
+
const flowStallTimeout = await this.getFlowStallTimeout(flowName);
|
|
194
|
+
const entries = await this.store.index.read(indexKey, { limit: 1e3 });
|
|
174
195
|
for (const entry of entries) {
|
|
175
196
|
if (!entry.metadata) continue;
|
|
176
197
|
checkedCount++;
|
|
177
|
-
if (entry.metadata.status !== "running") continue;
|
|
198
|
+
if (entry.metadata.status !== "running" && entry.metadata.status !== "awaiting") continue;
|
|
199
|
+
const awaitingSteps = entry.metadata.awaitingSteps || {};
|
|
200
|
+
const awaitingStepNames = Object.keys(awaitingSteps);
|
|
201
|
+
if (awaitingStepNames.length > 0) {
|
|
202
|
+
let hasOverdueAwaits = false;
|
|
203
|
+
let hasLegacyAwait = false;
|
|
204
|
+
for (const stepName of awaitingStepNames) {
|
|
205
|
+
const awaitState = awaitingSteps[stepName];
|
|
206
|
+
if (awaitState?.status === "awaiting") {
|
|
207
|
+
if (!awaitState.timeoutAt) {
|
|
208
|
+
hasLegacyAwait = true;
|
|
209
|
+
} else if (Date.now() > awaitState.timeoutAt) {
|
|
210
|
+
hasOverdueAwaits = true;
|
|
211
|
+
break;
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
if (hasOverdueAwaits) {
|
|
216
|
+
await this.markAsStalled(flowName, entry.id, "Await pattern timed out");
|
|
217
|
+
stalledCount++;
|
|
218
|
+
continue;
|
|
219
|
+
}
|
|
220
|
+
if (hasLegacyAwait) {
|
|
221
|
+
this.logger.debug(`Skipping flow with legacy await (no timeout) - '${flowName}' runId '${entry.id}'`);
|
|
222
|
+
}
|
|
223
|
+
continue;
|
|
224
|
+
}
|
|
178
225
|
const lastActivity = entry.metadata.lastActivityAt || entry.metadata.startedAt || 0;
|
|
179
226
|
const timeSinceActivity = Date.now() - lastActivity;
|
|
180
|
-
if (timeSinceActivity >
|
|
227
|
+
if (timeSinceActivity > flowStallTimeout) {
|
|
181
228
|
await this.markAsStalled(flowName, entry.id, "Periodic check detected no activity");
|
|
182
229
|
stalledCount++;
|
|
183
230
|
}
|
|
184
231
|
}
|
|
185
232
|
}
|
|
186
|
-
|
|
187
|
-
this.logger.info("Periodic stall check completed", {
|
|
188
|
-
checked: checkedCount,
|
|
189
|
-
stalled: stalledCount
|
|
190
|
-
});
|
|
191
|
-
} else {
|
|
192
|
-
this.logger.debug("Periodic stall check completed", {
|
|
193
|
-
checked: checkedCount,
|
|
194
|
-
stalled: 0
|
|
195
|
-
});
|
|
196
|
-
}
|
|
233
|
+
this.logger.info(`Periodic stall check completed - checked: ${checkedCount}, stalled: ${stalledCount}`);
|
|
197
234
|
} catch (error) {
|
|
198
235
|
this.logger.error("Failed to run periodic stall check", {
|
|
199
236
|
error: error.message
|
|
200
237
|
});
|
|
201
238
|
}
|
|
202
239
|
}
|
|
240
|
+
/**
|
|
241
|
+
* Run startup recovery to clean up flows left in running state from previous server instance
|
|
242
|
+
* This marks all running flows as stalled since their in-memory state is lost
|
|
243
|
+
* Also validates and cleans up flow stats index
|
|
244
|
+
*/
|
|
245
|
+
async runStartupRecovery() {
|
|
246
|
+
this.logger.info("Running startup recovery to check for orphaned flows and validate stats");
|
|
247
|
+
try {
|
|
248
|
+
if (!this.store.index.get || !this.store.index.read) {
|
|
249
|
+
this.logger.warn("Store does not support required index operations");
|
|
250
|
+
return;
|
|
251
|
+
}
|
|
252
|
+
const analyzedFlows = $useAnalyzedFlows();
|
|
253
|
+
const flowNames = analyzedFlows.map((f) => f.id).filter(Boolean);
|
|
254
|
+
this.logger.info(`Starting flow recovery check for ${flowNames.length} registered flows: [${flowNames.join(", ")}]`);
|
|
255
|
+
if (flowNames.length === 0) {
|
|
256
|
+
this.logger.debug("No flows registered, skipping startup recovery");
|
|
257
|
+
return;
|
|
258
|
+
}
|
|
259
|
+
const { StoreSubjects } = useStreamTopics();
|
|
260
|
+
let recoveredCount = 0;
|
|
261
|
+
const actualCounts = {};
|
|
262
|
+
for (const flowName of flowNames) {
|
|
263
|
+
actualCounts[flowName] = { running: 0, awaiting: 0 };
|
|
264
|
+
const indexKey = StoreSubjects.flowRunIndex(flowName);
|
|
265
|
+
this.logger.debug(`Reading flow run index for '${flowName}': ${indexKey}`);
|
|
266
|
+
const entries = await this.store.index.read(indexKey, { limit: 1e3 });
|
|
267
|
+
const statusCounts = {};
|
|
268
|
+
for (const entry of entries) {
|
|
269
|
+
if (!entry.metadata) {
|
|
270
|
+
this.logger.debug(`Skipping entry without metadata - '${flowName}' runId '${entry.id}'`);
|
|
271
|
+
continue;
|
|
272
|
+
}
|
|
273
|
+
const status = entry.metadata.status || "unknown";
|
|
274
|
+
statusCounts[status] = (statusCounts[status] || 0) + 1;
|
|
275
|
+
if (status === "running") {
|
|
276
|
+
actualCounts[flowName].running++;
|
|
277
|
+
} else if (status === "awaiting") {
|
|
278
|
+
actualCounts[flowName].awaiting++;
|
|
279
|
+
}
|
|
280
|
+
if (entry.metadata.status === "running" || entry.metadata.status === "awaiting") {
|
|
281
|
+
const awaitingSteps = entry.metadata.awaitingSteps || {};
|
|
282
|
+
const awaitingStepNames = Object.keys(awaitingSteps);
|
|
283
|
+
this.logger.info(`Found flow in ${entry.metadata.status} state - '${flowName}' runId '${entry.id}' with ${awaitingStepNames.length} awaiting steps`);
|
|
284
|
+
this.logger.debug(`Flow '${flowName}' runId '${entry.id}' status: ${entry.metadata.status}, awaitingSteps: ${awaitingStepNames.length}`);
|
|
285
|
+
if (awaitingStepNames.length > 0) {
|
|
286
|
+
let hasActiveValidAwaits = false;
|
|
287
|
+
let hasOverdueAwaits = false;
|
|
288
|
+
for (const stepName of awaitingStepNames) {
|
|
289
|
+
const awaitState = awaitingSteps[stepName];
|
|
290
|
+
this.logger.info(`Checking await state for '${flowName}' runId '${entry.id}' step '${stepName}': status=${awaitState?.status}, timeoutAt=${awaitState?.timeoutAt}, resolveAt=${awaitState?.resolveAt}`);
|
|
291
|
+
if (awaitState?.status === "awaiting") {
|
|
292
|
+
const timeoutAt = awaitState.timeoutAt || awaitState.resolveAt;
|
|
293
|
+
if (!timeoutAt) {
|
|
294
|
+
hasActiveValidAwaits = true;
|
|
295
|
+
this.logger.warn(`Found await without timeout (legacy data) - '${flowName}' runId '${entry.id}' step '${stepName}' - treating as valid (timeout tracking was added later)`);
|
|
296
|
+
} else if (Date.now() > timeoutAt) {
|
|
297
|
+
hasOverdueAwaits = true;
|
|
298
|
+
this.logger.warn(`Found overdue await pattern - '${flowName}' runId '${entry.id}' step '${stepName}': timeoutAt=${new Date(timeoutAt).toISOString()}, overdueBy=${Math.round((Date.now() - timeoutAt) / 1e3)}s`);
|
|
299
|
+
} else {
|
|
300
|
+
hasActiveValidAwaits = true;
|
|
301
|
+
this.logger.debug(`Found active valid await - '${flowName}' runId '${entry.id}' step '${stepName}': remaining=${Math.round((timeoutAt - Date.now()) / 1e3)}s`);
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
if (hasOverdueAwaits) {
|
|
306
|
+
this.logger.info(`Marking flow as stalled (overdue awaits) - '${flowName}' runId '${entry.id}'`);
|
|
307
|
+
await this.markAsStalled(flowName, entry.id, "Await pattern resolution failed or expired");
|
|
308
|
+
recoveredCount++;
|
|
309
|
+
continue;
|
|
310
|
+
}
|
|
311
|
+
if (hasActiveValidAwaits) {
|
|
312
|
+
if (entry.metadata.status === "running") {
|
|
313
|
+
if (this.store.index.update) {
|
|
314
|
+
await this.store.index.update(indexKey, entry.id, {
|
|
315
|
+
status: "awaiting"
|
|
316
|
+
});
|
|
317
|
+
this.logger.info(`Updated flow status to awaiting (has active awaits) - '${flowName}' runId '${entry.id}' steps: [${awaitingStepNames.join(", ")}]`);
|
|
318
|
+
}
|
|
319
|
+
} else {
|
|
320
|
+
this.logger.debug(`Flow already has awaiting status - '${flowName}' runId '${entry.id}'`);
|
|
321
|
+
}
|
|
322
|
+
continue;
|
|
323
|
+
}
|
|
324
|
+
}
|
|
325
|
+
this.logger.info(`Marking flow as stalled (no active awaits) - '${flowName}' runId '${entry.id}'`);
|
|
326
|
+
await this.markAsStalled(flowName, entry.id, "Server restart - flow state lost");
|
|
327
|
+
recoveredCount++;
|
|
328
|
+
}
|
|
329
|
+
}
|
|
330
|
+
const statusSummary = Object.entries(statusCounts).map(([status, count]) => `${status}:${count}`).join(", ");
|
|
331
|
+
this.logger.info(`Flow recovery summary for '${flowName}' - total: ${entries.length}, statuses: {${statusSummary}}`);
|
|
332
|
+
}
|
|
333
|
+
await this.validateFlowStats(flowNames, actualCounts);
|
|
334
|
+
if (recoveredCount > 0) {
|
|
335
|
+
this.logger.info(`Startup recovery completed - marked ${recoveredCount} orphaned flow(s) as stalled`);
|
|
336
|
+
} else {
|
|
337
|
+
this.logger.debug("Startup recovery completed - no orphaned flows found");
|
|
338
|
+
}
|
|
339
|
+
} catch (error) {
|
|
340
|
+
this.logger.error(`Failed to run startup recovery: ${error.message}`);
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
/**
|
|
344
|
+
* Validate flow stats index and remove entries for non-existent flows
|
|
345
|
+
* Also corrects running/awaiting counts based on actual scanned data
|
|
346
|
+
*
|
|
347
|
+
* NOTE: We only validate running/awaiting counts because:
|
|
348
|
+
* - They are small snapshot values (usually < 100)
|
|
349
|
+
* - We already scanned all flows during startup recovery
|
|
350
|
+
* - Discrepancies indicate actual bugs (flows stuck in wrong state)
|
|
351
|
+
*
|
|
352
|
+
* We do NOT validate total/success/failure/cancel because:
|
|
353
|
+
* - These are cumulative counters that can be millions in production
|
|
354
|
+
* - Validation would require full table scan (prohibitively expensive)
|
|
355
|
+
* - Minor discrepancies don't affect runtime behavior
|
|
356
|
+
*/
|
|
357
|
+
async validateFlowStats(validFlowNames, actualCounts) {
|
|
358
|
+
this.logger.debug("Validating flow stats index");
|
|
359
|
+
try {
|
|
360
|
+
if (!this.store.index.read || !this.store.index.delete || !this.store.index.update) {
|
|
361
|
+
this.logger.debug("Store does not support stats validation operations");
|
|
362
|
+
return;
|
|
363
|
+
}
|
|
364
|
+
const { StoreSubjects } = useStreamTopics();
|
|
365
|
+
const statsIndexKey = StoreSubjects.flowIndex();
|
|
366
|
+
const statsEntries = await this.store.index.read(statsIndexKey, { limit: 1e4 });
|
|
367
|
+
let removedCount = 0;
|
|
368
|
+
let correctedCount = 0;
|
|
369
|
+
for (const entry of statsEntries) {
|
|
370
|
+
const flowName = entry.id;
|
|
371
|
+
if (!validFlowNames.includes(flowName)) {
|
|
372
|
+
this.logger.info(`Removing stats for non-existent flow '${flowName}'`);
|
|
373
|
+
await this.store.index.delete(statsIndexKey, flowName);
|
|
374
|
+
removedCount++;
|
|
375
|
+
continue;
|
|
376
|
+
}
|
|
377
|
+
if (!entry.metadata?.stats) {
|
|
378
|
+
this.logger.warn(`Flow stats entry missing stats object for '${flowName}'`);
|
|
379
|
+
continue;
|
|
380
|
+
}
|
|
381
|
+
const stats = entry.metadata.stats;
|
|
382
|
+
const actual = actualCounts[flowName] || { running: 0, awaiting: 0 };
|
|
383
|
+
const runningMismatch = stats.running !== actual.running;
|
|
384
|
+
const awaitingMismatch = stats.awaiting !== actual.awaiting;
|
|
385
|
+
if (runningMismatch || awaitingMismatch) {
|
|
386
|
+
this.logger.warn(`Flow stats mismatch detected for '${flowName}' - stored: running=${stats.running} awaiting=${stats.awaiting}, actual: running=${actual.running} awaiting=${actual.awaiting} - correcting`);
|
|
387
|
+
await this.store.index.update(statsIndexKey, flowName, {
|
|
388
|
+
stats: {
|
|
389
|
+
running: actual.running,
|
|
390
|
+
awaiting: actual.awaiting
|
|
391
|
+
}
|
|
392
|
+
});
|
|
393
|
+
correctedCount++;
|
|
394
|
+
}
|
|
395
|
+
}
|
|
396
|
+
if (removedCount > 0 || correctedCount > 0) {
|
|
397
|
+
this.logger.info(`Flow stats validation completed - removed ${removedCount} orphaned stats, corrected ${correctedCount} running/awaiting counts`);
|
|
398
|
+
} else {
|
|
399
|
+
this.logger.debug("Flow stats validation completed - all stats accurate");
|
|
400
|
+
}
|
|
401
|
+
} catch (error) {
|
|
402
|
+
this.logger.error(`Failed to validate flow stats: ${error.message}`);
|
|
403
|
+
}
|
|
404
|
+
}
|
|
203
405
|
/**
|
|
204
406
|
* Internal method for periodic checks
|
|
205
407
|
* Gets flow names from registry and checks them
|
|
206
408
|
*/
|
|
207
409
|
async checkAllRunningFlows() {
|
|
208
410
|
try {
|
|
209
|
-
const { $useAnalyzedFlows } = await import("#imports");
|
|
210
411
|
const analyzedFlows = $useAnalyzedFlows();
|
|
211
412
|
const flowNames = analyzedFlows.map((f) => f.id).filter(Boolean);
|
|
212
413
|
if (flowNames.length === 0) {
|
|
@@ -215,9 +416,7 @@ export class FlowStallDetector {
|
|
|
215
416
|
}
|
|
216
417
|
await this.checkFlowsForStalls(flowNames);
|
|
217
418
|
} catch (error) {
|
|
218
|
-
this.logger.error(
|
|
219
|
-
error: error.message
|
|
220
|
-
});
|
|
419
|
+
this.logger.error(`Failed to run periodic stall check: ${error.message}`);
|
|
221
420
|
}
|
|
222
421
|
}
|
|
223
422
|
/**
|