comfyui-node 1.4.4 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +21 -16
- package/dist/.tsbuildinfo +1 -1
- package/dist/call-wrapper.d.ts +141 -124
- package/dist/call-wrapper.d.ts.map +1 -1
- package/dist/call-wrapper.js +353 -64
- package/dist/call-wrapper.js.map +1 -1
- package/dist/client.d.ts +290 -290
- package/dist/client.d.ts.map +1 -1
- package/dist/client.js +78 -19
- package/dist/client.js.map +1 -1
- package/dist/index.d.ts +3 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +1 -1
- package/dist/index.js.map +1 -1
- package/dist/pool/SmartPool.d.ts +144 -0
- package/dist/pool/SmartPool.d.ts.map +1 -0
- package/dist/pool/SmartPool.js +677 -0
- package/dist/pool/SmartPool.js.map +1 -0
- package/dist/pool/SmartPoolV2.d.ts +120 -0
- package/dist/pool/SmartPoolV2.d.ts.map +1 -0
- package/dist/pool/SmartPoolV2.js +587 -0
- package/dist/pool/SmartPoolV2.js.map +1 -0
- package/dist/pool/WorkflowPool.d.ts +32 -2
- package/dist/pool/WorkflowPool.d.ts.map +1 -1
- package/dist/pool/WorkflowPool.js +298 -66
- package/dist/pool/WorkflowPool.js.map +1 -1
- package/dist/pool/client/ClientManager.d.ts +4 -2
- package/dist/pool/client/ClientManager.d.ts.map +1 -1
- package/dist/pool/client/ClientManager.js +29 -9
- package/dist/pool/client/ClientManager.js.map +1 -1
- package/dist/pool/index.d.ts +2 -0
- package/dist/pool/index.d.ts.map +1 -1
- package/dist/pool/index.js +2 -0
- package/dist/pool/index.js.map +1 -1
- package/dist/pool/queue/QueueAdapter.d.ts +32 -30
- package/dist/pool/queue/QueueAdapter.d.ts.map +1 -1
- package/dist/pool/queue/adapters/memory.d.ts +22 -20
- package/dist/pool/queue/adapters/memory.d.ts.map +1 -1
- package/dist/pool/queue/adapters/memory.js +14 -2
- package/dist/pool/queue/adapters/memory.js.map +1 -1
- package/dist/pool/types/affinity.d.ts +6 -0
- package/dist/pool/types/affinity.d.ts.map +1 -0
- package/dist/pool/types/affinity.js +2 -0
- package/dist/pool/types/affinity.js.map +1 -0
- package/dist/pool/types/job.d.ts.map +1 -1
- package/dist/pool/utils/failure-analysis.d.ts +14 -0
- package/dist/pool/utils/failure-analysis.d.ts.map +1 -0
- package/dist/pool/utils/failure-analysis.js +224 -0
- package/dist/pool/utils/failure-analysis.js.map +1 -0
- package/dist/pool.d.ts +180 -180
- package/dist/types/error.d.ts +31 -1
- package/dist/types/error.d.ts.map +1 -1
- package/dist/types/error.js +30 -0
- package/dist/types/error.js.map +1 -1
- package/dist/workflow.d.ts.map +1 -1
- package/dist/workflow.js +4 -1
- package/dist/workflow.js.map +1 -1
- package/package.json +4 -3
|
@@ -9,6 +9,8 @@ import { ClientManager } from "./client/ClientManager.js";
|
|
|
9
9
|
import { hashWorkflow } from "./utils/hash.js";
|
|
10
10
|
import { cloneDeep } from "./utils/clone.js";
|
|
11
11
|
import { JobProfiler } from "./profiling/JobProfiler.js";
|
|
12
|
+
import { analyzeWorkflowFailure } from "./utils/failure-analysis.js";
|
|
13
|
+
import { WorkflowNotSupportedError } from "../types/error.js";
|
|
12
14
|
const DEFAULT_MAX_ATTEMPTS = 3;
|
|
13
15
|
const DEFAULT_RETRY_DELAY = 1000;
|
|
14
16
|
export class WorkflowPool extends TypedEventTarget {
|
|
@@ -17,9 +19,18 @@ export class WorkflowPool extends TypedEventTarget {
|
|
|
17
19
|
clientManager;
|
|
18
20
|
opts;
|
|
19
21
|
jobStore = new Map();
|
|
22
|
+
jobFailureAnalysis = new Map();
|
|
23
|
+
affinities = new Map();
|
|
20
24
|
initPromise;
|
|
21
25
|
processing = false;
|
|
26
|
+
processQueued = false;
|
|
22
27
|
activeJobs = new Map();
|
|
28
|
+
queueDebug = process.env.WORKFLOW_POOL_DEBUG === "1";
|
|
29
|
+
debugLog(...args) {
|
|
30
|
+
if (this.queueDebug) {
|
|
31
|
+
console.log(...args);
|
|
32
|
+
}
|
|
33
|
+
}
|
|
23
34
|
constructor(clients, opts) {
|
|
24
35
|
super();
|
|
25
36
|
this.strategy = opts?.failoverStrategy ?? new SmartFailoverStrategy();
|
|
@@ -28,6 +39,11 @@ export class WorkflowPool extends TypedEventTarget {
|
|
|
28
39
|
healthCheckIntervalMs: opts?.healthCheckIntervalMs ?? 30000
|
|
29
40
|
});
|
|
30
41
|
this.opts = opts ?? {};
|
|
42
|
+
if (opts?.workflowAffinities) {
|
|
43
|
+
for (const affinity of opts.workflowAffinities) {
|
|
44
|
+
this.affinities.set(affinity.workflowHash, affinity);
|
|
45
|
+
}
|
|
46
|
+
}
|
|
31
47
|
this.clientManager.on("client:state", (ev) => {
|
|
32
48
|
this.dispatchEvent(new CustomEvent("client:state", { detail: ev.detail }));
|
|
33
49
|
});
|
|
@@ -51,6 +67,15 @@ export class WorkflowPool extends TypedEventTarget {
|
|
|
51
67
|
async ready() {
|
|
52
68
|
await this.initPromise;
|
|
53
69
|
}
|
|
70
|
+
setAffinity(affinity) {
|
|
71
|
+
this.affinities.set(affinity.workflowHash, affinity);
|
|
72
|
+
}
|
|
73
|
+
removeAffinity(workflowHash) {
|
|
74
|
+
return this.affinities.delete(workflowHash);
|
|
75
|
+
}
|
|
76
|
+
getAffinities() {
|
|
77
|
+
return Array.from(this.affinities.values());
|
|
78
|
+
}
|
|
54
79
|
async enqueue(workflowInput, options) {
|
|
55
80
|
await this.ready();
|
|
56
81
|
const workflowJson = this.normalizeWorkflow(workflowInput);
|
|
@@ -72,6 +97,13 @@ export class WorkflowPool extends TypedEventTarget {
|
|
|
72
97
|
outputAliases: workflowInput.outputAliases ?? {}
|
|
73
98
|
};
|
|
74
99
|
}
|
|
100
|
+
const affinity = this.affinities.get(workflowHash);
|
|
101
|
+
const preferredClientIds = options?.preferredClientIds
|
|
102
|
+
? [...options.preferredClientIds]
|
|
103
|
+
: (affinity?.preferredClientIds ? [...affinity.preferredClientIds] : []);
|
|
104
|
+
const excludeClientIds = options?.excludeClientIds
|
|
105
|
+
? [...options.excludeClientIds]
|
|
106
|
+
: (affinity?.excludeClientIds ? [...affinity.excludeClientIds] : []);
|
|
75
107
|
const payload = {
|
|
76
108
|
jobId,
|
|
77
109
|
workflow: workflowJson,
|
|
@@ -83,14 +115,20 @@ export class WorkflowPool extends TypedEventTarget {
|
|
|
83
115
|
maxAttempts: options?.maxAttempts ?? DEFAULT_MAX_ATTEMPTS,
|
|
84
116
|
retryDelayMs: options?.retryDelayMs ?? DEFAULT_RETRY_DELAY,
|
|
85
117
|
priority: options?.priority ?? 0,
|
|
86
|
-
preferredClientIds:
|
|
87
|
-
excludeClientIds:
|
|
118
|
+
preferredClientIds: preferredClientIds,
|
|
119
|
+
excludeClientIds: excludeClientIds,
|
|
88
120
|
metadata: options?.metadata ?? {},
|
|
89
121
|
includeOutputs: options?.includeOutputs ?? []
|
|
90
122
|
}
|
|
91
123
|
};
|
|
92
124
|
const record = {
|
|
93
125
|
...payload,
|
|
126
|
+
options: {
|
|
127
|
+
...payload.options,
|
|
128
|
+
preferredClientIds: payload.options.preferredClientIds ? [...payload.options.preferredClientIds] : [],
|
|
129
|
+
excludeClientIds: payload.options.excludeClientIds ? [...payload.options.excludeClientIds] : [],
|
|
130
|
+
includeOutputs: payload.options.includeOutputs ? [...payload.options.includeOutputs] : []
|
|
131
|
+
},
|
|
94
132
|
attachments: options?.attachments,
|
|
95
133
|
status: "queued"
|
|
96
134
|
};
|
|
@@ -113,6 +151,7 @@ export class WorkflowPool extends TypedEventTarget {
|
|
|
113
151
|
if (removed) {
|
|
114
152
|
record.status = "cancelled";
|
|
115
153
|
record.completedAt = Date.now();
|
|
154
|
+
this.clearJobFailures(jobId);
|
|
116
155
|
this.dispatchEvent(new CustomEvent("job:cancelled", { detail: { job: record } }));
|
|
117
156
|
return true;
|
|
118
157
|
}
|
|
@@ -122,6 +161,7 @@ export class WorkflowPool extends TypedEventTarget {
|
|
|
122
161
|
await active.cancel();
|
|
123
162
|
record.status = "cancelled";
|
|
124
163
|
record.completedAt = Date.now();
|
|
164
|
+
this.clearJobFailures(jobId);
|
|
125
165
|
this.dispatchEvent(new CustomEvent("job:cancelled", { detail: { job: record } }));
|
|
126
166
|
return true;
|
|
127
167
|
}
|
|
@@ -159,7 +199,7 @@ export class WorkflowPool extends TypedEventTarget {
|
|
|
159
199
|
}
|
|
160
200
|
}
|
|
161
201
|
static fallbackId() {
|
|
162
|
-
return
|
|
202
|
+
return globalThis.crypto && "randomUUID" in globalThis.crypto
|
|
163
203
|
? globalThis.crypto.randomUUID()
|
|
164
204
|
: `job_${Math.random().toString(36).slice(2, 10)}`;
|
|
165
205
|
}
|
|
@@ -185,44 +225,215 @@ export class WorkflowPool extends TypedEventTarget {
|
|
|
185
225
|
}
|
|
186
226
|
return autoSeeds;
|
|
187
227
|
}
|
|
228
|
+
rememberJobFailure(job, clientId, analysis) {
|
|
229
|
+
let map = this.jobFailureAnalysis.get(job.jobId);
|
|
230
|
+
if (!map) {
|
|
231
|
+
map = new Map();
|
|
232
|
+
this.jobFailureAnalysis.set(job.jobId, map);
|
|
233
|
+
}
|
|
234
|
+
map.set(clientId, analysis);
|
|
235
|
+
}
|
|
236
|
+
clearJobFailures(jobId) {
|
|
237
|
+
this.jobFailureAnalysis.delete(jobId);
|
|
238
|
+
}
|
|
239
|
+
collectFailureReasons(jobId) {
|
|
240
|
+
const map = this.jobFailureAnalysis.get(jobId);
|
|
241
|
+
if (!map) {
|
|
242
|
+
return {};
|
|
243
|
+
}
|
|
244
|
+
const reasons = {};
|
|
245
|
+
for (const [clientId, analysis] of map.entries()) {
|
|
246
|
+
reasons[clientId] = analysis.reason;
|
|
247
|
+
}
|
|
248
|
+
return reasons;
|
|
249
|
+
}
|
|
250
|
+
addPermanentExclusion(job, clientId) {
|
|
251
|
+
if (!job.options.excludeClientIds) {
|
|
252
|
+
job.options.excludeClientIds = [];
|
|
253
|
+
}
|
|
254
|
+
if (!job.options.excludeClientIds.includes(clientId)) {
|
|
255
|
+
job.options.excludeClientIds.push(clientId);
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
hasRetryPath(job) {
|
|
259
|
+
const map = this.jobFailureAnalysis.get(job.jobId);
|
|
260
|
+
const exclude = new Set(job.options.excludeClientIds ?? []);
|
|
261
|
+
const preferred = job.options.preferredClientIds?.length ? new Set(job.options.preferredClientIds) : null;
|
|
262
|
+
for (const client of this.clientManager.list()) {
|
|
263
|
+
if (preferred && !preferred.has(client.id)) {
|
|
264
|
+
continue;
|
|
265
|
+
}
|
|
266
|
+
if (exclude.has(client.id)) {
|
|
267
|
+
continue;
|
|
268
|
+
}
|
|
269
|
+
const analysis = map?.get(client.id);
|
|
270
|
+
if (analysis?.blockClient === "permanent") {
|
|
271
|
+
continue;
|
|
272
|
+
}
|
|
273
|
+
return true;
|
|
274
|
+
}
|
|
275
|
+
return false;
|
|
276
|
+
}
|
|
277
|
+
createWorkflowNotSupportedError(job, cause) {
|
|
278
|
+
const reasons = this.collectFailureReasons(job.jobId);
|
|
279
|
+
const message = `Workflow ${job.workflowHash} is not supported by any connected clients`;
|
|
280
|
+
return new WorkflowNotSupportedError(message, {
|
|
281
|
+
workflowHash: job.workflowHash,
|
|
282
|
+
reasons,
|
|
283
|
+
cause
|
|
284
|
+
});
|
|
285
|
+
}
|
|
188
286
|
async processQueue() {
|
|
287
|
+
this.debugLog("[processQueue] Called");
|
|
189
288
|
if (this.processing) {
|
|
289
|
+
this.debugLog("[processQueue] Already processing, returning early");
|
|
290
|
+
this.processQueued = true;
|
|
190
291
|
return;
|
|
191
292
|
}
|
|
192
293
|
this.processing = true;
|
|
193
294
|
try {
|
|
295
|
+
// Continue processing until no more jobs can be assigned
|
|
296
|
+
let iteration = 0;
|
|
194
297
|
while (true) {
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
298
|
+
iteration++;
|
|
299
|
+
this.debugLog(`[processQueue] Iteration ${iteration}`);
|
|
300
|
+
const idleClients = this.clientManager.list().filter(c => this.clientManager.isClientStable(c));
|
|
301
|
+
this.debugLog(`[processQueue] Idle clients: [${idleClients.map(c => c.id).join(", ")}] (${idleClients.length})`);
|
|
302
|
+
if (!idleClients.length) {
|
|
303
|
+
this.debugLog("[processQueue] No idle clients, breaking");
|
|
304
|
+
break; // No idle clients available
|
|
198
305
|
}
|
|
199
|
-
const
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
306
|
+
const waitingJobs = await this.queue.peek(100); // Peek at top 100 jobs
|
|
307
|
+
this.debugLog(`[processQueue] Waiting jobs in queue: ${waitingJobs.length}`);
|
|
308
|
+
if (!waitingJobs.length) {
|
|
309
|
+
this.debugLog("[processQueue] No waiting jobs, breaking");
|
|
310
|
+
break; // No jobs in queue
|
|
203
311
|
}
|
|
204
|
-
const
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
312
|
+
const leasedClientIds = new Set();
|
|
313
|
+
const reservedJobIds = new Set();
|
|
314
|
+
const jobMatchInfos = [];
|
|
315
|
+
for (const jobPayload of waitingJobs) {
|
|
316
|
+
const job = this.jobStore.get(jobPayload.jobId);
|
|
317
|
+
if (!job) {
|
|
318
|
+
this.debugLog(`[processQueue] Job ${jobPayload.jobId} not in jobStore, skipping`);
|
|
319
|
+
continue;
|
|
320
|
+
}
|
|
321
|
+
const compatibleClients = idleClients
|
|
322
|
+
.filter(client => {
|
|
323
|
+
const canRun = this.clientManager.canClientRunJob(client, job);
|
|
324
|
+
if (!canRun) {
|
|
325
|
+
this.debugLog(`[processQueue] Job ${job.jobId.substring(0, 8)}... NOT compatible with ${client.id}. Checking why...`);
|
|
326
|
+
this.debugLog(`[processQueue] - preferredClientIds: ${JSON.stringify(job.options.preferredClientIds)}`);
|
|
327
|
+
this.debugLog(`[processQueue] - excludeClientIds: ${JSON.stringify(job.options.excludeClientIds)}`);
|
|
328
|
+
this.debugLog(`[processQueue] - client.id: ${client.id}`);
|
|
329
|
+
}
|
|
330
|
+
return canRun;
|
|
331
|
+
})
|
|
332
|
+
.map(client => client.id);
|
|
333
|
+
this.debugLog(`[processQueue] Job ${job.jobId.substring(0, 8)}... compatible with: [${compatibleClients.join(", ")}] (selectivity=${compatibleClients.length})`);
|
|
334
|
+
if (compatibleClients.length > 0) {
|
|
335
|
+
jobMatchInfos.push({
|
|
336
|
+
jobPayload,
|
|
337
|
+
job,
|
|
338
|
+
compatibleClients,
|
|
339
|
+
selectivity: compatibleClients.length
|
|
340
|
+
});
|
|
341
|
+
}
|
|
209
342
|
}
|
|
210
|
-
this.
|
|
211
|
-
|
|
343
|
+
this.debugLog(`[processQueue] Found ${jobMatchInfos.length} compatible job matches`);
|
|
344
|
+
if (jobMatchInfos.length === 0) {
|
|
345
|
+
this.debugLog("[processQueue] No compatible jobs for idle clients, breaking");
|
|
346
|
+
break; // No compatible jobs for idle clients
|
|
347
|
+
}
|
|
348
|
+
// Sort jobs by priority first, then selectivity, to maximize throughput
|
|
349
|
+
// 1. Higher priority jobs execute first (explicit user priority)
|
|
350
|
+
// 2. More selective jobs (fewer compatible clients) assigned first within same priority
|
|
351
|
+
// 3. Earlier queue position as final tiebreaker
|
|
352
|
+
jobMatchInfos.sort((a, b) => {
|
|
353
|
+
// Primary: priority (higher priority = higher precedence)
|
|
354
|
+
const aPriority = a.job.options.priority ?? 0;
|
|
355
|
+
const bPriority = b.job.options.priority ?? 0;
|
|
356
|
+
if (aPriority !== bPriority) {
|
|
357
|
+
return bPriority - aPriority; // Higher priority first
|
|
358
|
+
}
|
|
359
|
+
// Secondary: selectivity (fewer compatible clients = higher precedence)
|
|
360
|
+
if (a.selectivity !== b.selectivity) {
|
|
361
|
+
return a.selectivity - b.selectivity;
|
|
362
|
+
}
|
|
363
|
+
// Tertiary: maintain queue order (earlier jobs first)
|
|
364
|
+
const aIndex = waitingJobs.indexOf(a.jobPayload);
|
|
365
|
+
const bIndex = waitingJobs.indexOf(b.jobPayload);
|
|
366
|
+
return aIndex - bIndex;
|
|
212
367
|
});
|
|
368
|
+
// Assign jobs to clients using the selectivity-based ordering
|
|
369
|
+
let assignedAnyJob = false;
|
|
370
|
+
for (const matchInfo of jobMatchInfos) {
|
|
371
|
+
if (reservedJobIds.has(matchInfo.job.jobId))
|
|
372
|
+
continue;
|
|
373
|
+
// Find first available compatible client
|
|
374
|
+
const availableClient = matchInfo.compatibleClients.find(clientId => !leasedClientIds.has(clientId));
|
|
375
|
+
if (!availableClient) {
|
|
376
|
+
this.debugLog(`[processQueue] No available client for job ${matchInfo.job.jobId.substring(0, 8)}...`);
|
|
377
|
+
continue; // No available clients for this job
|
|
378
|
+
}
|
|
379
|
+
this.debugLog(`[processQueue] Reserving job ${matchInfo.job.jobId.substring(0, 8)}... for client ${availableClient}`);
|
|
380
|
+
const reservation = await this.queue.reserveById(matchInfo.job.jobId);
|
|
381
|
+
if (reservation) {
|
|
382
|
+
// Mark as leased/reserved for this cycle
|
|
383
|
+
leasedClientIds.add(availableClient);
|
|
384
|
+
reservedJobIds.add(matchInfo.job.jobId);
|
|
385
|
+
assignedAnyJob = true;
|
|
386
|
+
// Get the lease (which marks the client as busy)
|
|
387
|
+
const lease = this.clientManager.claim(matchInfo.job, availableClient);
|
|
388
|
+
if (lease) {
|
|
389
|
+
this.debugLog(`[processQueue] Starting job ${matchInfo.job.jobId.substring(0, 8)}... on client ${availableClient}`);
|
|
390
|
+
this.runJob({ reservation, job: matchInfo.job, clientId: lease.clientId, release: lease.release }).catch((error) => {
|
|
391
|
+
console.error("[WorkflowPool] Unhandled job error", error);
|
|
392
|
+
});
|
|
393
|
+
}
|
|
394
|
+
else {
|
|
395
|
+
// This should not happen since we checked canClientRunJob, but handle defensively
|
|
396
|
+
console.error(`[processQueue.processQueue] CRITICAL: Failed to claim client ${availableClient} for job ${matchInfo.job.jobId} after successful check.`);
|
|
397
|
+
await this.queue.retry(reservation.reservationId, { delayMs: matchInfo.job.options.retryDelayMs });
|
|
398
|
+
}
|
|
399
|
+
}
|
|
400
|
+
else {
|
|
401
|
+
this.debugLog(`[processQueue] Failed to reserve job ${matchInfo.job.jobId.substring(0, 8)}...`);
|
|
402
|
+
}
|
|
403
|
+
}
|
|
404
|
+
this.debugLog(`[processQueue] Assigned any job in this iteration: ${assignedAnyJob}`);
|
|
405
|
+
// If we didn't assign any jobs this iteration, no point continuing
|
|
406
|
+
if (!assignedAnyJob) {
|
|
407
|
+
this.debugLog("[processQueue] No jobs assigned, breaking");
|
|
408
|
+
break;
|
|
409
|
+
}
|
|
213
410
|
}
|
|
214
411
|
}
|
|
215
412
|
finally {
|
|
413
|
+
this.debugLog("[processQueue] Exiting, setting processing = false");
|
|
216
414
|
this.processing = false;
|
|
415
|
+
if (this.processQueued) {
|
|
416
|
+
this.debugLog("[processQueue] Pending rerun detected, draining queue again");
|
|
417
|
+
this.processQueued = false;
|
|
418
|
+
void this.processQueue();
|
|
419
|
+
}
|
|
217
420
|
}
|
|
218
421
|
}
|
|
219
422
|
async runJob(ctx) {
|
|
220
423
|
const { reservation, job, clientId, release } = ctx;
|
|
424
|
+
let released = false;
|
|
425
|
+
const safeRelease = (opts) => {
|
|
426
|
+
if (released) {
|
|
427
|
+
return;
|
|
428
|
+
}
|
|
429
|
+
released = true;
|
|
430
|
+
release(opts);
|
|
431
|
+
};
|
|
221
432
|
const managed = this.clientManager.getClient(clientId);
|
|
222
433
|
const client = managed?.client;
|
|
223
434
|
if (!client) {
|
|
224
435
|
await this.queue.retry(reservation.reservationId, { delayMs: job.options.retryDelayMs });
|
|
225
|
-
|
|
436
|
+
safeRelease({ success: false });
|
|
226
437
|
return;
|
|
227
438
|
}
|
|
228
439
|
job.status = "running";
|
|
@@ -257,9 +468,9 @@ export class WorkflowPool extends TypedEventTarget {
|
|
|
257
468
|
// Use stored metadata if available (from Workflow instance), otherwise extract from recreated instance
|
|
258
469
|
const outputNodeIds = reservation.payload.workflowMeta?.outputNodeIds ??
|
|
259
470
|
wfInstance.outputNodeIds ??
|
|
260
|
-
job.options.includeOutputs ??
|
|
261
|
-
|
|
262
|
-
|
|
471
|
+
job.options.includeOutputs ??
|
|
472
|
+
[];
|
|
473
|
+
const outputAliases = reservation.payload.workflowMeta?.outputAliases ?? wfInstance.outputAliases ?? {};
|
|
263
474
|
let promptBuilder = new PromptBuilder(wfInstance.json, wfInstance.inputPaths ?? [], outputNodeIds);
|
|
264
475
|
for (const nodeId of outputNodeIds) {
|
|
265
476
|
const alias = outputAliases[nodeId] ?? nodeId;
|
|
@@ -267,9 +478,7 @@ export class WorkflowPool extends TypedEventTarget {
|
|
|
267
478
|
}
|
|
268
479
|
const wrapper = new CallWrapper(client, promptBuilder);
|
|
269
480
|
// Setup profiling if enabled
|
|
270
|
-
const profiler = this.opts.enableProfiling
|
|
271
|
-
? new JobProfiler(job.enqueuedAt, workflowPayload)
|
|
272
|
-
: undefined;
|
|
481
|
+
const profiler = this.opts.enableProfiling ? new JobProfiler(job.enqueuedAt, workflowPayload) : undefined;
|
|
273
482
|
// Setup node execution timeout tracking
|
|
274
483
|
const nodeExecutionTimeout = this.opts.nodeExecutionTimeoutMs ?? 300000; // 5 minutes default
|
|
275
484
|
let nodeTimeoutId;
|
|
@@ -285,9 +494,10 @@ export class WorkflowPool extends TypedEventTarget {
|
|
|
285
494
|
currentExecutingNode = nodeName || null;
|
|
286
495
|
nodeTimeoutId = setTimeout(() => {
|
|
287
496
|
const elapsed = Date.now() - (lastNodeStartTime || 0);
|
|
288
|
-
const nodeInfo = currentExecutingNode ? ` (node: ${currentExecutingNode})` :
|
|
289
|
-
|
|
290
|
-
`Actual time: ${elapsed}ms. Server may be stuck or node is too slow for configured timeout.`)
|
|
497
|
+
const nodeInfo = currentExecutingNode ? ` (node: ${currentExecutingNode})` : "";
|
|
498
|
+
completionError = new Error(`Node execution timeout: took longer than ${nodeExecutionTimeout}ms${nodeInfo}. ` +
|
|
499
|
+
`Actual time: ${elapsed}ms. Server may be stuck or node is too slow for configured timeout.`);
|
|
500
|
+
resolveCompletion?.();
|
|
291
501
|
}, nodeExecutionTimeout);
|
|
292
502
|
}
|
|
293
503
|
};
|
|
@@ -326,20 +536,20 @@ export class WorkflowPool extends TypedEventTarget {
|
|
|
326
536
|
const onExecutionError = (event) => {
|
|
327
537
|
const detail = event.detail || {};
|
|
328
538
|
if (detail.node !== undefined) {
|
|
329
|
-
profiler.onNodeError(String(detail.node), detail.exception_message ||
|
|
539
|
+
profiler.onNodeError(String(detail.node), detail.exception_message || "Execution error");
|
|
330
540
|
}
|
|
331
541
|
};
|
|
332
542
|
// Attach listeners to client
|
|
333
|
-
client.addEventListener(
|
|
334
|
-
client.addEventListener(
|
|
335
|
-
client.addEventListener(
|
|
336
|
-
client.addEventListener(
|
|
543
|
+
client.addEventListener("execution_start", onExecutionStart);
|
|
544
|
+
client.addEventListener("execution_cached", onExecutionCached);
|
|
545
|
+
client.addEventListener("executing", onExecuting);
|
|
546
|
+
client.addEventListener("execution_error", onExecutionError);
|
|
337
547
|
// Cleanup function to remove listeners
|
|
338
548
|
const cleanupProfiler = () => {
|
|
339
|
-
client.removeEventListener(
|
|
340
|
-
client.removeEventListener(
|
|
341
|
-
client.removeEventListener(
|
|
342
|
-
client.removeEventListener(
|
|
549
|
+
client.removeEventListener("execution_start", onExecutionStart);
|
|
550
|
+
client.removeEventListener("execution_cached", onExecutionCached);
|
|
551
|
+
client.removeEventListener("executing", onExecuting);
|
|
552
|
+
client.removeEventListener("execution_error", onExecutionError);
|
|
343
553
|
};
|
|
344
554
|
// Ensure cleanup happens when job finishes
|
|
345
555
|
wrapper.onFinished(() => cleanupProfiler());
|
|
@@ -365,19 +575,19 @@ export class WorkflowPool extends TypedEventTarget {
|
|
|
365
575
|
};
|
|
366
576
|
const onExecutionStarted = (event) => {
|
|
367
577
|
// Execution started - reset timeout for first node
|
|
368
|
-
resetNodeTimeout(
|
|
578
|
+
resetNodeTimeout("execution_start");
|
|
369
579
|
};
|
|
370
580
|
if (nodeExecutionTimeout > 0) {
|
|
371
|
-
client.addEventListener(
|
|
372
|
-
client.addEventListener(
|
|
373
|
-
client.addEventListener(
|
|
581
|
+
client.addEventListener("execution_start", onExecutionStarted);
|
|
582
|
+
client.addEventListener("executing", onNodeExecuting);
|
|
583
|
+
client.addEventListener("progress", onNodeProgress);
|
|
374
584
|
}
|
|
375
585
|
const cleanupNodeTimeout = () => {
|
|
376
586
|
clearNodeTimeout();
|
|
377
587
|
if (nodeExecutionTimeout > 0) {
|
|
378
|
-
client.removeEventListener(
|
|
379
|
-
client.removeEventListener(
|
|
380
|
-
client.removeEventListener(
|
|
588
|
+
client.removeEventListener("execution_start", onExecutionStarted);
|
|
589
|
+
client.removeEventListener("executing", onNodeExecuting);
|
|
590
|
+
client.removeEventListener("progress", onNodeProgress);
|
|
381
591
|
}
|
|
382
592
|
};
|
|
383
593
|
let pendingSettled = false;
|
|
@@ -398,10 +608,11 @@ export class WorkflowPool extends TypedEventTarget {
|
|
|
398
608
|
};
|
|
399
609
|
});
|
|
400
610
|
let resolveCompletion;
|
|
401
|
-
let
|
|
402
|
-
|
|
611
|
+
let completionError;
|
|
612
|
+
// completionPromise is used to track when the wrapper completes (success or failure)
|
|
613
|
+
// It's resolved in onFinished and onFailed handlers
|
|
614
|
+
const completionPromise = new Promise((resolve) => {
|
|
403
615
|
resolveCompletion = resolve;
|
|
404
|
-
rejectCompletion = reject;
|
|
405
616
|
});
|
|
406
617
|
let jobStartedDispatched = false;
|
|
407
618
|
wrapper.onProgress((progress, promptId) => {
|
|
@@ -493,16 +704,20 @@ export class WorkflowPool extends TypedEventTarget {
|
|
|
493
704
|
}
|
|
494
705
|
job.result = resultPayload;
|
|
495
706
|
job.completedAt = Date.now();
|
|
707
|
+
this.clearJobFailures(job.jobId);
|
|
496
708
|
// Cleanup timeouts
|
|
497
709
|
cleanupNodeTimeout();
|
|
498
710
|
// Attach profiling stats if profiling was enabled
|
|
499
711
|
if (profiler) {
|
|
500
712
|
job.profileStats = profiler.getStats();
|
|
501
713
|
}
|
|
714
|
+
completionError = undefined;
|
|
502
715
|
this.dispatchEvent(new CustomEvent("job:completed", { detail: { job } }));
|
|
716
|
+
safeRelease({ success: true });
|
|
503
717
|
resolveCompletion?.();
|
|
504
718
|
});
|
|
505
719
|
wrapper.onFailed((error, promptId) => {
|
|
720
|
+
this.debugLog("[debug] wrapper.onFailed", job.jobId, error.name);
|
|
506
721
|
if (!job.promptId && promptId) {
|
|
507
722
|
job.promptId = promptId;
|
|
508
723
|
}
|
|
@@ -510,9 +725,13 @@ export class WorkflowPool extends TypedEventTarget {
|
|
|
510
725
|
// Cleanup timeouts
|
|
511
726
|
cleanupNodeTimeout();
|
|
512
727
|
rejectPending?.(error);
|
|
513
|
-
|
|
728
|
+
completionError = error;
|
|
729
|
+
this.debugLog("[debug] resolveCompletion available", Boolean(resolveCompletion));
|
|
730
|
+
safeRelease({ success: false });
|
|
731
|
+
resolveCompletion?.();
|
|
514
732
|
});
|
|
515
733
|
try {
|
|
734
|
+
// Start the workflow execution
|
|
516
735
|
const exec = wrapper.run();
|
|
517
736
|
// Add timeout for execution start to prevent jobs getting stuck
|
|
518
737
|
const executionStartTimeout = this.opts.executionStartTimeoutMs ?? 5000;
|
|
@@ -528,18 +747,21 @@ export class WorkflowPool extends TypedEventTarget {
|
|
|
528
747
|
})
|
|
529
748
|
]);
|
|
530
749
|
await pendingWithTimeout;
|
|
531
|
-
clearTimeout(pendingTimeoutId);
|
|
532
750
|
}
|
|
533
751
|
else {
|
|
534
752
|
await pendingPromise;
|
|
535
753
|
}
|
|
754
|
+
if (executionStartTimeout > 0) {
|
|
755
|
+
clearTimeout(pendingTimeoutId);
|
|
756
|
+
}
|
|
536
757
|
this.activeJobs.set(job.jobId, {
|
|
537
758
|
reservation,
|
|
538
759
|
job,
|
|
539
760
|
clientId,
|
|
540
|
-
release,
|
|
761
|
+
release: (opts) => safeRelease(opts),
|
|
541
762
|
cancel: async () => {
|
|
542
763
|
try {
|
|
764
|
+
wrapper.cancel("workflow pool cancel");
|
|
543
765
|
if (job.promptId) {
|
|
544
766
|
await client.ext.queue.interrupt(job.promptId);
|
|
545
767
|
}
|
|
@@ -547,36 +769,41 @@ export class WorkflowPool extends TypedEventTarget {
|
|
|
547
769
|
finally {
|
|
548
770
|
this.activeJobs.delete(job.jobId);
|
|
549
771
|
await this.queue.discard(reservation.reservationId, new Error("cancelled"));
|
|
550
|
-
|
|
772
|
+
safeRelease({ success: false });
|
|
551
773
|
}
|
|
552
774
|
}
|
|
553
775
|
});
|
|
554
776
|
const result = await exec;
|
|
777
|
+
// Wait for the wrapper to complete (onFinished or onFailed callback)
|
|
778
|
+
await completionPromise;
|
|
555
779
|
if (result === false) {
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
catch (err) {
|
|
561
|
-
throw err;
|
|
562
|
-
}
|
|
563
|
-
throw job.lastError ?? new Error("Execution failed");
|
|
780
|
+
const errorToThrow = (completionError instanceof Error ? completionError : undefined) ??
|
|
781
|
+
(job.lastError instanceof Error ? job.lastError : undefined) ??
|
|
782
|
+
new Error("Execution failed");
|
|
783
|
+
throw errorToThrow;
|
|
564
784
|
}
|
|
565
|
-
await completionPromise;
|
|
566
785
|
await this.queue.commit(reservation.reservationId);
|
|
567
|
-
|
|
786
|
+
safeRelease({ success: true });
|
|
568
787
|
}
|
|
569
788
|
catch (error) {
|
|
789
|
+
// Immediately release the client on any failure
|
|
790
|
+
safeRelease({ success: false });
|
|
570
791
|
const latestStatus = this.jobStore.get(job.jobId)?.status;
|
|
571
792
|
if (latestStatus === "cancelled") {
|
|
572
|
-
release({ success: false });
|
|
573
793
|
return;
|
|
574
794
|
}
|
|
575
795
|
job.lastError = error;
|
|
576
796
|
job.status = "failed";
|
|
577
|
-
this.clientManager.recordFailure(clientId, job, error);
|
|
578
797
|
const remainingAttempts = job.options.maxAttempts - job.attempts;
|
|
579
|
-
const
|
|
798
|
+
const failureAnalysis = analyzeWorkflowFailure(error);
|
|
799
|
+
this.rememberJobFailure(job, clientId, failureAnalysis);
|
|
800
|
+
if (failureAnalysis.blockClient === "permanent") {
|
|
801
|
+
this.addPermanentExclusion(job, clientId);
|
|
802
|
+
reservation.payload.options.excludeClientIds = [...(job.options.excludeClientIds ?? [])];
|
|
803
|
+
}
|
|
804
|
+
this.clientManager.recordFailure(clientId, job, error);
|
|
805
|
+
const hasRetryPath = this.hasRetryPath(job);
|
|
806
|
+
const willRetry = failureAnalysis.retryable && remainingAttempts > 0 && hasRetryPath;
|
|
580
807
|
this.dispatchEvent(new CustomEvent("job:failed", {
|
|
581
808
|
detail: { job, willRetry }
|
|
582
809
|
}));
|
|
@@ -589,19 +816,24 @@ export class WorkflowPool extends TypedEventTarget {
|
|
|
589
816
|
job.startedAt = undefined;
|
|
590
817
|
job.completedAt = undefined;
|
|
591
818
|
job.result = undefined;
|
|
819
|
+
reservation.payload.options.excludeClientIds = [...(job.options.excludeClientIds ?? [])];
|
|
592
820
|
await this.queue.retry(reservation.reservationId, { delayMs: delay });
|
|
593
821
|
this.dispatchEvent(new CustomEvent("job:queued", { detail: { job } }));
|
|
594
822
|
this.scheduleProcess(delay);
|
|
595
|
-
release({ success: false });
|
|
596
823
|
}
|
|
597
824
|
else {
|
|
598
825
|
job.completedAt = Date.now();
|
|
599
|
-
|
|
600
|
-
|
|
826
|
+
const finalError = !hasRetryPath && failureAnalysis.type === "client_incompatible" && this.jobFailureAnalysis.has(job.jobId)
|
|
827
|
+
? this.createWorkflowNotSupportedError(job, error)
|
|
828
|
+
: error;
|
|
829
|
+
job.lastError = finalError;
|
|
830
|
+
await this.queue.discard(reservation.reservationId, finalError);
|
|
831
|
+
this.clearJobFailures(job.jobId);
|
|
601
832
|
}
|
|
602
833
|
}
|
|
603
834
|
finally {
|
|
604
835
|
this.activeJobs.delete(job.jobId);
|
|
836
|
+
this.debugLog(`[runJob.finally] Job ${job.jobId.substring(0, 8)}... completed, calling processQueue()`);
|
|
605
837
|
void this.processQueue();
|
|
606
838
|
}
|
|
607
839
|
}
|