comfyui-node 1.6.4 → 1.6.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,846 +1,856 @@
1
- import { randomUUID } from "node:crypto";
2
- import { TypedEventTarget } from "../typed-event-target.js";
3
- import { Workflow } from "../workflow.js";
4
- import { PromptBuilder } from "../prompt-builder.js";
5
- import { CallWrapper } from "../call-wrapper.js";
6
- import { MemoryQueueAdapter } from "./queue/adapters/memory.js";
7
- import { SmartFailoverStrategy } from "./failover/SmartFailoverStrategy.js";
8
- import { ClientManager } from "./client/ClientManager.js";
9
- import { hashWorkflow } from "./utils/hash.js";
10
- import { cloneDeep } from "./utils/clone.js";
11
- import { JobProfiler } from "./profiling/JobProfiler.js";
12
- import { analyzeWorkflowFailure } from "./utils/failure-analysis.js";
13
- import { WorkflowNotSupportedError } from "../types/error.js";
14
- const DEFAULT_MAX_ATTEMPTS = 3;
15
- const DEFAULT_RETRY_DELAY = 1000;
16
- export class WorkflowPool extends TypedEventTarget {
17
- queue;
18
- strategy;
19
- clientManager;
20
- opts;
21
- jobStore = new Map();
22
- jobFailureAnalysis = new Map();
23
- affinities = new Map();
24
- initPromise;
25
- processing = false;
26
- processQueued = false;
27
- activeJobs = new Map();
28
- queueDebug = process.env.WORKFLOW_POOL_DEBUG === "1";
29
- debugLog(...args) {
30
- if (this.queueDebug) {
31
- console.log(...args);
32
- }
33
- }
34
- constructor(clients, opts) {
35
- super();
36
- this.strategy = opts?.failoverStrategy ?? new SmartFailoverStrategy();
37
- this.queue = opts?.queueAdapter ?? new MemoryQueueAdapter();
38
- this.clientManager = new ClientManager(this.strategy, {
39
- healthCheckIntervalMs: opts?.healthCheckIntervalMs ?? 30000
40
- });
41
- this.opts = opts ?? {};
42
- if (opts?.workflowAffinities) {
43
- for (const affinity of opts.workflowAffinities) {
44
- this.affinities.set(affinity.workflowHash, affinity);
45
- }
46
- }
47
- this.clientManager.on("client:state", (ev) => {
48
- this.dispatchEvent(new CustomEvent("client:state", { detail: ev.detail }));
49
- });
50
- this.clientManager.on("client:blocked_workflow", (ev) => {
51
- this.dispatchEvent(new CustomEvent("client:blocked_workflow", { detail: ev.detail }));
52
- });
53
- this.clientManager.on("client:unblocked_workflow", (ev) => {
54
- this.dispatchEvent(new CustomEvent("client:unblocked_workflow", { detail: ev.detail }));
55
- });
56
- this.initPromise = this.clientManager
57
- .initialize(clients)
58
- .then(() => {
59
- this.dispatchEvent(new CustomEvent("pool:ready", {
60
- detail: { clientIds: this.clientManager.list().map((c) => c.id) }
61
- }));
62
- })
63
- .catch((error) => {
64
- this.dispatchEvent(new CustomEvent("pool:error", { detail: { error } }));
65
- });
66
- }
67
- async ready() {
68
- await this.initPromise;
69
- }
70
- setAffinity(affinity) {
71
- this.affinities.set(affinity.workflowHash, affinity);
72
- }
73
- removeAffinity(workflowHash) {
74
- return this.affinities.delete(workflowHash);
75
- }
76
- getAffinities() {
77
- return Array.from(this.affinities.values());
78
- }
79
- async enqueue(workflowInput, options) {
80
- await this.ready();
81
- const workflowJson = this.normalizeWorkflow(workflowInput);
82
- // Use the workflow's pre-computed structureHash if available (from Workflow instance)
83
- // Otherwise compute it from the JSON
84
- let workflowHash;
85
- if (workflowInput instanceof Workflow) {
86
- workflowHash = workflowInput.structureHash ?? hashWorkflow(workflowJson);
87
- }
88
- else {
89
- workflowHash = hashWorkflow(workflowJson);
90
- }
91
- const jobId = options?.jobId ?? this.generateJobId();
92
- // Extract workflow metadata (outputAliases, outputNodeIds, etc.) if input is a Workflow instance
93
- let workflowMeta;
94
- if (workflowInput instanceof Workflow) {
95
- workflowMeta = {
96
- outputNodeIds: workflowInput.outputNodeIds ?? [],
97
- outputAliases: workflowInput.outputAliases ?? {}
98
- };
99
- }
100
- const affinity = this.affinities.get(workflowHash);
101
- const preferredClientIds = options?.preferredClientIds
102
- ? [...options.preferredClientIds]
103
- : (affinity?.preferredClientIds ? [...affinity.preferredClientIds] : []);
104
- const excludeClientIds = options?.excludeClientIds
105
- ? [...options.excludeClientIds]
106
- : (affinity?.excludeClientIds ? [...affinity.excludeClientIds] : []);
107
- const payload = {
108
- jobId,
109
- workflow: workflowJson,
110
- workflowHash,
111
- attempts: 0,
112
- enqueuedAt: Date.now(),
113
- workflowMeta,
114
- options: {
115
- maxAttempts: options?.maxAttempts ?? DEFAULT_MAX_ATTEMPTS,
116
- retryDelayMs: options?.retryDelayMs ?? DEFAULT_RETRY_DELAY,
117
- priority: options?.priority ?? 0,
118
- preferredClientIds: preferredClientIds,
119
- excludeClientIds: excludeClientIds,
120
- metadata: options?.metadata ?? {},
121
- includeOutputs: options?.includeOutputs ?? []
122
- }
123
- };
124
- const record = {
125
- ...payload,
126
- options: {
127
- ...payload.options,
128
- preferredClientIds: payload.options.preferredClientIds ? [...payload.options.preferredClientIds] : [],
129
- excludeClientIds: payload.options.excludeClientIds ? [...payload.options.excludeClientIds] : [],
130
- includeOutputs: payload.options.includeOutputs ? [...payload.options.includeOutputs] : []
131
- },
132
- attachments: options?.attachments,
133
- status: "queued"
134
- };
135
- this.jobStore.set(jobId, record);
136
- await this.queue.enqueue(payload, { priority: payload.options.priority });
137
- this.dispatchEvent(new CustomEvent("job:queued", { detail: { job: record } }));
138
- void this.processQueue();
139
- return jobId;
140
- }
141
- getJob(jobId) {
142
- return this.jobStore.get(jobId);
143
- }
144
- async cancel(jobId) {
145
- const record = this.jobStore.get(jobId);
146
- if (!record) {
147
- return false;
148
- }
149
- if (record.status === "queued") {
150
- const removed = await this.queue.remove(jobId);
151
- if (removed) {
152
- record.status = "cancelled";
153
- record.completedAt = Date.now();
154
- this.clearJobFailures(jobId);
155
- this.dispatchEvent(new CustomEvent("job:cancelled", { detail: { job: record } }));
156
- return true;
157
- }
158
- }
159
- const active = this.activeJobs.get(jobId);
160
- if (active?.cancel) {
161
- await active.cancel();
162
- record.status = "cancelled";
163
- record.completedAt = Date.now();
164
- this.clearJobFailures(jobId);
165
- this.dispatchEvent(new CustomEvent("job:cancelled", { detail: { job: record } }));
166
- return true;
167
- }
168
- return false;
169
- }
170
- async shutdown() {
171
- this.clientManager.destroy();
172
- await this.queue.shutdown();
173
- for (const [, ctx] of Array.from(this.activeJobs)) {
174
- ctx.release({ success: false });
175
- }
176
- this.activeJobs.clear();
177
- }
178
- async getQueueStats() {
179
- return this.queue.stats();
180
- }
181
- normalizeWorkflow(input) {
182
- if (typeof input === "string") {
183
- return JSON.parse(input);
184
- }
185
- if (input instanceof Workflow) {
186
- return cloneDeep(input.json ?? {});
187
- }
188
- if (typeof input?.toJSON === "function") {
189
- return cloneDeep(input.toJSON());
190
- }
191
- return cloneDeep(input);
192
- }
193
- generateJobId() {
194
- try {
195
- return randomUUID();
196
- }
197
- catch {
198
- return WorkflowPool.fallbackId();
199
- }
200
- }
201
- static fallbackId() {
202
- return globalThis.crypto && "randomUUID" in globalThis.crypto
203
- ? globalThis.crypto.randomUUID()
204
- : `job_${Math.random().toString(36).slice(2, 10)}`;
205
- }
206
- scheduleProcess(delayMs) {
207
- const wait = Math.max(delayMs, 10);
208
- setTimeout(() => {
209
- void this.processQueue();
210
- }, wait);
211
- }
212
- applyAutoSeed(workflow) {
213
- const autoSeeds = {};
214
- for (const [nodeId, nodeValue] of Object.entries(workflow)) {
215
- if (!nodeValue || typeof nodeValue !== "object")
216
- continue;
217
- const inputs = nodeValue.inputs;
218
- if (!inputs || typeof inputs !== "object")
219
- continue;
220
- if (typeof inputs.seed === "number" && inputs.seed === -1) {
221
- const val = Math.floor(Math.random() * 2_147_483_647);
222
- inputs.seed = val;
223
- autoSeeds[nodeId] = val;
224
- }
225
- }
226
- return autoSeeds;
227
- }
228
- rememberJobFailure(job, clientId, analysis) {
229
- let map = this.jobFailureAnalysis.get(job.jobId);
230
- if (!map) {
231
- map = new Map();
232
- this.jobFailureAnalysis.set(job.jobId, map);
233
- }
234
- map.set(clientId, analysis);
235
- }
236
- clearJobFailures(jobId) {
237
- this.jobFailureAnalysis.delete(jobId);
238
- }
239
- collectFailureReasons(jobId) {
240
- const map = this.jobFailureAnalysis.get(jobId);
241
- if (!map) {
242
- return {};
243
- }
244
- const reasons = {};
245
- for (const [clientId, analysis] of map.entries()) {
246
- reasons[clientId] = analysis.reason;
247
- }
248
- return reasons;
249
- }
250
- addPermanentExclusion(job, clientId) {
251
- if (!job.options.excludeClientIds) {
252
- job.options.excludeClientIds = [];
253
- }
254
- if (!job.options.excludeClientIds.includes(clientId)) {
255
- job.options.excludeClientIds.push(clientId);
256
- }
257
- }
258
- hasRetryPath(job) {
259
- const map = this.jobFailureAnalysis.get(job.jobId);
260
- const exclude = new Set(job.options.excludeClientIds ?? []);
261
- const preferred = job.options.preferredClientIds?.length ? new Set(job.options.preferredClientIds) : null;
262
- for (const client of this.clientManager.list()) {
263
- if (preferred && !preferred.has(client.id)) {
264
- continue;
265
- }
266
- if (exclude.has(client.id)) {
267
- continue;
268
- }
269
- const analysis = map?.get(client.id);
270
- if (analysis?.blockClient === "permanent") {
271
- continue;
272
- }
273
- return true;
274
- }
275
- return false;
276
- }
277
- createWorkflowNotSupportedError(job, cause) {
278
- const reasons = this.collectFailureReasons(job.jobId);
279
- const message = `Workflow ${job.workflowHash} is not supported by any connected clients`;
280
- return new WorkflowNotSupportedError(message, {
281
- workflowHash: job.workflowHash,
282
- reasons,
283
- cause
284
- });
285
- }
286
- async processQueue() {
287
- this.debugLog("[processQueue] Called");
288
- if (this.processing) {
289
- this.debugLog("[processQueue] Already processing, returning early");
290
- this.processQueued = true;
291
- return;
292
- }
293
- this.processing = true;
294
- try {
295
- // Continue processing until no more jobs can be assigned
296
- let iteration = 0;
297
- while (true) {
298
- iteration++;
299
- this.debugLog(`[processQueue] Iteration ${iteration}`);
300
- const idleClients = this.clientManager.list().filter(c => this.clientManager.isClientStable(c));
301
- this.debugLog(`[processQueue] Idle clients: [${idleClients.map(c => c.id).join(", ")}] (${idleClients.length})`);
302
- if (!idleClients.length) {
303
- this.debugLog("[processQueue] No idle clients, breaking");
304
- break; // No idle clients available
305
- }
306
- const waitingJobs = await this.queue.peek(100); // Peek at top 100 jobs
307
- this.debugLog(`[processQueue] Waiting jobs in queue: ${waitingJobs.length}`);
308
- if (!waitingJobs.length) {
309
- this.debugLog("[processQueue] No waiting jobs, breaking");
310
- break; // No jobs in queue
311
- }
312
- const leasedClientIds = new Set();
313
- const reservedJobIds = new Set();
314
- const jobMatchInfos = [];
315
- for (const jobPayload of waitingJobs) {
316
- const job = this.jobStore.get(jobPayload.jobId);
317
- if (!job) {
318
- this.debugLog(`[processQueue] Job ${jobPayload.jobId} not in jobStore, skipping`);
319
- continue;
320
- }
321
- const compatibleClients = idleClients
322
- .filter(client => {
323
- const canRun = this.clientManager.canClientRunJob(client, job);
324
- if (!canRun) {
325
- this.debugLog(`[processQueue] Job ${job.jobId.substring(0, 8)}... NOT compatible with ${client.id}. Checking why...`);
326
- this.debugLog(`[processQueue] - preferredClientIds: ${JSON.stringify(job.options.preferredClientIds)}`);
327
- this.debugLog(`[processQueue] - excludeClientIds: ${JSON.stringify(job.options.excludeClientIds)}`);
328
- this.debugLog(`[processQueue] - client.id: ${client.id}`);
329
- }
330
- return canRun;
331
- })
332
- .map(client => client.id);
333
- this.debugLog(`[processQueue] Job ${job.jobId.substring(0, 8)}... compatible with: [${compatibleClients.join(", ")}] (selectivity=${compatibleClients.length})`);
334
- if (compatibleClients.length > 0) {
335
- jobMatchInfos.push({
336
- jobPayload,
337
- job,
338
- compatibleClients,
339
- selectivity: compatibleClients.length
340
- });
341
- }
342
- }
343
- this.debugLog(`[processQueue] Found ${jobMatchInfos.length} compatible job matches`);
344
- if (jobMatchInfos.length === 0) {
345
- this.debugLog("[processQueue] No compatible jobs for idle clients, breaking");
346
- break; // No compatible jobs for idle clients
347
- }
348
- // Sort jobs by priority first, then selectivity, to maximize throughput
349
- // 1. Higher priority jobs execute first (explicit user priority)
350
- // 2. More selective jobs (fewer compatible clients) assigned first within same priority
351
- // 3. Earlier queue position as final tiebreaker
352
- jobMatchInfos.sort((a, b) => {
353
- // Primary: priority (higher priority = higher precedence)
354
- const aPriority = a.job.options.priority ?? 0;
355
- const bPriority = b.job.options.priority ?? 0;
356
- if (aPriority !== bPriority) {
357
- return bPriority - aPriority; // Higher priority first
358
- }
359
- // Secondary: selectivity (fewer compatible clients = higher precedence)
360
- if (a.selectivity !== b.selectivity) {
361
- return a.selectivity - b.selectivity;
362
- }
363
- // Tertiary: maintain queue order (earlier jobs first)
364
- const aIndex = waitingJobs.indexOf(a.jobPayload);
365
- const bIndex = waitingJobs.indexOf(b.jobPayload);
366
- return aIndex - bIndex;
367
- });
368
- // Assign jobs to clients using the selectivity-based ordering
369
- let assignedAnyJob = false;
370
- for (const matchInfo of jobMatchInfos) {
371
- if (reservedJobIds.has(matchInfo.job.jobId))
372
- continue;
373
- // Find first available compatible client
374
- const availableClient = matchInfo.compatibleClients.find(clientId => !leasedClientIds.has(clientId));
375
- if (!availableClient) {
376
- this.debugLog(`[processQueue] No available client for job ${matchInfo.job.jobId.substring(0, 8)}...`);
377
- continue; // No available clients for this job
378
- }
379
- this.debugLog(`[processQueue] Reserving job ${matchInfo.job.jobId.substring(0, 8)}... for client ${availableClient}`);
380
- const reservation = await this.queue.reserveById(matchInfo.job.jobId);
381
- if (reservation) {
382
- // Mark as leased/reserved for this cycle
383
- leasedClientIds.add(availableClient);
384
- reservedJobIds.add(matchInfo.job.jobId);
385
- assignedAnyJob = true;
386
- // Get the lease (which marks the client as busy)
387
- const lease = this.clientManager.claim(matchInfo.job, availableClient);
388
- if (lease) {
389
- this.debugLog(`[processQueue] Starting job ${matchInfo.job.jobId.substring(0, 8)}... on client ${availableClient}`);
390
- this.runJob({
391
- reservation,
392
- job: matchInfo.job,
393
- clientId: lease.clientId,
394
- release: lease.release
395
- }).catch((error) => {
396
- console.error("[WorkflowPool] Unhandled job error", error);
397
- });
398
- }
399
- else {
400
- // This should not happen since we checked canClientRunJob, but handle defensively
401
- console.error(`[processQueue.processQueue] CRITICAL: Failed to claim client ${availableClient} for job ${matchInfo.job.jobId} after successful check.`);
402
- await this.queue.retry(reservation.reservationId, { delayMs: matchInfo.job.options.retryDelayMs });
403
- }
404
- }
405
- else {
406
- this.debugLog(`[processQueue] Failed to reserve job ${matchInfo.job.jobId.substring(0, 8)}...`);
407
- }
408
- }
409
- this.debugLog(`[processQueue] Assigned any job in this iteration: ${assignedAnyJob}`);
410
- // If we didn't assign any jobs this iteration, no point continuing
411
- if (!assignedAnyJob) {
412
- this.debugLog("[processQueue] No jobs assigned, breaking");
413
- break;
414
- }
415
- }
416
- }
417
- finally {
418
- this.debugLog("[processQueue] Exiting, setting processing = false");
419
- this.processing = false;
420
- if (this.processQueued) {
421
- this.debugLog("[processQueue] Pending rerun detected, draining queue again");
422
- this.processQueued = false;
423
- void this.processQueue();
424
- }
425
- }
426
- }
427
- async runJob(ctx) {
428
- const { reservation, job, clientId, release } = ctx;
429
- let released = false;
430
- const safeRelease = (opts) => {
431
- if (released) {
432
- return;
433
- }
434
- released = true;
435
- release(opts);
436
- };
437
- const managed = this.clientManager.getClient(clientId);
438
- const client = managed?.client;
439
- if (!client) {
440
- await this.queue.retry(reservation.reservationId, { delayMs: job.options.retryDelayMs });
441
- safeRelease({ success: false });
442
- return;
443
- }
444
- job.status = "running";
445
- job.clientId = clientId;
446
- job.attempts += 1;
447
- reservation.payload.attempts = job.attempts;
448
- job.startedAt = Date.now();
449
- // Don't dispatch job:started here - wait until we have promptId in onPending
450
- // this.dispatchEvent(new CustomEvent("job:started", { detail: { job } }));
451
- const workflowPayload = cloneDeep(reservation.payload.workflow);
452
- if (job.attachments?.length) {
453
- for (const attachment of job.attachments) {
454
- const filename = attachment.filename ?? `${job.jobId}-${attachment.nodeId}-${attachment.inputName}.bin`;
455
- const blob = attachment.file instanceof Buffer ? new Blob([new Uint8Array(attachment.file)]) : attachment.file;
456
- await client.ext.file.uploadImage(blob, filename, { override: true });
457
- const node = workflowPayload[attachment.nodeId];
458
- if (node?.inputs) {
459
- node.inputs[attachment.inputName] = filename;
460
- }
461
- }
462
- }
463
- const autoSeeds = this.applyAutoSeed(workflowPayload);
464
- let wfInstance = Workflow.from(workflowPayload);
465
- if (job.options.includeOutputs?.length) {
466
- for (const nodeId of job.options.includeOutputs) {
467
- if (nodeId) {
468
- wfInstance = wfInstance.output(nodeId);
469
- }
470
- }
471
- }
472
- wfInstance.inferDefaultOutputs?.();
473
- // Use stored metadata if available (from Workflow instance), otherwise extract from recreated instance
474
- const outputNodeIds = reservation.payload.workflowMeta?.outputNodeIds ??
475
- wfInstance.outputNodeIds ??
476
- job.options.includeOutputs ??
477
- [];
478
- const outputAliases = reservation.payload.workflowMeta?.outputAliases ?? wfInstance.outputAliases ?? {};
479
- let promptBuilder = new PromptBuilder(wfInstance.json, wfInstance.inputPaths ?? [], outputNodeIds);
480
- for (const nodeId of outputNodeIds) {
481
- const alias = outputAliases[nodeId] ?? nodeId;
482
- promptBuilder = promptBuilder.setOutputNode(alias, nodeId);
483
- }
484
- const wrapper = new CallWrapper(client, promptBuilder);
485
- // Setup profiling if enabled
486
- const profiler = this.opts.enableProfiling ? new JobProfiler(job.enqueuedAt, workflowPayload) : undefined;
487
- // Setup node execution timeout tracking
488
- const nodeExecutionTimeout = this.opts.nodeExecutionTimeoutMs ?? 300000; // 5 minutes default
489
- let nodeTimeoutId;
490
- let lastNodeStartTime;
491
- let currentExecutingNode = null;
492
- const resetNodeTimeout = (nodeName) => {
493
- if (nodeTimeoutId) {
494
- clearTimeout(nodeTimeoutId);
495
- nodeTimeoutId = undefined;
496
- }
497
- if (nodeExecutionTimeout > 0 && nodeName !== null) {
498
- lastNodeStartTime = Date.now();
499
- currentExecutingNode = nodeName || null;
500
- nodeTimeoutId = setTimeout(() => {
501
- const elapsed = Date.now() - (lastNodeStartTime || 0);
502
- const nodeInfo = currentExecutingNode ? ` (node: ${currentExecutingNode})` : "";
503
- completionError = new Error(`Node execution timeout: took longer than ${nodeExecutionTimeout}ms${nodeInfo}. ` +
504
- `Actual time: ${elapsed}ms. Server may be stuck or node is too slow for configured timeout.`);
505
- resolveCompletion?.();
506
- }, nodeExecutionTimeout);
507
- }
508
- };
509
- const clearNodeTimeout = () => {
510
- if (nodeTimeoutId) {
511
- clearTimeout(nodeTimeoutId);
512
- nodeTimeoutId = undefined;
513
- }
514
- currentExecutingNode = null;
515
- lastNodeStartTime = undefined;
516
- };
517
- // Setup profiling event listeners on the raw ComfyUI client
518
- if (profiler) {
519
- const onExecutionStart = (event) => {
520
- const promptId = event.detail?.prompt_id;
521
- if (promptId) {
522
- profiler.onExecutionStart(promptId);
523
- }
524
- };
525
- const onExecutionCached = (event) => {
526
- const nodes = event.detail?.nodes;
527
- if (Array.isArray(nodes)) {
528
- profiler.onCachedNodes(nodes.map(String));
529
- }
530
- };
531
- const onExecuting = (event) => {
532
- const node = event.detail?.node;
533
- if (node === null) {
534
- // Workflow completed
535
- profiler.onExecutionComplete();
536
- }
537
- else if (node !== undefined) {
538
- profiler.onNodeExecuting(String(node));
539
- }
540
- };
541
- const onExecutionError = (event) => {
542
- const detail = event.detail || {};
543
- if (detail.node !== undefined) {
544
- profiler.onNodeError(String(detail.node), detail.exception_message || "Execution error");
545
- }
546
- };
547
- // Attach listeners to client
548
- client.addEventListener("execution_start", onExecutionStart);
549
- client.addEventListener("execution_cached", onExecutionCached);
550
- client.addEventListener("executing", onExecuting);
551
- client.addEventListener("execution_error", onExecutionError);
552
- // Cleanup function to remove listeners
553
- const cleanupProfiler = () => {
554
- client.removeEventListener("execution_start", onExecutionStart);
555
- client.removeEventListener("execution_cached", onExecutionCached);
556
- client.removeEventListener("executing", onExecuting);
557
- client.removeEventListener("execution_error", onExecutionError);
558
- };
559
- // Ensure cleanup happens when job finishes
560
- wrapper.onFinished(() => cleanupProfiler());
561
- wrapper.onFailed(() => cleanupProfiler());
562
- }
563
- // Setup node execution timeout listeners (always active if timeout > 0)
564
- const onNodeExecuting = (event) => {
565
- const node = event.detail?.node;
566
- if (node === null) {
567
- // Workflow completed - clear timeout
568
- clearNodeTimeout();
569
- }
570
- else if (node !== undefined) {
571
- // New node started - reset timeout
572
- resetNodeTimeout(String(node));
573
- }
574
- };
575
- const onNodeProgress = (event) => {
576
- // Progress event means node is still working - reset timeout
577
- if (event.detail?.node) {
578
- resetNodeTimeout(String(event.detail.node));
579
- }
580
- };
581
- const onExecutionStarted = (event) => {
582
- // Execution started - reset timeout for first node
583
- resetNodeTimeout("execution_start");
584
- };
585
- if (nodeExecutionTimeout > 0) {
586
- client.addEventListener("execution_start", onExecutionStarted);
587
- client.addEventListener("executing", onNodeExecuting);
588
- client.addEventListener("progress", onNodeProgress);
589
- }
590
- const cleanupNodeTimeout = () => {
591
- clearNodeTimeout();
592
- if (nodeExecutionTimeout > 0) {
593
- client.removeEventListener("execution_start", onExecutionStarted);
594
- client.removeEventListener("executing", onNodeExecuting);
595
- client.removeEventListener("progress", onNodeProgress);
596
- }
597
- };
598
- let pendingSettled = false;
599
- let resolvePending;
600
- let rejectPending;
601
- const pendingPromise = new Promise((resolve, reject) => {
602
- resolvePending = () => {
603
- if (!pendingSettled) {
604
- pendingSettled = true;
605
- resolve();
606
- }
607
- };
608
- rejectPending = (err) => {
609
- if (!pendingSettled) {
610
- pendingSettled = true;
611
- reject(err);
612
- }
613
- };
614
- });
615
- let resolveCompletion;
616
- let completionError;
617
- // completionPromise is used to track when the wrapper completes (success or failure)
618
- // It's resolved in onFinished and onFailed handlers
619
- const completionPromise = new Promise((resolve) => {
620
- resolveCompletion = resolve;
621
- });
622
- let jobStartedDispatched = false;
623
- wrapper.onProgress((progress, promptId) => {
624
- if (!job.promptId && promptId) {
625
- job.promptId = promptId;
626
- }
627
- // Dispatch job:started on first progress update with promptId
628
- if (!jobStartedDispatched && job.promptId) {
629
- jobStartedDispatched = true;
630
- this.dispatchEvent(new CustomEvent("job:started", { detail: { job } }));
631
- }
632
- // Feed progress to profiler
633
- if (profiler) {
634
- profiler.onProgress(progress);
635
- }
636
- this.dispatchEvent(new CustomEvent("job:progress", {
637
- detail: { jobId: job.jobId, clientId, progress }
638
- }));
639
- });
640
- wrapper.onPreview((blob, promptId) => {
641
- if (!job.promptId && promptId) {
642
- job.promptId = promptId;
643
- }
644
- // Dispatch job:started on first preview with promptId
645
- if (!jobStartedDispatched && job.promptId) {
646
- jobStartedDispatched = true;
647
- this.dispatchEvent(new CustomEvent("job:started", { detail: { job } }));
648
- }
649
- this.dispatchEvent(new CustomEvent("job:preview", {
650
- detail: { jobId: job.jobId, clientId, blob }
651
- }));
652
- });
653
- wrapper.onPreviewMeta((payload, promptId) => {
654
- if (!job.promptId && promptId) {
655
- job.promptId = promptId;
656
- }
657
- // Dispatch job:started on first preview_meta with promptId
658
- if (!jobStartedDispatched && job.promptId) {
659
- jobStartedDispatched = true;
660
- this.dispatchEvent(new CustomEvent("job:started", { detail: { job } }));
661
- }
662
- this.dispatchEvent(new CustomEvent("job:preview_meta", {
663
- detail: { jobId: job.jobId, clientId, payload }
664
- }));
665
- });
666
- wrapper.onOutput((key, data, promptId) => {
667
- if (!job.promptId && promptId) {
668
- job.promptId = promptId;
669
- }
670
- this.dispatchEvent(new CustomEvent("job:output", {
671
- detail: { jobId: job.jobId, clientId, key: String(key), data }
672
- }));
673
- });
674
- wrapper.onPending((promptId) => {
675
- if (!job.promptId && promptId) {
676
- job.promptId = promptId;
677
- }
678
- // Don't dispatch job:started here - wait for first progress/preview with promptId
679
- this.dispatchEvent(new CustomEvent("job:accepted", { detail: { job } }));
680
- resolvePending?.();
681
- });
682
- wrapper.onStart((promptId) => {
683
- if (!job.promptId && promptId) {
684
- job.promptId = promptId;
685
- }
686
- });
687
- wrapper.onFinished((data, promptId) => {
688
- if (!job.promptId && promptId) {
689
- job.promptId = promptId;
690
- }
691
- job.status = "completed";
692
- job.lastError = undefined;
693
- const resultPayload = {};
694
- for (const nodeId of outputNodeIds) {
695
- const alias = outputAliases[nodeId] ?? nodeId;
696
- // CallWrapper uses alias keys when mapOutputKeys is configured, fallback to nodeId
697
- const nodeResult = data[alias];
698
- const fallbackResult = data[nodeId];
699
- const finalResult = nodeResult !== undefined ? nodeResult : fallbackResult;
700
- resultPayload[alias] = finalResult;
701
- }
702
- resultPayload._nodes = [...outputNodeIds];
703
- resultPayload._aliases = { ...outputAliases };
704
- if (job.promptId) {
705
- resultPayload._promptId = job.promptId;
706
- }
707
- if (Object.keys(autoSeeds).length) {
708
- resultPayload._autoSeeds = { ...autoSeeds };
709
- }
710
- job.result = resultPayload;
711
- job.completedAt = Date.now();
712
- this.clearJobFailures(job.jobId);
713
- // Cleanup timeouts
714
- cleanupNodeTimeout();
715
- // Attach profiling stats if profiling was enabled
716
- if (profiler) {
717
- job.profileStats = profiler.getStats();
718
- }
719
- completionError = undefined;
720
- this.dispatchEvent(new CustomEvent("job:completed", { detail: { job } }));
721
- safeRelease({ success: true });
722
- resolveCompletion?.();
723
- });
724
- wrapper.onFailed((error, promptId) => {
725
- this.debugLog("[debug] wrapper.onFailed", job.jobId, error.name);
726
- if (!job.promptId && promptId) {
727
- job.promptId = promptId;
728
- }
729
- job.lastError = error;
730
- // Cleanup timeouts
731
- cleanupNodeTimeout();
732
- rejectPending?.(error);
733
- completionError = error;
734
- this.debugLog("[debug] resolveCompletion available", Boolean(resolveCompletion));
735
- safeRelease({ success: false });
736
- resolveCompletion?.();
737
- });
738
- try {
739
- // Start the workflow execution
740
- const exec = wrapper.run();
741
- // Add timeout for execution start to prevent jobs getting stuck
742
- const executionStartTimeout = this.opts.executionStartTimeoutMs ?? 5000;
743
- let pendingTimeoutId;
744
- if (executionStartTimeout > 0) {
745
- const pendingWithTimeout = Promise.race([
746
- pendingPromise,
747
- new Promise((_, reject) => {
748
- pendingTimeoutId = setTimeout(() => {
749
- reject(new Error(`Execution failed to start within ${executionStartTimeout}ms. ` +
750
- `Server may be stuck or unresponsive.`));
751
- }, executionStartTimeout);
752
- })
753
- ]);
754
- await pendingWithTimeout;
755
- }
756
- else {
757
- await pendingPromise;
758
- }
759
- if (executionStartTimeout > 0) {
760
- clearTimeout(pendingTimeoutId);
761
- }
762
- this.activeJobs.set(job.jobId, {
763
- reservation,
764
- job,
765
- clientId,
766
- release: (opts) => safeRelease(opts),
767
- cancel: async () => {
768
- try {
769
- wrapper.cancel("workflow pool cancel");
770
- if (job.promptId) {
771
- await client.ext.queue.interrupt(job.promptId);
772
- }
773
- }
774
- finally {
775
- this.activeJobs.delete(job.jobId);
776
- await this.queue.discard(reservation.reservationId, new Error("cancelled"));
777
- safeRelease({ success: false });
778
- }
779
- }
780
- });
781
- const result = await exec;
782
- // Wait for the wrapper to complete (onFinished or onFailed callback)
783
- await completionPromise;
784
- if (result === false) {
785
- const errorToThrow = (completionError instanceof Error ? completionError : undefined) ??
786
- (job.lastError instanceof Error ? job.lastError : undefined) ??
787
- new Error("Execution failed");
788
- throw errorToThrow;
789
- }
790
- await this.queue.commit(reservation.reservationId);
791
- safeRelease({ success: true });
792
- }
793
- catch (error) {
794
- // Immediately release the client on any failure
795
- safeRelease({ success: false });
796
- const latestStatus = this.jobStore.get(job.jobId)?.status;
797
- if (latestStatus === "cancelled") {
798
- return;
799
- }
800
- job.lastError = error;
801
- job.status = "failed";
802
- const remainingAttempts = job.options.maxAttempts - job.attempts;
803
- const failureAnalysis = analyzeWorkflowFailure(error);
804
- this.rememberJobFailure(job, clientId, failureAnalysis);
805
- if (failureAnalysis.blockClient === "permanent") {
806
- this.addPermanentExclusion(job, clientId);
807
- reservation.payload.options.excludeClientIds = [...(job.options.excludeClientIds ?? [])];
808
- }
809
- this.clientManager.recordFailure(clientId, job, error);
810
- const hasRetryPath = this.hasRetryPath(job);
811
- const willRetry = failureAnalysis.retryable && remainingAttempts > 0 && hasRetryPath;
812
- this.dispatchEvent(new CustomEvent("job:failed", {
813
- detail: { job, willRetry }
814
- }));
815
- if (willRetry) {
816
- const delay = this.opts.retryBackoffMs ?? job.options.retryDelayMs;
817
- this.dispatchEvent(new CustomEvent("job:retrying", { detail: { job, delayMs: delay } }));
818
- job.status = "queued";
819
- job.clientId = undefined;
820
- job.promptId = undefined;
821
- job.startedAt = undefined;
822
- job.completedAt = undefined;
823
- job.result = undefined;
824
- reservation.payload.options.excludeClientIds = [...(job.options.excludeClientIds ?? [])];
825
- await this.queue.retry(reservation.reservationId, { delayMs: delay });
826
- this.dispatchEvent(new CustomEvent("job:queued", { detail: { job } }));
827
- this.scheduleProcess(delay);
828
- }
829
- else {
830
- job.completedAt = Date.now();
831
- const finalError = !hasRetryPath && failureAnalysis.type === "client_incompatible" && this.jobFailureAnalysis.has(job.jobId)
832
- ? this.createWorkflowNotSupportedError(job, error)
833
- : error;
834
- job.lastError = finalError;
835
- await this.queue.discard(reservation.reservationId, finalError);
836
- this.clearJobFailures(job.jobId);
837
- }
838
- }
839
- finally {
840
- this.activeJobs.delete(job.jobId);
841
- this.debugLog(`[runJob.finally] Job ${job.jobId.substring(0, 8)}... completed, calling processQueue()`);
842
- void this.processQueue();
843
- }
844
- }
845
- }
1
+ import { randomUUID } from "node:crypto";
2
+ import { TypedEventTarget } from "../typed-event-target.js";
3
+ import { Workflow } from "../workflow.js";
4
+ import { PromptBuilder } from "../prompt-builder.js";
5
+ import { CallWrapper } from "../call-wrapper.js";
6
+ import { MemoryQueueAdapter } from "./queue/adapters/memory.js";
7
+ import { SmartFailoverStrategy } from "./failover/SmartFailoverStrategy.js";
8
+ import { ClientManager } from "./client/ClientManager.js";
9
+ import { hashWorkflow } from "./utils/hash.js";
10
+ import { cloneDeep } from "./utils/clone.js";
11
+ import { JobProfiler } from "./profiling/JobProfiler.js";
12
+ import { analyzeWorkflowFailure } from "./utils/failure-analysis.js";
13
+ import { WorkflowNotSupportedError } from "../types/error.js";
14
+ const DEFAULT_MAX_ATTEMPTS = 3;
15
+ const DEFAULT_RETRY_DELAY = 1000;
16
+ export class WorkflowPool extends TypedEventTarget {
17
+ queue;
18
+ strategy;
19
+ clientManager;
20
+ opts;
21
+ jobStore = new Map();
22
+ jobFailureAnalysis = new Map();
23
+ affinities = new Map();
24
+ initPromise;
25
+ processing = false;
26
+ processQueued = false;
27
+ activeJobs = new Map();
28
+ queueDebug = process.env.WORKFLOW_POOL_DEBUG === "1";
29
+ debugLog(...args) {
30
+ if (this.queueDebug) {
31
+ console.log(...args);
32
+ }
33
+ }
34
+ constructor(clients, opts) {
35
+ super();
36
+ this.strategy = opts?.failoverStrategy ?? new SmartFailoverStrategy();
37
+ this.queue = opts?.queueAdapter ?? new MemoryQueueAdapter();
38
+ this.clientManager = new ClientManager(this.strategy, {
39
+ healthCheckIntervalMs: opts?.healthCheckIntervalMs ?? 30000
40
+ });
41
+ this.opts = opts ?? {};
42
+ if (opts?.workflowAffinities) {
43
+ for (const affinity of opts.workflowAffinities) {
44
+ this.affinities.set(affinity.workflowHash, affinity);
45
+ }
46
+ }
47
+ this.clientManager.on("client:state", (ev) => {
48
+ this.dispatchEvent(new CustomEvent("client:state", { detail: ev.detail }));
49
+ });
50
+ this.clientManager.on("client:blocked_workflow", (ev) => {
51
+ this.dispatchEvent(new CustomEvent("client:blocked_workflow", { detail: ev.detail }));
52
+ });
53
+ this.clientManager.on("client:unblocked_workflow", (ev) => {
54
+ this.dispatchEvent(new CustomEvent("client:unblocked_workflow", { detail: ev.detail }));
55
+ });
56
+ this.initPromise = this.clientManager
57
+ .initialize(clients)
58
+ .then(() => {
59
+ this.dispatchEvent(new CustomEvent("pool:ready", {
60
+ detail: { clientIds: this.clientManager.list().map((c) => c.id) }
61
+ }));
62
+ })
63
+ .catch((error) => {
64
+ this.dispatchEvent(new CustomEvent("pool:error", { detail: { error } }));
65
+ });
66
+ }
67
+ async ready() {
68
+ await this.initPromise;
69
+ }
70
+ setAffinity(affinity) {
71
+ this.affinities.set(affinity.workflowHash, affinity);
72
+ }
73
+ removeAffinity(workflowHash) {
74
+ return this.affinities.delete(workflowHash);
75
+ }
76
+ getAffinities() {
77
+ return Array.from(this.affinities.values());
78
+ }
79
+ async enqueue(workflowInput, options) {
80
+ await this.ready();
81
+ const workflowJson = this.normalizeWorkflow(workflowInput);
82
+ // Use the workflow's pre-computed structureHash if available (from Workflow instance)
83
+ // Otherwise compute it from the JSON
84
+ let workflowHash;
85
+ if (workflowInput instanceof Workflow) {
86
+ workflowHash = workflowInput.structureHash ?? hashWorkflow(workflowJson);
87
+ }
88
+ else {
89
+ workflowHash = hashWorkflow(workflowJson);
90
+ }
91
+ const jobId = options?.jobId ?? this.generateJobId();
92
+ // Extract workflow metadata (outputAliases, outputNodeIds, etc.) if input is a Workflow instance
93
+ let workflowMeta;
94
+ if (workflowInput instanceof Workflow) {
95
+ workflowMeta = {
96
+ outputNodeIds: workflowInput.outputNodeIds ?? [],
97
+ outputAliases: workflowInput.outputAliases ?? {}
98
+ };
99
+ }
100
+ const affinity = this.affinities.get(workflowHash);
101
+ const preferredClientIds = options?.preferredClientIds
102
+ ? [...options.preferredClientIds]
103
+ : affinity?.preferredClientIds
104
+ ? [...affinity.preferredClientIds]
105
+ : [];
106
+ const excludeClientIds = options?.excludeClientIds
107
+ ? [...options.excludeClientIds]
108
+ : affinity?.excludeClientIds
109
+ ? [...affinity.excludeClientIds]
110
+ : [];
111
+ const payload = {
112
+ jobId,
113
+ workflow: workflowJson,
114
+ workflowHash,
115
+ attempts: 0,
116
+ enqueuedAt: Date.now(),
117
+ workflowMeta,
118
+ options: {
119
+ maxAttempts: options?.maxAttempts ?? DEFAULT_MAX_ATTEMPTS,
120
+ retryDelayMs: options?.retryDelayMs ?? DEFAULT_RETRY_DELAY,
121
+ priority: options?.priority ?? 0,
122
+ preferredClientIds: preferredClientIds,
123
+ excludeClientIds: excludeClientIds,
124
+ metadata: options?.metadata ?? {},
125
+ includeOutputs: options?.includeOutputs ?? []
126
+ },
127
+ timeouts: {
128
+ executionStartTimeoutMs: options?.executionStartTimeoutMs,
129
+ nodeExecutionTimeoutMs: options?.nodeExecutionTimeoutMs
130
+ }
131
+ };
132
+ const record = {
133
+ ...payload,
134
+ options: {
135
+ ...payload.options,
136
+ preferredClientIds: payload.options.preferredClientIds ? [...payload.options.preferredClientIds] : [],
137
+ excludeClientIds: payload.options.excludeClientIds ? [...payload.options.excludeClientIds] : [],
138
+ includeOutputs: payload.options.includeOutputs ? [...payload.options.includeOutputs] : []
139
+ },
140
+ attachments: options?.attachments,
141
+ status: "queued"
142
+ };
143
+ this.jobStore.set(jobId, record);
144
+ await this.queue.enqueue(payload, { priority: payload.options.priority });
145
+ this.dispatchEvent(new CustomEvent("job:queued", { detail: { job: record } }));
146
+ void this.processQueue();
147
+ return jobId;
148
+ }
149
+ getJob(jobId) {
150
+ return this.jobStore.get(jobId);
151
+ }
152
+ async cancel(jobId) {
153
+ const record = this.jobStore.get(jobId);
154
+ if (!record) {
155
+ return false;
156
+ }
157
+ if (record.status === "queued") {
158
+ const removed = await this.queue.remove(jobId);
159
+ if (removed) {
160
+ record.status = "cancelled";
161
+ record.completedAt = Date.now();
162
+ this.clearJobFailures(jobId);
163
+ this.dispatchEvent(new CustomEvent("job:cancelled", { detail: { job: record } }));
164
+ return true;
165
+ }
166
+ }
167
+ const active = this.activeJobs.get(jobId);
168
+ if (active?.cancel) {
169
+ await active.cancel();
170
+ record.status = "cancelled";
171
+ record.completedAt = Date.now();
172
+ this.clearJobFailures(jobId);
173
+ this.dispatchEvent(new CustomEvent("job:cancelled", { detail: { job: record } }));
174
+ return true;
175
+ }
176
+ return false;
177
+ }
178
+ async shutdown() {
179
+ this.clientManager.destroy();
180
+ await this.queue.shutdown();
181
+ for (const [, ctx] of Array.from(this.activeJobs)) {
182
+ ctx.release({ success: false });
183
+ }
184
+ this.activeJobs.clear();
185
+ }
186
+ async getQueueStats() {
187
+ return this.queue.stats();
188
+ }
189
+ normalizeWorkflow(input) {
190
+ if (typeof input === "string") {
191
+ return JSON.parse(input);
192
+ }
193
+ if (input instanceof Workflow) {
194
+ return cloneDeep(input.json ?? {});
195
+ }
196
+ if (typeof input?.toJSON === "function") {
197
+ return cloneDeep(input.toJSON());
198
+ }
199
+ return cloneDeep(input);
200
+ }
201
+ generateJobId() {
202
+ try {
203
+ return randomUUID();
204
+ }
205
+ catch {
206
+ return WorkflowPool.fallbackId();
207
+ }
208
+ }
209
+ static fallbackId() {
210
+ return globalThis.crypto && "randomUUID" in globalThis.crypto
211
+ ? globalThis.crypto.randomUUID()
212
+ : `job_${Math.random().toString(36).slice(2, 10)}`;
213
+ }
214
+ scheduleProcess(delayMs) {
215
+ const wait = Math.max(delayMs, 10);
216
+ setTimeout(() => {
217
+ void this.processQueue();
218
+ }, wait);
219
+ }
220
+ applyAutoSeed(workflow) {
221
+ const autoSeeds = {};
222
+ for (const [nodeId, nodeValue] of Object.entries(workflow)) {
223
+ if (!nodeValue || typeof nodeValue !== "object")
224
+ continue;
225
+ const inputs = nodeValue.inputs;
226
+ if (!inputs || typeof inputs !== "object")
227
+ continue;
228
+ if (typeof inputs.seed === "number" && inputs.seed === -1) {
229
+ const val = Math.floor(Math.random() * 2_147_483_647);
230
+ inputs.seed = val;
231
+ autoSeeds[nodeId] = val;
232
+ }
233
+ }
234
+ return autoSeeds;
235
+ }
236
+ rememberJobFailure(job, clientId, analysis) {
237
+ let map = this.jobFailureAnalysis.get(job.jobId);
238
+ if (!map) {
239
+ map = new Map();
240
+ this.jobFailureAnalysis.set(job.jobId, map);
241
+ }
242
+ map.set(clientId, analysis);
243
+ }
244
+ clearJobFailures(jobId) {
245
+ this.jobFailureAnalysis.delete(jobId);
246
+ }
247
+ collectFailureReasons(jobId) {
248
+ const map = this.jobFailureAnalysis.get(jobId);
249
+ if (!map) {
250
+ return {};
251
+ }
252
+ const reasons = {};
253
+ for (const [clientId, analysis] of map.entries()) {
254
+ reasons[clientId] = analysis.reason;
255
+ }
256
+ return reasons;
257
+ }
258
+ addPermanentExclusion(job, clientId) {
259
+ if (!job.options.excludeClientIds) {
260
+ job.options.excludeClientIds = [];
261
+ }
262
+ if (!job.options.excludeClientIds.includes(clientId)) {
263
+ job.options.excludeClientIds.push(clientId);
264
+ }
265
+ }
266
+ hasRetryPath(job) {
267
+ const map = this.jobFailureAnalysis.get(job.jobId);
268
+ const exclude = new Set(job.options.excludeClientIds ?? []);
269
+ const preferred = job.options.preferredClientIds?.length ? new Set(job.options.preferredClientIds) : null;
270
+ for (const client of this.clientManager.list()) {
271
+ if (preferred && !preferred.has(client.id)) {
272
+ continue;
273
+ }
274
+ if (exclude.has(client.id)) {
275
+ continue;
276
+ }
277
+ const analysis = map?.get(client.id);
278
+ if (analysis?.blockClient === "permanent") {
279
+ continue;
280
+ }
281
+ return true;
282
+ }
283
+ return false;
284
+ }
285
+ createWorkflowNotSupportedError(job, cause) {
286
+ const reasons = this.collectFailureReasons(job.jobId);
287
+ const message = `Workflow ${job.workflowHash} is not supported by any connected clients`;
288
+ return new WorkflowNotSupportedError(message, {
289
+ workflowHash: job.workflowHash,
290
+ reasons,
291
+ cause
292
+ });
293
+ }
294
+ async processQueue() {
295
+ this.debugLog("[processQueue] Called");
296
+ if (this.processing) {
297
+ this.debugLog("[processQueue] Already processing, returning early");
298
+ this.processQueued = true;
299
+ return;
300
+ }
301
+ this.processing = true;
302
+ try {
303
+ // Continue processing until no more jobs can be assigned
304
+ let iteration = 0;
305
+ while (true) {
306
+ iteration++;
307
+ this.debugLog(`[processQueue] Iteration ${iteration}`);
308
+ const idleClients = this.clientManager.list().filter((c) => this.clientManager.isClientStable(c));
309
+ this.debugLog(`[processQueue] Idle clients: [${idleClients.map((c) => c.id).join(", ")}] (${idleClients.length})`);
310
+ if (!idleClients.length) {
311
+ this.debugLog("[processQueue] No idle clients, breaking");
312
+ break; // No idle clients available
313
+ }
314
+ const waitingJobs = await this.queue.peek(100); // Peek at top 100 jobs
315
+ this.debugLog(`[processQueue] Waiting jobs in queue: ${waitingJobs.length}`);
316
+ if (!waitingJobs.length) {
317
+ this.debugLog("[processQueue] No waiting jobs, breaking");
318
+ break; // No jobs in queue
319
+ }
320
+ const leasedClientIds = new Set();
321
+ const reservedJobIds = new Set();
322
+ const jobMatchInfos = [];
323
+ for (const jobPayload of waitingJobs) {
324
+ const job = this.jobStore.get(jobPayload.jobId);
325
+ if (!job) {
326
+ this.debugLog(`[processQueue] Job ${jobPayload.jobId} not in jobStore, skipping`);
327
+ continue;
328
+ }
329
+ const compatibleClients = idleClients
330
+ .filter((client) => {
331
+ const canRun = this.clientManager.canClientRunJob(client, job);
332
+ if (!canRun) {
333
+ this.debugLog(`[processQueue] Job ${job.jobId.substring(0, 8)}... NOT compatible with ${client.id}. Checking why...`);
334
+ this.debugLog(`[processQueue] - preferredClientIds: ${JSON.stringify(job.options.preferredClientIds)}`);
335
+ this.debugLog(`[processQueue] - excludeClientIds: ${JSON.stringify(job.options.excludeClientIds)}`);
336
+ this.debugLog(`[processQueue] - client.id: ${client.id}`);
337
+ }
338
+ return canRun;
339
+ })
340
+ .map((client) => client.id);
341
+ this.debugLog(`[processQueue] Job ${job.jobId.substring(0, 8)}... compatible with: [${compatibleClients.join(", ")}] (selectivity=${compatibleClients.length})`);
342
+ if (compatibleClients.length > 0) {
343
+ jobMatchInfos.push({
344
+ jobPayload,
345
+ job,
346
+ compatibleClients,
347
+ selectivity: compatibleClients.length
348
+ });
349
+ }
350
+ }
351
+ this.debugLog(`[processQueue] Found ${jobMatchInfos.length} compatible job matches`);
352
+ if (jobMatchInfos.length === 0) {
353
+ this.debugLog("[processQueue] No compatible jobs for idle clients, breaking");
354
+ break; // No compatible jobs for idle clients
355
+ }
356
+ // Sort jobs by priority first, then selectivity, to maximize throughput
357
+ // 1. Higher priority jobs execute first (explicit user priority)
358
+ // 2. More selective jobs (fewer compatible clients) assigned first within same priority
359
+ // 3. Earlier queue position as final tiebreaker
360
+ jobMatchInfos.sort((a, b) => {
361
+ // Primary: priority (higher priority = higher precedence)
362
+ const aPriority = a.job.options.priority ?? 0;
363
+ const bPriority = b.job.options.priority ?? 0;
364
+ if (aPriority !== bPriority) {
365
+ return bPriority - aPriority; // Higher priority first
366
+ }
367
+ // Secondary: selectivity (fewer compatible clients = higher precedence)
368
+ if (a.selectivity !== b.selectivity) {
369
+ return a.selectivity - b.selectivity;
370
+ }
371
+ // Tertiary: maintain queue order (earlier jobs first)
372
+ const aIndex = waitingJobs.indexOf(a.jobPayload);
373
+ const bIndex = waitingJobs.indexOf(b.jobPayload);
374
+ return aIndex - bIndex;
375
+ });
376
+ // Assign jobs to clients using the selectivity-based ordering
377
+ let assignedAnyJob = false;
378
+ for (const matchInfo of jobMatchInfos) {
379
+ if (reservedJobIds.has(matchInfo.job.jobId))
380
+ continue;
381
+ // Find first available compatible client
382
+ const availableClient = matchInfo.compatibleClients.find((clientId) => !leasedClientIds.has(clientId));
383
+ if (!availableClient) {
384
+ this.debugLog(`[processQueue] No available client for job ${matchInfo.job.jobId.substring(0, 8)}...`);
385
+ continue; // No available clients for this job
386
+ }
387
+ this.debugLog(`[processQueue] Reserving job ${matchInfo.job.jobId.substring(0, 8)}... for client ${availableClient}`);
388
+ const reservation = await this.queue.reserveById(matchInfo.job.jobId);
389
+ if (reservation) {
390
+ // Mark as leased/reserved for this cycle
391
+ leasedClientIds.add(availableClient);
392
+ reservedJobIds.add(matchInfo.job.jobId);
393
+ assignedAnyJob = true;
394
+ // Get the lease (which marks the client as busy)
395
+ const lease = this.clientManager.claim(matchInfo.job, availableClient);
396
+ if (lease) {
397
+ this.debugLog(`[processQueue] Starting job ${matchInfo.job.jobId.substring(0, 8)}... on client ${availableClient}`);
398
+ this.runJob({
399
+ reservation,
400
+ job: matchInfo.job,
401
+ clientId: lease.clientId,
402
+ release: lease.release
403
+ }).catch((error) => {
404
+ console.error("[WorkflowPool] Unhandled job error", error);
405
+ });
406
+ }
407
+ else {
408
+ // This should not happen since we checked canClientRunJob, but handle defensively
409
+ console.error(`[processQueue.processQueue] CRITICAL: Failed to claim client ${availableClient} for job ${matchInfo.job.jobId} after successful check.`);
410
+ await this.queue.retry(reservation.reservationId, { delayMs: matchInfo.job.options.retryDelayMs });
411
+ }
412
+ }
413
+ else {
414
+ this.debugLog(`[processQueue] Failed to reserve job ${matchInfo.job.jobId.substring(0, 8)}...`);
415
+ }
416
+ }
417
+ this.debugLog(`[processQueue] Assigned any job in this iteration: ${assignedAnyJob}`);
418
+ // If we didn't assign any jobs this iteration, no point continuing
419
+ if (!assignedAnyJob) {
420
+ this.debugLog("[processQueue] No jobs assigned, breaking");
421
+ break;
422
+ }
423
+ }
424
+ }
425
+ finally {
426
+ this.debugLog("[processQueue] Exiting, setting processing = false");
427
+ this.processing = false;
428
+ if (this.processQueued) {
429
+ this.debugLog("[processQueue] Pending rerun detected, draining queue again");
430
+ this.processQueued = false;
431
+ void this.processQueue();
432
+ }
433
+ }
434
+ }
435
+ async runJob(ctx) {
436
+ const { reservation, job, clientId, release } = ctx;
437
+ let released = false;
438
+ const safeRelease = (opts) => {
439
+ if (released) {
440
+ return;
441
+ }
442
+ released = true;
443
+ release(opts);
444
+ };
445
+ const managed = this.clientManager.getClient(clientId);
446
+ const client = managed?.client;
447
+ if (!client) {
448
+ await this.queue.retry(reservation.reservationId, { delayMs: job.options.retryDelayMs });
449
+ safeRelease({ success: false });
450
+ return;
451
+ }
452
+ job.status = "running";
453
+ job.clientId = clientId;
454
+ job.attempts += 1;
455
+ reservation.payload.attempts = job.attempts;
456
+ job.startedAt = Date.now();
457
+ // Don't dispatch job:started here - wait until we have promptId in onPending
458
+ // this.dispatchEvent(new CustomEvent("job:started", { detail: { job } }));
459
+ const workflowPayload = cloneDeep(reservation.payload.workflow);
460
+ if (job.attachments?.length) {
461
+ for (const attachment of job.attachments) {
462
+ const filename = attachment.filename ?? `${job.jobId}-${attachment.nodeId}-${attachment.inputName}.bin`;
463
+ const blob = attachment.file instanceof Buffer ? new Blob([new Uint8Array(attachment.file)]) : attachment.file;
464
+ await client.ext.file.uploadImage(blob, filename, { override: true });
465
+ const node = workflowPayload[attachment.nodeId];
466
+ if (node?.inputs) {
467
+ node.inputs[attachment.inputName] = filename;
468
+ }
469
+ }
470
+ }
471
+ const autoSeeds = this.applyAutoSeed(workflowPayload);
472
+ let wfInstance = Workflow.from(workflowPayload);
473
+ if (job.options.includeOutputs?.length) {
474
+ for (const nodeId of job.options.includeOutputs) {
475
+ if (nodeId) {
476
+ wfInstance = wfInstance.output(nodeId);
477
+ }
478
+ }
479
+ }
480
+ wfInstance.inferDefaultOutputs?.();
481
+ // Use stored metadata if available (from Workflow instance), otherwise extract from recreated instance
482
+ const outputNodeIds = reservation.payload.workflowMeta?.outputNodeIds ??
483
+ wfInstance.outputNodeIds ??
484
+ job.options.includeOutputs ??
485
+ [];
486
+ const outputAliases = reservation.payload.workflowMeta?.outputAliases ?? wfInstance.outputAliases ?? {};
487
+ let promptBuilder = new PromptBuilder(wfInstance.json, wfInstance.inputPaths ?? [], outputNodeIds);
488
+ for (const nodeId of outputNodeIds) {
489
+ const alias = outputAliases[nodeId] ?? nodeId;
490
+ promptBuilder = promptBuilder.setOutputNode(alias, nodeId);
491
+ }
492
+ const wrapper = new CallWrapper(client, promptBuilder);
493
+ // Setup profiling if enabled
494
+ const profiler = this.opts.enableProfiling ? new JobProfiler(job.enqueuedAt, workflowPayload) : undefined;
495
+ // Setup node execution timeout tracking
496
+ // Use per-job timeout override if specified, otherwise use pool default
497
+ const nodeExecutionTimeout = job.timeouts?.nodeExecutionTimeoutMs ?? this.opts.nodeExecutionTimeoutMs ?? 300000; // 5 minutes default
498
+ let nodeTimeoutId;
499
+ let lastNodeStartTime;
500
+ let currentExecutingNode = null;
501
+ const resetNodeTimeout = (nodeName) => {
502
+ if (nodeTimeoutId) {
503
+ clearTimeout(nodeTimeoutId);
504
+ nodeTimeoutId = undefined;
505
+ }
506
+ if (nodeExecutionTimeout > 0 && nodeName !== null) {
507
+ lastNodeStartTime = Date.now();
508
+ currentExecutingNode = nodeName || null;
509
+ nodeTimeoutId = setTimeout(() => {
510
+ const elapsed = Date.now() - (lastNodeStartTime || 0);
511
+ const nodeInfo = currentExecutingNode ? ` (node: ${currentExecutingNode})` : "";
512
+ completionError = new Error(`Node execution timeout: took longer than ${nodeExecutionTimeout}ms${nodeInfo}. ` +
513
+ `Actual time: ${elapsed}ms. Server may be stuck or node is too slow for configured timeout.`);
514
+ resolveCompletion?.();
515
+ }, nodeExecutionTimeout);
516
+ }
517
+ };
518
+ const clearNodeTimeout = () => {
519
+ if (nodeTimeoutId) {
520
+ clearTimeout(nodeTimeoutId);
521
+ nodeTimeoutId = undefined;
522
+ }
523
+ currentExecutingNode = null;
524
+ lastNodeStartTime = undefined;
525
+ };
526
+ // Setup profiling event listeners on the raw ComfyUI client
527
+ if (profiler) {
528
+ const onExecutionStart = (event) => {
529
+ const promptId = event.detail?.prompt_id;
530
+ if (promptId) {
531
+ profiler.onExecutionStart(promptId);
532
+ }
533
+ };
534
+ const onExecutionCached = (event) => {
535
+ const nodes = event.detail?.nodes;
536
+ if (Array.isArray(nodes)) {
537
+ profiler.onCachedNodes(nodes.map(String));
538
+ }
539
+ };
540
+ const onExecuting = (event) => {
541
+ const node = event.detail?.node;
542
+ if (node === null) {
543
+ // Workflow completed
544
+ profiler.onExecutionComplete();
545
+ }
546
+ else if (node !== undefined) {
547
+ profiler.onNodeExecuting(String(node));
548
+ }
549
+ };
550
+ const onExecutionError = (event) => {
551
+ const detail = event.detail || {};
552
+ if (detail.node !== undefined) {
553
+ profiler.onNodeError(String(detail.node), detail.exception_message || "Execution error");
554
+ }
555
+ };
556
+ // Attach listeners to client
557
+ client.addEventListener("execution_start", onExecutionStart);
558
+ client.addEventListener("execution_cached", onExecutionCached);
559
+ client.addEventListener("executing", onExecuting);
560
+ client.addEventListener("execution_error", onExecutionError);
561
+ // Cleanup function to remove listeners
562
+ const cleanupProfiler = () => {
563
+ client.removeEventListener("execution_start", onExecutionStart);
564
+ client.removeEventListener("execution_cached", onExecutionCached);
565
+ client.removeEventListener("executing", onExecuting);
566
+ client.removeEventListener("execution_error", onExecutionError);
567
+ };
568
+ // Ensure cleanup happens when job finishes
569
+ wrapper.onFinished(() => cleanupProfiler());
570
+ wrapper.onFailed(() => cleanupProfiler());
571
+ }
572
+ // Setup node execution timeout listeners (always active if timeout > 0)
573
+ const onNodeExecuting = (event) => {
574
+ const node = event.detail?.node;
575
+ if (node === null) {
576
+ // Workflow completed - clear timeout
577
+ clearNodeTimeout();
578
+ }
579
+ else if (node !== undefined) {
580
+ // New node started - reset timeout
581
+ resetNodeTimeout(String(node));
582
+ }
583
+ };
584
+ const onNodeProgress = (event) => {
585
+ // Progress event means node is still working - reset timeout
586
+ if (event.detail?.node) {
587
+ resetNodeTimeout(String(event.detail.node));
588
+ }
589
+ };
590
+ const onExecutionStarted = (event) => {
591
+ // Execution started - reset timeout for first node
592
+ resetNodeTimeout("execution_start");
593
+ };
594
+ if (nodeExecutionTimeout > 0) {
595
+ client.addEventListener("execution_start", onExecutionStarted);
596
+ client.addEventListener("executing", onNodeExecuting);
597
+ client.addEventListener("progress", onNodeProgress);
598
+ }
599
+ const cleanupNodeTimeout = () => {
600
+ clearNodeTimeout();
601
+ if (nodeExecutionTimeout > 0) {
602
+ client.removeEventListener("execution_start", onExecutionStarted);
603
+ client.removeEventListener("executing", onNodeExecuting);
604
+ client.removeEventListener("progress", onNodeProgress);
605
+ }
606
+ };
607
+ let pendingSettled = false;
608
+ let resolvePending;
609
+ let rejectPending;
610
+ const pendingPromise = new Promise((resolve, reject) => {
611
+ resolvePending = () => {
612
+ if (!pendingSettled) {
613
+ pendingSettled = true;
614
+ resolve();
615
+ }
616
+ };
617
+ rejectPending = (err) => {
618
+ if (!pendingSettled) {
619
+ pendingSettled = true;
620
+ reject(err);
621
+ }
622
+ };
623
+ });
624
+ let resolveCompletion;
625
+ let completionError;
626
+ // completionPromise is used to track when the wrapper completes (success or failure)
627
+ // It's resolved in onFinished and onFailed handlers
628
+ const completionPromise = new Promise((resolve) => {
629
+ resolveCompletion = resolve;
630
+ });
631
+ let jobStartedDispatched = false;
632
+ wrapper.onProgress((progress, promptId) => {
633
+ if (!job.promptId && promptId) {
634
+ job.promptId = promptId;
635
+ }
636
+ // Dispatch job:started on first progress update with promptId
637
+ if (!jobStartedDispatched && job.promptId) {
638
+ jobStartedDispatched = true;
639
+ this.dispatchEvent(new CustomEvent("job:started", { detail: { job } }));
640
+ }
641
+ // Feed progress to profiler
642
+ if (profiler) {
643
+ profiler.onProgress(progress);
644
+ }
645
+ this.dispatchEvent(new CustomEvent("job:progress", {
646
+ detail: { jobId: job.jobId, clientId, progress }
647
+ }));
648
+ });
649
+ wrapper.onPreview((blob, promptId) => {
650
+ if (!job.promptId && promptId) {
651
+ job.promptId = promptId;
652
+ }
653
+ // Dispatch job:started on first preview with promptId
654
+ if (!jobStartedDispatched && job.promptId) {
655
+ jobStartedDispatched = true;
656
+ this.dispatchEvent(new CustomEvent("job:started", { detail: { job } }));
657
+ }
658
+ this.dispatchEvent(new CustomEvent("job:preview", {
659
+ detail: { jobId: job.jobId, clientId, blob }
660
+ }));
661
+ });
662
+ wrapper.onPreviewMeta((payload, promptId) => {
663
+ if (!job.promptId && promptId) {
664
+ job.promptId = promptId;
665
+ }
666
+ // Dispatch job:started on first preview_meta with promptId
667
+ if (!jobStartedDispatched && job.promptId) {
668
+ jobStartedDispatched = true;
669
+ this.dispatchEvent(new CustomEvent("job:started", { detail: { job } }));
670
+ }
671
+ this.dispatchEvent(new CustomEvent("job:preview_meta", {
672
+ detail: { jobId: job.jobId, clientId, payload }
673
+ }));
674
+ });
675
+ wrapper.onOutput((key, data, promptId) => {
676
+ if (!job.promptId && promptId) {
677
+ job.promptId = promptId;
678
+ }
679
+ this.dispatchEvent(new CustomEvent("job:output", {
680
+ detail: { jobId: job.jobId, clientId, key: String(key), data }
681
+ }));
682
+ });
683
+ wrapper.onPending((promptId) => {
684
+ if (!job.promptId && promptId) {
685
+ job.promptId = promptId;
686
+ }
687
+ // Don't dispatch job:started here - wait for first progress/preview with promptId
688
+ this.dispatchEvent(new CustomEvent("job:accepted", { detail: { job } }));
689
+ resolvePending?.();
690
+ });
691
+ wrapper.onStart((promptId) => {
692
+ if (!job.promptId && promptId) {
693
+ job.promptId = promptId;
694
+ }
695
+ });
696
+ wrapper.onFinished((data, promptId) => {
697
+ if (!job.promptId && promptId) {
698
+ job.promptId = promptId;
699
+ }
700
+ job.status = "completed";
701
+ job.lastError = undefined;
702
+ const resultPayload = {};
703
+ for (const nodeId of outputNodeIds) {
704
+ const alias = outputAliases[nodeId] ?? nodeId;
705
+ // CallWrapper uses alias keys when mapOutputKeys is configured, fallback to nodeId
706
+ const nodeResult = data[alias];
707
+ const fallbackResult = data[nodeId];
708
+ const finalResult = nodeResult !== undefined ? nodeResult : fallbackResult;
709
+ resultPayload[alias] = finalResult;
710
+ }
711
+ resultPayload._nodes = [...outputNodeIds];
712
+ resultPayload._aliases = { ...outputAliases };
713
+ if (job.promptId) {
714
+ resultPayload._promptId = job.promptId;
715
+ }
716
+ if (Object.keys(autoSeeds).length) {
717
+ resultPayload._autoSeeds = { ...autoSeeds };
718
+ }
719
+ job.result = resultPayload;
720
+ job.completedAt = Date.now();
721
+ this.clearJobFailures(job.jobId);
722
+ // Cleanup timeouts
723
+ cleanupNodeTimeout();
724
+ // Attach profiling stats if profiling was enabled
725
+ if (profiler) {
726
+ job.profileStats = profiler.getStats();
727
+ }
728
+ completionError = undefined;
729
+ this.dispatchEvent(new CustomEvent("job:completed", { detail: { job } }));
730
+ safeRelease({ success: true });
731
+ resolveCompletion?.();
732
+ });
733
+ wrapper.onFailed((error, promptId) => {
734
+ this.debugLog("[debug] wrapper.onFailed", job.jobId, error.name);
735
+ if (!job.promptId && promptId) {
736
+ job.promptId = promptId;
737
+ }
738
+ job.lastError = error;
739
+ // Cleanup timeouts
740
+ cleanupNodeTimeout();
741
+ rejectPending?.(error);
742
+ completionError = error;
743
+ this.debugLog("[debug] resolveCompletion available", Boolean(resolveCompletion));
744
+ safeRelease({ success: false });
745
+ resolveCompletion?.();
746
+ });
747
+ try {
748
+ // Start the workflow execution
749
+ const exec = wrapper.run();
750
+ // Add timeout for execution start to prevent jobs getting stuck
751
+ // Use per-job timeout override if specified, otherwise use pool default
752
+ const executionStartTimeout = job.timeouts?.executionStartTimeoutMs ?? this.opts.executionStartTimeoutMs ?? 5000;
753
+ let pendingTimeoutId;
754
+ if (executionStartTimeout > 0) {
755
+ const pendingWithTimeout = Promise.race([
756
+ pendingPromise,
757
+ new Promise((_, reject) => {
758
+ pendingTimeoutId = setTimeout(() => {
759
+ reject(new Error(`Execution failed to start within ${executionStartTimeout}ms. ` +
760
+ `Server may be stuck or unresponsive.`));
761
+ }, executionStartTimeout);
762
+ })
763
+ ]);
764
+ await pendingWithTimeout;
765
+ }
766
+ else {
767
+ await pendingPromise;
768
+ }
769
+ if (executionStartTimeout > 0) {
770
+ clearTimeout(pendingTimeoutId);
771
+ }
772
+ this.activeJobs.set(job.jobId, {
773
+ reservation,
774
+ job,
775
+ clientId,
776
+ release: (opts) => safeRelease(opts),
777
+ cancel: async () => {
778
+ try {
779
+ wrapper.cancel("workflow pool cancel");
780
+ if (job.promptId) {
781
+ await client.ext.queue.interrupt(job.promptId);
782
+ }
783
+ }
784
+ finally {
785
+ this.activeJobs.delete(job.jobId);
786
+ await this.queue.discard(reservation.reservationId, new Error("cancelled"));
787
+ safeRelease({ success: false });
788
+ }
789
+ }
790
+ });
791
+ const result = await exec;
792
+ // Wait for the wrapper to complete (onFinished or onFailed callback)
793
+ await completionPromise;
794
+ if (result === false) {
795
+ const errorToThrow = (completionError instanceof Error ? completionError : undefined) ??
796
+ (job.lastError instanceof Error ? job.lastError : undefined) ??
797
+ new Error("Execution failed");
798
+ throw errorToThrow;
799
+ }
800
+ await this.queue.commit(reservation.reservationId);
801
+ safeRelease({ success: true });
802
+ }
803
+ catch (error) {
804
+ // Immediately release the client on any failure
805
+ safeRelease({ success: false });
806
+ const latestStatus = this.jobStore.get(job.jobId)?.status;
807
+ if (latestStatus === "cancelled") {
808
+ return;
809
+ }
810
+ job.lastError = error;
811
+ job.status = "failed";
812
+ const remainingAttempts = job.options.maxAttempts - job.attempts;
813
+ const failureAnalysis = analyzeWorkflowFailure(error);
814
+ this.rememberJobFailure(job, clientId, failureAnalysis);
815
+ if (failureAnalysis.blockClient === "permanent") {
816
+ this.addPermanentExclusion(job, clientId);
817
+ reservation.payload.options.excludeClientIds = [...(job.options.excludeClientIds ?? [])];
818
+ }
819
+ this.clientManager.recordFailure(clientId, job, error);
820
+ const hasRetryPath = this.hasRetryPath(job);
821
+ const willRetry = failureAnalysis.retryable && remainingAttempts > 0 && hasRetryPath;
822
+ this.dispatchEvent(new CustomEvent("job:failed", {
823
+ detail: { job, willRetry }
824
+ }));
825
+ if (willRetry) {
826
+ const delay = this.opts.retryBackoffMs ?? job.options.retryDelayMs;
827
+ this.dispatchEvent(new CustomEvent("job:retrying", { detail: { job, delayMs: delay } }));
828
+ job.status = "queued";
829
+ job.clientId = undefined;
830
+ job.promptId = undefined;
831
+ job.startedAt = undefined;
832
+ job.completedAt = undefined;
833
+ job.result = undefined;
834
+ reservation.payload.options.excludeClientIds = [...(job.options.excludeClientIds ?? [])];
835
+ await this.queue.retry(reservation.reservationId, { delayMs: delay });
836
+ this.dispatchEvent(new CustomEvent("job:queued", { detail: { job } }));
837
+ this.scheduleProcess(delay);
838
+ }
839
+ else {
840
+ job.completedAt = Date.now();
841
+ const finalError = !hasRetryPath && failureAnalysis.type === "client_incompatible" && this.jobFailureAnalysis.has(job.jobId)
842
+ ? this.createWorkflowNotSupportedError(job, error)
843
+ : error;
844
+ job.lastError = finalError;
845
+ await this.queue.discard(reservation.reservationId, finalError);
846
+ this.clearJobFailures(job.jobId);
847
+ }
848
+ }
849
+ finally {
850
+ this.activeJobs.delete(job.jobId);
851
+ this.debugLog(`[runJob.finally] Job ${job.jobId.substring(0, 8)}... completed, calling processQueue()`);
852
+ void this.processQueue();
853
+ }
854
+ }
855
+ }
846
856
  //# sourceMappingURL=WorkflowPool.js.map