channel-worker 2.5.4 → 2.5.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -139,6 +139,26 @@ class CommandPoller {
139
139
  }
140
140
  // Strip the '_pw' suffix to derive the script name (upload_youtube_pw → upload_youtube).
141
141
  const scriptName = command.type.replace(/_pw$/, '');
142
+
143
+ // PER-PROFILE MUTEX — two _pw scripts can't share the same NST profile.
144
+ // The NST browser is launched once and shared via CDP; concurrent
145
+ // page.goto / navigation calls from sibling scripts abort each other
146
+ // (saw `net::ERR_ABORTED` on FB while YT was navigating during the same
147
+ // publish click). Different profiles still run in parallel — this lock
148
+ // only serializes within a profile. Wait up to 30 min with 5s polling
149
+ // (upload typically 1-3 min); stale-cleanup picks up zombies separately.
150
+ if (!this._pwInFlight) this._pwInFlight = new Map();
151
+ const MUTEX_MAX_WAIT_MS = 30 * 60 * 1000;
152
+ const waitStart = Date.now();
153
+ while (this._pwInFlight.has(profileId)) {
154
+ if (Date.now() - waitStart > MUTEX_MAX_WAIT_MS) {
155
+ await this.api.updateCommand(command._id, { status: 'failed', error: `pw mutex timeout — profile ${profileId} busy with ${this._pwInFlight.get(profileId)} for >30min` });
156
+ return;
157
+ }
158
+ console.log(`[commands/pw] ${command.type} waiting — profile ${profileId} busy with ${this._pwInFlight.get(profileId)}`);
159
+ await new Promise(r => setTimeout(r, 5000));
160
+ }
161
+ this._pwInFlight.set(profileId, command.type);
142
162
  console.log(`[commands/pw] ${command.type} → scripts/${scriptName}.js (profile=${profileId})`);
143
163
 
144
164
  // Lazy-init NST manager (same pattern as the other handlers).
@@ -177,6 +197,10 @@ class CommandPoller {
177
197
  } catch (err) {
178
198
  console.error(`[commands/pw] ${command.type} failed: ${err.message}`);
179
199
  await this.api.postCommandResult(command._id, { status: 'failed', error: String(err.message || err).slice(0, 500) });
200
+ } finally {
201
+ // Always release the per-profile mutex — even on throw — or sibling
202
+ // pw cmds for the same profile would hang forever.
203
+ if (this._pwInFlight) this._pwInFlight.delete(profileId);
180
204
  }
181
205
  }
182
206
 
@@ -1340,10 +1364,58 @@ class CommandPoller {
1340
1364
  // Pause on the Renderers tab; profile should stay closed until Resume.
1341
1365
  const isPaused = (r) => r && r.health_state === 'paused'
1342
1366
  && (!r.pause_until || new Date(r.pause_until).getTime() > Date.now());
1343
- const stillOffline = renderers.filter(r => !runningRenderers.includes(r) && !isPaused(r));
1367
+ let stillOffline = renderers.filter(r => !runningRenderers.includes(r) && !isPaused(r));
1344
1368
  const pausedCount = renderers.filter(isPaused).length;
1345
1369
  console.log(`[scene-dispatch] running=${runningRenderers.length} cap=${parallelLimit} (flowkit=${flowkitQ} dom=${domQ}) offline=${stillOffline.length} paused=${pausedCount} queue=${queueCount} names=[${runningRenderers.map(r=>r.name)}]`);
1346
1370
 
1371
+ // ROUND-ROBIN ROTATION — when a running renderer just finished its
1372
+ // scene (no in-flight cmd) AND there's at least one OFFLINE sibling
1373
+ // AND queue has more work, close the just-finished one so the next
1374
+ // cycle launches the sibling. Result: scenes alternate veo03 →
1375
+ // veo04 → veo03 → veo04 … instead of one renderer claiming
1376
+ // everything. Required for shared-Google-account safety: keeping a
1377
+ // browser idle on Flow page keeps refreshing tokens / pinging
1378
+ // telemetry → Veo flags "2 concurrent sessions per account" → captcha.
1379
+ // Closing it = 0 idle sessions, only 1 active at a time.
1380
+ if (queueCount > 0 && stillOffline.length > 0 && runningRenderers.length > 0) {
1381
+ const stoppedNames = [];
1382
+ for (const r of [...runningRenderers]) {
1383
+ // Skip externally-launched profiles (user opened manually via NST UI)
1384
+ // — we don't own their lifecycle.
1385
+ if (!this._profileLastActivity[r.nst_profile_id]) continue;
1386
+ try {
1387
+ const c = await this.api.rendererHasCommands(r.nst_profile_id);
1388
+ if (c > 0) continue; // busy → keep alive
1389
+ console.log(`[scene-dispatch] Round-robin: closing idle ${r.name} to rotate to sibling`);
1390
+ await this.nst.stopProfile(r.nst_profile_id);
1391
+ delete this._profileLastActivity[r.nst_profile_id];
1392
+ if (r.name) delete this._profileLastActivity[r.name.toLowerCase()];
1393
+ const idx = runningRenderers.indexOf(r);
1394
+ if (idx >= 0) runningRenderers.splice(idx, 1);
1395
+ stoppedNames.push(r.name);
1396
+ // The now-closed renderer becomes a fresh offline candidate for
1397
+ // future cycles (won't relaunch immediately — sibling goes first
1398
+ // because its last_command_assigned_at is older).
1399
+ stillOffline.push(r);
1400
+ } catch (e) {
1401
+ console.warn(`[scene-dispatch] Round-robin close failed for ${r.name}: ${e.message}`);
1402
+ }
1403
+ }
1404
+ if (stoppedNames.length) {
1405
+ console.log(`[scene-dispatch] Round-robin closed: [${stoppedNames.join(',')}] — next launch picks sibling`);
1406
+ }
1407
+ }
1408
+
1409
+ // Sort offline by last_command_assigned_at ASC (most idle first) so
1410
+ // the launcher picks the round-robin partner, not the one that just
1411
+ // finished. API populates last_command_assigned_at on every claim,
1412
+ // so this naturally implements turn-taking across siblings.
1413
+ stillOffline = stillOffline.slice().sort((a, b) => {
1414
+ const ta = a.last_command_assigned_at ? new Date(a.last_command_assigned_at).getTime() : 0;
1415
+ const tb = b.last_command_assigned_at ? new Date(b.last_command_assigned_at).getTime() : 0;
1416
+ return ta - tb;
1417
+ });
1418
+
1347
1419
  // Launch policy: STRICT parallel_limit cap on physical launches.
1348
1420
  // With pc2.parallel_limit=1 + 2 renderers (veo03/veo04 share same Veo
1349
1421
  // Google account), only ONE of them is launched at any time. The
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "channel-worker",
3
- "version": "2.5.4",
3
+ "version": "2.5.6",
4
4
  "description": "Channel Manager worker daemon — runs on remote machines to execute video pipeline jobs",
5
5
  "main": "lib/daemon.js",
6
6
  "bin": {