zyndo 0.3.3 → 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/sellerDaemon.js +46 -8
- package/package.json +1 -1
package/dist/sellerDaemon.js
CHANGED
|
@@ -23,7 +23,12 @@ const HEARTBEAT_INTERVAL_MS = 45_000;
|
|
|
23
23
|
// broker's authoritative task list. This catches missed `task.assigned`
|
|
24
24
|
// events caused by broker restarts (in-memory event queue reset), network
|
|
25
25
|
// partitions, or dropped SSE frames. 8 cycles at 25s = ~200s safety net.
|
|
26
|
-
|
|
26
|
+
// Periodic reconcile safety net. Every N poll cycles, check /agent/tasks for
|
|
27
|
+
// anything the daemon doesn't know about. Was 8 (~200s at 25s polls). Too
|
|
28
|
+
// slow — users saw multi-minute hire pickup delays after broker restarts
|
|
29
|
+
// broke the event cursor. 2 (~50s) is fast enough to be invisible while
|
|
30
|
+
// still being cheap. Incident 2026-04-14.
|
|
31
|
+
const RECONCILE_EVERY_N_POLLS = 2;
|
|
27
32
|
// ---------------------------------------------------------------------------
|
|
28
33
|
// Reconnect error classification (incident 2026-04-09 follow-up)
|
|
29
34
|
//
|
|
@@ -178,7 +183,21 @@ export async function startSellerDaemon(config, opts) {
|
|
|
178
183
|
logger.error(`Identity registration failed: ${err instanceof Error ? err.message : String(err)}. Deliveries will be unsigned.`);
|
|
179
184
|
}
|
|
180
185
|
}
|
|
181
|
-
|
|
186
|
+
// Reset the event cursor on every daemon startup. The persisted value from
|
|
187
|
+
// a previous session can be higher than the broker's in-memory event
|
|
188
|
+
// counter after a broker restart (Railway auto-deploys reset the counter),
|
|
189
|
+
// causing pollEvents to return empty indefinitely — tasks could only be
|
|
190
|
+
// recovered via the periodic reconcile loop, producing multi-minute hire
|
|
191
|
+
// pickup delays. Starting from 0 makes pollEvents instantly see any current
|
|
192
|
+
// event stream. The startup reconcile already catches any in-flight task
|
|
193
|
+
// the broker persisted, and the activeTasks Set dedupes any replayed
|
|
194
|
+
// events so no work is done twice. Incident 2026-04-14.
|
|
195
|
+
const persistedCursor = loadLastEventId();
|
|
196
|
+
if (persistedCursor > 0) {
|
|
197
|
+
logger.info(`Event cursor reset from ${persistedCursor} → 0 on startup (broker may have restarted; reconcile will recover in-flight tasks).`);
|
|
198
|
+
}
|
|
199
|
+
let lastEventId = 0;
|
|
200
|
+
saveLastEventId(0);
|
|
182
201
|
let lastHeartbeat = Date.now();
|
|
183
202
|
const activeTasks = new Set();
|
|
184
203
|
let pollsSinceReconcile = 0;
|
|
@@ -330,12 +349,23 @@ export async function startSellerDaemon(config, opts) {
|
|
|
330
349
|
logger.info(`Reconnected successfully (attempt ${attempt + 1}).`);
|
|
331
350
|
lastHeartbeat = Date.now();
|
|
332
351
|
reconnected = true;
|
|
333
|
-
//
|
|
334
|
-
//
|
|
335
|
-
//
|
|
336
|
-
//
|
|
337
|
-
//
|
|
338
|
-
//
|
|
352
|
+
// The broker almost certainly restarted (that's why the
|
|
353
|
+
// heartbeat failed in the first place). Its in-memory event
|
|
354
|
+
// counter has reset to 0 while our in-memory lastEventId is
|
|
355
|
+
// still at the old high value — every subsequent pollEvents
|
|
356
|
+
// call would return empty because we'd be asking for events
|
|
357
|
+
// "since <high>" from a stream that now starts at 1. Reset
|
|
358
|
+
// the cursor in memory AND on disk so the next poll sees the
|
|
359
|
+
// current event stream. Incident 2026-04-14.
|
|
360
|
+
if (lastEventId > 0) {
|
|
361
|
+
logger.info(`Event cursor reset ${lastEventId} → 0 after reconnect (broker restart suspected).`);
|
|
362
|
+
}
|
|
363
|
+
lastEventId = 0;
|
|
364
|
+
saveLastEventId(0);
|
|
365
|
+
// Also run an immediate reconcile so any task that was
|
|
366
|
+
// already assigned to us before the broker restarted (and
|
|
367
|
+
// is now past the event horizon) gets picked up without
|
|
368
|
+
// waiting for the periodic cycle.
|
|
339
369
|
pollsSinceReconcile = RECONCILE_EVERY_N_POLLS;
|
|
340
370
|
await reconcileTasks('reconnect');
|
|
341
371
|
break;
|
|
@@ -374,6 +404,14 @@ export async function startSellerDaemon(config, opts) {
|
|
|
374
404
|
logger.info(`Re-registered: agentId=${holder.current.agentId}`);
|
|
375
405
|
lastHeartbeat = Date.now();
|
|
376
406
|
reconnected = true;
|
|
407
|
+
// Same cursor reset as the reconnect branch — a fresh connect
|
|
408
|
+
// against a post-restart broker means the event stream is
|
|
409
|
+
// brand new, so ask for "since=0".
|
|
410
|
+
if (lastEventId > 0) {
|
|
411
|
+
logger.info(`Event cursor reset ${lastEventId} → 0 after re-register.`);
|
|
412
|
+
}
|
|
413
|
+
lastEventId = 0;
|
|
414
|
+
saveLastEventId(0);
|
|
377
415
|
pollsSinceReconcile = RECONCILE_EVERY_N_POLLS;
|
|
378
416
|
await reconcileTasks('re-register');
|
|
379
417
|
}
|