zyndo 0.3.3 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -23,7 +23,12 @@ const HEARTBEAT_INTERVAL_MS = 45_000;
23
23
  // broker's authoritative task list. This catches missed `task.assigned`
24
24
  // events caused by broker restarts (in-memory event queue reset), network
25
25
  // partitions, or dropped SSE frames. 8 cycles at 25s = ~200s safety net.
26
- const RECONCILE_EVERY_N_POLLS = 8;
26
+ // Periodic reconcile safety net. Every N poll cycles, check /agent/tasks for
27
+ // anything the daemon doesn't know about. Was 8 (~200s at 25s polls). Too
28
+ // slow — users saw multi-minute hire pickup delays after broker restarts
29
+ // broke the event cursor. 2 (~50s) is fast enough to be invisible while
30
+ // still being cheap. Incident 2026-04-14.
31
+ const RECONCILE_EVERY_N_POLLS = 2;
27
32
  // ---------------------------------------------------------------------------
28
33
  // Reconnect error classification (incident 2026-04-09 follow-up)
29
34
  //
@@ -178,7 +183,21 @@ export async function startSellerDaemon(config, opts) {
178
183
  logger.error(`Identity registration failed: ${err instanceof Error ? err.message : String(err)}. Deliveries will be unsigned.`);
179
184
  }
180
185
  }
181
- let lastEventId = loadLastEventId();
186
+ // Reset the event cursor on every daemon startup. The persisted value from
187
+ // a previous session can be higher than the broker's in-memory event
188
+ // counter after a broker restart (Railway auto-deploys reset the counter),
189
+ // causing pollEvents to return empty indefinitely — tasks could only be
190
+ // recovered via the periodic reconcile loop, producing multi-minute hire
191
+ // pickup delays. Starting from 0 makes pollEvents instantly see any current
192
+ // event stream. The startup reconcile already catches any in-flight task
193
+ // the broker persisted, and the activeTasks Set dedupes any replayed
194
+ // events so no work is done twice. Incident 2026-04-14.
195
+ const persistedCursor = loadLastEventId();
196
+ if (persistedCursor > 0) {
197
+ logger.info(`Event cursor reset from ${persistedCursor} → 0 on startup (broker may have restarted; reconcile will recover in-flight tasks).`);
198
+ }
199
+ let lastEventId = 0;
200
+ saveLastEventId(0);
182
201
  let lastHeartbeat = Date.now();
183
202
  const activeTasks = new Set();
184
203
  let pollsSinceReconcile = 0;
@@ -330,12 +349,23 @@ export async function startSellerDaemon(config, opts) {
330
349
  logger.info(`Reconnected successfully (attempt ${attempt + 1}).`);
331
350
  lastHeartbeat = Date.now();
332
351
  reconnected = true;
333
- // Force a reconcile on the next tick: the broker may have
334
- // restarted and wiped its in-memory event queue, so our
335
- // lastEventId cursor is now stale relative to the fresh
336
- // nextEventId counter. Without this the seller polls forever
337
- // and never sees task.assigned events that happened while we
338
- // were disconnected or that were issued after the reset.
352
+ // The broker almost certainly restarted (that's why the
353
+ // heartbeat failed in the first place). Its in-memory event
354
+ // counter has reset to 0 while our in-memory lastEventId is
355
+ // still at the old high value every subsequent pollEvents
356
+ // call would return empty because we'd be asking for events
357
+ // "since <high>" from a stream that now starts at 1. Reset
358
+ // the cursor in memory AND on disk so the next poll sees the
359
+ // current event stream. Incident 2026-04-14.
360
+ if (lastEventId > 0) {
361
+ logger.info(`Event cursor reset ${lastEventId} → 0 after reconnect (broker restart suspected).`);
362
+ }
363
+ lastEventId = 0;
364
+ saveLastEventId(0);
365
+ // Also run an immediate reconcile so any task that was
366
+ // already assigned to us before the broker restarted (and
367
+ // is now past the event horizon) gets picked up without
368
+ // waiting for the periodic cycle.
339
369
  pollsSinceReconcile = RECONCILE_EVERY_N_POLLS;
340
370
  await reconcileTasks('reconnect');
341
371
  break;
@@ -374,6 +404,14 @@ export async function startSellerDaemon(config, opts) {
374
404
  logger.info(`Re-registered: agentId=${holder.current.agentId}`);
375
405
  lastHeartbeat = Date.now();
376
406
  reconnected = true;
407
+ // Same cursor reset as the reconnect branch — a fresh connect
408
+ // against a post-restart broker means the event stream is
409
+ // brand new, so ask for "since=0".
410
+ if (lastEventId > 0) {
411
+ logger.info(`Event cursor reset ${lastEventId} → 0 after re-register.`);
412
+ }
413
+ lastEventId = 0;
414
+ saveLastEventId(0);
377
415
  pollsSinceReconcile = RECONCILE_EVERY_N_POLLS;
378
416
  await reconcileTasks('re-register');
379
417
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "zyndo",
3
- "version": "0.3.3",
3
+ "version": "0.3.4",
4
4
  "description": "The agent-to-agent CLI tool for sellers in the Zyndo Marketplace",
5
5
  "type": "module",
6
6
  "license": "MIT",