zyndo 0.1.8 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,17 @@
1
1
  import type { SellerConfig } from './config.js';
2
+ /** Parse "Reconnect failed (NNN): ..." and return the status code or undefined. */
3
+ export declare function parseReconnectErrorStatus(err: unknown): number | undefined;
4
+ /**
5
+ * Permanent failures that will never succeed on retry: 400 (schema/validation
6
+ * error — the CLI payload itself is wrong for this broker), 404 (endpoint
7
+ * gone), 410 (reconnectToken invalid/expired — the broker explicitly told us
8
+ * to stop trying and start fresh).
9
+ *
10
+ * Transient failures worth retrying: 401 (may be a race with key rotation),
11
+ * 408/429/5xx, and everything else (including network errors where the status
12
+ * could not be parsed).
13
+ */
14
+ export declare function isPermanentReconnectFailure(err: unknown): boolean;
2
15
  export type DaemonLogger = Readonly<{
3
16
  info: (msg: string) => void;
4
17
  error: (msg: string) => void;
@@ -16,6 +16,44 @@ import { createAskBuyerTool } from './tools/askBuyer.js';
16
16
  import { loadState, saveState, deleteState, loadSession, saveSession } from './state.js';
17
17
  const POLL_INTERVAL_MS = 25_000;
18
18
  const HEARTBEAT_INTERVAL_MS = 45_000;
19
+ // ---------------------------------------------------------------------------
20
+ // Reconnect error classification (incident 2026-04-09 follow-up)
21
+ //
22
+ // When a reconnect attempt fails, we need to distinguish transient failures
23
+ // (network blip, 5xx, 401 on a rotated key) from permanent ones (4xx schema
24
+ // validation, 410 reconnect-token invalid). Transient failures should be
25
+ // retried with exponential backoff. Permanent failures should NOT be retried
26
+ // — instead the daemon should fall through to a fresh connect with the full
27
+ // registration payload (skills, categories, etc.) so the seller stays online.
28
+ //
29
+ // Before this classification, a permanent 400 ("Sellers must register at
30
+ // least one skill") would retry 5x, fail, continue the outer loop, and hit
31
+ // the same 400 on the next heartbeat cycle forever.
32
+ // ---------------------------------------------------------------------------
33
+ /** Parse "Reconnect failed (NNN): ..." and return the status code or undefined. */
34
+ export function parseReconnectErrorStatus(err) {
35
+ const msg = err instanceof Error ? err.message : String(err);
36
+ const match = /Reconnect failed \((\d{3})\)/.exec(msg);
37
+ if (match === null)
38
+ return undefined;
39
+ return Number.parseInt(match[1], 10);
40
+ }
41
+ /**
42
+ * Permanent failures that will never succeed on retry: 400 (schema/validation
43
+ * error — the CLI payload itself is wrong for this broker), 404 (endpoint
44
+ * gone), 410 (reconnectToken invalid/expired — the broker explicitly told us
45
+ * to stop trying and start fresh).
46
+ *
47
+ * Transient failures worth retrying: 401 (may be a race with key rotation),
48
+ * 408/429/5xx, and everything else (including network errors where the status
49
+ * could not be parsed).
50
+ */
51
+ export function isPermanentReconnectFailure(err) {
52
+ const status = parseReconnectErrorStatus(err);
53
+ if (status === undefined)
54
+ return false;
55
+ return status === 400 || status === 404 || status === 410;
56
+ }
19
57
  const defaultLogger = {
20
58
  info: (msg) => process.stdout.write(`[zyndo] ${msg}\n`),
21
59
  error: (msg) => process.stderr.write(`[zyndo] ERROR: ${msg}\n`)
@@ -104,14 +142,16 @@ export async function startSellerDaemon(config, opts) {
104
142
  }
105
143
  catch {
106
144
  logger.info('Heartbeat failed, attempting reconnect...');
107
- // Bounded retry with exponential backoff. Previously a single
108
- // reconnect failure would `break` out of the main loop and kill
109
- // the daemon mid-task. A transient network blip or 401 is now
110
- // survivable we retry up to 5 times (2s/4s/8s/16s/32s) before
111
- // giving up and restarting the outer loop iteration. If the
112
- // signal is aborted, exit cleanly. Incident 2026-04-09.
145
+ // Retry strategy (incident 2026-04-09):
146
+ // - Transient failures (5xx, 401, network blip): retry up to 5x
147
+ // with 2/4/8/16/32s backoff.
148
+ // - Permanent failures (400/404/410): abort retry immediately
149
+ // and fall through to a FRESH connect that re-registers skills.
150
+ // This handles the "Sellers must register at least one skill"
151
+ // endless-loop bug and the 410 reconnectToken-invalid case.
113
152
  const backoffs = [2_000, 4_000, 8_000, 16_000, 32_000];
114
153
  let reconnected = false;
154
+ let permanent = false;
115
155
  for (let attempt = 0; attempt < backoffs.length; attempt += 1) {
116
156
  if (signal !== undefined && signal.aborted)
117
157
  break;
@@ -129,16 +169,45 @@ export async function startSellerDaemon(config, opts) {
129
169
  }
130
170
  catch (reconnectError) {
131
171
  const msg = reconnectError instanceof Error ? reconnectError.message : String(reconnectError);
172
+ if (isPermanentReconnectFailure(reconnectError)) {
173
+ logger.error(`Reconnect permanently failed (${msg}). Falling through to fresh registration.`);
174
+ permanent = true;
175
+ break;
176
+ }
132
177
  logger.error(`Reconnect attempt ${attempt + 1}/${backoffs.length} failed: ${msg}`);
133
178
  if (attempt < backoffs.length - 1) {
134
179
  await new Promise((resolve) => setTimeout(resolve, backoffs[attempt]));
135
180
  }
136
181
  }
137
182
  }
138
- if (!reconnected) {
183
+ if (!reconnected && permanent && (signal === undefined || !signal.aborted)) {
184
+ // Permanent reconnect failure — re-register from scratch so the
185
+ // daemon stays alive. The new agentId will be different; the
186
+ // broker's account-scoped identity lookup (Fix C) should map
187
+ // the user's API key back to the same stable seller agentId.
188
+ try {
189
+ logger.info(`Re-registering as seller "${config.name}"...`);
190
+ session = await connect(config.bridgeUrl, config.apiKey, {
191
+ role: 'seller',
192
+ name: config.name,
193
+ description: config.description,
194
+ skills: [...config.skills],
195
+ categories: [...config.categories],
196
+ maxConcurrentTasks: config.maxConcurrentTasks
197
+ });
198
+ saveSession(session.agentId, session.reconnectToken);
199
+ logger.info(`Re-registered: agentId=${session.agentId}`);
200
+ lastHeartbeat = Date.now();
201
+ reconnected = true;
202
+ }
203
+ catch (freshErr) {
204
+ const msg = freshErr instanceof Error ? freshErr.message : String(freshErr);
205
+ logger.error(`Fresh re-registration failed: ${msg}. Will retry on next heartbeat cycle.`);
206
+ lastHeartbeat = Date.now();
207
+ }
208
+ }
209
+ if (!reconnected && !permanent) {
139
210
  logger.error('All reconnect attempts exhausted. Will retry on next heartbeat cycle.');
140
- // Reset the heartbeat clock so we don't spin on reconnect —
141
- // wait a full HEARTBEAT_INTERVAL_MS before trying again.
142
211
  lastHeartbeat = Date.now();
143
212
  }
144
213
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "zyndo",
3
- "version": "0.1.8",
3
+ "version": "0.2.0",
4
4
  "description": "The agent-to-agent CLI tool for sellers in the Zyndo Marketplace",
5
5
  "type": "module",
6
6
  "license": "MIT",