zyndo 0.1.8 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/sellerDaemon.d.ts +13 -0
- package/dist/sellerDaemon.js +78 -9
- package/package.json +1 -1
package/dist/sellerDaemon.d.ts
CHANGED
|
@@ -1,4 +1,17 @@
|
|
|
1
1
|
import type { SellerConfig } from './config.js';
|
|
2
|
+
/** Parse "Reconnect failed (NNN): ..." and return the status code or undefined. */
|
|
3
|
+
export declare function parseReconnectErrorStatus(err: unknown): number | undefined;
|
|
4
|
+
/**
|
|
5
|
+
* Permanent failures that will never succeed on retry: 400 (schema/validation
|
|
6
|
+
* error — the CLI payload itself is wrong for this broker), 404 (endpoint
|
|
7
|
+
* gone), 410 (reconnectToken invalid/expired — the broker explicitly told us
|
|
8
|
+
* to stop trying and start fresh).
|
|
9
|
+
*
|
|
10
|
+
* Transient failures worth retrying: 401 (may be a race with key rotation),
|
|
11
|
+
* 408/429/5xx, and everything else (including network errors where the status
|
|
12
|
+
* could not be parsed).
|
|
13
|
+
*/
|
|
14
|
+
export declare function isPermanentReconnectFailure(err: unknown): boolean;
|
|
2
15
|
export type DaemonLogger = Readonly<{
|
|
3
16
|
info: (msg: string) => void;
|
|
4
17
|
error: (msg: string) => void;
|
package/dist/sellerDaemon.js
CHANGED
|
@@ -16,6 +16,44 @@ import { createAskBuyerTool } from './tools/askBuyer.js';
|
|
|
16
16
|
import { loadState, saveState, deleteState, loadSession, saveSession } from './state.js';
|
|
17
17
|
const POLL_INTERVAL_MS = 25_000;
|
|
18
18
|
const HEARTBEAT_INTERVAL_MS = 45_000;
|
|
19
|
+
// ---------------------------------------------------------------------------
|
|
20
|
+
// Reconnect error classification (incident 2026-04-09 follow-up)
|
|
21
|
+
//
|
|
22
|
+
// When a reconnect attempt fails, we need to distinguish transient failures
|
|
23
|
+
// (network blip, 5xx, 401 on a rotated key) from permanent ones (4xx schema
|
|
24
|
+
// validation, 410 reconnect-token invalid). Transient failures should be
|
|
25
|
+
// retried with exponential backoff. Permanent failures should NOT be retried
|
|
26
|
+
// — instead the daemon should fall through to a fresh connect with the full
|
|
27
|
+
// registration payload (skills, categories, etc.) so the seller stays online.
|
|
28
|
+
//
|
|
29
|
+
// Before this classification, a permanent 400 ("Sellers must register at
|
|
30
|
+
// least one skill") would retry 5x, fail, continue the outer loop, and hit
|
|
31
|
+
// the same 400 on the next heartbeat cycle forever.
|
|
32
|
+
// ---------------------------------------------------------------------------
|
|
33
|
+
/** Parse "Reconnect failed (NNN): ..." and return the status code or undefined. */
|
|
34
|
+
export function parseReconnectErrorStatus(err) {
|
|
35
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
36
|
+
const match = /Reconnect failed \((\d{3})\)/.exec(msg);
|
|
37
|
+
if (match === null)
|
|
38
|
+
return undefined;
|
|
39
|
+
return Number.parseInt(match[1], 10);
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Permanent failures that will never succeed on retry: 400 (schema/validation
|
|
43
|
+
* error — the CLI payload itself is wrong for this broker), 404 (endpoint
|
|
44
|
+
* gone), 410 (reconnectToken invalid/expired — the broker explicitly told us
|
|
45
|
+
* to stop trying and start fresh).
|
|
46
|
+
*
|
|
47
|
+
* Transient failures worth retrying: 401 (may be a race with key rotation),
|
|
48
|
+
* 408/429/5xx, and everything else (including network errors where the status
|
|
49
|
+
* could not be parsed).
|
|
50
|
+
*/
|
|
51
|
+
export function isPermanentReconnectFailure(err) {
|
|
52
|
+
const status = parseReconnectErrorStatus(err);
|
|
53
|
+
if (status === undefined)
|
|
54
|
+
return false;
|
|
55
|
+
return status === 400 || status === 404 || status === 410;
|
|
56
|
+
}
|
|
19
57
|
const defaultLogger = {
|
|
20
58
|
info: (msg) => process.stdout.write(`[zyndo] ${msg}\n`),
|
|
21
59
|
error: (msg) => process.stderr.write(`[zyndo] ERROR: ${msg}\n`)
|
|
@@ -104,14 +142,16 @@ export async function startSellerDaemon(config, opts) {
|
|
|
104
142
|
}
|
|
105
143
|
catch {
|
|
106
144
|
logger.info('Heartbeat failed, attempting reconnect...');
|
|
107
|
-
//
|
|
108
|
-
//
|
|
109
|
-
//
|
|
110
|
-
//
|
|
111
|
-
//
|
|
112
|
-
//
|
|
145
|
+
// Retry strategy (incident 2026-04-09):
|
|
146
|
+
// - Transient failures (5xx, 401, network blip): retry up to 5x
|
|
147
|
+
// with 2/4/8/16/32s backoff.
|
|
148
|
+
// - Permanent failures (400/404/410): abort retry immediately
|
|
149
|
+
// and fall through to a FRESH connect that re-registers skills.
|
|
150
|
+
// This handles the "Sellers must register at least one skill"
|
|
151
|
+
// endless-loop bug and the 410 reconnectToken-invalid case.
|
|
113
152
|
const backoffs = [2_000, 4_000, 8_000, 16_000, 32_000];
|
|
114
153
|
let reconnected = false;
|
|
154
|
+
let permanent = false;
|
|
115
155
|
for (let attempt = 0; attempt < backoffs.length; attempt += 1) {
|
|
116
156
|
if (signal !== undefined && signal.aborted)
|
|
117
157
|
break;
|
|
@@ -129,16 +169,45 @@ export async function startSellerDaemon(config, opts) {
|
|
|
129
169
|
}
|
|
130
170
|
catch (reconnectError) {
|
|
131
171
|
const msg = reconnectError instanceof Error ? reconnectError.message : String(reconnectError);
|
|
172
|
+
if (isPermanentReconnectFailure(reconnectError)) {
|
|
173
|
+
logger.error(`Reconnect permanently failed (${msg}). Falling through to fresh registration.`);
|
|
174
|
+
permanent = true;
|
|
175
|
+
break;
|
|
176
|
+
}
|
|
132
177
|
logger.error(`Reconnect attempt ${attempt + 1}/${backoffs.length} failed: ${msg}`);
|
|
133
178
|
if (attempt < backoffs.length - 1) {
|
|
134
179
|
await new Promise((resolve) => setTimeout(resolve, backoffs[attempt]));
|
|
135
180
|
}
|
|
136
181
|
}
|
|
137
182
|
}
|
|
138
|
-
if (!reconnected) {
|
|
183
|
+
if (!reconnected && permanent && (signal === undefined || !signal.aborted)) {
|
|
184
|
+
// Permanent reconnect failure — re-register from scratch so the
|
|
185
|
+
// daemon stays alive. The new agentId will be different; the
|
|
186
|
+
// broker's account-scoped identity lookup (Fix C) should map
|
|
187
|
+
// the user's API key back to the same stable seller agentId.
|
|
188
|
+
try {
|
|
189
|
+
logger.info(`Re-registering as seller "${config.name}"...`);
|
|
190
|
+
session = await connect(config.bridgeUrl, config.apiKey, {
|
|
191
|
+
role: 'seller',
|
|
192
|
+
name: config.name,
|
|
193
|
+
description: config.description,
|
|
194
|
+
skills: [...config.skills],
|
|
195
|
+
categories: [...config.categories],
|
|
196
|
+
maxConcurrentTasks: config.maxConcurrentTasks
|
|
197
|
+
});
|
|
198
|
+
saveSession(session.agentId, session.reconnectToken);
|
|
199
|
+
logger.info(`Re-registered: agentId=${session.agentId}`);
|
|
200
|
+
lastHeartbeat = Date.now();
|
|
201
|
+
reconnected = true;
|
|
202
|
+
}
|
|
203
|
+
catch (freshErr) {
|
|
204
|
+
const msg = freshErr instanceof Error ? freshErr.message : String(freshErr);
|
|
205
|
+
logger.error(`Fresh re-registration failed: ${msg}. Will retry on next heartbeat cycle.`);
|
|
206
|
+
lastHeartbeat = Date.now();
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
if (!reconnected && !permanent) {
|
|
139
210
|
logger.error('All reconnect attempts exhausted. Will retry on next heartbeat cycle.');
|
|
140
|
-
// Reset the heartbeat clock so we don't spin on reconnect —
|
|
141
|
-
// wait a full HEARTBEAT_INTERVAL_MS before trying again.
|
|
142
211
|
lastHeartbeat = Date.now();
|
|
143
212
|
}
|
|
144
213
|
}
|