zyndo 0.3.2 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -42,6 +42,9 @@ export type TaskDetail = Readonly<{
42
42
  content: string;
43
43
  }>;
44
44
  createdAt: string;
45
+ revisionCount?: number;
46
+ pendingRevisionFeedback?: string;
47
+ pendingRevisionRequestedAt?: string;
45
48
  deliverablesSnapshot?: SkillDeliverablesSnapshot;
46
49
  }>;
47
50
  export type TaskMessage = Readonly<{
@@ -52,6 +55,29 @@ export type TaskMessage = Readonly<{
52
55
  content: string;
53
56
  createdAt: string;
54
57
  }>;
58
+ export type ReconnectOpts = Readonly<{
59
+ role: 'seller' | 'buyer';
60
+ name: string;
61
+ description: string;
62
+ }>;
63
+ export type SessionHolder = {
64
+ current: AgentSession;
65
+ /**
66
+ * API key used to authenticate reconnect calls. Empty string disables
67
+ * automatic reconnect on 401 (used by tests and MCP buyer bridges that
68
+ * don't own a long-running session).
69
+ */
70
+ apiKey: string;
71
+ reconnectOpts: ReconnectOpts;
72
+ };
73
+ export declare function createSessionHolder(session: AgentSession, apiKey: string, reconnectOpts: ReconnectOpts): SessionHolder;
74
+ /**
75
+ * Wrap a raw session in a holder for callers that don't need
76
+ * automatic reconnect-on-401 (e.g. MCP buyer-side bridges). Passing a bare
77
+ * holder means `withAuthRetry` will never attempt to reconnect — it will
78
+ * simply throw the original 401 error the way the old helpers did.
79
+ */
80
+ export declare function bareSessionHolder(session: AgentSession): SessionHolder;
55
81
  export declare function connect(bridgeUrl: string, apiKey: string, opts: {
56
82
  role: 'seller' | 'buyer';
57
83
  name: string;
@@ -71,19 +97,19 @@ export declare function reconnect(session: AgentSession, apiKey: string, opts?:
71
97
  name?: string;
72
98
  description?: string;
73
99
  }): Promise<AgentSession>;
74
- export declare function heartbeat(session: AgentSession): Promise<void>;
75
- export declare function pollEvents(session: AgentSession, ack?: number): Promise<ReadonlyArray<AgentEvent>>;
76
- export declare function acceptTask(session: AgentSession, taskId: string): Promise<void>;
100
+ export declare function heartbeat(holder: SessionHolder): Promise<void>;
101
+ export declare function pollEvents(holder: SessionHolder, ack?: number): Promise<ReadonlyArray<AgentEvent>>;
102
+ export declare function acceptTask(holder: SessionHolder, taskId: string): Promise<void>;
77
103
  /**
78
104
  * Slice 3b — register the seller's Ed25519 public key with the broker so
79
105
  * subsequent signed deliveries can be verified. Safe to call on every daemon
80
106
  * start; the broker upserts and tracks rotations.
81
107
  */
82
- export declare function registerIdentity(session: AgentSession, publicKeyB64: string): Promise<void>;
83
- export declare function deliverTask(session: AgentSession, taskId: string, content: string, signatureB64?: string): Promise<void>;
84
- export declare function sendTaskMessage(session: AgentSession, taskId: string, type: 'question' | 'answer' | 'info', content: string): Promise<void>;
85
- export declare function getTaskMessages(session: AgentSession, taskId: string): Promise<ReadonlyArray<TaskMessage>>;
86
- export declare function getTaskDetail(session: AgentSession, taskId: string): Promise<TaskDetail | undefined>;
108
+ export declare function registerIdentity(holder: SessionHolder, publicKeyB64: string): Promise<void>;
109
+ export declare function deliverTask(holder: SessionHolder, taskId: string, content: string, signatureB64?: string): Promise<void>;
110
+ export declare function sendTaskMessage(holder: SessionHolder, taskId: string, type: 'question' | 'answer' | 'info', content: string): Promise<void>;
111
+ export declare function getTaskMessages(holder: SessionHolder, taskId: string): Promise<ReadonlyArray<TaskMessage>>;
112
+ export declare function getTaskDetail(holder: SessionHolder, taskId: string): Promise<TaskDetail | undefined>;
87
113
  /**
88
114
  * List every task the broker has on record for this agent (both as buyer and
89
115
  * seller). Used by the seller daemon to reconcile against its event cursor:
@@ -93,4 +119,4 @@ export declare function getTaskDetail(session: AgentSession, taskId: string): Pr
93
119
  * seller can appear "online" (heartbeat OK, polling OK) while silently leaving
94
120
  * a buyer's task stuck forever.
95
121
  */
96
- export declare function listAgentTasks(session: AgentSession): Promise<ReadonlyArray<TaskDetail>>;
122
+ export declare function listAgentTasks(holder: SessionHolder): Promise<ReadonlyArray<TaskDetail>>;
@@ -1,11 +1,44 @@
1
1
  // ---------------------------------------------------------------------------
2
2
  // Connection module — connect, heartbeat, poll events, deliver
3
+ //
4
+ // Every network helper takes a SessionHolder (a mutable wrapper around the
5
+ // current AgentSession) instead of the raw session. This makes broker
6
+ // restart-mid-task survivable:
7
+ //
8
+ // 1. Seller daemon creates ONE holder at startup.
9
+ // 2. All handlers (handleTask, handleRevision, handleMessage) receive that
10
+ // holder by reference and call network helpers with it.
11
+ // 3. On any 401 response, `withAuthRetry` reconnects, mutates the holder's
12
+ // `.current` in place, and retries the original call once.
13
+ // 4. In-flight handlers see the new token on their next network call
14
+ // because they hold the same holder reference.
15
+ //
16
+ // Before this change, `session` was passed by value to each handler. The
17
+ // outer heartbeat loop could reconnect and update its own `session` variable,
18
+ // but the already-running handler still held the stale object in its
19
+ // closure and died with 401 on deliver. Incident 2026-04-14.
3
20
  // ---------------------------------------------------------------------------
4
21
  // Mirror of @zyndo/contracts MAX_AGENT_MESSAGE_CHARS. The CLI ships as a
5
22
  // standalone npm package, so we duplicate this small constant rather than
6
23
  // pulling in the contracts workspace as a dependency. Keep these values in
7
24
  // sync if the broker-side limit ever changes.
8
25
  export const MAX_AGENT_MESSAGE_CHARS = 50_000;
26
+ export function createSessionHolder(session, apiKey, reconnectOpts) {
27
+ return { current: session, apiKey, reconnectOpts };
28
+ }
29
+ /**
30
+ * Wrap a raw session in a holder for callers that don't need
31
+ * automatic reconnect-on-401 (e.g. MCP buyer-side bridges). Passing a bare
32
+ * holder means `withAuthRetry` will never attempt to reconnect — it will
33
+ * simply throw the original 401 error the way the old helpers did.
34
+ */
35
+ export function bareSessionHolder(session) {
36
+ return {
37
+ current: session,
38
+ apiKey: '',
39
+ reconnectOpts: { role: 'buyer', name: '', description: '' }
40
+ };
41
+ }
9
42
  // ---------------------------------------------------------------------------
10
43
  // HTTP helpers
11
44
  // ---------------------------------------------------------------------------
@@ -22,7 +55,52 @@ async function jsonGet(url, token) {
22
55
  return fetch(url, { headers });
23
56
  }
24
57
  // ---------------------------------------------------------------------------
25
- // Connect
58
+ // withAuthRetry — shared 401-retry wrapper used by every network helper
59
+ // ---------------------------------------------------------------------------
60
+ /**
61
+ * Make an authenticated request. If the broker returns 401, attempt a
62
+ * single reconnect, mutate the holder's `.current` in place, and retry
63
+ * the call once with the fresh token.
64
+ *
65
+ * - Never loops. At most one retry.
66
+ * - If the holder has no `apiKey` (bare holder), 401 is passed through
67
+ * to the caller unchanged — this preserves the pre-holder semantics for
68
+ * MCP buyer bridges and unit tests.
69
+ * - The first 401 triggers a reconnect attempt; if that reconnect itself
70
+ * throws, the original 401 error surfaces with a note about the failure.
71
+ * - Non-401 error statuses are NEVER retried.
72
+ */
73
+ async function withAuthRetry(holder, label, doCall) {
74
+ const firstRes = await doCall(holder.current.token);
75
+ if (firstRes.status !== 401)
76
+ return firstRes;
77
+ if (holder.apiKey.length === 0)
78
+ return firstRes;
79
+ // Drain the body so the socket can be reused.
80
+ try {
81
+ await firstRes.text();
82
+ }
83
+ catch { /* ignore */ }
84
+ try {
85
+ const fresh = await reconnect(holder.current, holder.apiKey, {
86
+ role: holder.reconnectOpts.role,
87
+ name: holder.reconnectOpts.name,
88
+ description: holder.reconnectOpts.description
89
+ });
90
+ holder.current = fresh;
91
+ process.stderr.write(`[zyndo] ${label}: session refreshed after 401, retrying once\n`);
92
+ }
93
+ catch (reconnectErr) {
94
+ const msg = reconnectErr instanceof Error ? reconnectErr.message : String(reconnectErr);
95
+ process.stderr.write(`[zyndo] ${label}: reconnect-on-401 failed: ${msg}\n`);
96
+ // Surface the reconnect error; the caller can decide whether to bubble
97
+ // it up or swallow it (pollEvents, for example, soft-fails).
98
+ throw new Error(`${label} 401 and reconnect failed: ${msg}`);
99
+ }
100
+ return doCall(holder.current.token);
101
+ }
102
+ // ---------------------------------------------------------------------------
103
+ // Connect (no holder — creates one)
26
104
  // ---------------------------------------------------------------------------
27
105
  export async function connect(bridgeUrl, apiKey, opts) {
28
106
  const headers = {
@@ -45,14 +123,14 @@ export async function connect(bridgeUrl, apiKey, opts) {
45
123
  body: JSON.stringify(body)
46
124
  });
47
125
  if (!res.ok) {
48
- const body = await res.text();
49
- throw new Error(`Connect failed (${res.status}): ${body}`);
126
+ const errBody = await res.text();
127
+ throw new Error(`Connect failed (${res.status}): ${errBody}`);
50
128
  }
51
129
  const data = (await res.json());
52
130
  return { agentId: data.agentId, token: data.token, reconnectToken: data.reconnectToken, bridgeUrl };
53
131
  }
54
132
  // ---------------------------------------------------------------------------
55
- // Reconnect
133
+ // Reconnect — called by withAuthRetry and by the seller daemon outer loop
56
134
  // ---------------------------------------------------------------------------
57
135
  export async function reconnect(session, apiKey, opts) {
58
136
  // The broker's /agent/connect endpoint is gated on x-zyndo-api-key when
@@ -82,27 +160,36 @@ export async function reconnect(session, apiKey, opts) {
82
160
  // ---------------------------------------------------------------------------
83
161
  // Heartbeat
84
162
  // ---------------------------------------------------------------------------
85
- export async function heartbeat(session) {
86
- const res = await jsonPost(`${session.bridgeUrl}/agent/heartbeat`, {}, session.token);
87
- if (!res.ok && res.status === 401) {
88
- throw new Error('Heartbeat failed: session expired. Attempting reconnect.');
163
+ export async function heartbeat(holder) {
164
+ const res = await withAuthRetry(holder, 'heartbeat', (token) => jsonPost(`${holder.current.bridgeUrl}/agent/heartbeat`, {}, token));
165
+ if (!res.ok) {
166
+ // A 401 after a successful retry cycle still reaches here, and any other
167
+ // non-ok status. The outer daemon heartbeat loop will catch this and
168
+ // attempt a fresh reconnect/re-register cycle as a last resort.
169
+ throw new Error(`Heartbeat failed (${res.status}): ${await res.text().catch(() => '<no body>')}`);
89
170
  }
90
171
  }
91
172
  // ---------------------------------------------------------------------------
92
- // Poll events
173
+ // Poll events — soft-fails on any error (returns empty array) because the
174
+ // seller daemon polls on a tick and can't die on a single failed poll.
93
175
  // ---------------------------------------------------------------------------
94
- export async function pollEvents(session, ack = 0) {
95
- const res = await jsonGet(`${session.bridgeUrl}/agent/events?since=${ack}`, session.token);
96
- if (!res.ok)
176
+ export async function pollEvents(holder, ack = 0) {
177
+ try {
178
+ const res = await withAuthRetry(holder, 'pollEvents', (token) => jsonGet(`${holder.current.bridgeUrl}/agent/events?since=${ack}`, token));
179
+ if (!res.ok)
180
+ return [];
181
+ const data = (await res.json());
182
+ return data.events;
183
+ }
184
+ catch {
97
185
  return [];
98
- const data = (await res.json());
99
- return data.events;
186
+ }
100
187
  }
101
188
  // ---------------------------------------------------------------------------
102
189
  // Task operations
103
190
  // ---------------------------------------------------------------------------
104
- export async function acceptTask(session, taskId) {
105
- const res = await jsonPost(`${session.bridgeUrl}/agent/tasks/${taskId}/accept`, {}, session.token);
191
+ export async function acceptTask(holder, taskId) {
192
+ const res = await withAuthRetry(holder, `accept task ${taskId}`, (token) => jsonPost(`${holder.current.bridgeUrl}/agent/tasks/${taskId}/accept`, {}, token));
106
193
  if (!res.ok) {
107
194
  throw new Error(`Accept failed (${res.status}): ${await res.text()}`);
108
195
  }
@@ -112,23 +199,23 @@ export async function acceptTask(session, taskId) {
112
199
  * subsequent signed deliveries can be verified. Safe to call on every daemon
113
200
  * start; the broker upserts and tracks rotations.
114
201
  */
115
- export async function registerIdentity(session, publicKeyB64) {
116
- const res = await jsonPost(`${session.bridgeUrl}/agent/identity/register`, { publicKeyB64 }, session.token);
202
+ export async function registerIdentity(holder, publicKeyB64) {
203
+ const res = await withAuthRetry(holder, 'registerIdentity', (token) => jsonPost(`${holder.current.bridgeUrl}/agent/identity/register`, { publicKeyB64 }, token));
117
204
  if (!res.ok) {
118
205
  throw new Error(`Identity register failed (${res.status}): ${await res.text()}`);
119
206
  }
120
207
  }
121
- export async function deliverTask(session, taskId, content, signatureB64) {
208
+ export async function deliverTask(holder, taskId, content, signatureB64) {
122
209
  const body = { output: { type: 'text', content } };
123
210
  if (signatureB64 !== undefined) {
124
211
  body.signature = signatureB64;
125
212
  }
126
- const res = await jsonPost(`${session.bridgeUrl}/agent/tasks/${taskId}/deliver`, body, session.token);
213
+ const res = await withAuthRetry(holder, `deliver task ${taskId}`, (token) => jsonPost(`${holder.current.bridgeUrl}/agent/tasks/${taskId}/deliver`, body, token));
127
214
  if (!res.ok) {
128
215
  throw new Error(`Deliver failed (${res.status}): ${await res.text()}`);
129
216
  }
130
217
  }
131
- export async function sendTaskMessage(session, taskId, type, content) {
218
+ export async function sendTaskMessage(holder, taskId, type, content) {
132
219
  // Pre-flight guard: fail fast locally instead of round-tripping to the broker
133
220
  // when the message obviously exceeds the marketplace limit. The broker will
134
221
  // also reject this — this is just for faster, clearer feedback to the agent.
@@ -136,20 +223,20 @@ export async function sendTaskMessage(session, taskId, type, content) {
136
223
  throw new Error(`Message content is ${content.length} characters, which exceeds the ${MAX_AGENT_MESSAGE_CHARS}-character marketplace limit. ` +
137
224
  `Trim the message before sending. Long context should be summarized or attached as a file delivery, not pasted into a chat message.`);
138
225
  }
139
- const res = await jsonPost(`${session.bridgeUrl}/agent/tasks/${taskId}/messages`, { type, content }, session.token);
226
+ const res = await withAuthRetry(holder, `send message ${taskId}`, (token) => jsonPost(`${holder.current.bridgeUrl}/agent/tasks/${taskId}/messages`, { type, content }, token));
140
227
  if (!res.ok) {
141
228
  throw new Error(`Send message failed (${res.status}): ${await res.text()}`);
142
229
  }
143
230
  }
144
- export async function getTaskMessages(session, taskId) {
145
- const res = await jsonGet(`${session.bridgeUrl}/agent/tasks/${taskId}/messages`, session.token);
231
+ export async function getTaskMessages(holder, taskId) {
232
+ const res = await withAuthRetry(holder, `get messages ${taskId}`, (token) => jsonGet(`${holder.current.bridgeUrl}/agent/tasks/${taskId}/messages`, token));
146
233
  if (!res.ok)
147
234
  return [];
148
235
  const data = (await res.json());
149
236
  return data.messages;
150
237
  }
151
- export async function getTaskDetail(session, taskId) {
152
- const res = await jsonGet(`${session.bridgeUrl}/agent/tasks/${taskId}`, session.token);
238
+ export async function getTaskDetail(holder, taskId) {
239
+ const res = await withAuthRetry(holder, `get task ${taskId}`, (token) => jsonGet(`${holder.current.bridgeUrl}/agent/tasks/${taskId}`, token));
153
240
  if (!res.ok)
154
241
  return undefined;
155
242
  return (await res.json());
@@ -163,8 +250,8 @@ export async function getTaskDetail(session, taskId) {
163
250
  * seller can appear "online" (heartbeat OK, polling OK) while silently leaving
164
251
  * a buyer's task stuck forever.
165
252
  */
166
- export async function listAgentTasks(session) {
167
- const res = await jsonGet(`${session.bridgeUrl}/agent/tasks`, session.token);
253
+ export async function listAgentTasks(holder) {
254
+ const res = await withAuthRetry(holder, 'list tasks', (token) => jsonGet(`${holder.current.bridgeUrl}/agent/tasks`, token));
168
255
  if (!res.ok)
169
256
  return [];
170
257
  const data = (await res.json());
@@ -5,7 +5,7 @@
5
5
  // Streamable HTTP transport (mcpHttpServer.ts). All handlers are pure
6
6
  // functions that take a McpSessionState instead of reading module-level vars.
7
7
  // ---------------------------------------------------------------------------
8
- import { connect, reconnect, pollEvents } from '../connection.js';
8
+ import { connect, reconnect, pollEvents, bareSessionHolder } from '../connection.js';
9
9
  // ---------------------------------------------------------------------------
10
10
  // Tool definitions
11
11
  // ---------------------------------------------------------------------------
@@ -268,7 +268,7 @@ async function handleHire(state, args) {
268
268
  async function handleCheckTasks(state) {
269
269
  if (state.agentSession === undefined)
270
270
  return JSON.stringify({ error: 'Not connected. Call zyndo_connect first.' });
271
- const events = await pollEvents(state.agentSession, state.lastEventId);
271
+ const events = await pollEvents(bareSessionHolder(state.agentSession), state.lastEventId);
272
272
  const taskEvents = {};
273
273
  for (const event of events) {
274
274
  if (event.eventId > state.lastEventId)
@@ -5,7 +5,7 @@
5
5
  // Manages a single McpSessionState and heartbeat timer for the stdio session.
6
6
  // ---------------------------------------------------------------------------
7
7
  import { createInterface } from 'node:readline';
8
- import { heartbeat } from '../connection.js';
8
+ import { heartbeat, bareSessionHolder } from '../connection.js';
9
9
  import { handleMcpMethod, mcpError } from './mcpCore.js';
10
10
  // ---------------------------------------------------------------------------
11
11
  // Heartbeat with auto-reconnect
@@ -15,7 +15,7 @@ function startHeartbeat(state) {
15
15
  if (state.agentSession === undefined)
16
16
  return;
17
17
  try {
18
- await heartbeat(state.agentSession);
18
+ await heartbeat(bareSessionHolder(state.agentSession));
19
19
  }
20
20
  catch {
21
21
  // Session expired — the next tool call will trigger auto-reconnect via mcpCore
@@ -2,7 +2,7 @@
2
2
  // Seller daemon — poll for tasks, run agent loop, deliver results
3
3
  // ---------------------------------------------------------------------------
4
4
  import { resolve } from 'node:path';
5
- import { connect, reconnect, heartbeat, pollEvents, acceptTask, deliverTask, sendTaskMessage, getTaskMessages, getTaskDetail, listAgentTasks, registerIdentity } from './connection.js';
5
+ import { connect, reconnect, createSessionHolder, heartbeat, pollEvents, acceptTask, deliverTask, sendTaskMessage, getTaskMessages, getTaskDetail, listAgentTasks, registerIdentity } from './connection.js';
6
6
  import { ensureIdentityKeypair, signDelivery } from './identity.js';
7
7
  import { composeSystemPrompt, truncateToContract } from './scopeContract.js';
8
8
  import { runAgentLoop } from './agentLoop.js';
@@ -16,14 +16,19 @@ import { createBashTool } from './tools/bash.js';
16
16
  import { createGlobTool } from './tools/glob.js';
17
17
  import { createGrepTool } from './tools/grep.js';
18
18
  import { createAskBuyerTool } from './tools/askBuyer.js';
19
- import { loadState, saveState, deleteState, loadSession, saveSession, loadLastEventId, saveLastEventId } from './state.js';
19
+ import { loadState, saveState, deleteState, loadSession, saveSession, loadLastEventId, saveLastEventId, savePendingDelivery, loadPendingDelivery, deletePendingDelivery, incrementPendingDeliveryAttempts } from './state.js';
20
20
  const POLL_INTERVAL_MS = 25_000;
21
21
  const HEARTBEAT_INTERVAL_MS = 45_000;
22
22
  // Every N poll cycles, reconcile the local active-task set against the
23
23
  // broker's authoritative task list. This catches missed `task.assigned`
24
24
  // events caused by broker restarts (in-memory event queue reset), network
25
25
  // partitions, or dropped SSE frames. 8 cycles at 25s = ~200s safety net.
26
- const RECONCILE_EVERY_N_POLLS = 8;
26
+ // Periodic reconcile safety net. Every N poll cycles, check /agent/tasks for
27
+ // anything the daemon doesn't know about. Was 8 (~200s at 25s polls). Too
28
+ // slow — users saw multi-minute hire pickup delays after broker restarts
29
+ // broke the event cursor. 2 (~50s) is fast enough to be invisible while
30
+ // still being cheap. Incident 2026-04-14.
31
+ const RECONCILE_EVERY_N_POLLS = 2;
27
32
  // ---------------------------------------------------------------------------
28
33
  // Reconnect error classification (incident 2026-04-09 follow-up)
29
34
  //
@@ -154,19 +159,45 @@ export async function startSellerDaemon(config, opts) {
154
159
  }
155
160
  // Persist session for future restarts
156
161
  saveSession(session.agentId, session.reconnectToken);
162
+ // Wrap the session in a mutable holder. Every network helper reads
163
+ // holder.current.token at call time. When a broker restart invalidates
164
+ // the in-memory token map, withAuthRetry inside each helper calls
165
+ // reconnect(), mutates holder.current in place, and retries once. Any
166
+ // in-flight task handler that was mid-call sees the new token on its
167
+ // next network operation because the handler holds the SAME holder
168
+ // reference. Incident 2026-04-14.
169
+ const holder = createSessionHolder(session, config.apiKey, {
170
+ role: 'seller',
171
+ name: config.name,
172
+ description: config.description
173
+ });
157
174
  // Slice 3b — register the Ed25519 public key with the broker. Failures
158
175
  // are logged but non-fatal because signing is soft-enforced until
159
176
  // rollout > 80%.
160
177
  if (identityPublicKeyB64 !== undefined) {
161
178
  try {
162
- await registerIdentity(session, identityPublicKeyB64);
179
+ await registerIdentity(holder, identityPublicKeyB64);
163
180
  logger.info('Identity public key registered with broker.');
164
181
  }
165
182
  catch (err) {
166
183
  logger.error(`Identity registration failed: ${err instanceof Error ? err.message : String(err)}. Deliveries will be unsigned.`);
167
184
  }
168
185
  }
169
- let lastEventId = loadLastEventId();
186
+ // Reset the event cursor on every daemon startup. The persisted value from
187
+ // a previous session can be higher than the broker's in-memory event
188
+ // counter after a broker restart (Railway auto-deploys reset the counter),
189
+ // causing pollEvents to return empty indefinitely — tasks could only be
190
+ // recovered via the periodic reconcile loop, producing multi-minute hire
191
+ // pickup delays. Starting from 0 makes pollEvents instantly see any current
192
+ // event stream. The startup reconcile already catches any in-flight task
193
+ // the broker persisted, and the activeTasks Set dedupes any replayed
194
+ // events so no work is done twice. Incident 2026-04-14.
195
+ const persistedCursor = loadLastEventId();
196
+ if (persistedCursor > 0) {
197
+ logger.info(`Event cursor reset from ${persistedCursor} → 0 on startup (broker may have restarted; reconcile will recover in-flight tasks).`);
198
+ }
199
+ let lastEventId = 0;
200
+ saveLastEventId(0);
170
201
  let lastHeartbeat = Date.now();
171
202
  const activeTasks = new Set();
172
203
  let pollsSinceReconcile = 0;
@@ -185,14 +216,11 @@ export async function startSellerDaemon(config, opts) {
185
216
  * buyer; we just re-add to activeTasks so we do not re-accept).
186
217
  */
187
218
  async function reconcileTasks(reason) {
188
- const activeSession = session;
189
- if (activeSession === undefined)
190
- return;
191
219
  try {
192
- const tasks = await listAgentTasks(activeSession);
220
+ const tasks = await listAgentTasks(holder);
193
221
  let picked = 0;
194
222
  for (const task of tasks) {
195
- if (task.sellerAgentId !== activeSession.agentId)
223
+ if (task.sellerAgentId !== holder.current.agentId)
196
224
  continue;
197
225
  if (activeTasks.has(task.taskId))
198
226
  continue;
@@ -204,18 +232,65 @@ export async function startSellerDaemon(config, opts) {
204
232
  activeTasks.add(task.taskId);
205
233
  picked += 1;
206
234
  logger.info(`Reconcile (${reason}): picking up stuck submitted task ${task.taskId}`);
207
- handleTask(activeSession, task.taskId, config, logger, identityPrivateKey)
235
+ handleTask(holder, task.taskId, config, logger, identityPrivateKey)
208
236
  .catch((err) => {
209
237
  logger.error(`Reconciled task ${task.taskId} failed: ${err instanceof Error ? err.message : String(err)}`);
210
238
  })
211
239
  .finally(() => activeTasks.delete(task.taskId));
212
240
  continue;
213
241
  }
242
+ // `working` tasks with a pending delivery on disk are stuck deliveries
243
+ // from a previous run where deliverTask threw (network blip, broker
244
+ // mid-restart, 401 after the retry budget). Re-enter handleTask so it
245
+ // reads the pending delivery from disk and re-attempts the exact same
246
+ // bytes, no codex re-spawn.
247
+ if (task.state === 'working' && loadPendingDelivery(task.taskId) !== undefined) {
248
+ if (activeTasks.size >= config.maxConcurrentTasks) {
249
+ logger.info(`Reconcile: task ${task.taskId} has pending delivery but seller at capacity, will retry.`);
250
+ continue;
251
+ }
252
+ activeTasks.add(task.taskId);
253
+ picked += 1;
254
+ logger.info(`Reconcile (${reason}): retrying stuck pending delivery for task ${task.taskId}`);
255
+ handleTask(holder, task.taskId, config, logger, identityPrivateKey)
256
+ .catch((err) => {
257
+ logger.error(`Reconciled pending delivery ${task.taskId} failed: ${err instanceof Error ? err.message : String(err)}`);
258
+ })
259
+ .finally(() => activeTasks.delete(task.taskId));
260
+ continue;
261
+ }
262
+ // `working` tasks with `pendingRevisionFeedback` set on the broker side
263
+ // are stuck revisions from a lost `task.revision.requested` event. The
264
+ // broker stamps the feedback onto the task record in the same
265
+ // transaction that emits the event; if the event stream drops frames
266
+ // or the seller's cursor is ahead of the broker's reset-in-memory
267
+ // counter, the seller never sees the event. Reconcile picks it up by
268
+ // reading the feedback straight from the task detail. Incident
269
+ // 2026-04-14 — user requested a revision on a legacy task and the
270
+ // seller daemon sat idle for 8 minutes because this path didn't exist.
271
+ if (task.state === 'working'
272
+ && typeof task.pendingRevisionFeedback === 'string'
273
+ && task.pendingRevisionFeedback.length > 0) {
274
+ if (activeTasks.size >= config.maxConcurrentTasks) {
275
+ logger.info(`Reconcile: task ${task.taskId} has stuck revision but seller at capacity, will retry.`);
276
+ continue;
277
+ }
278
+ activeTasks.add(task.taskId);
279
+ picked += 1;
280
+ const feedback = task.pendingRevisionFeedback;
281
+ logger.info(`Reconcile (${reason}): picking up stuck revision for task ${task.taskId} (feedback=${feedback.slice(0, 80)}${feedback.length > 80 ? '…' : ''})`);
282
+ handleRevision(holder, task.taskId, feedback, config, logger, identityPrivateKey)
283
+ .catch((err) => {
284
+ logger.error(`Reconciled revision ${task.taskId} failed: ${err instanceof Error ? err.message : String(err)}`);
285
+ })
286
+ .finally(() => activeTasks.delete(task.taskId));
287
+ continue;
288
+ }
214
289
  if (task.state === 'input-required' && task.inputType === 'question') {
215
290
  activeTasks.add(task.taskId);
216
291
  picked += 1;
217
292
  logger.info(`Reconcile (${reason}): picking up input-required task ${task.taskId}`);
218
- handleBuyerMessage(activeSession, task.taskId, config, logger, identityPrivateKey)
293
+ handleBuyerMessage(holder, task.taskId, config, logger, identityPrivateKey)
219
294
  .catch((err) => {
220
295
  logger.error(`Reconciled message for ${task.taskId} failed: ${err instanceof Error ? err.message : String(err)}`);
221
296
  })
@@ -240,8 +315,13 @@ export async function startSellerDaemon(config, opts) {
240
315
  // Heartbeat
241
316
  if (Date.now() - lastHeartbeat >= HEARTBEAT_INTERVAL_MS) {
242
317
  try {
243
- await heartbeat(session);
318
+ await heartbeat(holder);
244
319
  lastHeartbeat = Date.now();
320
+ // Persist the (possibly refreshed) session on every successful
321
+ // heartbeat so a daemon restart always picks up the freshest
322
+ // reconnect token. withAuthRetry may have silently refreshed
323
+ // holder.current mid-heartbeat.
324
+ saveSession(holder.current.agentId, holder.current.reconnectToken);
245
325
  }
246
326
  catch {
247
327
  logger.info('Heartbeat failed, attempting reconnect...');
@@ -259,21 +339,33 @@ export async function startSellerDaemon(config, opts) {
259
339
  if (signal !== undefined && signal.aborted)
260
340
  break;
261
341
  try {
262
- session = await reconnect(session, config.apiKey, {
342
+ const fresh = await reconnect(holder.current, config.apiKey, {
263
343
  role: 'seller',
264
344
  name: config.name,
265
345
  description: config.description
266
346
  });
267
- saveSession(session.agentId, session.reconnectToken);
347
+ holder.current = fresh;
348
+ saveSession(holder.current.agentId, holder.current.reconnectToken);
268
349
  logger.info(`Reconnected successfully (attempt ${attempt + 1}).`);
269
350
  lastHeartbeat = Date.now();
270
351
  reconnected = true;
271
- // Force a reconcile on the next tick: the broker may have
272
- // restarted and wiped its in-memory event queue, so our
273
- // lastEventId cursor is now stale relative to the fresh
274
- // nextEventId counter. Without this the seller polls forever
275
- // and never sees task.assigned events that happened while we
276
- // were disconnected or that were issued after the reset.
352
+ // The broker almost certainly restarted (that's why the
353
+ // heartbeat failed in the first place). Its in-memory event
354
+ // counter has reset to 0 while our in-memory lastEventId is
355
+ // still at the old high value every subsequent pollEvents
356
+ // call would return empty because we'd be asking for events
357
+ // "since <high>" from a stream that now starts at 1. Reset
358
+ // the cursor in memory AND on disk so the next poll sees the
359
+ // current event stream. Incident 2026-04-14.
360
+ if (lastEventId > 0) {
361
+ logger.info(`Event cursor reset ${lastEventId} → 0 after reconnect (broker restart suspected).`);
362
+ }
363
+ lastEventId = 0;
364
+ saveLastEventId(0);
365
+ // Also run an immediate reconcile so any task that was
366
+ // already assigned to us before the broker restarted (and
367
+ // is now past the event horizon) gets picked up without
368
+ // waiting for the periodic cycle.
277
369
  pollsSinceReconcile = RECONCILE_EVERY_N_POLLS;
278
370
  await reconcileTasks('reconnect');
279
371
  break;
@@ -298,7 +390,7 @@ export async function startSellerDaemon(config, opts) {
298
390
  // the user's API key back to the same stable seller agentId.
299
391
  try {
300
392
  logger.info(`Re-registering as seller "${config.name}"...`);
301
- session = await connect(config.bridgeUrl, config.apiKey, {
393
+ const freshRegister = await connect(config.bridgeUrl, config.apiKey, {
302
394
  role: 'seller',
303
395
  name: config.name,
304
396
  description: config.description,
@@ -307,10 +399,19 @@ export async function startSellerDaemon(config, opts) {
307
399
  maxConcurrentTasks: config.maxConcurrentTasks,
308
400
  sellerSlug: config.id
309
401
  });
310
- saveSession(session.agentId, session.reconnectToken);
311
- logger.info(`Re-registered: agentId=${session.agentId}`);
402
+ holder.current = freshRegister;
403
+ saveSession(holder.current.agentId, holder.current.reconnectToken);
404
+ logger.info(`Re-registered: agentId=${holder.current.agentId}`);
312
405
  lastHeartbeat = Date.now();
313
406
  reconnected = true;
407
+ // Same cursor reset as the reconnect branch — a fresh connect
408
+ // against a post-restart broker means the event stream is
409
+ // brand new, so ask for "since=0".
410
+ if (lastEventId > 0) {
411
+ logger.info(`Event cursor reset ${lastEventId} → 0 after re-register.`);
412
+ }
413
+ lastEventId = 0;
414
+ saveLastEventId(0);
314
415
  pollsSinceReconcile = RECONCILE_EVERY_N_POLLS;
315
416
  await reconcileTasks('re-register');
316
417
  }
@@ -327,7 +428,7 @@ export async function startSellerDaemon(config, opts) {
327
428
  }
328
429
  }
329
430
  // Poll events
330
- const events = await pollEvents(session, lastEventId);
431
+ const events = await pollEvents(holder, lastEventId);
331
432
  let minDeferredEventId;
332
433
  for (const event of events) {
333
434
  if (event.type === 'task.assigned') {
@@ -349,7 +450,7 @@ export async function startSellerDaemon(config, opts) {
349
450
  if (event.eventId > lastEventId)
350
451
  lastEventId = event.eventId;
351
452
  logger.info(`Task assigned: ${taskId} (active: ${activeTasks.size}/${config.maxConcurrentTasks})`);
352
- handleTask(session, taskId, config, logger, identityPrivateKey)
453
+ handleTask(holder, taskId, config, logger, identityPrivateKey)
353
454
  .catch((err) => {
354
455
  logger.error(`Task ${taskId} failed: ${err instanceof Error ? err.message : String(err)}`);
355
456
  })
@@ -366,7 +467,7 @@ export async function startSellerDaemon(config, opts) {
366
467
  continue; // Task handler is already running
367
468
  activeTasks.add(taskId);
368
469
  logger.info(`Message received for task: ${taskId}`);
369
- handleBuyerMessage(session, taskId, config, logger, identityPrivateKey)
470
+ handleBuyerMessage(holder, taskId, config, logger, identityPrivateKey)
370
471
  .catch((err) => {
371
472
  logger.error(`Message handling for ${taskId} failed: ${err instanceof Error ? err.message : String(err)}`);
372
473
  })
@@ -379,7 +480,7 @@ export async function startSellerDaemon(config, opts) {
379
480
  continue;
380
481
  activeTasks.add(taskId);
381
482
  logger.info(`Revision requested for task: ${taskId}`);
382
- handleRevision(session, taskId, feedback, config, logger, identityPrivateKey)
483
+ handleRevision(holder, taskId, feedback, config, logger, identityPrivateKey)
383
484
  .catch((err) => {
384
485
  logger.error(`Revision for ${taskId} failed: ${err instanceof Error ? err.message : String(err)}`);
385
486
  })
@@ -432,9 +533,40 @@ export async function startSellerDaemon(config, opts) {
432
533
  // ---------------------------------------------------------------------------
433
534
  // Task handler
434
535
  // ---------------------------------------------------------------------------
435
- async function handleTask(session, taskId, config, logger, identityPrivateKey) {
536
+ async function handleTask(holder, taskId, config, logger, identityPrivateKey) {
537
+ // Recovery fast-path: if a pending delivery is already on disk for this
538
+ // task (from a previous run where codex finished but deliverTask failed),
539
+ // skip codex entirely and retry the same bytes. Incident 2026-04-14.
540
+ const pending = loadPendingDelivery(taskId);
541
+ if (pending !== undefined) {
542
+ logger.info(`Task ${taskId}: pending delivery found on disk (${pending.output.length} chars produced at ${pending.producedAt}, attempt ${pending.attempts + 1}), skipping codex and delivering cached bytes directly.`);
543
+ // Ensure the task is in a state the broker will accept delivery on.
544
+ // If we never accepted or already moved to working, accept is idempotent
545
+ // at the 409 level.
546
+ try {
547
+ await acceptTask(holder, taskId);
548
+ }
549
+ catch (err) {
550
+ const msg = err instanceof Error ? err.message : String(err);
551
+ if (!msg.includes('409')) {
552
+ logger.info(`Task ${taskId}: acceptTask during pending replay returned ${msg.slice(0, 120)}; proceeding to deliver anyway.`);
553
+ }
554
+ }
555
+ incrementPendingDeliveryAttempts(taskId);
556
+ try {
557
+ await deliverTask(holder, taskId, pending.output, pending.signature);
558
+ deletePendingDelivery(taskId);
559
+ logger.info(`Task ${taskId}: cached delivery accepted by broker.`);
560
+ saveState({ taskId, messages: [], claudeCodeContext: '', originalContext: '', lastDelivery: pending.output });
561
+ }
562
+ catch (err) {
563
+ logger.error(`Task ${taskId}: cached delivery failed (${err instanceof Error ? err.message : String(err)}). Pending file retained for next reconcile cycle.`);
564
+ throw err;
565
+ }
566
+ return;
567
+ }
436
568
  try {
437
- await acceptTask(session, taskId);
569
+ await acceptTask(holder, taskId);
438
570
  }
439
571
  catch (err) {
440
572
  const msg = err instanceof Error ? err.message : String(err);
@@ -446,9 +578,9 @@ async function handleTask(session, taskId, config, logger, identityPrivateKey) {
446
578
  }
447
579
  logger.info(`Task ${taskId}: accepted, starting work...`);
448
580
  // Get task context from task detail first, then messages as supplement
449
- const detail = await getTaskDetail(session, taskId);
581
+ const detail = await getTaskDetail(holder, taskId);
450
582
  const taskContext = detail?.context ?? '';
451
- const messages = await getTaskMessages(session, taskId);
583
+ const messages = await getTaskMessages(holder, taskId);
452
584
  const messageContext = messages.map((m) => m.content).join('\n');
453
585
  const context = taskContext || messageContext || 'Task assigned. No additional context provided.';
454
586
  // Compose the system prompt with the BOUND CONTRACT + REFUSAL PROTOCOL
@@ -467,14 +599,14 @@ async function handleTask(session, taskId, config, logger, identityPrivateKey) {
467
599
  : await runAgentLoop(createProvider(config.provider, config.model, config.providerApiKey), loadTools(config.allowedTools, config.workingDirectory), context, { systemPrompt: scopedSystemPrompt, taskId });
468
600
  if (result.timedOut === true) {
469
601
  logger.error(`Task ${taskId}: agent timed out or errored, notifying buyer.`);
470
- await sendTaskMessage(session, taskId, 'info', 'The seller agent encountered a timeout processing this task. Please allow more time or simplify the request. The task remains assigned.');
602
+ await sendTaskMessage(holder, taskId, 'info', 'The seller agent encountered a timeout processing this task. Please allow more time or simplify the request. The task remains assigned.');
471
603
  saveState({ taskId, messages: [], claudeCodeContext: context, originalContext: context });
472
604
  return;
473
605
  }
474
606
  if (result.paused) {
475
607
  logger.info(`Task ${taskId}: paused, asking buyer: "${result.pendingQuestion?.slice(0, 100)}..."`);
476
608
  saveState({ taskId, messages: [], claudeCodeContext: context, pendingQuestion: result.pendingQuestion });
477
- await sendTaskMessage(session, taskId, 'question', result.pendingQuestion ?? 'Could you clarify?');
609
+ await sendTaskMessage(holder, taskId, 'question', result.pendingQuestion ?? 'Could you clarify?');
478
610
  return;
479
611
  }
480
612
  // Output-side truncation: count produced units, clamp to contract, log a
@@ -485,21 +617,57 @@ async function handleTask(session, taskId, config, logger, identityPrivateKey) {
485
617
  if (truncated.events.length > 0) {
486
618
  for (const ev of truncated.events) {
487
619
  logger.info(`Task ${taskId}: scope truncation — item="${ev.itemName}" produced=${ev.producedQuantity} allowed=${ev.allowedQuantity}`);
488
- await sendTaskMessage(session, taskId, 'info', `[scope-guard] Your brief requested more "${ev.itemName}" than the listing delivers. The seller agent produced ${ev.producedQuantity} but the contract allows only ${ev.allowedQuantity}. The delivery has been trimmed to the contracted quantity. To get more, hire again with a listing that covers the extra volume.`);
620
+ await sendTaskMessage(holder, taskId, 'info', `[scope-guard] Your brief requested more "${ev.itemName}" than the listing delivers. The seller agent produced ${ev.producedQuantity} but the contract allows only ${ev.allowedQuantity}. The delivery has been trimmed to the contracted quantity. To get more, hire again with a listing that covers the extra volume.`);
489
621
  }
490
622
  }
491
623
  logger.info(`Task ${taskId}: delivering result (${finalOutput.length} chars)`);
492
624
  const signature = identityPrivateKey !== undefined
493
625
  ? signDelivery({ taskId, content: finalOutput, privateKey: identityPrivateKey })
494
626
  : undefined;
495
- await deliverTask(session, taskId, finalOutput, signature);
627
+ // Persist the produced bytes to disk BEFORE calling deliverTask. If the
628
+ // deliver fails (401, network blip, broker mid-restart), the reconcile
629
+ // loop will re-enter handleTask, find this file, and retry the exact
630
+ // same bytes — no codex re-spawn, no regeneration cost. Incident 2026-04-14.
631
+ savePendingDelivery({
632
+ taskId,
633
+ output: finalOutput,
634
+ ...(signature !== undefined ? { signature } : {}),
635
+ producedAt: new Date().toISOString(),
636
+ attempts: 0
637
+ });
638
+ try {
639
+ await deliverTask(holder, taskId, finalOutput, signature);
640
+ }
641
+ catch (err) {
642
+ incrementPendingDeliveryAttempts(taskId);
643
+ logger.error(`Task ${taskId}: initial deliver failed (${err instanceof Error ? err.message : String(err)}). Pending delivery file retained for reconcile retry.`);
644
+ throw err;
645
+ }
646
+ deletePendingDelivery(taskId);
496
647
  // Preserve state for potential revision — only deleted on terminal events
497
648
  saveState({ taskId, messages: [], claudeCodeContext: context, originalContext: context, lastDelivery: finalOutput });
498
649
  }
499
650
  // ---------------------------------------------------------------------------
500
651
  // Revision handler
501
652
  // ---------------------------------------------------------------------------
502
- async function handleRevision(session, taskId, feedback, config, logger, identityPrivateKey) {
653
+ async function handleRevision(holder, taskId, feedback, config, logger, identityPrivateKey) {
654
+ // Recovery fast-path: same as handleTask — if a pending revision delivery
655
+ // exists on disk, deliver cached bytes instead of re-running codex.
656
+ const pending = loadPendingDelivery(taskId);
657
+ if (pending !== undefined) {
658
+ logger.info(`Task ${taskId}: pending revision delivery found on disk (${pending.output.length} chars produced at ${pending.producedAt}, attempt ${pending.attempts + 1}), delivering cached bytes directly.`);
659
+ incrementPendingDeliveryAttempts(taskId);
660
+ try {
661
+ await deliverTask(holder, taskId, pending.output, pending.signature);
662
+ deletePendingDelivery(taskId);
663
+ logger.info(`Task ${taskId}: cached revision delivery accepted by broker.`);
664
+ }
665
+ catch (err) {
666
+ logger.error(`Task ${taskId}: cached revision delivery failed (${err instanceof Error ? err.message : String(err)}). Pending file retained for next reconcile cycle.`);
667
+ throw err;
668
+ }
669
+ return;
670
+ }
503
671
  const savedState = loadState(taskId);
504
672
  // Use original task context (not compounded revision context) to avoid quadratic growth
505
673
  const originalContext = savedState?.originalContext ?? savedState?.claudeCodeContext ?? '';
@@ -519,21 +687,21 @@ async function handleRevision(session, taskId, feedback, config, logger, identit
519
687
  'Revise your work based on the buyer feedback above. Output the complete updated deliverable.'
520
688
  ].join('\n');
521
689
  // Re-fetch the frozen snapshot so revisions stay bound to the contract.
522
- const revisionDetail = await getTaskDetail(session, taskId);
690
+ const revisionDetail = await getTaskDetail(holder, taskId);
523
691
  const revisionSystemPrompt = composeSystemPrompt(config.systemPrompt, revisionDetail?.deliverablesSnapshot);
524
692
  const result = config.provider === 'claude-code'
525
693
  ? await runClaudeCodeTask(revisionContext, { ...config, systemPrompt: revisionSystemPrompt }, logger)
526
694
  : await runAgentLoop(createProvider(config.provider, config.model, config.providerApiKey), loadTools(config.allowedTools, config.workingDirectory), revisionContext, { systemPrompt: revisionSystemPrompt, taskId });
527
695
  if (result.timedOut === true) {
528
696
  logger.error(`Task ${taskId}: revision timed out, notifying buyer.`);
529
- await sendTaskMessage(session, taskId, 'info', 'The seller agent timed out while working on the revision. Please allow more time or simplify the request.');
697
+ await sendTaskMessage(holder, taskId, 'info', 'The seller agent timed out while working on the revision. Please allow more time or simplify the request.');
530
698
  saveState({ taskId, messages: [], claudeCodeContext: revisionContext, originalContext });
531
699
  return;
532
700
  }
533
701
  if (result.paused) {
534
702
  logger.info(`Task ${taskId}: revision paused, asking buyer...`);
535
703
  saveState({ taskId, messages: [], claudeCodeContext: revisionContext, originalContext, pendingQuestion: result.pendingQuestion });
536
- await sendTaskMessage(session, taskId, 'question', result.pendingQuestion ?? 'Could you clarify?');
704
+ await sendTaskMessage(holder, taskId, 'question', result.pendingQuestion ?? 'Could you clarify?');
537
705
  return;
538
706
  }
539
707
  const truncatedRevision = truncateToContract(result.output, revisionDetail?.deliverablesSnapshot);
@@ -541,14 +709,32 @@ async function handleRevision(session, taskId, feedback, config, logger, identit
541
709
  if (truncatedRevision.events.length > 0) {
542
710
  for (const ev of truncatedRevision.events) {
543
711
  logger.info(`Task ${taskId}: revision scope truncation — item="${ev.itemName}" produced=${ev.producedQuantity} allowed=${ev.allowedQuantity}`);
544
- await sendTaskMessage(session, taskId, 'info', `[scope-guard] Revision trimmed: "${ev.itemName}" produced ${ev.producedQuantity}, contract allows ${ev.allowedQuantity}.`);
712
+ await sendTaskMessage(holder, taskId, 'info', `[scope-guard] Revision trimmed: "${ev.itemName}" produced ${ev.producedQuantity}, contract allows ${ev.allowedQuantity}.`);
545
713
  }
546
714
  }
547
715
  logger.info(`Task ${taskId}: delivering revision (${finalRevisionOutput.length} chars)`);
548
716
  const revisionSignature = identityPrivateKey !== undefined
549
717
  ? signDelivery({ taskId, content: finalRevisionOutput, privateKey: identityPrivateKey })
550
718
  : undefined;
551
- await deliverTask(session, taskId, finalRevisionOutput, revisionSignature);
719
+ // Persist produced revision bytes before calling deliverTask. Same
720
+ // rationale as handleTask: a broker restart or 401 after retry should
721
+ // not waste another codex run on regeneration.
722
+ savePendingDelivery({
723
+ taskId,
724
+ output: finalRevisionOutput,
725
+ ...(revisionSignature !== undefined ? { signature: revisionSignature } : {}),
726
+ producedAt: new Date().toISOString(),
727
+ attempts: 0
728
+ });
729
+ try {
730
+ await deliverTask(holder, taskId, finalRevisionOutput, revisionSignature);
731
+ }
732
+ catch (err) {
733
+ incrementPendingDeliveryAttempts(taskId);
734
+ logger.error(`Task ${taskId}: initial revision deliver failed (${err instanceof Error ? err.message : String(err)}). Pending delivery file retained for reconcile retry.`);
735
+ throw err;
736
+ }
737
+ deletePendingDelivery(taskId);
552
738
  saveState({ taskId, messages: [], claudeCodeContext: revisionContext, originalContext, lastDelivery: finalRevisionOutput });
553
739
  }
554
740
  // ---------------------------------------------------------------------------
@@ -564,7 +750,7 @@ function buildResumedContext(priorContext, priorQuestion, buyerAnswer) {
564
750
  'Continue the task with this new information.'
565
751
  ].join('\n');
566
752
  }
567
- async function handleBuyerMessage(session, taskId, config, logger, identityPrivateKey) {
753
+ async function handleBuyerMessage(holder, taskId, config, logger, identityPrivateKey) {
568
754
  const savedState = loadState(taskId);
569
755
  if (savedState === undefined) {
570
756
  logger.info(`Task ${taskId}: no saved state found, ignoring message.`);
@@ -574,10 +760,10 @@ async function handleBuyerMessage(session, taskId, config, logger, identityPriva
574
760
  // Guide them to use zyndo_request_revision for the formal revision flow.
575
761
  if (savedState.lastDelivery !== undefined && savedState.pendingQuestion === undefined) {
576
762
  logger.info(`Task ${taskId}: buyer sent message after delivery, guiding to zyndo_request_revision.`);
577
- await sendTaskMessage(session, taskId, 'info', 'I received your message. To get a revised delivery, please use the revision action (zyndo_request_revision) with your feedback. I will then rework and re-deliver.');
763
+ await sendTaskMessage(holder, taskId, 'info', 'I received your message. To get a revised delivery, please use the revision action (zyndo_request_revision) with your feedback. I will then rework and re-deliver.');
578
764
  return;
579
765
  }
580
- const messages = await getTaskMessages(session, taskId);
766
+ const messages = await getTaskMessages(holder, taskId);
581
767
  const lastMessage = messages[messages.length - 1];
582
768
  if (lastMessage === undefined)
583
769
  return;
@@ -596,7 +782,7 @@ async function handleBuyerMessage(session, taskId, config, logger, identityPriva
596
782
  }
597
783
  if (result.timedOut === true) {
598
784
  logger.error(`Task ${taskId}: message-resume timed out, notifying buyer.`);
599
- await sendTaskMessage(session, taskId, 'info', 'The seller agent timed out while processing your response. Please allow more time or simplify the request.');
785
+ await sendTaskMessage(holder, taskId, 'info', 'The seller agent timed out while processing your response. Please allow more time or simplify the request.');
600
786
  return;
601
787
  }
602
788
  if (result.paused) {
@@ -605,14 +791,30 @@ async function handleBuyerMessage(session, taskId, config, logger, identityPriva
605
791
  ? buildResumedContext(savedState.claudeCodeContext ?? '', savedState.pendingQuestion, lastMessage.content)
606
792
  : savedState.claudeCodeContext ?? '';
607
793
  saveState({ taskId, messages: [], claudeCodeContext: updatedContext, pendingQuestion: result.pendingQuestion });
608
- await sendTaskMessage(session, taskId, 'question', result.pendingQuestion ?? 'Could you clarify?');
794
+ await sendTaskMessage(holder, taskId, 'question', result.pendingQuestion ?? 'Could you clarify?');
609
795
  return;
610
796
  }
611
797
  logger.info(`Task ${taskId}: delivering result (${result.output.length} chars)`);
612
798
  const resumedSignature = identityPrivateKey !== undefined
613
799
  ? signDelivery({ taskId, content: result.output, privateKey: identityPrivateKey })
614
800
  : undefined;
615
- await deliverTask(session, taskId, result.output, resumedSignature);
801
+ // Persist before deliver, same pattern as handleTask / handleRevision.
802
+ savePendingDelivery({
803
+ taskId,
804
+ output: result.output,
805
+ ...(resumedSignature !== undefined ? { signature: resumedSignature } : {}),
806
+ producedAt: new Date().toISOString(),
807
+ attempts: 0
808
+ });
809
+ try {
810
+ await deliverTask(holder, taskId, result.output, resumedSignature);
811
+ }
812
+ catch (err) {
813
+ incrementPendingDeliveryAttempts(taskId);
814
+ logger.error(`Task ${taskId}: initial resumed deliver failed (${err instanceof Error ? err.message : String(err)}). Pending delivery file retained.`);
815
+ throw err;
816
+ }
817
+ deletePendingDelivery(taskId);
616
818
  const resumedContext = config.provider === 'claude-code'
617
819
  ? buildResumedContext(savedState.claudeCodeContext ?? '', savedState.pendingQuestion, lastMessage.content)
618
820
  : savedState.claudeCodeContext ?? '';
package/dist/state.d.ts CHANGED
@@ -21,3 +21,14 @@ export type PersistedSession = Readonly<{
21
21
  export declare function saveSession(agentId: string, reconnectToken: string): void;
22
22
  export declare function loadSession(): PersistedSession | undefined;
23
23
  export declare function clearSession(): void;
24
+ export type PendingDelivery = Readonly<{
25
+ taskId: string;
26
+ output: string;
27
+ signature?: string;
28
+ producedAt: string;
29
+ attempts: number;
30
+ }>;
31
+ export declare function savePendingDelivery(payload: PendingDelivery): void;
32
+ export declare function loadPendingDelivery(taskId: string): PendingDelivery | undefined;
33
+ export declare function deletePendingDelivery(taskId: string): void;
34
+ export declare function incrementPendingDeliveryAttempts(taskId: string): void;
package/dist/state.js CHANGED
@@ -84,3 +84,38 @@ export function clearSession() {
84
84
  unlinkSync(getSessionFile());
85
85
  }
86
86
  }
87
+ const PENDING_DELIVERY_DIR = resolve(getBaseDir(), 'pending-deliveries');
88
+ function pendingDeliveryFile(taskId) {
89
+ return resolve(PENDING_DELIVERY_DIR, `task-${sanitizeTaskId(taskId)}.json`);
90
+ }
91
+ export function savePendingDelivery(payload) {
92
+ mkdirSync(PENDING_DELIVERY_DIR, { recursive: true });
93
+ writeFileSync(pendingDeliveryFile(payload.taskId), JSON.stringify(payload, null, 2), 'utf-8');
94
+ }
95
+ export function loadPendingDelivery(taskId) {
96
+ const path = pendingDeliveryFile(taskId);
97
+ if (!existsSync(path))
98
+ return undefined;
99
+ try {
100
+ const raw = readFileSync(path, 'utf-8');
101
+ const parsed = JSON.parse(raw);
102
+ if (typeof parsed.taskId !== 'string' || typeof parsed.output !== 'string')
103
+ return undefined;
104
+ return parsed;
105
+ }
106
+ catch {
107
+ return undefined;
108
+ }
109
+ }
110
+ export function deletePendingDelivery(taskId) {
111
+ const path = pendingDeliveryFile(taskId);
112
+ if (existsSync(path)) {
113
+ unlinkSync(path);
114
+ }
115
+ }
116
+ export function incrementPendingDeliveryAttempts(taskId) {
117
+ const existing = loadPendingDelivery(taskId);
118
+ if (existing === undefined)
119
+ return;
120
+ savePendingDelivery({ ...existing, attempts: existing.attempts + 1 });
121
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "zyndo",
3
- "version": "0.3.2",
3
+ "version": "0.3.4",
4
4
  "description": "The agent-to-agent CLI tool for sellers in the Zyndo Marketplace",
5
5
  "type": "module",
6
6
  "license": "MIT",