zyndo 0.3.2 → 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/connection.d.ts +35 -9
- package/dist/connection.js +115 -28
- package/dist/mcp/mcpCore.js +2 -2
- package/dist/mcp/mcpServer.js +2 -2
- package/dist/sellerDaemon.js +250 -48
- package/dist/state.d.ts +11 -0
- package/dist/state.js +35 -0
- package/package.json +1 -1
package/dist/connection.d.ts
CHANGED
|
@@ -42,6 +42,9 @@ export type TaskDetail = Readonly<{
|
|
|
42
42
|
content: string;
|
|
43
43
|
}>;
|
|
44
44
|
createdAt: string;
|
|
45
|
+
revisionCount?: number;
|
|
46
|
+
pendingRevisionFeedback?: string;
|
|
47
|
+
pendingRevisionRequestedAt?: string;
|
|
45
48
|
deliverablesSnapshot?: SkillDeliverablesSnapshot;
|
|
46
49
|
}>;
|
|
47
50
|
export type TaskMessage = Readonly<{
|
|
@@ -52,6 +55,29 @@ export type TaskMessage = Readonly<{
|
|
|
52
55
|
content: string;
|
|
53
56
|
createdAt: string;
|
|
54
57
|
}>;
|
|
58
|
+
export type ReconnectOpts = Readonly<{
|
|
59
|
+
role: 'seller' | 'buyer';
|
|
60
|
+
name: string;
|
|
61
|
+
description: string;
|
|
62
|
+
}>;
|
|
63
|
+
export type SessionHolder = {
|
|
64
|
+
current: AgentSession;
|
|
65
|
+
/**
|
|
66
|
+
* API key used to authenticate reconnect calls. Empty string disables
|
|
67
|
+
* automatic reconnect on 401 (used by tests and MCP buyer bridges that
|
|
68
|
+
* don't own a long-running session).
|
|
69
|
+
*/
|
|
70
|
+
apiKey: string;
|
|
71
|
+
reconnectOpts: ReconnectOpts;
|
|
72
|
+
};
|
|
73
|
+
export declare function createSessionHolder(session: AgentSession, apiKey: string, reconnectOpts: ReconnectOpts): SessionHolder;
|
|
74
|
+
/**
|
|
75
|
+
* Wrap a raw session in a holder for callers that don't need
|
|
76
|
+
* automatic reconnect-on-401 (e.g. MCP buyer-side bridges). Passing a bare
|
|
77
|
+
* holder means `withAuthRetry` will never attempt to reconnect — it will
|
|
78
|
+
* simply throw the original 401 error the way the old helpers did.
|
|
79
|
+
*/
|
|
80
|
+
export declare function bareSessionHolder(session: AgentSession): SessionHolder;
|
|
55
81
|
export declare function connect(bridgeUrl: string, apiKey: string, opts: {
|
|
56
82
|
role: 'seller' | 'buyer';
|
|
57
83
|
name: string;
|
|
@@ -71,19 +97,19 @@ export declare function reconnect(session: AgentSession, apiKey: string, opts?:
|
|
|
71
97
|
name?: string;
|
|
72
98
|
description?: string;
|
|
73
99
|
}): Promise<AgentSession>;
|
|
74
|
-
export declare function heartbeat(
|
|
75
|
-
export declare function pollEvents(
|
|
76
|
-
export declare function acceptTask(
|
|
100
|
+
export declare function heartbeat(holder: SessionHolder): Promise<void>;
|
|
101
|
+
export declare function pollEvents(holder: SessionHolder, ack?: number): Promise<ReadonlyArray<AgentEvent>>;
|
|
102
|
+
export declare function acceptTask(holder: SessionHolder, taskId: string): Promise<void>;
|
|
77
103
|
/**
|
|
78
104
|
* Slice 3b — register the seller's Ed25519 public key with the broker so
|
|
79
105
|
* subsequent signed deliveries can be verified. Safe to call on every daemon
|
|
80
106
|
* start; the broker upserts and tracks rotations.
|
|
81
107
|
*/
|
|
82
|
-
export declare function registerIdentity(
|
|
83
|
-
export declare function deliverTask(
|
|
84
|
-
export declare function sendTaskMessage(
|
|
85
|
-
export declare function getTaskMessages(
|
|
86
|
-
export declare function getTaskDetail(
|
|
108
|
+
export declare function registerIdentity(holder: SessionHolder, publicKeyB64: string): Promise<void>;
|
|
109
|
+
export declare function deliverTask(holder: SessionHolder, taskId: string, content: string, signatureB64?: string): Promise<void>;
|
|
110
|
+
export declare function sendTaskMessage(holder: SessionHolder, taskId: string, type: 'question' | 'answer' | 'info', content: string): Promise<void>;
|
|
111
|
+
export declare function getTaskMessages(holder: SessionHolder, taskId: string): Promise<ReadonlyArray<TaskMessage>>;
|
|
112
|
+
export declare function getTaskDetail(holder: SessionHolder, taskId: string): Promise<TaskDetail | undefined>;
|
|
87
113
|
/**
|
|
88
114
|
* List every task the broker has on record for this agent (both as buyer and
|
|
89
115
|
* seller). Used by the seller daemon to reconcile against its event cursor:
|
|
@@ -93,4 +119,4 @@ export declare function getTaskDetail(session: AgentSession, taskId: string): Pr
|
|
|
93
119
|
* seller can appear "online" (heartbeat OK, polling OK) while silently leaving
|
|
94
120
|
* a buyer's task stuck forever.
|
|
95
121
|
*/
|
|
96
|
-
export declare function listAgentTasks(
|
|
122
|
+
export declare function listAgentTasks(holder: SessionHolder): Promise<ReadonlyArray<TaskDetail>>;
|
package/dist/connection.js
CHANGED
|
@@ -1,11 +1,44 @@
|
|
|
1
1
|
// ---------------------------------------------------------------------------
|
|
2
2
|
// Connection module — connect, heartbeat, poll events, deliver
|
|
3
|
+
//
|
|
4
|
+
// Every network helper takes a SessionHolder (a mutable wrapper around the
|
|
5
|
+
// current AgentSession) instead of the raw session. This makes broker
|
|
6
|
+
// restart-mid-task survivable:
|
|
7
|
+
//
|
|
8
|
+
// 1. Seller daemon creates ONE holder at startup.
|
|
9
|
+
// 2. All handlers (handleTask, handleRevision, handleMessage) receive that
|
|
10
|
+
// holder by reference and call network helpers with it.
|
|
11
|
+
// 3. On any 401 response, `withAuthRetry` reconnects, mutates the holder's
|
|
12
|
+
// `.current` in place, and retries the original call once.
|
|
13
|
+
// 4. In-flight handlers see the new token on their next network call
|
|
14
|
+
// because they hold the same holder reference.
|
|
15
|
+
//
|
|
16
|
+
// Before this change, `session` was passed by value to each handler. The
|
|
17
|
+
// outer heartbeat loop could reconnect and update its own `session` variable,
|
|
18
|
+
// but the already-running handler still held the stale object in its
|
|
19
|
+
// closure and died with 401 on deliver. Incident 2026-04-14.
|
|
3
20
|
// ---------------------------------------------------------------------------
|
|
4
21
|
// Mirror of @zyndo/contracts MAX_AGENT_MESSAGE_CHARS. The CLI ships as a
|
|
5
22
|
// standalone npm package, so we duplicate this small constant rather than
|
|
6
23
|
// pulling in the contracts workspace as a dependency. Keep these values in
|
|
7
24
|
// sync if the broker-side limit ever changes.
|
|
8
25
|
export const MAX_AGENT_MESSAGE_CHARS = 50_000;
|
|
26
|
+
export function createSessionHolder(session, apiKey, reconnectOpts) {
|
|
27
|
+
return { current: session, apiKey, reconnectOpts };
|
|
28
|
+
}
|
|
29
|
+
/**
|
|
30
|
+
* Wrap a raw session in a holder for callers that don't need
|
|
31
|
+
* automatic reconnect-on-401 (e.g. MCP buyer-side bridges). Passing a bare
|
|
32
|
+
* holder means `withAuthRetry` will never attempt to reconnect — it will
|
|
33
|
+
* simply throw the original 401 error the way the old helpers did.
|
|
34
|
+
*/
|
|
35
|
+
export function bareSessionHolder(session) {
|
|
36
|
+
return {
|
|
37
|
+
current: session,
|
|
38
|
+
apiKey: '',
|
|
39
|
+
reconnectOpts: { role: 'buyer', name: '', description: '' }
|
|
40
|
+
};
|
|
41
|
+
}
|
|
9
42
|
// ---------------------------------------------------------------------------
|
|
10
43
|
// HTTP helpers
|
|
11
44
|
// ---------------------------------------------------------------------------
|
|
@@ -22,7 +55,52 @@ async function jsonGet(url, token) {
|
|
|
22
55
|
return fetch(url, { headers });
|
|
23
56
|
}
|
|
24
57
|
// ---------------------------------------------------------------------------
|
|
25
|
-
//
|
|
58
|
+
// withAuthRetry — shared 401-retry wrapper used by every network helper
|
|
59
|
+
// ---------------------------------------------------------------------------
|
|
60
|
+
/**
|
|
61
|
+
* Make an authenticated request. If the broker returns 401, attempt a
|
|
62
|
+
* single reconnect, mutate the holder's `.current` in place, and retry
|
|
63
|
+
* the call once with the fresh token.
|
|
64
|
+
*
|
|
65
|
+
* - Never loops. At most one retry.
|
|
66
|
+
* - If the holder has no `apiKey` (bare holder), 401 is passed through
|
|
67
|
+
* to the caller unchanged — this preserves the pre-holder semantics for
|
|
68
|
+
* MCP buyer bridges and unit tests.
|
|
69
|
+
* - The first 401 triggers a reconnect attempt; if that reconnect itself
|
|
70
|
+
* throws, the original 401 error surfaces with a note about the failure.
|
|
71
|
+
* - Non-401 error statuses are NEVER retried.
|
|
72
|
+
*/
|
|
73
|
+
async function withAuthRetry(holder, label, doCall) {
|
|
74
|
+
const firstRes = await doCall(holder.current.token);
|
|
75
|
+
if (firstRes.status !== 401)
|
|
76
|
+
return firstRes;
|
|
77
|
+
if (holder.apiKey.length === 0)
|
|
78
|
+
return firstRes;
|
|
79
|
+
// Drain the body so the socket can be reused.
|
|
80
|
+
try {
|
|
81
|
+
await firstRes.text();
|
|
82
|
+
}
|
|
83
|
+
catch { /* ignore */ }
|
|
84
|
+
try {
|
|
85
|
+
const fresh = await reconnect(holder.current, holder.apiKey, {
|
|
86
|
+
role: holder.reconnectOpts.role,
|
|
87
|
+
name: holder.reconnectOpts.name,
|
|
88
|
+
description: holder.reconnectOpts.description
|
|
89
|
+
});
|
|
90
|
+
holder.current = fresh;
|
|
91
|
+
process.stderr.write(`[zyndo] ${label}: session refreshed after 401, retrying once\n`);
|
|
92
|
+
}
|
|
93
|
+
catch (reconnectErr) {
|
|
94
|
+
const msg = reconnectErr instanceof Error ? reconnectErr.message : String(reconnectErr);
|
|
95
|
+
process.stderr.write(`[zyndo] ${label}: reconnect-on-401 failed: ${msg}\n`);
|
|
96
|
+
// Surface the reconnect error; the caller can decide whether to bubble
|
|
97
|
+
// it up or swallow it (pollEvents, for example, soft-fails).
|
|
98
|
+
throw new Error(`${label} 401 and reconnect failed: ${msg}`);
|
|
99
|
+
}
|
|
100
|
+
return doCall(holder.current.token);
|
|
101
|
+
}
|
|
102
|
+
// ---------------------------------------------------------------------------
|
|
103
|
+
// Connect (no holder — creates one)
|
|
26
104
|
// ---------------------------------------------------------------------------
|
|
27
105
|
export async function connect(bridgeUrl, apiKey, opts) {
|
|
28
106
|
const headers = {
|
|
@@ -45,14 +123,14 @@ export async function connect(bridgeUrl, apiKey, opts) {
|
|
|
45
123
|
body: JSON.stringify(body)
|
|
46
124
|
});
|
|
47
125
|
if (!res.ok) {
|
|
48
|
-
const
|
|
49
|
-
throw new Error(`Connect failed (${res.status}): ${
|
|
126
|
+
const errBody = await res.text();
|
|
127
|
+
throw new Error(`Connect failed (${res.status}): ${errBody}`);
|
|
50
128
|
}
|
|
51
129
|
const data = (await res.json());
|
|
52
130
|
return { agentId: data.agentId, token: data.token, reconnectToken: data.reconnectToken, bridgeUrl };
|
|
53
131
|
}
|
|
54
132
|
// ---------------------------------------------------------------------------
|
|
55
|
-
// Reconnect
|
|
133
|
+
// Reconnect — called by withAuthRetry and by the seller daemon outer loop
|
|
56
134
|
// ---------------------------------------------------------------------------
|
|
57
135
|
export async function reconnect(session, apiKey, opts) {
|
|
58
136
|
// The broker's /agent/connect endpoint is gated on x-zyndo-api-key when
|
|
@@ -82,27 +160,36 @@ export async function reconnect(session, apiKey, opts) {
|
|
|
82
160
|
// ---------------------------------------------------------------------------
|
|
83
161
|
// Heartbeat
|
|
84
162
|
// ---------------------------------------------------------------------------
|
|
85
|
-
export async function heartbeat(
|
|
86
|
-
const res = await jsonPost(`${
|
|
87
|
-
if (!res.ok
|
|
88
|
-
|
|
163
|
+
export async function heartbeat(holder) {
|
|
164
|
+
const res = await withAuthRetry(holder, 'heartbeat', (token) => jsonPost(`${holder.current.bridgeUrl}/agent/heartbeat`, {}, token));
|
|
165
|
+
if (!res.ok) {
|
|
166
|
+
// A 401 after a successful retry cycle still reaches here, and any other
|
|
167
|
+
// non-ok status. The outer daemon heartbeat loop will catch this and
|
|
168
|
+
// attempt a fresh reconnect/re-register cycle as a last resort.
|
|
169
|
+
throw new Error(`Heartbeat failed (${res.status}): ${await res.text().catch(() => '<no body>')}`);
|
|
89
170
|
}
|
|
90
171
|
}
|
|
91
172
|
// ---------------------------------------------------------------------------
|
|
92
|
-
// Poll events
|
|
173
|
+
// Poll events — soft-fails on any error (returns empty array) because the
|
|
174
|
+
// seller daemon polls on a tick and can't die on a single failed poll.
|
|
93
175
|
// ---------------------------------------------------------------------------
|
|
94
|
-
export async function pollEvents(
|
|
95
|
-
|
|
96
|
-
|
|
176
|
+
export async function pollEvents(holder, ack = 0) {
|
|
177
|
+
try {
|
|
178
|
+
const res = await withAuthRetry(holder, 'pollEvents', (token) => jsonGet(`${holder.current.bridgeUrl}/agent/events?since=${ack}`, token));
|
|
179
|
+
if (!res.ok)
|
|
180
|
+
return [];
|
|
181
|
+
const data = (await res.json());
|
|
182
|
+
return data.events;
|
|
183
|
+
}
|
|
184
|
+
catch {
|
|
97
185
|
return [];
|
|
98
|
-
|
|
99
|
-
return data.events;
|
|
186
|
+
}
|
|
100
187
|
}
|
|
101
188
|
// ---------------------------------------------------------------------------
|
|
102
189
|
// Task operations
|
|
103
190
|
// ---------------------------------------------------------------------------
|
|
104
|
-
export async function acceptTask(
|
|
105
|
-
const res = await jsonPost(`${
|
|
191
|
+
export async function acceptTask(holder, taskId) {
|
|
192
|
+
const res = await withAuthRetry(holder, `accept task ${taskId}`, (token) => jsonPost(`${holder.current.bridgeUrl}/agent/tasks/${taskId}/accept`, {}, token));
|
|
106
193
|
if (!res.ok) {
|
|
107
194
|
throw new Error(`Accept failed (${res.status}): ${await res.text()}`);
|
|
108
195
|
}
|
|
@@ -112,23 +199,23 @@ export async function acceptTask(session, taskId) {
|
|
|
112
199
|
* subsequent signed deliveries can be verified. Safe to call on every daemon
|
|
113
200
|
* start; the broker upserts and tracks rotations.
|
|
114
201
|
*/
|
|
115
|
-
export async function registerIdentity(
|
|
116
|
-
const res = await jsonPost(`${
|
|
202
|
+
export async function registerIdentity(holder, publicKeyB64) {
|
|
203
|
+
const res = await withAuthRetry(holder, 'registerIdentity', (token) => jsonPost(`${holder.current.bridgeUrl}/agent/identity/register`, { publicKeyB64 }, token));
|
|
117
204
|
if (!res.ok) {
|
|
118
205
|
throw new Error(`Identity register failed (${res.status}): ${await res.text()}`);
|
|
119
206
|
}
|
|
120
207
|
}
|
|
121
|
-
export async function deliverTask(
|
|
208
|
+
export async function deliverTask(holder, taskId, content, signatureB64) {
|
|
122
209
|
const body = { output: { type: 'text', content } };
|
|
123
210
|
if (signatureB64 !== undefined) {
|
|
124
211
|
body.signature = signatureB64;
|
|
125
212
|
}
|
|
126
|
-
const res = await jsonPost(`${
|
|
213
|
+
const res = await withAuthRetry(holder, `deliver task ${taskId}`, (token) => jsonPost(`${holder.current.bridgeUrl}/agent/tasks/${taskId}/deliver`, body, token));
|
|
127
214
|
if (!res.ok) {
|
|
128
215
|
throw new Error(`Deliver failed (${res.status}): ${await res.text()}`);
|
|
129
216
|
}
|
|
130
217
|
}
|
|
131
|
-
export async function sendTaskMessage(
|
|
218
|
+
export async function sendTaskMessage(holder, taskId, type, content) {
|
|
132
219
|
// Pre-flight guard: fail fast locally instead of round-tripping to the broker
|
|
133
220
|
// when the message obviously exceeds the marketplace limit. The broker will
|
|
134
221
|
// also reject this — this is just for faster, clearer feedback to the agent.
|
|
@@ -136,20 +223,20 @@ export async function sendTaskMessage(session, taskId, type, content) {
|
|
|
136
223
|
throw new Error(`Message content is ${content.length} characters, which exceeds the ${MAX_AGENT_MESSAGE_CHARS}-character marketplace limit. ` +
|
|
137
224
|
`Trim the message before sending. Long context should be summarized or attached as a file delivery, not pasted into a chat message.`);
|
|
138
225
|
}
|
|
139
|
-
const res = await jsonPost(`${
|
|
226
|
+
const res = await withAuthRetry(holder, `send message ${taskId}`, (token) => jsonPost(`${holder.current.bridgeUrl}/agent/tasks/${taskId}/messages`, { type, content }, token));
|
|
140
227
|
if (!res.ok) {
|
|
141
228
|
throw new Error(`Send message failed (${res.status}): ${await res.text()}`);
|
|
142
229
|
}
|
|
143
230
|
}
|
|
144
|
-
export async function getTaskMessages(
|
|
145
|
-
const res = await jsonGet(`${
|
|
231
|
+
export async function getTaskMessages(holder, taskId) {
|
|
232
|
+
const res = await withAuthRetry(holder, `get messages ${taskId}`, (token) => jsonGet(`${holder.current.bridgeUrl}/agent/tasks/${taskId}/messages`, token));
|
|
146
233
|
if (!res.ok)
|
|
147
234
|
return [];
|
|
148
235
|
const data = (await res.json());
|
|
149
236
|
return data.messages;
|
|
150
237
|
}
|
|
151
|
-
export async function getTaskDetail(
|
|
152
|
-
const res = await jsonGet(`${
|
|
238
|
+
export async function getTaskDetail(holder, taskId) {
|
|
239
|
+
const res = await withAuthRetry(holder, `get task ${taskId}`, (token) => jsonGet(`${holder.current.bridgeUrl}/agent/tasks/${taskId}`, token));
|
|
153
240
|
if (!res.ok)
|
|
154
241
|
return undefined;
|
|
155
242
|
return (await res.json());
|
|
@@ -163,8 +250,8 @@ export async function getTaskDetail(session, taskId) {
|
|
|
163
250
|
* seller can appear "online" (heartbeat OK, polling OK) while silently leaving
|
|
164
251
|
* a buyer's task stuck forever.
|
|
165
252
|
*/
|
|
166
|
-
export async function listAgentTasks(
|
|
167
|
-
const res = await jsonGet(`${
|
|
253
|
+
export async function listAgentTasks(holder) {
|
|
254
|
+
const res = await withAuthRetry(holder, 'list tasks', (token) => jsonGet(`${holder.current.bridgeUrl}/agent/tasks`, token));
|
|
168
255
|
if (!res.ok)
|
|
169
256
|
return [];
|
|
170
257
|
const data = (await res.json());
|
package/dist/mcp/mcpCore.js
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
// Streamable HTTP transport (mcpHttpServer.ts). All handlers are pure
|
|
6
6
|
// functions that take a McpSessionState instead of reading module-level vars.
|
|
7
7
|
// ---------------------------------------------------------------------------
|
|
8
|
-
import { connect, reconnect, pollEvents } from '../connection.js';
|
|
8
|
+
import { connect, reconnect, pollEvents, bareSessionHolder } from '../connection.js';
|
|
9
9
|
// ---------------------------------------------------------------------------
|
|
10
10
|
// Tool definitions
|
|
11
11
|
// ---------------------------------------------------------------------------
|
|
@@ -268,7 +268,7 @@ async function handleHire(state, args) {
|
|
|
268
268
|
async function handleCheckTasks(state) {
|
|
269
269
|
if (state.agentSession === undefined)
|
|
270
270
|
return JSON.stringify({ error: 'Not connected. Call zyndo_connect first.' });
|
|
271
|
-
const events = await pollEvents(state.agentSession, state.lastEventId);
|
|
271
|
+
const events = await pollEvents(bareSessionHolder(state.agentSession), state.lastEventId);
|
|
272
272
|
const taskEvents = {};
|
|
273
273
|
for (const event of events) {
|
|
274
274
|
if (event.eventId > state.lastEventId)
|
package/dist/mcp/mcpServer.js
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
// Manages a single McpSessionState and heartbeat timer for the stdio session.
|
|
6
6
|
// ---------------------------------------------------------------------------
|
|
7
7
|
import { createInterface } from 'node:readline';
|
|
8
|
-
import { heartbeat } from '../connection.js';
|
|
8
|
+
import { heartbeat, bareSessionHolder } from '../connection.js';
|
|
9
9
|
import { handleMcpMethod, mcpError } from './mcpCore.js';
|
|
10
10
|
// ---------------------------------------------------------------------------
|
|
11
11
|
// Heartbeat with auto-reconnect
|
|
@@ -15,7 +15,7 @@ function startHeartbeat(state) {
|
|
|
15
15
|
if (state.agentSession === undefined)
|
|
16
16
|
return;
|
|
17
17
|
try {
|
|
18
|
-
await heartbeat(state.agentSession);
|
|
18
|
+
await heartbeat(bareSessionHolder(state.agentSession));
|
|
19
19
|
}
|
|
20
20
|
catch {
|
|
21
21
|
// Session expired — the next tool call will trigger auto-reconnect via mcpCore
|
package/dist/sellerDaemon.js
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
// Seller daemon — poll for tasks, run agent loop, deliver results
|
|
3
3
|
// ---------------------------------------------------------------------------
|
|
4
4
|
import { resolve } from 'node:path';
|
|
5
|
-
import { connect, reconnect, heartbeat, pollEvents, acceptTask, deliverTask, sendTaskMessage, getTaskMessages, getTaskDetail, listAgentTasks, registerIdentity } from './connection.js';
|
|
5
|
+
import { connect, reconnect, createSessionHolder, heartbeat, pollEvents, acceptTask, deliverTask, sendTaskMessage, getTaskMessages, getTaskDetail, listAgentTasks, registerIdentity } from './connection.js';
|
|
6
6
|
import { ensureIdentityKeypair, signDelivery } from './identity.js';
|
|
7
7
|
import { composeSystemPrompt, truncateToContract } from './scopeContract.js';
|
|
8
8
|
import { runAgentLoop } from './agentLoop.js';
|
|
@@ -16,14 +16,19 @@ import { createBashTool } from './tools/bash.js';
|
|
|
16
16
|
import { createGlobTool } from './tools/glob.js';
|
|
17
17
|
import { createGrepTool } from './tools/grep.js';
|
|
18
18
|
import { createAskBuyerTool } from './tools/askBuyer.js';
|
|
19
|
-
import { loadState, saveState, deleteState, loadSession, saveSession, loadLastEventId, saveLastEventId } from './state.js';
|
|
19
|
+
import { loadState, saveState, deleteState, loadSession, saveSession, loadLastEventId, saveLastEventId, savePendingDelivery, loadPendingDelivery, deletePendingDelivery, incrementPendingDeliveryAttempts } from './state.js';
|
|
20
20
|
const POLL_INTERVAL_MS = 25_000;
|
|
21
21
|
const HEARTBEAT_INTERVAL_MS = 45_000;
|
|
22
22
|
// Every N poll cycles, reconcile the local active-task set against the
|
|
23
23
|
// broker's authoritative task list. This catches missed `task.assigned`
|
|
24
24
|
// events caused by broker restarts (in-memory event queue reset), network
|
|
25
25
|
// partitions, or dropped SSE frames. 8 cycles at 25s = ~200s safety net.
|
|
26
|
-
|
|
26
|
+
// Periodic reconcile safety net. Every N poll cycles, check /agent/tasks for
|
|
27
|
+
// anything the daemon doesn't know about. Was 8 (~200s at 25s polls). Too
|
|
28
|
+
// slow — users saw multi-minute hire pickup delays after broker restarts
|
|
29
|
+
// broke the event cursor. 2 (~50s) is fast enough to be invisible while
|
|
30
|
+
// still being cheap. Incident 2026-04-14.
|
|
31
|
+
const RECONCILE_EVERY_N_POLLS = 2;
|
|
27
32
|
// ---------------------------------------------------------------------------
|
|
28
33
|
// Reconnect error classification (incident 2026-04-09 follow-up)
|
|
29
34
|
//
|
|
@@ -154,19 +159,45 @@ export async function startSellerDaemon(config, opts) {
|
|
|
154
159
|
}
|
|
155
160
|
// Persist session for future restarts
|
|
156
161
|
saveSession(session.agentId, session.reconnectToken);
|
|
162
|
+
// Wrap the session in a mutable holder. Every network helper reads
|
|
163
|
+
// holder.current.token at call time. When a broker restart invalidates
|
|
164
|
+
// the in-memory token map, withAuthRetry inside each helper calls
|
|
165
|
+
// reconnect(), mutates holder.current in place, and retries once. Any
|
|
166
|
+
// in-flight task handler that was mid-call sees the new token on its
|
|
167
|
+
// next network operation because the handler holds the SAME holder
|
|
168
|
+
// reference. Incident 2026-04-14.
|
|
169
|
+
const holder = createSessionHolder(session, config.apiKey, {
|
|
170
|
+
role: 'seller',
|
|
171
|
+
name: config.name,
|
|
172
|
+
description: config.description
|
|
173
|
+
});
|
|
157
174
|
// Slice 3b — register the Ed25519 public key with the broker. Failures
|
|
158
175
|
// are logged but non-fatal because signing is soft-enforced until
|
|
159
176
|
// rollout > 80%.
|
|
160
177
|
if (identityPublicKeyB64 !== undefined) {
|
|
161
178
|
try {
|
|
162
|
-
await registerIdentity(
|
|
179
|
+
await registerIdentity(holder, identityPublicKeyB64);
|
|
163
180
|
logger.info('Identity public key registered with broker.');
|
|
164
181
|
}
|
|
165
182
|
catch (err) {
|
|
166
183
|
logger.error(`Identity registration failed: ${err instanceof Error ? err.message : String(err)}. Deliveries will be unsigned.`);
|
|
167
184
|
}
|
|
168
185
|
}
|
|
169
|
-
|
|
186
|
+
// Reset the event cursor on every daemon startup. The persisted value from
|
|
187
|
+
// a previous session can be higher than the broker's in-memory event
|
|
188
|
+
// counter after a broker restart (Railway auto-deploys reset the counter),
|
|
189
|
+
// causing pollEvents to return empty indefinitely — tasks could only be
|
|
190
|
+
// recovered via the periodic reconcile loop, producing multi-minute hire
|
|
191
|
+
// pickup delays. Starting from 0 makes pollEvents instantly see any current
|
|
192
|
+
// event stream. The startup reconcile already catches any in-flight task
|
|
193
|
+
// the broker persisted, and the activeTasks Set dedupes any replayed
|
|
194
|
+
// events so no work is done twice. Incident 2026-04-14.
|
|
195
|
+
const persistedCursor = loadLastEventId();
|
|
196
|
+
if (persistedCursor > 0) {
|
|
197
|
+
logger.info(`Event cursor reset from ${persistedCursor} → 0 on startup (broker may have restarted; reconcile will recover in-flight tasks).`);
|
|
198
|
+
}
|
|
199
|
+
let lastEventId = 0;
|
|
200
|
+
saveLastEventId(0);
|
|
170
201
|
let lastHeartbeat = Date.now();
|
|
171
202
|
const activeTasks = new Set();
|
|
172
203
|
let pollsSinceReconcile = 0;
|
|
@@ -185,14 +216,11 @@ export async function startSellerDaemon(config, opts) {
|
|
|
185
216
|
* buyer; we just re-add to activeTasks so we do not re-accept).
|
|
186
217
|
*/
|
|
187
218
|
async function reconcileTasks(reason) {
|
|
188
|
-
const activeSession = session;
|
|
189
|
-
if (activeSession === undefined)
|
|
190
|
-
return;
|
|
191
219
|
try {
|
|
192
|
-
const tasks = await listAgentTasks(
|
|
220
|
+
const tasks = await listAgentTasks(holder);
|
|
193
221
|
let picked = 0;
|
|
194
222
|
for (const task of tasks) {
|
|
195
|
-
if (task.sellerAgentId !==
|
|
223
|
+
if (task.sellerAgentId !== holder.current.agentId)
|
|
196
224
|
continue;
|
|
197
225
|
if (activeTasks.has(task.taskId))
|
|
198
226
|
continue;
|
|
@@ -204,18 +232,65 @@ export async function startSellerDaemon(config, opts) {
|
|
|
204
232
|
activeTasks.add(task.taskId);
|
|
205
233
|
picked += 1;
|
|
206
234
|
logger.info(`Reconcile (${reason}): picking up stuck submitted task ${task.taskId}`);
|
|
207
|
-
handleTask(
|
|
235
|
+
handleTask(holder, task.taskId, config, logger, identityPrivateKey)
|
|
208
236
|
.catch((err) => {
|
|
209
237
|
logger.error(`Reconciled task ${task.taskId} failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
210
238
|
})
|
|
211
239
|
.finally(() => activeTasks.delete(task.taskId));
|
|
212
240
|
continue;
|
|
213
241
|
}
|
|
242
|
+
// `working` tasks with a pending delivery on disk are stuck deliveries
|
|
243
|
+
// from a previous run where deliverTask threw (network blip, broker
|
|
244
|
+
// mid-restart, 401 after the retry budget). Re-enter handleTask so it
|
|
245
|
+
// reads the pending delivery from disk and re-attempts the exact same
|
|
246
|
+
// bytes, no codex re-spawn.
|
|
247
|
+
if (task.state === 'working' && loadPendingDelivery(task.taskId) !== undefined) {
|
|
248
|
+
if (activeTasks.size >= config.maxConcurrentTasks) {
|
|
249
|
+
logger.info(`Reconcile: task ${task.taskId} has pending delivery but seller at capacity, will retry.`);
|
|
250
|
+
continue;
|
|
251
|
+
}
|
|
252
|
+
activeTasks.add(task.taskId);
|
|
253
|
+
picked += 1;
|
|
254
|
+
logger.info(`Reconcile (${reason}): retrying stuck pending delivery for task ${task.taskId}`);
|
|
255
|
+
handleTask(holder, task.taskId, config, logger, identityPrivateKey)
|
|
256
|
+
.catch((err) => {
|
|
257
|
+
logger.error(`Reconciled pending delivery ${task.taskId} failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
258
|
+
})
|
|
259
|
+
.finally(() => activeTasks.delete(task.taskId));
|
|
260
|
+
continue;
|
|
261
|
+
}
|
|
262
|
+
// `working` tasks with `pendingRevisionFeedback` set on the broker side
|
|
263
|
+
// are stuck revisions from a lost `task.revision.requested` event. The
|
|
264
|
+
// broker stamps the feedback onto the task record in the same
|
|
265
|
+
// transaction that emits the event; if the event stream drops frames
|
|
266
|
+
// or the seller's cursor is ahead of the broker's reset-in-memory
|
|
267
|
+
// counter, the seller never sees the event. Reconcile picks it up by
|
|
268
|
+
// reading the feedback straight from the task detail. Incident
|
|
269
|
+
// 2026-04-14 — user requested a revision on a legacy task and the
|
|
270
|
+
// seller daemon sat idle for 8 minutes because this path didn't exist.
|
|
271
|
+
if (task.state === 'working'
|
|
272
|
+
&& typeof task.pendingRevisionFeedback === 'string'
|
|
273
|
+
&& task.pendingRevisionFeedback.length > 0) {
|
|
274
|
+
if (activeTasks.size >= config.maxConcurrentTasks) {
|
|
275
|
+
logger.info(`Reconcile: task ${task.taskId} has stuck revision but seller at capacity, will retry.`);
|
|
276
|
+
continue;
|
|
277
|
+
}
|
|
278
|
+
activeTasks.add(task.taskId);
|
|
279
|
+
picked += 1;
|
|
280
|
+
const feedback = task.pendingRevisionFeedback;
|
|
281
|
+
logger.info(`Reconcile (${reason}): picking up stuck revision for task ${task.taskId} (feedback=${feedback.slice(0, 80)}${feedback.length > 80 ? '…' : ''})`);
|
|
282
|
+
handleRevision(holder, task.taskId, feedback, config, logger, identityPrivateKey)
|
|
283
|
+
.catch((err) => {
|
|
284
|
+
logger.error(`Reconciled revision ${task.taskId} failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
285
|
+
})
|
|
286
|
+
.finally(() => activeTasks.delete(task.taskId));
|
|
287
|
+
continue;
|
|
288
|
+
}
|
|
214
289
|
if (task.state === 'input-required' && task.inputType === 'question') {
|
|
215
290
|
activeTasks.add(task.taskId);
|
|
216
291
|
picked += 1;
|
|
217
292
|
logger.info(`Reconcile (${reason}): picking up input-required task ${task.taskId}`);
|
|
218
|
-
handleBuyerMessage(
|
|
293
|
+
handleBuyerMessage(holder, task.taskId, config, logger, identityPrivateKey)
|
|
219
294
|
.catch((err) => {
|
|
220
295
|
logger.error(`Reconciled message for ${task.taskId} failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
221
296
|
})
|
|
@@ -240,8 +315,13 @@ export async function startSellerDaemon(config, opts) {
|
|
|
240
315
|
// Heartbeat
|
|
241
316
|
if (Date.now() - lastHeartbeat >= HEARTBEAT_INTERVAL_MS) {
|
|
242
317
|
try {
|
|
243
|
-
await heartbeat(
|
|
318
|
+
await heartbeat(holder);
|
|
244
319
|
lastHeartbeat = Date.now();
|
|
320
|
+
// Persist the (possibly refreshed) session on every successful
|
|
321
|
+
// heartbeat so a daemon restart always picks up the freshest
|
|
322
|
+
// reconnect token. withAuthRetry may have silently refreshed
|
|
323
|
+
// holder.current mid-heartbeat.
|
|
324
|
+
saveSession(holder.current.agentId, holder.current.reconnectToken);
|
|
245
325
|
}
|
|
246
326
|
catch {
|
|
247
327
|
logger.info('Heartbeat failed, attempting reconnect...');
|
|
@@ -259,21 +339,33 @@ export async function startSellerDaemon(config, opts) {
|
|
|
259
339
|
if (signal !== undefined && signal.aborted)
|
|
260
340
|
break;
|
|
261
341
|
try {
|
|
262
|
-
|
|
342
|
+
const fresh = await reconnect(holder.current, config.apiKey, {
|
|
263
343
|
role: 'seller',
|
|
264
344
|
name: config.name,
|
|
265
345
|
description: config.description
|
|
266
346
|
});
|
|
267
|
-
|
|
347
|
+
holder.current = fresh;
|
|
348
|
+
saveSession(holder.current.agentId, holder.current.reconnectToken);
|
|
268
349
|
logger.info(`Reconnected successfully (attempt ${attempt + 1}).`);
|
|
269
350
|
lastHeartbeat = Date.now();
|
|
270
351
|
reconnected = true;
|
|
271
|
-
//
|
|
272
|
-
//
|
|
273
|
-
//
|
|
274
|
-
//
|
|
275
|
-
//
|
|
276
|
-
//
|
|
352
|
+
// The broker almost certainly restarted (that's why the
|
|
353
|
+
// heartbeat failed in the first place). Its in-memory event
|
|
354
|
+
// counter has reset to 0 while our in-memory lastEventId is
|
|
355
|
+
// still at the old high value — every subsequent pollEvents
|
|
356
|
+
// call would return empty because we'd be asking for events
|
|
357
|
+
// "since <high>" from a stream that now starts at 1. Reset
|
|
358
|
+
// the cursor in memory AND on disk so the next poll sees the
|
|
359
|
+
// current event stream. Incident 2026-04-14.
|
|
360
|
+
if (lastEventId > 0) {
|
|
361
|
+
logger.info(`Event cursor reset ${lastEventId} → 0 after reconnect (broker restart suspected).`);
|
|
362
|
+
}
|
|
363
|
+
lastEventId = 0;
|
|
364
|
+
saveLastEventId(0);
|
|
365
|
+
// Also run an immediate reconcile so any task that was
|
|
366
|
+
// already assigned to us before the broker restarted (and
|
|
367
|
+
// is now past the event horizon) gets picked up without
|
|
368
|
+
// waiting for the periodic cycle.
|
|
277
369
|
pollsSinceReconcile = RECONCILE_EVERY_N_POLLS;
|
|
278
370
|
await reconcileTasks('reconnect');
|
|
279
371
|
break;
|
|
@@ -298,7 +390,7 @@ export async function startSellerDaemon(config, opts) {
|
|
|
298
390
|
// the user's API key back to the same stable seller agentId.
|
|
299
391
|
try {
|
|
300
392
|
logger.info(`Re-registering as seller "${config.name}"...`);
|
|
301
|
-
|
|
393
|
+
const freshRegister = await connect(config.bridgeUrl, config.apiKey, {
|
|
302
394
|
role: 'seller',
|
|
303
395
|
name: config.name,
|
|
304
396
|
description: config.description,
|
|
@@ -307,10 +399,19 @@ export async function startSellerDaemon(config, opts) {
|
|
|
307
399
|
maxConcurrentTasks: config.maxConcurrentTasks,
|
|
308
400
|
sellerSlug: config.id
|
|
309
401
|
});
|
|
310
|
-
|
|
311
|
-
|
|
402
|
+
holder.current = freshRegister;
|
|
403
|
+
saveSession(holder.current.agentId, holder.current.reconnectToken);
|
|
404
|
+
logger.info(`Re-registered: agentId=${holder.current.agentId}`);
|
|
312
405
|
lastHeartbeat = Date.now();
|
|
313
406
|
reconnected = true;
|
|
407
|
+
// Same cursor reset as the reconnect branch — a fresh connect
|
|
408
|
+
// against a post-restart broker means the event stream is
|
|
409
|
+
// brand new, so ask for "since=0".
|
|
410
|
+
if (lastEventId > 0) {
|
|
411
|
+
logger.info(`Event cursor reset ${lastEventId} → 0 after re-register.`);
|
|
412
|
+
}
|
|
413
|
+
lastEventId = 0;
|
|
414
|
+
saveLastEventId(0);
|
|
314
415
|
pollsSinceReconcile = RECONCILE_EVERY_N_POLLS;
|
|
315
416
|
await reconcileTasks('re-register');
|
|
316
417
|
}
|
|
@@ -327,7 +428,7 @@ export async function startSellerDaemon(config, opts) {
|
|
|
327
428
|
}
|
|
328
429
|
}
|
|
329
430
|
// Poll events
|
|
330
|
-
const events = await pollEvents(
|
|
431
|
+
const events = await pollEvents(holder, lastEventId);
|
|
331
432
|
let minDeferredEventId;
|
|
332
433
|
for (const event of events) {
|
|
333
434
|
if (event.type === 'task.assigned') {
|
|
@@ -349,7 +450,7 @@ export async function startSellerDaemon(config, opts) {
|
|
|
349
450
|
if (event.eventId > lastEventId)
|
|
350
451
|
lastEventId = event.eventId;
|
|
351
452
|
logger.info(`Task assigned: ${taskId} (active: ${activeTasks.size}/${config.maxConcurrentTasks})`);
|
|
352
|
-
handleTask(
|
|
453
|
+
handleTask(holder, taskId, config, logger, identityPrivateKey)
|
|
353
454
|
.catch((err) => {
|
|
354
455
|
logger.error(`Task ${taskId} failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
355
456
|
})
|
|
@@ -366,7 +467,7 @@ export async function startSellerDaemon(config, opts) {
|
|
|
366
467
|
continue; // Task handler is already running
|
|
367
468
|
activeTasks.add(taskId);
|
|
368
469
|
logger.info(`Message received for task: ${taskId}`);
|
|
369
|
-
handleBuyerMessage(
|
|
470
|
+
handleBuyerMessage(holder, taskId, config, logger, identityPrivateKey)
|
|
370
471
|
.catch((err) => {
|
|
371
472
|
logger.error(`Message handling for ${taskId} failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
372
473
|
})
|
|
@@ -379,7 +480,7 @@ export async function startSellerDaemon(config, opts) {
|
|
|
379
480
|
continue;
|
|
380
481
|
activeTasks.add(taskId);
|
|
381
482
|
logger.info(`Revision requested for task: ${taskId}`);
|
|
382
|
-
handleRevision(
|
|
483
|
+
handleRevision(holder, taskId, feedback, config, logger, identityPrivateKey)
|
|
383
484
|
.catch((err) => {
|
|
384
485
|
logger.error(`Revision for ${taskId} failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
385
486
|
})
|
|
@@ -432,9 +533,40 @@ export async function startSellerDaemon(config, opts) {
|
|
|
432
533
|
// ---------------------------------------------------------------------------
|
|
433
534
|
// Task handler
|
|
434
535
|
// ---------------------------------------------------------------------------
|
|
435
|
-
async function handleTask(
|
|
536
|
+
async function handleTask(holder, taskId, config, logger, identityPrivateKey) {
|
|
537
|
+
// Recovery fast-path: if a pending delivery is already on disk for this
|
|
538
|
+
// task (from a previous run where codex finished but deliverTask failed),
|
|
539
|
+
// skip codex entirely and retry the same bytes. Incident 2026-04-14.
|
|
540
|
+
const pending = loadPendingDelivery(taskId);
|
|
541
|
+
if (pending !== undefined) {
|
|
542
|
+
logger.info(`Task ${taskId}: pending delivery found on disk (${pending.output.length} chars produced at ${pending.producedAt}, attempt ${pending.attempts + 1}), skipping codex and delivering cached bytes directly.`);
|
|
543
|
+
// Ensure the task is in a state the broker will accept delivery on.
|
|
544
|
+
// If we never accepted or already moved to working, accept is idempotent
|
|
545
|
+
// at the 409 level.
|
|
546
|
+
try {
|
|
547
|
+
await acceptTask(holder, taskId);
|
|
548
|
+
}
|
|
549
|
+
catch (err) {
|
|
550
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
551
|
+
if (!msg.includes('409')) {
|
|
552
|
+
logger.info(`Task ${taskId}: acceptTask during pending replay returned ${msg.slice(0, 120)}; proceeding to deliver anyway.`);
|
|
553
|
+
}
|
|
554
|
+
}
|
|
555
|
+
incrementPendingDeliveryAttempts(taskId);
|
|
556
|
+
try {
|
|
557
|
+
await deliverTask(holder, taskId, pending.output, pending.signature);
|
|
558
|
+
deletePendingDelivery(taskId);
|
|
559
|
+
logger.info(`Task ${taskId}: cached delivery accepted by broker.`);
|
|
560
|
+
saveState({ taskId, messages: [], claudeCodeContext: '', originalContext: '', lastDelivery: pending.output });
|
|
561
|
+
}
|
|
562
|
+
catch (err) {
|
|
563
|
+
logger.error(`Task ${taskId}: cached delivery failed (${err instanceof Error ? err.message : String(err)}). Pending file retained for next reconcile cycle.`);
|
|
564
|
+
throw err;
|
|
565
|
+
}
|
|
566
|
+
return;
|
|
567
|
+
}
|
|
436
568
|
try {
|
|
437
|
-
await acceptTask(
|
|
569
|
+
await acceptTask(holder, taskId);
|
|
438
570
|
}
|
|
439
571
|
catch (err) {
|
|
440
572
|
const msg = err instanceof Error ? err.message : String(err);
|
|
@@ -446,9 +578,9 @@ async function handleTask(session, taskId, config, logger, identityPrivateKey) {
|
|
|
446
578
|
}
|
|
447
579
|
logger.info(`Task ${taskId}: accepted, starting work...`);
|
|
448
580
|
// Get task context from task detail first, then messages as supplement
|
|
449
|
-
const detail = await getTaskDetail(
|
|
581
|
+
const detail = await getTaskDetail(holder, taskId);
|
|
450
582
|
const taskContext = detail?.context ?? '';
|
|
451
|
-
const messages = await getTaskMessages(
|
|
583
|
+
const messages = await getTaskMessages(holder, taskId);
|
|
452
584
|
const messageContext = messages.map((m) => m.content).join('\n');
|
|
453
585
|
const context = taskContext || messageContext || 'Task assigned. No additional context provided.';
|
|
454
586
|
// Compose the system prompt with the BOUND CONTRACT + REFUSAL PROTOCOL
|
|
@@ -467,14 +599,14 @@ async function handleTask(session, taskId, config, logger, identityPrivateKey) {
|
|
|
467
599
|
: await runAgentLoop(createProvider(config.provider, config.model, config.providerApiKey), loadTools(config.allowedTools, config.workingDirectory), context, { systemPrompt: scopedSystemPrompt, taskId });
|
|
468
600
|
if (result.timedOut === true) {
|
|
469
601
|
logger.error(`Task ${taskId}: agent timed out or errored, notifying buyer.`);
|
|
470
|
-
await sendTaskMessage(
|
|
602
|
+
await sendTaskMessage(holder, taskId, 'info', 'The seller agent encountered a timeout processing this task. Please allow more time or simplify the request. The task remains assigned.');
|
|
471
603
|
saveState({ taskId, messages: [], claudeCodeContext: context, originalContext: context });
|
|
472
604
|
return;
|
|
473
605
|
}
|
|
474
606
|
if (result.paused) {
|
|
475
607
|
logger.info(`Task ${taskId}: paused, asking buyer: "${result.pendingQuestion?.slice(0, 100)}..."`);
|
|
476
608
|
saveState({ taskId, messages: [], claudeCodeContext: context, pendingQuestion: result.pendingQuestion });
|
|
477
|
-
await sendTaskMessage(
|
|
609
|
+
await sendTaskMessage(holder, taskId, 'question', result.pendingQuestion ?? 'Could you clarify?');
|
|
478
610
|
return;
|
|
479
611
|
}
|
|
480
612
|
// Output-side truncation: count produced units, clamp to contract, log a
|
|
@@ -485,21 +617,57 @@ async function handleTask(session, taskId, config, logger, identityPrivateKey) {
|
|
|
485
617
|
if (truncated.events.length > 0) {
|
|
486
618
|
for (const ev of truncated.events) {
|
|
487
619
|
logger.info(`Task ${taskId}: scope truncation — item="${ev.itemName}" produced=${ev.producedQuantity} allowed=${ev.allowedQuantity}`);
|
|
488
|
-
await sendTaskMessage(
|
|
620
|
+
await sendTaskMessage(holder, taskId, 'info', `[scope-guard] Your brief requested more "${ev.itemName}" than the listing delivers. The seller agent produced ${ev.producedQuantity} but the contract allows only ${ev.allowedQuantity}. The delivery has been trimmed to the contracted quantity. To get more, hire again with a listing that covers the extra volume.`);
|
|
489
621
|
}
|
|
490
622
|
}
|
|
491
623
|
logger.info(`Task ${taskId}: delivering result (${finalOutput.length} chars)`);
|
|
492
624
|
const signature = identityPrivateKey !== undefined
|
|
493
625
|
? signDelivery({ taskId, content: finalOutput, privateKey: identityPrivateKey })
|
|
494
626
|
: undefined;
|
|
495
|
-
|
|
627
|
+
// Persist the produced bytes to disk BEFORE calling deliverTask. If the
|
|
628
|
+
// deliver fails (401, network blip, broker mid-restart), the reconcile
|
|
629
|
+
// loop will re-enter handleTask, find this file, and retry the exact
|
|
630
|
+
// same bytes — no codex re-spawn, no regeneration cost. Incident 2026-04-14.
|
|
631
|
+
savePendingDelivery({
|
|
632
|
+
taskId,
|
|
633
|
+
output: finalOutput,
|
|
634
|
+
...(signature !== undefined ? { signature } : {}),
|
|
635
|
+
producedAt: new Date().toISOString(),
|
|
636
|
+
attempts: 0
|
|
637
|
+
});
|
|
638
|
+
try {
|
|
639
|
+
await deliverTask(holder, taskId, finalOutput, signature);
|
|
640
|
+
}
|
|
641
|
+
catch (err) {
|
|
642
|
+
incrementPendingDeliveryAttempts(taskId);
|
|
643
|
+
logger.error(`Task ${taskId}: initial deliver failed (${err instanceof Error ? err.message : String(err)}). Pending delivery file retained for reconcile retry.`);
|
|
644
|
+
throw err;
|
|
645
|
+
}
|
|
646
|
+
deletePendingDelivery(taskId);
|
|
496
647
|
// Preserve state for potential revision — only deleted on terminal events
|
|
497
648
|
saveState({ taskId, messages: [], claudeCodeContext: context, originalContext: context, lastDelivery: finalOutput });
|
|
498
649
|
}
|
|
499
650
|
// ---------------------------------------------------------------------------
|
|
500
651
|
// Revision handler
|
|
501
652
|
// ---------------------------------------------------------------------------
|
|
502
|
-
async function handleRevision(
|
|
653
|
+
async function handleRevision(holder, taskId, feedback, config, logger, identityPrivateKey) {
|
|
654
|
+
// Recovery fast-path: same as handleTask — if a pending revision delivery
|
|
655
|
+
// exists on disk, deliver cached bytes instead of re-running codex.
|
|
656
|
+
const pending = loadPendingDelivery(taskId);
|
|
657
|
+
if (pending !== undefined) {
|
|
658
|
+
logger.info(`Task ${taskId}: pending revision delivery found on disk (${pending.output.length} chars produced at ${pending.producedAt}, attempt ${pending.attempts + 1}), delivering cached bytes directly.`);
|
|
659
|
+
incrementPendingDeliveryAttempts(taskId);
|
|
660
|
+
try {
|
|
661
|
+
await deliverTask(holder, taskId, pending.output, pending.signature);
|
|
662
|
+
deletePendingDelivery(taskId);
|
|
663
|
+
logger.info(`Task ${taskId}: cached revision delivery accepted by broker.`);
|
|
664
|
+
}
|
|
665
|
+
catch (err) {
|
|
666
|
+
logger.error(`Task ${taskId}: cached revision delivery failed (${err instanceof Error ? err.message : String(err)}). Pending file retained for next reconcile cycle.`);
|
|
667
|
+
throw err;
|
|
668
|
+
}
|
|
669
|
+
return;
|
|
670
|
+
}
|
|
503
671
|
const savedState = loadState(taskId);
|
|
504
672
|
// Use original task context (not compounded revision context) to avoid quadratic growth
|
|
505
673
|
const originalContext = savedState?.originalContext ?? savedState?.claudeCodeContext ?? '';
|
|
@@ -519,21 +687,21 @@ async function handleRevision(session, taskId, feedback, config, logger, identit
|
|
|
519
687
|
'Revise your work based on the buyer feedback above. Output the complete updated deliverable.'
|
|
520
688
|
].join('\n');
|
|
521
689
|
// Re-fetch the frozen snapshot so revisions stay bound to the contract.
|
|
522
|
-
const revisionDetail = await getTaskDetail(
|
|
690
|
+
const revisionDetail = await getTaskDetail(holder, taskId);
|
|
523
691
|
const revisionSystemPrompt = composeSystemPrompt(config.systemPrompt, revisionDetail?.deliverablesSnapshot);
|
|
524
692
|
const result = config.provider === 'claude-code'
|
|
525
693
|
? await runClaudeCodeTask(revisionContext, { ...config, systemPrompt: revisionSystemPrompt }, logger)
|
|
526
694
|
: await runAgentLoop(createProvider(config.provider, config.model, config.providerApiKey), loadTools(config.allowedTools, config.workingDirectory), revisionContext, { systemPrompt: revisionSystemPrompt, taskId });
|
|
527
695
|
if (result.timedOut === true) {
|
|
528
696
|
logger.error(`Task ${taskId}: revision timed out, notifying buyer.`);
|
|
529
|
-
await sendTaskMessage(
|
|
697
|
+
await sendTaskMessage(holder, taskId, 'info', 'The seller agent timed out while working on the revision. Please allow more time or simplify the request.');
|
|
530
698
|
saveState({ taskId, messages: [], claudeCodeContext: revisionContext, originalContext });
|
|
531
699
|
return;
|
|
532
700
|
}
|
|
533
701
|
if (result.paused) {
|
|
534
702
|
logger.info(`Task ${taskId}: revision paused, asking buyer...`);
|
|
535
703
|
saveState({ taskId, messages: [], claudeCodeContext: revisionContext, originalContext, pendingQuestion: result.pendingQuestion });
|
|
536
|
-
await sendTaskMessage(
|
|
704
|
+
await sendTaskMessage(holder, taskId, 'question', result.pendingQuestion ?? 'Could you clarify?');
|
|
537
705
|
return;
|
|
538
706
|
}
|
|
539
707
|
const truncatedRevision = truncateToContract(result.output, revisionDetail?.deliverablesSnapshot);
|
|
@@ -541,14 +709,32 @@ async function handleRevision(session, taskId, feedback, config, logger, identit
|
|
|
541
709
|
if (truncatedRevision.events.length > 0) {
|
|
542
710
|
for (const ev of truncatedRevision.events) {
|
|
543
711
|
logger.info(`Task ${taskId}: revision scope truncation — item="${ev.itemName}" produced=${ev.producedQuantity} allowed=${ev.allowedQuantity}`);
|
|
544
|
-
await sendTaskMessage(
|
|
712
|
+
await sendTaskMessage(holder, taskId, 'info', `[scope-guard] Revision trimmed: "${ev.itemName}" produced ${ev.producedQuantity}, contract allows ${ev.allowedQuantity}.`);
|
|
545
713
|
}
|
|
546
714
|
}
|
|
547
715
|
logger.info(`Task ${taskId}: delivering revision (${finalRevisionOutput.length} chars)`);
|
|
548
716
|
const revisionSignature = identityPrivateKey !== undefined
|
|
549
717
|
? signDelivery({ taskId, content: finalRevisionOutput, privateKey: identityPrivateKey })
|
|
550
718
|
: undefined;
|
|
551
|
-
|
|
719
|
+
// Persist produced revision bytes before calling deliverTask. Same
|
|
720
|
+
// rationale as handleTask: a broker restart or 401 after retry should
|
|
721
|
+
// not waste another codex run on regeneration.
|
|
722
|
+
savePendingDelivery({
|
|
723
|
+
taskId,
|
|
724
|
+
output: finalRevisionOutput,
|
|
725
|
+
...(revisionSignature !== undefined ? { signature: revisionSignature } : {}),
|
|
726
|
+
producedAt: new Date().toISOString(),
|
|
727
|
+
attempts: 0
|
|
728
|
+
});
|
|
729
|
+
try {
|
|
730
|
+
await deliverTask(holder, taskId, finalRevisionOutput, revisionSignature);
|
|
731
|
+
}
|
|
732
|
+
catch (err) {
|
|
733
|
+
incrementPendingDeliveryAttempts(taskId);
|
|
734
|
+
logger.error(`Task ${taskId}: initial revision deliver failed (${err instanceof Error ? err.message : String(err)}). Pending delivery file retained for reconcile retry.`);
|
|
735
|
+
throw err;
|
|
736
|
+
}
|
|
737
|
+
deletePendingDelivery(taskId);
|
|
552
738
|
saveState({ taskId, messages: [], claudeCodeContext: revisionContext, originalContext, lastDelivery: finalRevisionOutput });
|
|
553
739
|
}
|
|
554
740
|
// ---------------------------------------------------------------------------
|
|
@@ -564,7 +750,7 @@ function buildResumedContext(priorContext, priorQuestion, buyerAnswer) {
|
|
|
564
750
|
'Continue the task with this new information.'
|
|
565
751
|
].join('\n');
|
|
566
752
|
}
|
|
567
|
-
async function handleBuyerMessage(
|
|
753
|
+
async function handleBuyerMessage(holder, taskId, config, logger, identityPrivateKey) {
|
|
568
754
|
const savedState = loadState(taskId);
|
|
569
755
|
if (savedState === undefined) {
|
|
570
756
|
logger.info(`Task ${taskId}: no saved state found, ignoring message.`);
|
|
@@ -574,10 +760,10 @@ async function handleBuyerMessage(session, taskId, config, logger, identityPriva
|
|
|
574
760
|
// Guide them to use zyndo_request_revision for the formal revision flow.
|
|
575
761
|
if (savedState.lastDelivery !== undefined && savedState.pendingQuestion === undefined) {
|
|
576
762
|
logger.info(`Task ${taskId}: buyer sent message after delivery, guiding to zyndo_request_revision.`);
|
|
577
|
-
await sendTaskMessage(
|
|
763
|
+
await sendTaskMessage(holder, taskId, 'info', 'I received your message. To get a revised delivery, please use the revision action (zyndo_request_revision) with your feedback. I will then rework and re-deliver.');
|
|
578
764
|
return;
|
|
579
765
|
}
|
|
580
|
-
const messages = await getTaskMessages(
|
|
766
|
+
const messages = await getTaskMessages(holder, taskId);
|
|
581
767
|
const lastMessage = messages[messages.length - 1];
|
|
582
768
|
if (lastMessage === undefined)
|
|
583
769
|
return;
|
|
@@ -596,7 +782,7 @@ async function handleBuyerMessage(session, taskId, config, logger, identityPriva
|
|
|
596
782
|
}
|
|
597
783
|
if (result.timedOut === true) {
|
|
598
784
|
logger.error(`Task ${taskId}: message-resume timed out, notifying buyer.`);
|
|
599
|
-
await sendTaskMessage(
|
|
785
|
+
await sendTaskMessage(holder, taskId, 'info', 'The seller agent timed out while processing your response. Please allow more time or simplify the request.');
|
|
600
786
|
return;
|
|
601
787
|
}
|
|
602
788
|
if (result.paused) {
|
|
@@ -605,14 +791,30 @@ async function handleBuyerMessage(session, taskId, config, logger, identityPriva
|
|
|
605
791
|
? buildResumedContext(savedState.claudeCodeContext ?? '', savedState.pendingQuestion, lastMessage.content)
|
|
606
792
|
: savedState.claudeCodeContext ?? '';
|
|
607
793
|
saveState({ taskId, messages: [], claudeCodeContext: updatedContext, pendingQuestion: result.pendingQuestion });
|
|
608
|
-
await sendTaskMessage(
|
|
794
|
+
await sendTaskMessage(holder, taskId, 'question', result.pendingQuestion ?? 'Could you clarify?');
|
|
609
795
|
return;
|
|
610
796
|
}
|
|
611
797
|
logger.info(`Task ${taskId}: delivering result (${result.output.length} chars)`);
|
|
612
798
|
const resumedSignature = identityPrivateKey !== undefined
|
|
613
799
|
? signDelivery({ taskId, content: result.output, privateKey: identityPrivateKey })
|
|
614
800
|
: undefined;
|
|
615
|
-
|
|
801
|
+
// Persist before deliver, same pattern as handleTask / handleRevision.
|
|
802
|
+
savePendingDelivery({
|
|
803
|
+
taskId,
|
|
804
|
+
output: result.output,
|
|
805
|
+
...(resumedSignature !== undefined ? { signature: resumedSignature } : {}),
|
|
806
|
+
producedAt: new Date().toISOString(),
|
|
807
|
+
attempts: 0
|
|
808
|
+
});
|
|
809
|
+
try {
|
|
810
|
+
await deliverTask(holder, taskId, result.output, resumedSignature);
|
|
811
|
+
}
|
|
812
|
+
catch (err) {
|
|
813
|
+
incrementPendingDeliveryAttempts(taskId);
|
|
814
|
+
logger.error(`Task ${taskId}: initial resumed deliver failed (${err instanceof Error ? err.message : String(err)}). Pending delivery file retained.`);
|
|
815
|
+
throw err;
|
|
816
|
+
}
|
|
817
|
+
deletePendingDelivery(taskId);
|
|
616
818
|
const resumedContext = config.provider === 'claude-code'
|
|
617
819
|
? buildResumedContext(savedState.claudeCodeContext ?? '', savedState.pendingQuestion, lastMessage.content)
|
|
618
820
|
: savedState.claudeCodeContext ?? '';
|
package/dist/state.d.ts
CHANGED
|
@@ -21,3 +21,14 @@ export type PersistedSession = Readonly<{
|
|
|
21
21
|
export declare function saveSession(agentId: string, reconnectToken: string): void;
|
|
22
22
|
export declare function loadSession(): PersistedSession | undefined;
|
|
23
23
|
export declare function clearSession(): void;
|
|
24
|
+
export type PendingDelivery = Readonly<{
|
|
25
|
+
taskId: string;
|
|
26
|
+
output: string;
|
|
27
|
+
signature?: string;
|
|
28
|
+
producedAt: string;
|
|
29
|
+
attempts: number;
|
|
30
|
+
}>;
|
|
31
|
+
export declare function savePendingDelivery(payload: PendingDelivery): void;
|
|
32
|
+
export declare function loadPendingDelivery(taskId: string): PendingDelivery | undefined;
|
|
33
|
+
export declare function deletePendingDelivery(taskId: string): void;
|
|
34
|
+
export declare function incrementPendingDeliveryAttempts(taskId: string): void;
|
package/dist/state.js
CHANGED
|
@@ -84,3 +84,38 @@ export function clearSession() {
|
|
|
84
84
|
unlinkSync(getSessionFile());
|
|
85
85
|
}
|
|
86
86
|
}
|
|
87
|
+
const PENDING_DELIVERY_DIR = resolve(getBaseDir(), 'pending-deliveries');
|
|
88
|
+
function pendingDeliveryFile(taskId) {
|
|
89
|
+
return resolve(PENDING_DELIVERY_DIR, `task-${sanitizeTaskId(taskId)}.json`);
|
|
90
|
+
}
|
|
91
|
+
export function savePendingDelivery(payload) {
|
|
92
|
+
mkdirSync(PENDING_DELIVERY_DIR, { recursive: true });
|
|
93
|
+
writeFileSync(pendingDeliveryFile(payload.taskId), JSON.stringify(payload, null, 2), 'utf-8');
|
|
94
|
+
}
|
|
95
|
+
export function loadPendingDelivery(taskId) {
|
|
96
|
+
const path = pendingDeliveryFile(taskId);
|
|
97
|
+
if (!existsSync(path))
|
|
98
|
+
return undefined;
|
|
99
|
+
try {
|
|
100
|
+
const raw = readFileSync(path, 'utf-8');
|
|
101
|
+
const parsed = JSON.parse(raw);
|
|
102
|
+
if (typeof parsed.taskId !== 'string' || typeof parsed.output !== 'string')
|
|
103
|
+
return undefined;
|
|
104
|
+
return parsed;
|
|
105
|
+
}
|
|
106
|
+
catch {
|
|
107
|
+
return undefined;
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
export function deletePendingDelivery(taskId) {
|
|
111
|
+
const path = pendingDeliveryFile(taskId);
|
|
112
|
+
if (existsSync(path)) {
|
|
113
|
+
unlinkSync(path);
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
export function incrementPendingDeliveryAttempts(taskId) {
|
|
117
|
+
const existing = loadPendingDelivery(taskId);
|
|
118
|
+
if (existing === undefined)
|
|
119
|
+
return;
|
|
120
|
+
savePendingDelivery({ ...existing, attempts: existing.attempts + 1 });
|
|
121
|
+
}
|