polygram 0.8.0 → 0.9.0-rc.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +1 -1
- package/lib/{agent-loader.js → agents/loader.js} +6 -8
- package/lib/{approvals.js → approvals/store.js} +28 -5
- package/lib/{approval-ui.js → approvals/ui.js} +1 -17
- package/lib/config.js +121 -0
- package/lib/{error-classify.js → error/classify.js} +25 -34
- package/lib/handlers/abort.js +89 -0
- package/lib/handlers/approvals.js +361 -0
- package/lib/handlers/autosteer.js +94 -0
- package/lib/handlers/config-callback.js +118 -0
- package/lib/handlers/config-ui.js +104 -0
- package/lib/handlers/dispatcher.js +263 -0
- package/lib/handlers/download.js +182 -0
- package/lib/handlers/extract-attachments.js +97 -0
- package/lib/handlers/ipc-send.js +80 -0
- package/lib/handlers/poll.js +140 -0
- package/lib/handlers/record-inbound.js +88 -0
- package/lib/handlers/slash-commands.js +319 -0
- package/lib/handlers/voice.js +107 -0
- package/lib/pm-interface.js +27 -29
- package/lib/sdk/build-options.js +177 -0
- package/lib/sdk/callbacks.js +213 -0
- package/lib/{process-manager-sdk.js → sdk/process-manager.js} +19 -31
- package/lib/{telegram.js → telegram/api.js} +2 -2
- package/lib/{telegram-prompt.js → telegram/display-hint.js} +0 -14
- package/lib/{stream-reply.js → telegram/streamer.js} +4 -4
- package/package.json +2 -3
- package/polygram.js +347 -2581
- package/scripts/doctor.js +1 -1
- package/scripts/ipc-smoke.js +1 -10
- package/bin/approval-hook.js +0 -113
- package/lib/approval-waiters.js +0 -201
- package/lib/pm-router.js +0 -201
- package/lib/process-manager.js +0 -806
- /package/lib/{auto-resume.js → db/auto-resume.js} +0 -0
- /package/lib/{inbox.js → db/inbox.js} +0 -0
- /package/lib/{pairings.js → db/pairings.js} +0 -0
- /package/lib/{replay-window.js → db/replay-window.js} +0 -0
- /package/lib/{sent-cache.js → db/sent-cache.js} +0 -0
- /package/lib/{sessions.js → db/sessions.js} +0 -0
- /package/lib/{net-errors.js → error/net.js} +0 -0
- /package/lib/{ipc-client.js → ipc/client.js} +0 -0
- /package/lib/{ipc-file-validator.js → ipc/file-validator.js} +0 -0
- /package/lib/{ipc-server.js → ipc/server.js} +0 -0
- /package/lib/{telegram-chunk.js → telegram/chunk.js} +0 -0
- /package/lib/{deliver.js → telegram/deliver.js} +0 -0
- /package/lib/{telegram-format.js → telegram/format.js} +0 -0
- /package/lib/{parse-response.js → telegram/parse.js} +0 -0
- /package/lib/{status-reactions.js → telegram/reactions.js} +0 -0
- /package/lib/{typing-indicator.js → telegram/typing.js} +0 -0
- /package/lib/{voice.js → telegram/voice.js} +0 -0
package/lib/process-manager.js
DELETED
|
@@ -1,806 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* LRU-bounded warm process pool with FIFO pending queue per process.
|
|
3
|
-
*
|
|
4
|
-
* Each `entry` owns ONE claude subprocess. Messages sent via `send()` are
|
|
5
|
-
* appended to `entry.pendingQueue` and their prompt is written to the
|
|
6
|
-
* subprocess stdin. Claude processes stdin in FIFO order and emits one
|
|
7
|
-
* `result` event per turn. Each result resolves the oldest pending
|
|
8
|
-
* (queue head).
|
|
9
|
-
*
|
|
10
|
-
* Timers (idle + wall-clock) are only armed for the HEAD of the queue —
|
|
11
|
-
* the turn Claude is currently working on. When the head is shifted,
|
|
12
|
-
* the next pending becomes head and its timers arm fresh. This avoids
|
|
13
|
-
* the footgun of "pending #2's timer started ticking when its stdin
|
|
14
|
-
* was written, but Claude spent 5 minutes on pending #1 first → #2
|
|
15
|
-
* times out before Claude sees it".
|
|
16
|
-
*
|
|
17
|
-
* Timer fire rejects ONLY that pending (policy: don't kill the whole
|
|
18
|
-
* subprocess, other in-flight work is probably fine). If the subprocess
|
|
19
|
-
* is truly stuck, its head pending will time out repeatedly.
|
|
20
|
-
*
|
|
21
|
-
* The `onStreamChunk` and `onToolUse` callbacks pass the live `entry` so
|
|
22
|
-
* callers can inspect `entry.pendingQueue[0]` to route output to the
|
|
23
|
-
* correct turn's streamer / reactor / source message.
|
|
24
|
-
*
|
|
25
|
-
* All I/O (spawn, db) is injected for testability.
|
|
26
|
-
*/
|
|
27
|
-
|
|
28
|
-
const { createInterface } = require('readline');
|
|
29
|
-
const { isTransientHttpError } = require('./error-classify');
|
|
30
|
-
|
|
31
|
-
const DEFAULT_CAP = 10;
|
|
32
|
-
const DEFAULT_KILL_TIMEOUT_MS = 3000;
|
|
33
|
-
// 0.7.7: transient HTTP retry. When Anthropic returns a 5xx (or 429
|
|
34
|
-
// rate-limit) and the turn produced ZERO assistant messages so far,
|
|
35
|
-
// pm sleeps and retries the user message ONCE before surfacing the
|
|
36
|
-
// error to the user. Matches OpenClaw's
|
|
37
|
-
// pi-embedded-Vt2x_Jl3.js:39210-39216 — "single retry, then surface".
|
|
38
|
-
// Idempotency-protected: we only retry if no assistant content has
|
|
39
|
-
// streamed (otherwise re-sending would replay tools that already ran).
|
|
40
|
-
const DEFAULT_TRANSIENT_RETRY_DELAY_MS = 2500;
|
|
41
|
-
const MAX_TRANSIENT_RETRIES = 1;
|
|
42
|
-
// 0.7.6 (item H): hard cap on per-session pending queue depth.
|
|
43
|
-
// Pre-fix, a chat with rapid-fire user messages (or a stuck Claude that
|
|
44
|
-
// stops emitting `result`) could grow pendingQueue unbounded — each
|
|
45
|
-
// pending holds a streamer + reactor + timers, so a runaway client
|
|
46
|
-
// could exhaust memory or burn API quota for ack reactions on every
|
|
47
|
-
// dropped message. 50 is generous (a normal turn never queues more
|
|
48
|
-
// than a handful) but safely bounded.
|
|
49
|
-
const DEFAULT_QUEUE_CAP = 50;
|
|
50
|
-
|
|
51
|
-
/**
|
|
52
|
-
* Pull user-visible text from a stream-json `assistant` event.
|
|
53
|
-
* See header for colon-normalisation / tool_use-filter rationale.
|
|
54
|
-
*/
|
|
55
|
-
function extractAssistantText(event) {
|
|
56
|
-
const blocks = event?.message?.content;
|
|
57
|
-
if (!Array.isArray(blocks)) return '';
|
|
58
|
-
const parts = [];
|
|
59
|
-
for (const b of blocks) {
|
|
60
|
-
if (!b) continue;
|
|
61
|
-
if (b.type === 'text' && typeof b.text === 'string') {
|
|
62
|
-
parts.push(b.text);
|
|
63
|
-
}
|
|
64
|
-
}
|
|
65
|
-
return parts.join('\n\n').trim().replace(/([^:]):\s*$/, '$1…');
|
|
66
|
-
}
|
|
67
|
-
|
|
68
|
-
// 0.7.6 (item F): sum the four canonical usage counters across a Map of
|
|
69
|
-
// per-message usage objects. Each map value is the LAST-SEEN usage for
|
|
70
|
-
// that message id (Anthropic emits cumulative totals within a message);
|
|
71
|
-
// summing across map values gives the turn-wide totals.
|
|
72
|
-
//
|
|
73
|
-
// Defensive against missing fields — older claude versions may not
|
|
74
|
-
// always emit cache_*_input_tokens.
|
|
75
|
-
function sumUsage(usageByMessage) {
|
|
76
|
-
const out = {
|
|
77
|
-
input_tokens: 0,
|
|
78
|
-
output_tokens: 0,
|
|
79
|
-
cache_creation_input_tokens: 0,
|
|
80
|
-
cache_read_input_tokens: 0,
|
|
81
|
-
};
|
|
82
|
-
for (const u of usageByMessage.values()) {
|
|
83
|
-
if (!u) continue;
|
|
84
|
-
if (Number.isFinite(u.input_tokens)) out.input_tokens += u.input_tokens;
|
|
85
|
-
if (Number.isFinite(u.output_tokens)) out.output_tokens += u.output_tokens;
|
|
86
|
-
if (Number.isFinite(u.cache_creation_input_tokens)) {
|
|
87
|
-
out.cache_creation_input_tokens += u.cache_creation_input_tokens;
|
|
88
|
-
}
|
|
89
|
-
if (Number.isFinite(u.cache_read_input_tokens)) {
|
|
90
|
-
out.cache_read_input_tokens += u.cache_read_input_tokens;
|
|
91
|
-
}
|
|
92
|
-
}
|
|
93
|
-
return out;
|
|
94
|
-
}
|
|
95
|
-
|
|
96
|
-
/**
|
|
97
|
-
* Stream-json CLI-backed ProcessManager. Implements the canonical
|
|
98
|
-
* Pm interface (`lib/pm-interface.js`). Optional methods exposed:
|
|
99
|
-
* `requestRespawn` — drain queue and respawn process on next send
|
|
100
|
-
* (kept for parity with rc.6+ feature-detection at the router; SDK
|
|
101
|
-
* pm uses `applyFlagSettings` + `setModel` for the same UX).
|
|
102
|
-
*
|
|
103
|
-
* Optional methods NOT implemented (SDK pm has these): `steer`,
|
|
104
|
-
* `setModel`, `applyFlagSettings`, `setPermissionMode`,
|
|
105
|
-
* `drainQueue`, `interrupt`, `resetSession`.
|
|
106
|
-
*
|
|
107
|
-
* @implements {import('./pm-interface.js').Pm}
|
|
108
|
-
*/
|
|
109
|
-
class ProcessManager {
|
|
110
|
-
constructor({
|
|
111
|
-
cap = DEFAULT_CAP,
|
|
112
|
-
queueCap = DEFAULT_QUEUE_CAP,
|
|
113
|
-
spawnFn,
|
|
114
|
-
db = null,
|
|
115
|
-
logger = console,
|
|
116
|
-
killTimeoutMs = DEFAULT_KILL_TIMEOUT_MS,
|
|
117
|
-
onInit = null, // (sessionKey, event, entry) → void
|
|
118
|
-
onResult = null, // (sessionKey, event, entry, pending) → void
|
|
119
|
-
onClose = null, // (sessionKey, code, entry) → void
|
|
120
|
-
onStreamChunk = null, // (sessionKey, partialText, entry) → void — routes to pendingQueue[0]
|
|
121
|
-
onToolUse = null, // (sessionKey, toolName, entry) → void — routes to pendingQueue[0]
|
|
122
|
-
onAssistantMessageStart = null, // (sessionKey, entry) → void — fires when a NEW top-level assistant message begins (after a previous one ended). Used by polygram.js to call streamer.forceNewMessage() so each assistant message gets its own bubble.
|
|
123
|
-
onRespawn = null, // (sessionKey, reason, entry) → void — fires after graceful drain-and-kill
|
|
124
|
-
onQueueDrop = null, // 0.7.6: (sessionKey, droppedPending, entry) → void — fired when a pending is dropped because pendingQueue exceeded queueCap. Polygram uses this to surface a warning on the dropped message.
|
|
125
|
-
} = {}) {
|
|
126
|
-
if (!spawnFn) throw new Error('spawnFn required');
|
|
127
|
-
this.cap = cap;
|
|
128
|
-
this.queueCap = queueCap;
|
|
129
|
-
this.spawnFn = spawnFn;
|
|
130
|
-
this.db = db;
|
|
131
|
-
this.logger = logger;
|
|
132
|
-
this.killTimeoutMs = killTimeoutMs;
|
|
133
|
-
this.onInit = onInit;
|
|
134
|
-
this.onResult = onResult;
|
|
135
|
-
this.onClose = onClose;
|
|
136
|
-
this.onStreamChunk = onStreamChunk;
|
|
137
|
-
this.onToolUse = onToolUse;
|
|
138
|
-
this.onAssistantMessageStart = onAssistantMessageStart;
|
|
139
|
-
this.onRespawn = onRespawn;
|
|
140
|
-
this.onQueueDrop = onQueueDrop;
|
|
141
|
-
this.procs = new Map();
|
|
142
|
-
}
|
|
143
|
-
|
|
144
|
-
has(sessionKey) {
|
|
145
|
-
return this.procs.has(sessionKey);
|
|
146
|
-
}
|
|
147
|
-
|
|
148
|
-
get(sessionKey) {
|
|
149
|
-
return this.procs.get(sessionKey);
|
|
150
|
-
}
|
|
151
|
-
|
|
152
|
-
size() {
|
|
153
|
-
return this.procs.size;
|
|
154
|
-
}
|
|
155
|
-
|
|
156
|
-
keys() {
|
|
157
|
-
return Array.from(this.procs.keys());
|
|
158
|
-
}
|
|
159
|
-
|
|
160
|
-
async getOrSpawn(sessionKey, spawnContext) {
|
|
161
|
-
const existing = this.procs.get(sessionKey);
|
|
162
|
-
if (existing && !existing.closed) {
|
|
163
|
-
existing.lastUsedTs = Date.now();
|
|
164
|
-
return existing;
|
|
165
|
-
}
|
|
166
|
-
if (this.procs.size >= this.cap) {
|
|
167
|
-
const evicted = await this.evictLRU();
|
|
168
|
-
if (!evicted) {
|
|
169
|
-
// All sessions are in-flight — wait for one to drain, then retry.
|
|
170
|
-
// Waiters are held in `this._lruWaiters` FIFO and signalled when any
|
|
171
|
-
// pending queue empties (see _maybeSignalLruWaiter).
|
|
172
|
-
await this._awaitLruSlot();
|
|
173
|
-
// After waking, try the whole path again — the evictLRU may now
|
|
174
|
-
// succeed, or an existing session may have been spawned for this key.
|
|
175
|
-
return this.getOrSpawn(sessionKey, spawnContext);
|
|
176
|
-
}
|
|
177
|
-
}
|
|
178
|
-
return this._spawn(sessionKey, spawnContext);
|
|
179
|
-
}
|
|
180
|
-
|
|
181
|
-
// Hold a promise pair per waiter. _maybeSignalLruWaiter shifts the oldest
|
|
182
|
-
// waiter when a slot might have freed up. Each waiter has its own timer
|
|
183
|
-
// that rejects with 'LRU wait timeout' if no slot appears in time.
|
|
184
|
-
_awaitLruSlot({ timeoutMs = 5 * 60_000 } = {}) {
|
|
185
|
-
if (!this._lruWaiters) this._lruWaiters = [];
|
|
186
|
-
return new Promise((resolve, reject) => {
|
|
187
|
-
const waiter = { resolve, reject };
|
|
188
|
-
const timer = setTimeout(() => {
|
|
189
|
-
const idx = this._lruWaiters.indexOf(waiter);
|
|
190
|
-
if (idx !== -1) this._lruWaiters.splice(idx, 1);
|
|
191
|
-
this._logEvent('lru-wait-timeout', { cap: this.cap, queued_waiters: this._lruWaiters.length });
|
|
192
|
-
reject(new Error(`LRU wait timeout after ${timeoutMs / 1000}s`));
|
|
193
|
-
}, timeoutMs);
|
|
194
|
-
waiter.timer = timer;
|
|
195
|
-
this._lruWaiters.push(waiter);
|
|
196
|
-
this._logEvent('lru-wait', { cap: this.cap, queued_waiters: this._lruWaiters.length });
|
|
197
|
-
});
|
|
198
|
-
}
|
|
199
|
-
|
|
200
|
-
_maybeSignalLruWaiter() {
|
|
201
|
-
if (!this._lruWaiters || this._lruWaiters.length === 0) return;
|
|
202
|
-
// Only signal if there's actually capacity now (a session went idle
|
|
203
|
-
// or closed). Otherwise keep waiters sleeping for the next chance.
|
|
204
|
-
let hasIdle = false;
|
|
205
|
-
for (const v of this.procs.values()) {
|
|
206
|
-
if (!v.inFlight) { hasIdle = true; break; }
|
|
207
|
-
}
|
|
208
|
-
if (!hasIdle && this.procs.size >= this.cap) return;
|
|
209
|
-
const w = this._lruWaiters.shift();
|
|
210
|
-
clearTimeout(w.timer);
|
|
211
|
-
w.resolve();
|
|
212
|
-
}
|
|
213
|
-
|
|
214
|
-
async evictLRU() {
|
|
215
|
-
let victim = null;
|
|
216
|
-
for (const [k, v] of this.procs) {
|
|
217
|
-
if (v.inFlight) continue;
|
|
218
|
-
if (!victim || v.lastUsedTs < victim.entry.lastUsedTs) {
|
|
219
|
-
victim = { key: k, entry: v };
|
|
220
|
-
}
|
|
221
|
-
}
|
|
222
|
-
if (!victim) {
|
|
223
|
-
this._logEvent('lru-full', { cap: this.cap });
|
|
224
|
-
return false;
|
|
225
|
-
}
|
|
226
|
-
this._logEvent('evict', { session_key: victim.key, chat_id: victim.entry.chatId });
|
|
227
|
-
await this.kill(victim.key);
|
|
228
|
-
return true;
|
|
229
|
-
}
|
|
230
|
-
|
|
231
|
-
/**
|
|
232
|
-
* Request a graceful respawn (e.g. because /model or /effort changed).
|
|
233
|
-
* If the queue is empty, kill now; otherwise mark the entry so it kills
|
|
234
|
-
* itself when the last pending resolves. Next send() respawns fresh
|
|
235
|
-
* with whatever config spawnFn reads at that moment.
|
|
236
|
-
*
|
|
237
|
-
* onRespawn fires with `wasDrained=true` ONLY when we waited for an
|
|
238
|
-
* in-flight turn to finish before swapping. The immediate-kill case
|
|
239
|
-
* (queue empty at request time) calls onRespawn with `wasDrained=false`
|
|
240
|
-
* so callers can decide whether to post a user-visible confirmation
|
|
241
|
-
* (which is redundant noise when the user wasn't waiting on a turn).
|
|
242
|
-
*/
|
|
243
|
-
requestRespawn(sessionKey, reason = 'config-change') {
|
|
244
|
-
const entry = this.procs.get(sessionKey);
|
|
245
|
-
if (!entry || entry.closed) return { killed: false, queued: 0 };
|
|
246
|
-
entry.needsRespawn = reason;
|
|
247
|
-
this._logEvent('respawn-requested', {
|
|
248
|
-
session_key: sessionKey,
|
|
249
|
-
chat_id: entry.chatId,
|
|
250
|
-
reason,
|
|
251
|
-
queued: entry.pendingQueue.length,
|
|
252
|
-
});
|
|
253
|
-
if (entry.pendingQueue.length === 0) {
|
|
254
|
-
// Queue empty — kill immediately, fire onRespawn after close.
|
|
255
|
-
this._killAndNotifyRespawn(sessionKey, reason, false).catch(() => {});
|
|
256
|
-
return { killed: true, queued: 0 };
|
|
257
|
-
}
|
|
258
|
-
return { killed: false, queued: entry.pendingQueue.length };
|
|
259
|
-
}
|
|
260
|
-
|
|
261
|
-
async _killAndNotifyRespawn(sessionKey, reason, wasDrained) {
|
|
262
|
-
const entry = this.procs.get(sessionKey);
|
|
263
|
-
await this.kill(sessionKey);
|
|
264
|
-
if (this.onRespawn && entry) {
|
|
265
|
-
try { this.onRespawn(sessionKey, reason, entry, wasDrained); }
|
|
266
|
-
catch (err) { this.logger.error(`[pm] onRespawn: ${err.message}`); }
|
|
267
|
-
}
|
|
268
|
-
}
|
|
269
|
-
|
|
270
|
-
async kill(sessionKey) {
|
|
271
|
-
const entry = this.procs.get(sessionKey);
|
|
272
|
-
if (!entry) return;
|
|
273
|
-
this.procs.delete(sessionKey);
|
|
274
|
-
try { entry.proc.kill('SIGTERM'); } catch {}
|
|
275
|
-
await new Promise((resolve) => {
|
|
276
|
-
if (entry.closed) return resolve();
|
|
277
|
-
const timer = setTimeout(() => {
|
|
278
|
-
try { entry.proc.kill('SIGKILL'); } catch {}
|
|
279
|
-
resolve();
|
|
280
|
-
}, this.killTimeoutMs);
|
|
281
|
-
entry.proc.once('close', () => { clearTimeout(timer); resolve(); });
|
|
282
|
-
});
|
|
283
|
-
// Reject all pendings in the queue (if any survived the 'close' handler).
|
|
284
|
-
while (entry.pendingQueue.length > 0) {
|
|
285
|
-
const p = entry.pendingQueue.shift();
|
|
286
|
-
p.clearTimers?.();
|
|
287
|
-
p.reject(new Error('Process killed'));
|
|
288
|
-
}
|
|
289
|
-
}
|
|
290
|
-
|
|
291
|
-
async killChat(chatId) {
|
|
292
|
-
const prefix = String(chatId);
|
|
293
|
-
const targets = [];
|
|
294
|
-
for (const key of this.procs.keys()) {
|
|
295
|
-
if (key === prefix || key.startsWith(prefix + ':')) targets.push(key);
|
|
296
|
-
}
|
|
297
|
-
for (const key of targets) await this.kill(key);
|
|
298
|
-
}
|
|
299
|
-
|
|
300
|
-
async shutdown() {
|
|
301
|
-
// rc.38: mark "we're shutting down" so the proc.on('close') handler
|
|
302
|
-
// suppresses the misleading `resume-fail` event for signal-driven
|
|
303
|
-
// exits (SIGHUP from tmux pty close, SIGTERM from our own kill,
|
|
304
|
-
// SIGKILL from the kill-timeout escalator). Pre-rc.38 every deploy
|
|
305
|
-
// logged a `resume-fail` for every CLI-pm chat AND cleared the
|
|
306
|
-
// saved session_id, forcing a fresh resume on the next user turn
|
|
307
|
-
// — slower first turn, fresh context — for no real reason.
|
|
308
|
-
this._shuttingDown = true;
|
|
309
|
-
const keys = Array.from(this.procs.keys());
|
|
310
|
-
for (const key of keys) await this.kill(key);
|
|
311
|
-
}
|
|
312
|
-
|
|
313
|
-
_spawn(sessionKey, ctx = {}) {
|
|
314
|
-
const proc = this.spawnFn(sessionKey, ctx);
|
|
315
|
-
const rl = createInterface({ input: proc.stdout });
|
|
316
|
-
const entry = {
|
|
317
|
-
sessionKey,
|
|
318
|
-
proc,
|
|
319
|
-
rl,
|
|
320
|
-
pendingQueue: [],
|
|
321
|
-
lastUsedTs: Date.now(),
|
|
322
|
-
inFlight: false,
|
|
323
|
-
closed: false,
|
|
324
|
-
needsRespawn: null,
|
|
325
|
-
sessionId: ctx.existingSessionId || null,
|
|
326
|
-
chatId: ctx.chatId || null,
|
|
327
|
-
threadId: ctx.threadId || null,
|
|
328
|
-
label: ctx.label || sessionKey,
|
|
329
|
-
};
|
|
330
|
-
|
|
331
|
-
rl.on('line', (line) => {
|
|
332
|
-
let event;
|
|
333
|
-
try { event = JSON.parse(line); }
|
|
334
|
-
catch { this.logger.error(`[${entry.label}] non-JSON: ${line.slice(0, 200)}`); return; }
|
|
335
|
-
|
|
336
|
-
// Fix A: ANY stream-json event counts as Claude activity. Reset the
|
|
337
|
-
// idle timer on the HEAD pending (the turn Claude is working on),
|
|
338
|
-
// regardless of event type. Subagent runs emit `user`-type
|
|
339
|
-
// tool_result events between the parent's assistant events — those
|
|
340
|
-
// previously did NOT reset the timer, causing false timeouts during
|
|
341
|
-
// long subagent work.
|
|
342
|
-
const head = entry.pendingQueue[0];
|
|
343
|
-
if (head) head.resetIdleTimer?.();
|
|
344
|
-
|
|
345
|
-
if (event.type === 'system' && event.subtype === 'init') {
|
|
346
|
-
entry.sessionId = event.session_id;
|
|
347
|
-
if (this.onInit) this.onInit(sessionKey, event, entry);
|
|
348
|
-
}
|
|
349
|
-
|
|
350
|
-
if (event.type === 'assistant' && head) {
|
|
351
|
-
// 0.7.0 (Phase F): detect message_id transitions to split bubbles
|
|
352
|
-
// per top-level assistant message. Each Anthropic stream-json
|
|
353
|
-
// 'assistant' event carries event.message.id; the same id across
|
|
354
|
-
// events means cumulative updates to the same message, a new
|
|
355
|
-
// id means a new message (typically after a tool-result cycle).
|
|
356
|
-
const messageId = event.message?.id;
|
|
357
|
-
const added = extractAssistantText(event);
|
|
358
|
-
// 0.7.4 (item B): first sign Claude is doing real work on this
|
|
359
|
-
// pending. Fire onFirstStream ONCE, regardless of whether the
|
|
360
|
-
// assistant message has text or only tool_use blocks (some turns
|
|
361
|
-
// emit tool_use first with no preamble).
|
|
362
|
-
const hasAssistantContent = !!added || (Array.isArray(event.message?.content)
|
|
363
|
-
&& event.message.content.some((b) => b?.type === 'tool_use'));
|
|
364
|
-
if (hasAssistantContent) {
|
|
365
|
-
head.fireFirstStream?.();
|
|
366
|
-
// 0.7.7: any assistant content (text OR tool_use) disqualifies
|
|
367
|
-
// the turn from transient-retry — re-sending the user prompt
|
|
368
|
-
// after this point would replay tools that already executed.
|
|
369
|
-
head.firstAssistantSeen = true;
|
|
370
|
-
}
|
|
371
|
-
// 0.7.6 (item F): accumulate usage + counters for turn telemetry.
|
|
372
|
-
// The `result` event carries total_cost_usd + duration_ms but NOT
|
|
373
|
-
// a usage breakdown; usage lives on each assistant.message.usage.
|
|
374
|
-
// Anthropic emits cumulative totals per assistant message id
|
|
375
|
-
// (so within a single message the last usage seen wins; across
|
|
376
|
-
// distinct messages they sum).
|
|
377
|
-
const usage = event.message?.usage;
|
|
378
|
-
if (usage) {
|
|
379
|
-
if (messageId != null && head.lastUsageMessageId === messageId) {
|
|
380
|
-
// same message, replace running totals for this message
|
|
381
|
-
head.usageByMessage.set(messageId, usage);
|
|
382
|
-
} else {
|
|
383
|
-
head.lastUsageMessageId = messageId;
|
|
384
|
-
head.usageByMessage.set(messageId, usage);
|
|
385
|
-
}
|
|
386
|
-
}
|
|
387
|
-
if (Array.isArray(event.message?.content)) {
|
|
388
|
-
for (const b of event.message.content) {
|
|
389
|
-
if (b?.type === 'tool_use') head.toolUseCount++;
|
|
390
|
-
}
|
|
391
|
-
}
|
|
392
|
-
if (added) {
|
|
393
|
-
// Pre-0.7.0 we did `streamText = streamText + '\n\n' + added`,
|
|
394
|
-
// which DUPLICATED text on every update because `added` is
|
|
395
|
-
// the cumulative full text-so-far of the current assistant
|
|
396
|
-
// message (not a delta). 0.7.0 REPLACES instead — the new
|
|
397
|
-
// text is already cumulative — and uses messageId boundaries
|
|
398
|
-
// to fire onAssistantMessageStart for each new top-level
|
|
399
|
-
// assistant message. The streamer responds by force-creating
|
|
400
|
-
// a fresh bubble, so each assistant message gets its own.
|
|
401
|
-
const isNewMessage = head.lastAssistantMessageId != null
|
|
402
|
-
&& messageId != null
|
|
403
|
-
&& head.lastAssistantMessageId !== messageId
|
|
404
|
-
&& head.streamText
|
|
405
|
-
&& head.streamText.length > 0;
|
|
406
|
-
if (isNewMessage && this.onAssistantMessageStart) {
|
|
407
|
-
try { this.onAssistantMessageStart(sessionKey, entry); }
|
|
408
|
-
catch (err) { this.logger.error(`[${entry.label}] onAssistantMessageStart: ${err.message}`); }
|
|
409
|
-
}
|
|
410
|
-
if (messageId != null) head.lastAssistantMessageId = messageId;
|
|
411
|
-
head.streamText = added;
|
|
412
|
-
if (this.onStreamChunk) {
|
|
413
|
-
try { this.onStreamChunk(sessionKey, head.streamText, entry); }
|
|
414
|
-
catch (err) { this.logger.error(`[${entry.label}] onStreamChunk: ${err.message}`); }
|
|
415
|
-
}
|
|
416
|
-
}
|
|
417
|
-
if (this.onToolUse) {
|
|
418
|
-
const blocks = event.message?.content;
|
|
419
|
-
if (Array.isArray(blocks)) {
|
|
420
|
-
for (const b of blocks) {
|
|
421
|
-
if (b?.type === 'tool_use' && b.name) {
|
|
422
|
-
try { this.onToolUse(sessionKey, b.name, entry); }
|
|
423
|
-
catch (err) { this.logger.error(`[${entry.label}] onToolUse: ${err.message}`); }
|
|
424
|
-
}
|
|
425
|
-
}
|
|
426
|
-
}
|
|
427
|
-
}
|
|
428
|
-
}
|
|
429
|
-
|
|
430
|
-
if (event.type === 'result' && head) {
|
|
431
|
-
// 0.7.7: transient HTTP retry. If Anthropic returned a
|
|
432
|
-
// retryable error AND the turn produced ZERO assistant
|
|
433
|
-
// content yet AND we haven't already retried, sleep and
|
|
434
|
-
// re-write the prompt instead of resolving the pending.
|
|
435
|
-
// Idempotency: firstAssistantSeen guards against replaying
|
|
436
|
-
// tools that already ran.
|
|
437
|
-
const errSignal = event.error || event.subtype;
|
|
438
|
-
const isError = event.subtype !== 'success';
|
|
439
|
-
const shouldTransientRetry = isError
|
|
440
|
-
&& !head.firstAssistantSeen
|
|
441
|
-
&& head.transientRetries < MAX_TRANSIENT_RETRIES
|
|
442
|
-
&& head.prompt != null
|
|
443
|
-
&& isTransientHttpError({ message: errSignal, subtype: event.subtype });
|
|
444
|
-
if (shouldTransientRetry) {
|
|
445
|
-
head.transientRetries++;
|
|
446
|
-
this._logEvent('transient-retry', {
|
|
447
|
-
session_key: sessionKey,
|
|
448
|
-
chat_id: entry.chatId,
|
|
449
|
-
attempt: head.transientRetries,
|
|
450
|
-
subtype: event.subtype,
|
|
451
|
-
error: typeof errSignal === 'string' ? errSignal.slice(0, 200) : null,
|
|
452
|
-
});
|
|
453
|
-
// Reset accumulators so the retried turn's metrics aren't
|
|
454
|
-
// contaminated by the failed-turn's totals (usage on a
|
|
455
|
-
// failed turn IS billed but we surface it as a separate
|
|
456
|
-
// event-log entry rather than mixing into turn_metrics).
|
|
457
|
-
head.usageByMessage = new Map();
|
|
458
|
-
head.lastUsageMessageId = null;
|
|
459
|
-
head.toolUseCount = 0;
|
|
460
|
-
head.streamText = '';
|
|
461
|
-
head.lastAssistantMessageId = null;
|
|
462
|
-
// Re-arm idle timer (the old one is still ticking from the
|
|
463
|
-
// previous activate; resetIdleTimer just re-arms).
|
|
464
|
-
head.resetIdleTimer?.();
|
|
465
|
-
// Sleep then re-write. Keep the pending in-place; the next
|
|
466
|
-
// 'result' event resolves it normally (or hits the same
|
|
467
|
-
// retry path if MAX_TRANSIENT_RETRIES hadn't been
|
|
468
|
-
// exhausted, which after the increment above it has).
|
|
469
|
-
setTimeout(() => {
|
|
470
|
-
// Edge case: pending was killed/aborted during the
|
|
471
|
-
// retry sleep — process exited, queue drained, etc.
|
|
472
|
-
// Skip the re-write if pendingQueue no longer holds us.
|
|
473
|
-
if (entry.pendingQueue[0] !== head || entry.closed) return;
|
|
474
|
-
try {
|
|
475
|
-
entry.proc.stdin.write(JSON.stringify({
|
|
476
|
-
type: 'user',
|
|
477
|
-
message: { role: 'user', content: head.prompt },
|
|
478
|
-
}) + '\n');
|
|
479
|
-
} catch (err) {
|
|
480
|
-
// stdin write failed — fall back to surfacing the
|
|
481
|
-
// error. Mark as not-retried-anymore so we don't loop.
|
|
482
|
-
this.logger.error(`[${entry.label}] transient-retry stdin write failed: ${err.message}`);
|
|
483
|
-
entry.pendingQueue.shift();
|
|
484
|
-
head.clearTimers();
|
|
485
|
-
head.reject(err);
|
|
486
|
-
}
|
|
487
|
-
}, DEFAULT_TRANSIENT_RETRY_DELAY_MS);
|
|
488
|
-
return; // don't shift / resolve; wait for next result
|
|
489
|
-
}
|
|
490
|
-
|
|
491
|
-
entry.pendingQueue.shift();
|
|
492
|
-
head.clearTimers();
|
|
493
|
-
if (this.onResult) this.onResult(sessionKey, event, entry, head);
|
|
494
|
-
// 0.7.6 (item F): sum usage across distinct assistant messages
|
|
495
|
-
// (each message id seen got its last-known usage stored; sum the
|
|
496
|
-
// map values). Yields a single-row metric summary the caller
|
|
497
|
-
// can persist via db.insertTurnMetric().
|
|
498
|
-
const usageTotals = sumUsage(head.usageByMessage);
|
|
499
|
-
head.resolve({
|
|
500
|
-
text: event.result || '',
|
|
501
|
-
sessionId: event.session_id,
|
|
502
|
-
cost: event.total_cost_usd,
|
|
503
|
-
duration: event.duration_ms,
|
|
504
|
-
error: event.subtype === 'success' ? null : (event.error || event.subtype),
|
|
505
|
-
metrics: {
|
|
506
|
-
inputTokens: usageTotals.input_tokens,
|
|
507
|
-
outputTokens: usageTotals.output_tokens,
|
|
508
|
-
cacheCreationTokens: usageTotals.cache_creation_input_tokens,
|
|
509
|
-
cacheReadTokens: usageTotals.cache_read_input_tokens,
|
|
510
|
-
numAssistantMessages: head.usageByMessage.size,
|
|
511
|
-
numToolUses: head.toolUseCount,
|
|
512
|
-
resultSubtype: event.subtype || null,
|
|
513
|
-
},
|
|
514
|
-
});
|
|
515
|
-
// Activate next head or settle idle state.
|
|
516
|
-
if (entry.pendingQueue.length > 0) {
|
|
517
|
-
entry.pendingQueue[0].activate();
|
|
518
|
-
} else {
|
|
519
|
-
entry.inFlight = false;
|
|
520
|
-
// An entry just went idle → an LRU waiter might be able to run now.
|
|
521
|
-
this._maybeSignalLruWaiter();
|
|
522
|
-
// Graceful drain-and-respawn: if caller asked for a respawn
|
|
523
|
-
// (e.g. /model change) and we just emptied the queue, kill now
|
|
524
|
-
// and fire onRespawn so the caller can post confirmation.
|
|
525
|
-
if (entry.needsRespawn) {
|
|
526
|
-
const reason = entry.needsRespawn;
|
|
527
|
-
entry.needsRespawn = null;
|
|
528
|
-
this._logEvent('respawn-draining', {
|
|
529
|
-
session_key: sessionKey,
|
|
530
|
-
chat_id: entry.chatId,
|
|
531
|
-
reason,
|
|
532
|
-
});
|
|
533
|
-
// wasDrained=true: this path runs after the queue emptied
|
|
534
|
-
// naturally (an in-flight turn finished), so the user was
|
|
535
|
-
// waiting and the confirmation message is meaningful.
|
|
536
|
-
this._killAndNotifyRespawn(sessionKey, reason, true).catch(() => {});
|
|
537
|
-
}
|
|
538
|
-
}
|
|
539
|
-
}
|
|
540
|
-
});
|
|
541
|
-
|
|
542
|
-
proc.on('close', (code) => {
|
|
543
|
-
entry.closed = true;
|
|
544
|
-
entry.inFlight = false;
|
|
545
|
-
while (entry.pendingQueue.length > 0) {
|
|
546
|
-
const p = entry.pendingQueue.shift();
|
|
547
|
-
p.clearTimers?.();
|
|
548
|
-
p.reject(new Error(`Process exited (code ${code})`));
|
|
549
|
-
}
|
|
550
|
-
this.procs.delete(sessionKey);
|
|
551
|
-
// A slot freed up → maybe an LRU waiter can run now.
|
|
552
|
-
this._maybeSignalLruWaiter();
|
|
553
|
-
// rc.38: only fire `resume-fail` for UNEXPECTED non-zero exits.
|
|
554
|
-
// Signal-driven exits during planned shutdown (SIGHUP from tmux
|
|
555
|
-
// pty close on `tmux kill-session`, SIGTERM from our own kill(),
|
|
556
|
-
// SIGKILL from the kill-timeout escalator) are NOT resume
|
|
557
|
-
// failures — the saved session_id is still valid, we'd just be
|
|
558
|
-
// clearing it for nothing and logging misleading noise on every
|
|
559
|
-
// deploy. The real signal we care about is "the CLI rejected a
|
|
560
|
-
// stale or corrupt resume id at startup with a non-zero exit
|
|
561
|
-
// while polygram is healthy."
|
|
562
|
-
const isPlannedShutdown = this._shuttingDown
|
|
563
|
-
|| code === null // killed without an exit code
|
|
564
|
-
|| code === 129 // SIGHUP (tmux pty close on deploy kickstart)
|
|
565
|
-
|| code === 143 // SIGTERM (our own kill())
|
|
566
|
-
|| code === 137; // SIGKILL (kill-timeout escalation)
|
|
567
|
-
if (code !== 0 && ctx.existingSessionId && this.db?.clearSessionId
|
|
568
|
-
&& !isPlannedShutdown) {
|
|
569
|
-
this._logEvent('resume-fail', { session_key: sessionKey, session_id: ctx.existingSessionId, code });
|
|
570
|
-
try { this.db.clearSessionId(sessionKey); } catch (err) {
|
|
571
|
-
this.logger.error(`[${entry.label}] clearSessionId failed: ${err.message}`);
|
|
572
|
-
}
|
|
573
|
-
}
|
|
574
|
-
if (this.onClose) this.onClose(sessionKey, code, entry);
|
|
575
|
-
});
|
|
576
|
-
|
|
577
|
-
// rc.38: stdin error listener. Async EIO writes (the kernel reports
|
|
578
|
-
// them after the subprocess pipe closed during shutdown) had no
|
|
579
|
-
// listener pre-rc.38 → bubbled to the global uncaughtException
|
|
580
|
-
// handler → emitted misleading `uncaught-exception: write EIO`
|
|
581
|
-
// events on every deploy. Listening swallows that path; runtime
|
|
582
|
-
// stdin errors (rare; usually a real problem) still log here.
|
|
583
|
-
proc.stdin?.on?.('error', (err) => {
|
|
584
|
-
this.logger.error(`[${entry.label}] stdin error: ${err.message}`);
|
|
585
|
-
});
|
|
586
|
-
|
|
587
|
-
proc.on('error', (err) => {
|
|
588
|
-
this.logger.error(`[${entry.label}] proc error: ${err.message}`);
|
|
589
|
-
entry.closed = true;
|
|
590
|
-
entry.inFlight = false;
|
|
591
|
-
while (entry.pendingQueue.length > 0) {
|
|
592
|
-
const p = entry.pendingQueue.shift();
|
|
593
|
-
p.clearTimers?.();
|
|
594
|
-
p.reject(err);
|
|
595
|
-
}
|
|
596
|
-
this.procs.delete(sessionKey);
|
|
597
|
-
});
|
|
598
|
-
|
|
599
|
-
this.procs.set(sessionKey, entry);
|
|
600
|
-
return entry;
|
|
601
|
-
}
|
|
602
|
-
|
|
603
|
-
/**
|
|
604
|
-
* Append a turn to the queue. The returned promise resolves when Claude
|
|
605
|
-
* emits a `result` event for this turn (they emerge in stdin-write
|
|
606
|
-
* order). The underlying stdin write happens synchronously inside this
|
|
607
|
-
* call — the caller should have already serialised writes across
|
|
608
|
-
* sessions via an external lock if order matters.
|
|
609
|
-
*
|
|
610
|
-
* Options:
|
|
611
|
-
* timeoutMs — idle timer between Claude events (default 10min)
|
|
612
|
-
* maxTurnMs — wall-clock ceiling from "activate" time (default 30min)
|
|
613
|
-
* context — opaque object stored on the pending (polygram puts
|
|
614
|
-
* streamer, reactor, sourceMsgId here for its own use)
|
|
615
|
-
*/
|
|
616
|
-
send(sessionKey, prompt, {
|
|
617
|
-
timeoutMs = 600_000,
|
|
618
|
-
maxTurnMs = 30 * 60_000,
|
|
619
|
-
context = {},
|
|
620
|
-
} = {}) {
|
|
621
|
-
return new Promise((resolve, reject) => {
|
|
622
|
-
const entry = this.procs.get(sessionKey);
|
|
623
|
-
if (!entry || entry.closed) return reject(new Error('No process for session'));
|
|
624
|
-
if (!entry.proc.stdin || entry.proc.stdin.destroyed || !entry.proc.stdin.writable) {
|
|
625
|
-
return reject(new Error('Process stdin not writable'));
|
|
626
|
-
}
|
|
627
|
-
// If this entry is awaiting respawn, refuse new sends — the caller
|
|
628
|
-
// should wait for the respawn to complete (which happens when the
|
|
629
|
-
// current queue drains).
|
|
630
|
-
if (entry.needsRespawn) {
|
|
631
|
-
return reject(new Error(`Session awaiting respawn (${entry.needsRespawn})`));
|
|
632
|
-
}
|
|
633
|
-
|
|
634
|
-
entry.lastUsedTs = Date.now();
|
|
635
|
-
|
|
636
|
-
let idleTimer = null;
|
|
637
|
-
let maxTimer = null;
|
|
638
|
-
let activated = false;
|
|
639
|
-
|
|
640
|
-
const clearTimers = () => {
|
|
641
|
-
if (idleTimer) { clearTimeout(idleTimer); idleTimer = null; }
|
|
642
|
-
if (maxTimer) { clearTimeout(maxTimer); maxTimer = null; }
|
|
643
|
-
};
|
|
644
|
-
|
|
645
|
-
const pending = {
|
|
646
|
-
resolve: (r) => { clearTimers(); resolve(r); },
|
|
647
|
-
reject: (e) => { clearTimers(); reject(e); },
|
|
648
|
-
clearTimers,
|
|
649
|
-
startedAt: null,
|
|
650
|
-
streamText: '',
|
|
651
|
-
context,
|
|
652
|
-
idleTimer: null,
|
|
653
|
-
maxTimer: null,
|
|
654
|
-
activated: false,
|
|
655
|
-
// 0.7.6 (item F): per-turn telemetry accumulators. usageByMessage
|
|
656
|
-
// collects each assistant message's last-seen usage; we sum
|
|
657
|
-
// across messages at result time (each id is summed once, not
|
|
658
|
-
// per stream chunk, since usage in stream-json is cumulative
|
|
659
|
-
// *within* a message — last-seen-per-message wins).
|
|
660
|
-
usageByMessage: new Map(),
|
|
661
|
-
lastUsageMessageId: null,
|
|
662
|
-
toolUseCount: 0,
|
|
663
|
-
// 0.7.4 (item B): set true when the first stream event (assistant
|
|
664
|
-
// text or tool_use) arrives for this pending. Fires
|
|
665
|
-
// `context.onFirstStream` once. Used by polygram to flip the
|
|
666
|
-
// status reaction QUEUED → THINKING when Claude actually starts
|
|
667
|
-
// producing output, not when the pending becomes queue head
|
|
668
|
-
// (which can be ~hundreds of ms before the first token).
|
|
669
|
-
firstStreamFired: false,
|
|
670
|
-
// 0.7.7: transient-retry support. We hold the prompt so we can
|
|
671
|
-
// re-write it on transient 5xx/429 if zero assistant content
|
|
672
|
-
// streamed yet. firstAssistantSeen flips on first assistant
|
|
673
|
-
// event with non-empty content OR tool_use blocks — once true,
|
|
674
|
-
// retry is no longer idempotent (we'd replay executed tools)
|
|
675
|
-
// and pm surfaces the error instead.
|
|
676
|
-
prompt,
|
|
677
|
-
transientRetries: 0,
|
|
678
|
-
firstAssistantSeen: false,
|
|
679
|
-
};
|
|
680
|
-
|
|
681
|
-
pending.fireFirstStream = () => {
|
|
682
|
-
if (pending.firstStreamFired) return;
|
|
683
|
-
pending.firstStreamFired = true;
|
|
684
|
-
try { context?.onFirstStream?.(); }
|
|
685
|
-
catch (err) { this.logger.error(`[${entry.label}] onFirstStream: ${err.message}`); }
|
|
686
|
-
};
|
|
687
|
-
|
|
688
|
-
const fireTimeout = (reason) => {
|
|
689
|
-
// Only act if we're still the head; if we've been shifted/killed
|
|
690
|
-
// already, this is a stale callback.
|
|
691
|
-
if (entry.pendingQueue[0] !== pending) return;
|
|
692
|
-
this._logEvent('turn-timeout', {
|
|
693
|
-
session_key: sessionKey,
|
|
694
|
-
chat_id: entry.chatId,
|
|
695
|
-
reason,
|
|
696
|
-
});
|
|
697
|
-
// Remove from queue, reject. Per Q1 policy: don't kill the
|
|
698
|
-
// subprocess — later pendings might still be fine.
|
|
699
|
-
entry.pendingQueue.shift();
|
|
700
|
-
pending.reject(new Error(reason));
|
|
701
|
-
// Activate next head if any, else idle.
|
|
702
|
-
if (entry.pendingQueue.length > 0) {
|
|
703
|
-
entry.pendingQueue[0].activate();
|
|
704
|
-
} else {
|
|
705
|
-
entry.inFlight = false;
|
|
706
|
-
}
|
|
707
|
-
};
|
|
708
|
-
|
|
709
|
-
const armIdle = () => setTimeout(
|
|
710
|
-
() => fireTimeout(`Timeout: ${timeoutMs / 1000}s idle with no Claude activity`),
|
|
711
|
-
timeoutMs,
|
|
712
|
-
);
|
|
713
|
-
|
|
714
|
-
pending.activate = () => {
|
|
715
|
-
if (activated) return;
|
|
716
|
-
activated = true;
|
|
717
|
-
pending.activated = true;
|
|
718
|
-
pending.startedAt = Date.now();
|
|
719
|
-
idleTimer = armIdle();
|
|
720
|
-
pending.idleTimer = idleTimer;
|
|
721
|
-
maxTimer = setTimeout(
|
|
722
|
-
() => fireTimeout(`Turn exceeded ${maxTurnMs / 1000}s wall-clock ceiling`),
|
|
723
|
-
maxTurnMs,
|
|
724
|
-
);
|
|
725
|
-
pending.maxTimer = maxTimer;
|
|
726
|
-
// Give callers a hook so they can transition user-visible state
|
|
727
|
-
// (e.g. status reaction "👀 queued" → "🤔 thinking") the moment
|
|
728
|
-
// Claude actually starts this pending, not the moment it arrived.
|
|
729
|
-
try { context?.onActivate?.(); }
|
|
730
|
-
catch (err) { this.logger.error(`[${entry.label}] onActivate: ${err.message}`); }
|
|
731
|
-
};
|
|
732
|
-
|
|
733
|
-
pending.resetIdleTimer = () => {
|
|
734
|
-
if (!activated) return;
|
|
735
|
-
if (idleTimer) clearTimeout(idleTimer);
|
|
736
|
-
idleTimer = armIdle();
|
|
737
|
-
pending.idleTimer = idleTimer;
|
|
738
|
-
};
|
|
739
|
-
|
|
740
|
-
// 0.7.6 (item H): enforce per-session queue cap. Drop the OLDEST
|
|
741
|
-
// non-active pending (index 1 — index 0 is the in-flight head and
|
|
742
|
-
// killing it mid-turn would corrupt Claude's state). The dropped
|
|
743
|
-
// pending's promise rejects so its handler (polygram.js) can
|
|
744
|
-
// surface a "couldn't keep up — message dropped" warning to the
|
|
745
|
-
// user. We drop AFTER pushing the new pending so the cap means
|
|
746
|
-
// "at most queueCap pendings live", not "refuse to enqueue past N".
|
|
747
|
-
// Refusing the new write would lose the most recent message —
|
|
748
|
-
// usually the one the user actually cares about — whereas
|
|
749
|
-
// dropping the oldest preserves recency at the cost of a stale
|
|
750
|
-
// queued turn that the user has likely moved past anyway.
|
|
751
|
-
entry.pendingQueue.push(pending);
|
|
752
|
-
entry.inFlight = true;
|
|
753
|
-
while (entry.pendingQueue.length > this.queueCap) {
|
|
754
|
-
// Splice at index 1 to leave the active head intact.
|
|
755
|
-
const dropped = entry.pendingQueue.splice(1, 1)[0];
|
|
756
|
-
if (!dropped) break;
|
|
757
|
-
dropped.clearTimers?.();
|
|
758
|
-
const dropErr = new Error(
|
|
759
|
-
`queue overflow: dropped (queue cap ${this.queueCap})`,
|
|
760
|
-
);
|
|
761
|
-
dropErr.code = 'QUEUE_OVERFLOW';
|
|
762
|
-
this._logEvent('queue-overflow-drop', {
|
|
763
|
-
session_key: sessionKey,
|
|
764
|
-
chat_id: entry.chatId,
|
|
765
|
-
queue_len: entry.pendingQueue.length,
|
|
766
|
-
source_msg_id: dropped.context?.sourceMsgId ?? null,
|
|
767
|
-
});
|
|
768
|
-
if (this.onQueueDrop) {
|
|
769
|
-
try { this.onQueueDrop(sessionKey, dropped, entry); }
|
|
770
|
-
catch (err) { this.logger.error(`[${entry.label}] onQueueDrop: ${err.message}`); }
|
|
771
|
-
}
|
|
772
|
-
dropped.reject(dropErr);
|
|
773
|
-
}
|
|
774
|
-
|
|
775
|
-
// If we're the only pending, activate immediately. Otherwise wait
|
|
776
|
-
// until the preceding pending is shifted out.
|
|
777
|
-
if (entry.pendingQueue.length === 1) pending.activate();
|
|
778
|
-
|
|
779
|
-
try {
|
|
780
|
-
entry.proc.stdin.write(JSON.stringify({
|
|
781
|
-
type: 'user',
|
|
782
|
-
message: { role: 'user', content: prompt },
|
|
783
|
-
}) + '\n');
|
|
784
|
-
} catch (err) {
|
|
785
|
-
const idx = entry.pendingQueue.indexOf(pending);
|
|
786
|
-
if (idx !== -1) entry.pendingQueue.splice(idx, 1);
|
|
787
|
-
if (entry.pendingQueue.length === 0) entry.inFlight = false;
|
|
788
|
-
pending.reject(err);
|
|
789
|
-
}
|
|
790
|
-
});
|
|
791
|
-
}
|
|
792
|
-
|
|
793
|
-
_logEvent(kind, detail) {
|
|
794
|
-
if (!this.db?.logEvent) return;
|
|
795
|
-
try { this.db.logEvent(kind, detail); }
|
|
796
|
-
catch (err) { this.logger.error(`[pm] logEvent ${kind} failed: ${err.message}`); }
|
|
797
|
-
}
|
|
798
|
-
}
|
|
799
|
-
|
|
800
|
-
module.exports = {
|
|
801
|
-
ProcessManager,
|
|
802
|
-
DEFAULT_CAP,
|
|
803
|
-
DEFAULT_QUEUE_CAP,
|
|
804
|
-
extractAssistantText,
|
|
805
|
-
sumUsage,
|
|
806
|
-
};
|