polygram 0.9.0 → 0.10.0-rc.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +1 -1
- package/lib/db.js +14 -3
- package/lib/handlers/slash-commands.js +22 -12
- package/lib/model-costs.js +60 -0
- package/lib/process/factory.js +102 -0
- package/lib/process/process.js +193 -0
- package/lib/process/sdk-process.js +880 -0
- package/lib/process/tmux-process.js +1022 -0
- package/lib/process-manager.js +391 -0
- package/lib/sdk/callbacks.js +13 -5
- package/lib/tmux/log-tail.js +324 -0
- package/lib/tmux/orphan-sweep.js +79 -0
- package/lib/tmux/poll-scheduler.js +110 -0
- package/lib/tmux/session-log-parser.js +173 -0
- package/lib/tmux/tmux-runner.js +303 -0
- package/lib/tmux/tui-tool-input.js +62 -0
- package/migrations/011-pm-backend.sql +17 -0
- package/package.json +1 -1
- package/polygram.js +122 -33
- package/lib/sdk/process-manager.js +0 -1178
|
@@ -1,1178 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* ProcessManager — `@anthropic-ai/claude-agent-sdk` Query objects.
|
|
3
|
-
*
|
|
4
|
-
* The canonical pm impl post-0.9.0. Pre-0.9.0 polygram ran a dual-pm
|
|
5
|
-
* router (CLI subprocess pm + this SDK pm) behind env flags; the CLI
|
|
6
|
-
* variant was deleted with the rest of the migration safety net once
|
|
7
|
-
* both bots had soaked on SDK pm. See `lib/pm-interface.js` for the
|
|
8
|
-
* canonical contract and `docs/0.8.0-sdk-migration-plan.md` for the
|
|
9
|
-
* migration history.
|
|
10
|
-
*
|
|
11
|
-
* Architecture:
|
|
12
|
-
* - One Query per active sessionKey, held for the chat lifetime.
|
|
13
|
-
* - inputController is the writable end of an
|
|
14
|
-
* AsyncIterable<SDKUserMessage>; pm.send() pushes user messages
|
|
15
|
-
* onto it; the SDK's streamInput() consumes from the other end.
|
|
16
|
-
* - iteratePromise is the for-await loop over the Query's
|
|
17
|
-
* AsyncGenerator output. Wrapped in try/catch (D7 commitment).
|
|
18
|
-
* - pendingQueue maps N user messages → N SDKResultMessage events
|
|
19
|
-
* in FIFO order.
|
|
20
|
-
* - LRU eviction across the procs Map (cap = DEFAULT_CAP) via
|
|
21
|
-
* Query.close().
|
|
22
|
-
*
|
|
23
|
-
* Decisions encoded (v4 plan):
|
|
24
|
-
* D1 streaming: subscribe to SDKAssistantMessage (cumulative)
|
|
25
|
-
* D2 long-lived Query per chat
|
|
26
|
-
* D3 /effort via applyFlagSettings (no respawn)
|
|
27
|
-
* D5 Options.env SHADOW — buildSdkOptions enumerates everything
|
|
28
|
-
* D6 Query.close() is fast — 100ms shutdown timeout safe
|
|
29
|
-
* D7 killChat Promise.allSettled with 5s per-Query timeout
|
|
30
|
-
* D8 pm.drainQueue(errCode) owns drain logic
|
|
31
|
-
* D11 stdinLock dropped — SDK preserves FIFO at Query level
|
|
32
|
-
*/
|
|
33
|
-
|
|
34
|
-
'use strict';
|
|
35
|
-
|
|
36
|
-
const { query } = require('@anthropic-ai/claude-agent-sdk');
|
|
37
|
-
const { isTransientHttpError } = require('../error/classify');
|
|
38
|
-
|
|
39
|
-
const DEFAULT_CAP = 10;
|
|
40
|
-
const DEFAULT_QUEUE_CAP = 50;
|
|
41
|
-
const DEFAULT_LRU_WAIT_MS = 300_000; // 5 min waiter timeout
|
|
42
|
-
const DEFAULT_QUERY_CLOSE_TIMEOUT_MS = 5000; // per-Query close ceiling (D7)
|
|
43
|
-
const DEFAULT_TRANSIENT_RETRY_DELAY_MS = 2500;
|
|
44
|
-
const MAX_TRANSIENT_RETRIES = 1;
|
|
45
|
-
// Idle/wall-clock per-pending; SDK has no built-in. Reset on the
|
|
46
|
-
// event allowlist (H13 mitigation): assistant, partial-assistant,
|
|
47
|
-
// tool-progress; NOT on api-retry or compact_boundary.
|
|
48
|
-
const DEFAULT_IDLE_MS = 600_000;
|
|
49
|
-
const DEFAULT_MAX_TURN_MS = 30 * 60_000;
|
|
50
|
-
|
|
51
|
-
// ─── Helpers ────────────────────────────────────────────────────────
|
|
52
|
-
|
|
53
|
-
/**
|
|
54
|
-
* Pull cumulative user-visible text from an SDKAssistantMessage.
|
|
55
|
-
* Same shape as today's stream-json assistant events (per D1):
|
|
56
|
-
* `event.message.content[]` with text blocks.
|
|
57
|
-
*
|
|
58
|
-
* Colon-suffix normalisation matches the CLI pm — turns
|
|
59
|
-
* "Listing dependencies:" into "Listing dependencies…" so a
|
|
60
|
-
* trailing assistant message doesn't read as half-formed.
|
|
61
|
-
*/
|
|
62
|
-
function extractAssistantText(event) {
|
|
63
|
-
const blocks = event?.message?.content;
|
|
64
|
-
if (!Array.isArray(blocks)) return '';
|
|
65
|
-
const parts = [];
|
|
66
|
-
for (const b of blocks) {
|
|
67
|
-
if (!b) continue;
|
|
68
|
-
if (b.type === 'text' && typeof b.text === 'string') {
|
|
69
|
-
parts.push(b.text);
|
|
70
|
-
}
|
|
71
|
-
}
|
|
72
|
-
return parts.join('\n\n').trim().replace(/([^:]):\s*$/, '$1…');
|
|
73
|
-
}
|
|
74
|
-
|
|
75
|
-
/**
|
|
76
|
-
* Sum usage across distinct assistant message ids. Per D1 + Phase 0
|
|
77
|
-
* gate 22, modelUsage is camelCase but result.usage is snake_case;
|
|
78
|
-
* this helper sums the latter (matches CLI pm + 0.7.6 turn_metrics).
|
|
79
|
-
*/
|
|
80
|
-
function sumUsage(usageByMessage) {
|
|
81
|
-
const out = {
|
|
82
|
-
input_tokens: 0,
|
|
83
|
-
output_tokens: 0,
|
|
84
|
-
cache_creation_input_tokens: 0,
|
|
85
|
-
cache_read_input_tokens: 0,
|
|
86
|
-
};
|
|
87
|
-
for (const u of usageByMessage.values()) {
|
|
88
|
-
if (!u) continue;
|
|
89
|
-
if (Number.isFinite(u.input_tokens)) out.input_tokens += u.input_tokens;
|
|
90
|
-
if (Number.isFinite(u.output_tokens)) out.output_tokens += u.output_tokens;
|
|
91
|
-
if (Number.isFinite(u.cache_creation_input_tokens)) {
|
|
92
|
-
out.cache_creation_input_tokens += u.cache_creation_input_tokens;
|
|
93
|
-
}
|
|
94
|
-
if (Number.isFinite(u.cache_read_input_tokens)) {
|
|
95
|
-
out.cache_read_input_tokens += u.cache_read_input_tokens;
|
|
96
|
-
}
|
|
97
|
-
}
|
|
98
|
-
return out;
|
|
99
|
-
}
|
|
100
|
-
|
|
101
|
-
/**
|
|
102
|
-
* Create the writable-end-of-AsyncIterable that pm pushes user
|
|
103
|
-
* messages onto. SDK's `query({ prompt: <this> })` consumes from the
|
|
104
|
-
* read end via `for await`.
|
|
105
|
-
*
|
|
106
|
-
* Bounded by queueCap (D5). Push beyond cap drops the OLDEST queued
|
|
107
|
-
* (non-yielded) message and rejects its associated pending — matches
|
|
108
|
-
* 0.7.6 H semantics.
|
|
109
|
-
*/
|
|
110
|
-
function makeInputController({ queueCap = DEFAULT_QUEUE_CAP } = {}) {
|
|
111
|
-
const queue = []; // pending SDKUserMessages awaiting consumer
|
|
112
|
-
const waiters = []; // outstanding next() promises
|
|
113
|
-
let closed = false;
|
|
114
|
-
let dropCallback = null; // optional (oldestMessage) → void
|
|
115
|
-
|
|
116
|
-
const iter = {
|
|
117
|
-
[Symbol.asyncIterator]() { return iter; },
|
|
118
|
-
next() {
|
|
119
|
-
if (queue.length) {
|
|
120
|
-
return Promise.resolve({ value: queue.shift(), done: false });
|
|
121
|
-
}
|
|
122
|
-
if (closed) {
|
|
123
|
-
return Promise.resolve({ value: undefined, done: true });
|
|
124
|
-
}
|
|
125
|
-
return new Promise((resolve) => waiters.push(resolve));
|
|
126
|
-
},
|
|
127
|
-
async return() {
|
|
128
|
-
closed = true;
|
|
129
|
-
while (waiters.length) waiters.shift()({ value: undefined, done: true });
|
|
130
|
-
return { value: undefined, done: true };
|
|
131
|
-
},
|
|
132
|
-
};
|
|
133
|
-
|
|
134
|
-
function push(msg) {
|
|
135
|
-
if (closed) {
|
|
136
|
-
throw Object.assign(new Error('input controller closed'),
|
|
137
|
-
{ code: 'INPUT_CLOSED' });
|
|
138
|
-
}
|
|
139
|
-
if (waiters.length) {
|
|
140
|
-
waiters.shift()({ value: msg, done: false });
|
|
141
|
-
return;
|
|
142
|
-
}
|
|
143
|
-
queue.push(msg);
|
|
144
|
-
while (queue.length > queueCap) {
|
|
145
|
-
const dropped = queue.shift();
|
|
146
|
-
if (dropCallback) {
|
|
147
|
-
try { dropCallback(dropped); }
|
|
148
|
-
catch { /* swallow; pm logs separately */ }
|
|
149
|
-
}
|
|
150
|
-
}
|
|
151
|
-
}
|
|
152
|
-
|
|
153
|
-
function close() {
|
|
154
|
-
if (closed) return;
|
|
155
|
-
closed = true;
|
|
156
|
-
while (waiters.length) waiters.shift()({ value: undefined, done: true });
|
|
157
|
-
}
|
|
158
|
-
|
|
159
|
-
function onDrop(cb) { dropCallback = cb; }
|
|
160
|
-
|
|
161
|
-
return { iter, push, close, onDrop, get size() { return queue.length; } };
|
|
162
|
-
}
|
|
163
|
-
|
|
164
|
-
// ─── ProcessManager ────────────────────────────────────────────────
|
|
165
|
-
|
|
166
|
-
/**
|
|
167
|
-
* @anthropic-ai/claude-agent-sdk-backed ProcessManager. The canonical
|
|
168
|
-
* pm impl post-0.9.0. Implements the Pm interface
|
|
169
|
-
* (`lib/pm-interface.js`); optional methods exposed: `steer`,
|
|
170
|
-
* `setModel`, `applyFlagSettings`, `setPermissionMode`, `drainQueue`,
|
|
171
|
-
* `interrupt`, `resetSession`.
|
|
172
|
-
*
|
|
173
|
-
* @implements {import('./pm-interface.js').Pm}
|
|
174
|
-
*/
|
|
175
|
-
class ProcessManagerSdk {
|
|
176
|
-
constructor({
|
|
177
|
-
cap = DEFAULT_CAP,
|
|
178
|
-
queueCap = DEFAULT_QUEUE_CAP,
|
|
179
|
-
spawnFn, // (sessionKey, ctx) → SdkOptions OR { query, inputController }
|
|
180
|
-
db = null,
|
|
181
|
-
logger = console,
|
|
182
|
-
onInit = null,
|
|
183
|
-
onResult = null,
|
|
184
|
-
onClose = null,
|
|
185
|
-
onStreamChunk = null,
|
|
186
|
-
onToolUse = null,
|
|
187
|
-
onAssistantMessageStart = null,
|
|
188
|
-
// rc.47: fires when an SDK assistant message arrives with NO head
|
|
189
|
-
// pending in entry.pendingQueue — i.e. an autonomous turn (typical
|
|
190
|
-
// ScheduleWakeup case where the agent self-fires without a
|
|
191
|
-
// corresponding pm.send). Polygram wires this to a Telegram-send
|
|
192
|
-
// function that derives chat_id (always) and thread_id (when
|
|
193
|
-
// isolateTopics) from the sessionKey via getChatIdFromKey /
|
|
194
|
-
// getThreadIdFromKey, then forwards the text to the right chat/
|
|
195
|
-
// topic. Pre-rc.47 these messages were silently dropped at the
|
|
196
|
-
// `&& head` gate in _handleEvent. Subagent messages
|
|
197
|
-
// (parent_tool_use_id != null) are still filtered upstream.
|
|
198
|
-
onAutonomousAssistantMessage = null,
|
|
199
|
-
onCompactBoundary = null,
|
|
200
|
-
onQueueDrop = null,
|
|
201
|
-
onThinking = null,
|
|
202
|
-
queryCloseTimeoutMs = DEFAULT_QUERY_CLOSE_TIMEOUT_MS,
|
|
203
|
-
} = {}) {
|
|
204
|
-
if (!spawnFn) throw new Error('spawnFn required');
|
|
205
|
-
this.cap = cap;
|
|
206
|
-
this.queueCap = queueCap;
|
|
207
|
-
this.spawnFn = spawnFn;
|
|
208
|
-
this.db = db;
|
|
209
|
-
this.logger = logger;
|
|
210
|
-
this.queryCloseTimeoutMs = queryCloseTimeoutMs;
|
|
211
|
-
this.onInit = onInit;
|
|
212
|
-
this.onResult = onResult;
|
|
213
|
-
this.onClose = onClose;
|
|
214
|
-
this.onStreamChunk = onStreamChunk;
|
|
215
|
-
this.onToolUse = onToolUse;
|
|
216
|
-
this.onAssistantMessageStart = onAssistantMessageStart;
|
|
217
|
-
this.onAutonomousAssistantMessage = onAutonomousAssistantMessage;
|
|
218
|
-
this.onCompactBoundary = onCompactBoundary;
|
|
219
|
-
this.onQueueDrop = onQueueDrop;
|
|
220
|
-
this.onThinking = onThinking;
|
|
221
|
-
this.procs = new Map(); // sessionKey → entry
|
|
222
|
-
this._lruWaiters = []; // [{ resolve, reject, timer }]
|
|
223
|
-
}
|
|
224
|
-
|
|
225
|
-
has(sessionKey) { return this.procs.has(sessionKey); }
|
|
226
|
-
get(sessionKey) { return this.procs.get(sessionKey); }
|
|
227
|
-
get size() { return this.procs.size; }
|
|
228
|
-
keys() { return [...this.procs.keys()]; }
|
|
229
|
-
|
|
230
|
-
// ─── Spawn / pool ────────────────────────────────────────────────
|
|
231
|
-
|
|
232
|
-
async getOrSpawn(sessionKey, spawnContext) {
|
|
233
|
-
if (this._shuttingDown) {
|
|
234
|
-
throw new Error('shutdown');
|
|
235
|
-
}
|
|
236
|
-
const existing = this.procs.get(sessionKey);
|
|
237
|
-
if (existing && !existing.closed) return existing;
|
|
238
|
-
|
|
239
|
-
if (this.procs.size >= this.cap) {
|
|
240
|
-
const evicted = this._evictLRU();
|
|
241
|
-
if (!evicted) {
|
|
242
|
-
// All entries in-flight — park.
|
|
243
|
-
await this._awaitLruSlot();
|
|
244
|
-
if (this._shuttingDown) throw new Error('shutdown');
|
|
245
|
-
return this.getOrSpawn(sessionKey, spawnContext);
|
|
246
|
-
}
|
|
247
|
-
}
|
|
248
|
-
|
|
249
|
-
return this._spawnEntry(sessionKey, spawnContext);
|
|
250
|
-
}
|
|
251
|
-
|
|
252
|
-
_evictLRU() {
|
|
253
|
-
let oldest = null;
|
|
254
|
-
let oldestKey = null;
|
|
255
|
-
for (const [k, v] of this.procs.entries()) {
|
|
256
|
-
if (v.inFlight) continue;
|
|
257
|
-
if (!oldest || v.lastUsedTs < oldest.lastUsedTs) {
|
|
258
|
-
oldest = v;
|
|
259
|
-
oldestKey = k;
|
|
260
|
-
}
|
|
261
|
-
}
|
|
262
|
-
if (!oldest) {
|
|
263
|
-
this._logEvent('lru-full', { active: this.procs.size, cap: this.cap });
|
|
264
|
-
return false;
|
|
265
|
-
}
|
|
266
|
-
this._logEvent('evict', { session_key: oldestKey });
|
|
267
|
-
// Async tear-down with timeout (D6/D7).
|
|
268
|
-
this._closeEntry(oldest, 'evict').catch(() => {});
|
|
269
|
-
this.procs.delete(oldestKey);
|
|
270
|
-
return true;
|
|
271
|
-
}
|
|
272
|
-
|
|
273
|
-
async _awaitLruSlot() {
|
|
274
|
-
return new Promise((resolve, reject) => {
|
|
275
|
-
const timer = setTimeout(() => {
|
|
276
|
-
const idx = this._lruWaiters.findIndex((w) => w.resolve === resolve);
|
|
277
|
-
if (idx !== -1) this._lruWaiters.splice(idx, 1);
|
|
278
|
-
this._logEvent('lru-wait-timeout', { wait_ms: DEFAULT_LRU_WAIT_MS });
|
|
279
|
-
reject(new Error(`lru wait timed out after ${DEFAULT_LRU_WAIT_MS}ms`));
|
|
280
|
-
}, DEFAULT_LRU_WAIT_MS);
|
|
281
|
-
this._lruWaiters.push({ resolve, reject, timer });
|
|
282
|
-
this._logEvent('lru-wait', { active: this.procs.size, cap: this.cap });
|
|
283
|
-
});
|
|
284
|
-
}
|
|
285
|
-
|
|
286
|
-
_maybeSignalLruWaiter() {
|
|
287
|
-
const w = this._lruWaiters.shift();
|
|
288
|
-
if (w) { clearTimeout(w.timer); w.resolve(); }
|
|
289
|
-
}
|
|
290
|
-
|
|
291
|
-
_spawnEntry(sessionKey, spawnContext) {
|
|
292
|
-
const spawnResult = this.spawnFn(sessionKey, spawnContext);
|
|
293
|
-
// spawnFn may return either SdkOptions (production) or
|
|
294
|
-
// { query, inputController } (test fakeQuery shortcut).
|
|
295
|
-
let entryQuery, inputController;
|
|
296
|
-
if (spawnResult && typeof spawnResult.next === 'function') {
|
|
297
|
-
// It's already a Query instance (test path).
|
|
298
|
-
entryQuery = spawnResult;
|
|
299
|
-
inputController = makeInputController({ queueCap: this.queueCap });
|
|
300
|
-
// Test path: feed pushed messages back via streamInput.
|
|
301
|
-
// (fakeQuery's streamInput consumes the iter we hand it.)
|
|
302
|
-
entryQuery.streamInput?.(inputController.iter).catch(() => {});
|
|
303
|
-
} else if (spawnResult && spawnResult.query && spawnResult.inputController) {
|
|
304
|
-
// Pre-built (test convenience).
|
|
305
|
-
entryQuery = spawnResult.query;
|
|
306
|
-
inputController = spawnResult.inputController;
|
|
307
|
-
} else {
|
|
308
|
-
// Production: spawnFn returned SdkOptions.
|
|
309
|
-
inputController = makeInputController({ queueCap: this.queueCap });
|
|
310
|
-
entryQuery = query({
|
|
311
|
-
prompt: inputController.iter,
|
|
312
|
-
options: spawnResult || {},
|
|
313
|
-
});
|
|
314
|
-
}
|
|
315
|
-
|
|
316
|
-
const entry = {
|
|
317
|
-
sessionKey,
|
|
318
|
-
chatId: spawnContext?.chatId ?? null,
|
|
319
|
-
label: spawnContext?.label ?? sessionKey,
|
|
320
|
-
query: entryQuery,
|
|
321
|
-
inputController,
|
|
322
|
-
pendingQueue: [],
|
|
323
|
-
sessionId: null,
|
|
324
|
-
closed: false,
|
|
325
|
-
inFlight: false,
|
|
326
|
-
lastUsedTs: Date.now(),
|
|
327
|
-
iteratePromise: null,
|
|
328
|
-
};
|
|
329
|
-
|
|
330
|
-
inputController.onDrop((dropped) => {
|
|
331
|
-
// Bound by queueCap; oldest non-active pending was the one
|
|
332
|
-
// associated with this dropped message (head pending = active,
|
|
333
|
-
// its msg was already consumed by SDK; the message we're
|
|
334
|
-
// dropping is from a later pending).
|
|
335
|
-
this._handleQueueDrop(entry, dropped);
|
|
336
|
-
});
|
|
337
|
-
|
|
338
|
-
entry.iteratePromise = this._runIteration(entry).catch((err) => {
|
|
339
|
-
this.logger.error?.(`[${entry.label}] iteration crashed: ${err?.message || err}`);
|
|
340
|
-
this._failAllPendings(entry, err);
|
|
341
|
-
});
|
|
342
|
-
|
|
343
|
-
this.procs.set(sessionKey, entry);
|
|
344
|
-
return entry;
|
|
345
|
-
}
|
|
346
|
-
|
|
347
|
-
// ─── Iteration loop ──────────────────────────────────────────────
|
|
348
|
-
|
|
349
|
-
async _runIteration(entry) {
|
|
350
|
-
try {
|
|
351
|
-
for await (const msg of entry.query) {
|
|
352
|
-
await this._handleEvent(entry, msg);
|
|
353
|
-
if (entry.closed) break;
|
|
354
|
-
}
|
|
355
|
-
} catch (err) {
|
|
356
|
-
// SDK threw (AbortError, network, etc). Reject all pendings
|
|
357
|
-
// with the error; emit onClose; clean up.
|
|
358
|
-
this._failAllPendings(entry, err);
|
|
359
|
-
if (this.onClose) {
|
|
360
|
-
try { this.onClose(entry.sessionKey, err.code === 'AbortError' ? 0 : 1, entry); }
|
|
361
|
-
catch (e) { this.logger.error?.(`[${entry.label}] onClose: ${e.message}`); }
|
|
362
|
-
}
|
|
363
|
-
} finally {
|
|
364
|
-
entry.closed = true;
|
|
365
|
-
entry.inFlight = false;
|
|
366
|
-
this.procs.delete(entry.sessionKey);
|
|
367
|
-
this._maybeSignalLruWaiter();
|
|
368
|
-
}
|
|
369
|
-
}
|
|
370
|
-
|
|
371
|
-
async _handleEvent(entry, msg) {
|
|
372
|
-
const head = entry.pendingQueue[0];
|
|
373
|
-
|
|
374
|
-
// Reset idle timer on activity events (H13 allowlist).
|
|
375
|
-
if (head && this._isActivityEvent(msg)) {
|
|
376
|
-
head.resetIdleTimer?.();
|
|
377
|
-
}
|
|
378
|
-
|
|
379
|
-
if (msg.type === 'system' && msg.subtype === 'init') {
|
|
380
|
-
entry.sessionId = msg.session_id || null;
|
|
381
|
-
if (this.onInit) {
|
|
382
|
-
try { this.onInit(entry.sessionKey, msg, entry); }
|
|
383
|
-
catch (err) { this.logger.error?.(`[${entry.label}] onInit: ${err.message}`); }
|
|
384
|
-
}
|
|
385
|
-
return;
|
|
386
|
-
}
|
|
387
|
-
|
|
388
|
-
// rc.29: stream_event with content_block_start of type='thinking'.
|
|
389
|
-
// Fires DURING extended-thinking phase, BEFORE any text or tool_use
|
|
390
|
-
// content appears. Without this, polygram's reactor stays at QUEUED
|
|
391
|
-
// (👀) for the full thinking duration (10+ s with effort=high),
|
|
392
|
-
// then transitions THINKING → CODING in <1s when the model finally
|
|
393
|
-
// emits text/tool. UX target: 👀 → 🤔 transition fires within
|
|
394
|
-
// 100-500ms of pm.send, matching Claude Code CLI's "Thinking..."
|
|
395
|
-
// spinner timing.
|
|
396
|
-
//
|
|
397
|
-
// Requires `includePartialMessages: true` in SdkOptions; without
|
|
398
|
-
// it, this branch is unreachable (we never receive stream_event).
|
|
399
|
-
if (msg.type === 'stream_event' && head && !head.thinkingFired) {
|
|
400
|
-
const ev = msg.event;
|
|
401
|
-
const isThinkingStart = ev?.type === 'content_block_start'
|
|
402
|
-
&& ev?.content_block?.type === 'thinking';
|
|
403
|
-
if (isThinkingStart) {
|
|
404
|
-
head.thinkingFired = true;
|
|
405
|
-
if (this.onThinking) {
|
|
406
|
-
try { this.onThinking(entry.sessionKey, entry); }
|
|
407
|
-
catch (err) { this.logger.error?.(`[${entry.label}] onThinking: ${err.message}`); }
|
|
408
|
-
}
|
|
409
|
-
}
|
|
410
|
-
return;
|
|
411
|
-
}
|
|
412
|
-
|
|
413
|
-
if (msg.type === 'system' && msg.subtype === 'compact_boundary') {
|
|
414
|
-
// D6 / §5: surface compaction boundary to caller. Sequencing
|
|
415
|
-
// guarantee — we await this callback before processing the
|
|
416
|
-
// next event so a fresh assistant message after boundary
|
|
417
|
-
// routes to a new bubble cleanly.
|
|
418
|
-
if (this.onCompactBoundary) {
|
|
419
|
-
try { await this.onCompactBoundary(entry.sessionKey, msg, entry); }
|
|
420
|
-
catch (err) { this.logger.error?.(`[${entry.label}] onCompactBoundary: ${err.message}`); }
|
|
421
|
-
}
|
|
422
|
-
this._logEvent('compact-boundary', {
|
|
423
|
-
session_key: entry.sessionKey,
|
|
424
|
-
trigger: msg.compact_metadata?.trigger ?? null,
|
|
425
|
-
pre_tokens: msg.compact_metadata?.pre_tokens ?? null,
|
|
426
|
-
post_tokens: msg.compact_metadata?.post_tokens ?? null,
|
|
427
|
-
});
|
|
428
|
-
return;
|
|
429
|
-
}
|
|
430
|
-
|
|
431
|
-
if (msg.type === 'assistant' && !head) {
|
|
432
|
-
// rc.47: autonomous assistant message — no user-initiated
|
|
433
|
-
// pm.send is in flight. Typical cause: ScheduleWakeup fired,
|
|
434
|
-
// the agent emitted a self-driven response. Pre-rc.47 these
|
|
435
|
-
// were silently dropped by the `&& head` gate. Now we route
|
|
436
|
-
// them via onAutonomousAssistantMessage so polygram can
|
|
437
|
-
// forward the text to the right Telegram chat/topic.
|
|
438
|
-
if (msg.parent_tool_use_id != null) return;
|
|
439
|
-
const text = extractAssistantText(msg);
|
|
440
|
-
if (!text) return;
|
|
441
|
-
if (this.onAutonomousAssistantMessage) {
|
|
442
|
-
try { this.onAutonomousAssistantMessage(entry.sessionKey, msg, entry); }
|
|
443
|
-
catch (err) { this.logger.error?.(`[${entry.label}] onAutonomousAssistantMessage: ${err.message}`); }
|
|
444
|
-
}
|
|
445
|
-
return;
|
|
446
|
-
}
|
|
447
|
-
|
|
448
|
-
if (msg.type === 'assistant' && head) {
|
|
449
|
-
// Subagent filter (Phase 1 step 7): top-level only.
|
|
450
|
-
if (msg.parent_tool_use_id != null) return;
|
|
451
|
-
|
|
452
|
-
const messageId = msg.message?.id;
|
|
453
|
-
const added = extractAssistantText(msg);
|
|
454
|
-
const hasToolUse = Array.isArray(msg.message?.content)
|
|
455
|
-
&& msg.message.content.some((b) => b?.type === 'tool_use');
|
|
456
|
-
|
|
457
|
-
// First-stream fires when ANY assistant content arrives (text or tool_use).
|
|
458
|
-
if (added || hasToolUse) {
|
|
459
|
-
head.fireFirstStream?.();
|
|
460
|
-
head.firstAssistantSeen = true;
|
|
461
|
-
}
|
|
462
|
-
|
|
463
|
-
// Per-message-id usage (sum across at result time).
|
|
464
|
-
if (messageId != null && msg.message?.usage) {
|
|
465
|
-
head.usageByMessage.set(messageId, msg.message.usage);
|
|
466
|
-
}
|
|
467
|
-
|
|
468
|
-
// Tool-use accounting + onToolUse callback fan-out.
|
|
469
|
-
if (hasToolUse) {
|
|
470
|
-
for (const b of msg.message.content) {
|
|
471
|
-
if (b?.type === 'tool_use') {
|
|
472
|
-
head.toolUseCount++;
|
|
473
|
-
if (this.onToolUse && b.name) {
|
|
474
|
-
try { this.onToolUse(entry.sessionKey, b.name, entry); }
|
|
475
|
-
catch (err) { this.logger.error?.(`[${entry.label}] onToolUse: ${err.message}`); }
|
|
476
|
-
}
|
|
477
|
-
}
|
|
478
|
-
}
|
|
479
|
-
}
|
|
480
|
-
|
|
481
|
-
// rc.45: multi-segment same-bubble streaming.
|
|
482
|
-
//
|
|
483
|
-
// Pre-rc.45: every message-id transition fired
|
|
484
|
-
// onAssistantMessageStart (= forceNewMessage on the streamer),
|
|
485
|
-
// producing a fresh bubble per SDK assistant message even
|
|
486
|
-
// though the user only sent one input. Tool-heavy turns
|
|
487
|
-
// showed 2-6 bubbles per logical user-input cycle.
|
|
488
|
-
//
|
|
489
|
-
// rc.45: only fire onAssistantMessageStart when the user
|
|
490
|
-
// STEERED (injectUserMessage set pendingSteerCausesNewBubble).
|
|
491
|
-
// Otherwise, accumulate the prior segment's text into
|
|
492
|
-
// priorMessagesText and append the new segment to it — same
|
|
493
|
-
// bubble grows naturally. Same-message-id events (cumulative
|
|
494
|
-
// streaming within a single SDKAssistantMessage) still
|
|
495
|
-
// REPLACE the segment text; the carry-over only kicks in on
|
|
496
|
-
// message-id TRANSITIONS.
|
|
497
|
-
if (added) {
|
|
498
|
-
const isNewMessage = head.lastAssistantMessageId != null
|
|
499
|
-
&& messageId != null
|
|
500
|
-
&& head.lastAssistantMessageId !== messageId
|
|
501
|
-
&& head.streamText
|
|
502
|
-
&& head.streamText.length > 0;
|
|
503
|
-
if (isNewMessage) {
|
|
504
|
-
if (head.pendingSteerCausesNewBubble) {
|
|
505
|
-
// Steered: fire onAssistantMessageStart so the streamer
|
|
506
|
-
// forceNewMessage's. Reset the prior carry-over so the
|
|
507
|
-
// new bubble starts clean.
|
|
508
|
-
if (this.onAssistantMessageStart) {
|
|
509
|
-
try { await this.onAssistantMessageStart(entry.sessionKey, entry); }
|
|
510
|
-
catch (err) { this.logger.error?.(`[${entry.label}] onAssistantMessageStart: ${err.message}`); }
|
|
511
|
-
}
|
|
512
|
-
head.priorMessagesText = '';
|
|
513
|
-
head.pendingSteerCausesNewBubble = false;
|
|
514
|
-
} else {
|
|
515
|
-
// No steer: roll the just-finished segment's full text
|
|
516
|
-
// into priorMessagesText so the new segment appends to it.
|
|
517
|
-
head.priorMessagesText = head.streamText;
|
|
518
|
-
}
|
|
519
|
-
}
|
|
520
|
-
if (messageId != null) head.lastAssistantMessageId = messageId;
|
|
521
|
-
// Compose visible bubble text: carry-over (prior segments in
|
|
522
|
-
// this bubble) + the current segment's cumulative text.
|
|
523
|
-
head.streamText = head.priorMessagesText
|
|
524
|
-
? head.priorMessagesText + '\n\n' + added
|
|
525
|
-
: added;
|
|
526
|
-
if (this.onStreamChunk) {
|
|
527
|
-
try { this.onStreamChunk(entry.sessionKey, head.streamText, entry); }
|
|
528
|
-
catch (err) { this.logger.error?.(`[${entry.label}] onStreamChunk: ${err.message}`); }
|
|
529
|
-
}
|
|
530
|
-
}
|
|
531
|
-
return;
|
|
532
|
-
}
|
|
533
|
-
|
|
534
|
-
if (msg.type === 'result' && head) {
|
|
535
|
-
// Transient retry (D11 / 0.7.6 H): retry once if the turn
|
|
536
|
-
// hit a 5xx/429 BEFORE producing any assistant content.
|
|
537
|
-
const errSignal = msg.error || msg.subtype;
|
|
538
|
-
const isError = msg.subtype !== 'success';
|
|
539
|
-
const shouldRetry = isError
|
|
540
|
-
&& !head.firstAssistantSeen
|
|
541
|
-
&& head.transientRetries < MAX_TRANSIENT_RETRIES
|
|
542
|
-
&& head.prompt != null
|
|
543
|
-
&& isTransientHttpError({ message: errSignal, subtype: msg.subtype });
|
|
544
|
-
if (shouldRetry) {
|
|
545
|
-
head.transientRetries++;
|
|
546
|
-
this._logEvent('transient-retry', {
|
|
547
|
-
session_key: entry.sessionKey,
|
|
548
|
-
chat_id: entry.chatId,
|
|
549
|
-
attempt: head.transientRetries,
|
|
550
|
-
subtype: msg.subtype,
|
|
551
|
-
error: typeof errSignal === 'string' ? errSignal.slice(0, 200) : null,
|
|
552
|
-
});
|
|
553
|
-
// Reset accumulators; arm idle timer; sleep then re-push.
|
|
554
|
-
head.usageByMessage = new Map();
|
|
555
|
-
head.toolUseCount = 0;
|
|
556
|
-
head.streamText = '';
|
|
557
|
-
head.lastAssistantMessageId = null;
|
|
558
|
-
head.resetIdleTimer?.();
|
|
559
|
-
setTimeout(() => {
|
|
560
|
-
if (entry.pendingQueue[0] !== head || entry.closed) return;
|
|
561
|
-
try {
|
|
562
|
-
entry.inputController.push({
|
|
563
|
-
type: 'user',
|
|
564
|
-
message: { role: 'user', content: head.prompt },
|
|
565
|
-
parent_tool_use_id: null,
|
|
566
|
-
});
|
|
567
|
-
} catch (err) {
|
|
568
|
-
entry.pendingQueue.shift();
|
|
569
|
-
head.clearTimers();
|
|
570
|
-
head.reject(err);
|
|
571
|
-
}
|
|
572
|
-
}, DEFAULT_TRANSIENT_RETRY_DELAY_MS);
|
|
573
|
-
return;
|
|
574
|
-
}
|
|
575
|
-
|
|
576
|
-
// Normal resolution.
|
|
577
|
-
entry.pendingQueue.shift();
|
|
578
|
-
head.clearTimers();
|
|
579
|
-
if (this.onResult) {
|
|
580
|
-
try { this.onResult(entry.sessionKey, msg, entry, head); }
|
|
581
|
-
catch (err) { this.logger.error?.(`[${entry.label}] onResult: ${err.message}`); }
|
|
582
|
-
}
|
|
583
|
-
const usageTotals = sumUsage(head.usageByMessage);
|
|
584
|
-
head.resolve({
|
|
585
|
-
text: msg.result || '',
|
|
586
|
-
sessionId: msg.session_id,
|
|
587
|
-
cost: msg.total_cost_usd,
|
|
588
|
-
duration: msg.duration_ms,
|
|
589
|
-
error: msg.subtype === 'success' ? null : (msg.error || msg.subtype),
|
|
590
|
-
metrics: {
|
|
591
|
-
inputTokens: usageTotals.input_tokens,
|
|
592
|
-
outputTokens: usageTotals.output_tokens,
|
|
593
|
-
cacheCreationTokens: usageTotals.cache_creation_input_tokens,
|
|
594
|
-
cacheReadTokens: usageTotals.cache_read_input_tokens,
|
|
595
|
-
numAssistantMessages: head.usageByMessage.size,
|
|
596
|
-
numToolUses: head.toolUseCount,
|
|
597
|
-
resultSubtype: msg.subtype || null,
|
|
598
|
-
},
|
|
599
|
-
});
|
|
600
|
-
|
|
601
|
-
// Activate next head or settle idle.
|
|
602
|
-
if (entry.pendingQueue.length > 0) {
|
|
603
|
-
entry.pendingQueue[0].activate();
|
|
604
|
-
} else {
|
|
605
|
-
entry.inFlight = false;
|
|
606
|
-
this._maybeSignalLruWaiter();
|
|
607
|
-
}
|
|
608
|
-
return;
|
|
609
|
-
}
|
|
610
|
-
}
|
|
611
|
-
|
|
612
|
-
_isActivityEvent(msg) {
|
|
613
|
-
if (!msg?.type) return false;
|
|
614
|
-
if (msg.type === 'assistant') return true;
|
|
615
|
-
if (msg.type === 'partial_assistant') return true;
|
|
616
|
-
if (msg.type === 'stream_event') return true;
|
|
617
|
-
if (msg.type === 'tool_progress') return true;
|
|
618
|
-
if (msg.type === 'user') return true; // tool_result bridge events
|
|
619
|
-
return false;
|
|
620
|
-
}
|
|
621
|
-
|
|
622
|
-
// ─── Send ────────────────────────────────────────────────────────
|
|
623
|
-
|
|
624
|
-
send(sessionKey, prompt, {
|
|
625
|
-
timeoutMs = DEFAULT_IDLE_MS,
|
|
626
|
-
maxTurnMs = DEFAULT_MAX_TURN_MS,
|
|
627
|
-
context = {},
|
|
628
|
-
} = {}) {
|
|
629
|
-
return new Promise((resolve, reject) => {
|
|
630
|
-
const entry = this.procs.get(sessionKey);
|
|
631
|
-
if (!entry || entry.closed) {
|
|
632
|
-
return reject(new Error('No process for session'));
|
|
633
|
-
}
|
|
634
|
-
|
|
635
|
-
entry.lastUsedTs = Date.now();
|
|
636
|
-
|
|
637
|
-
let idleTimer = null;
|
|
638
|
-
let maxTimer = null;
|
|
639
|
-
let visibilityTimer = null;
|
|
640
|
-
let activated = false;
|
|
641
|
-
|
|
642
|
-
// rc.58: visibility heartbeat for long silent tool runs.
|
|
643
|
-
//
|
|
644
|
-
// The SDK can stay silent for minutes when the agent is running
|
|
645
|
-
// a single long Bash/Playwright tool (gcloud auth + Chrome
|
|
646
|
-
// OAuth login dance, multi-step Playwright session, etc.).
|
|
647
|
-
// During the silence:
|
|
648
|
-
// - polygram receives no onChunk, no onToolUse, no onResult
|
|
649
|
-
// - reactor's stall (30s) and freeze (180s) timers fire IF
|
|
650
|
-
// the reactor was in a STALL_PROMOTABLE state at last
|
|
651
|
-
// heartbeat — but if it was just stopped or in AUTOSTEERED
|
|
652
|
-
// terminal state, no visible signal fires
|
|
653
|
-
// - user sees "stuck" with no emoji/typing change for minutes
|
|
654
|
-
//
|
|
655
|
-
// This timer pings the head pending's reactor every 30s while
|
|
656
|
-
// the pending is in flight. It does NOT change the reactor's
|
|
657
|
-
// visible state (heartbeat is idempotent — re-arms timers
|
|
658
|
-
// without flushing) — but it ensures the stall/freeze cycle
|
|
659
|
-
// keeps producing visible promotions even if SDK silence
|
|
660
|
-
// would otherwise let the reactor go fully quiet.
|
|
661
|
-
//
|
|
662
|
-
// Discovery: 2026-05-05 09:58:38 → 10:10:36 in Shumabit@UMI —
|
|
663
|
-
// 12-min silent gap during a Playwright/Chrome OAuth dance,
|
|
664
|
-
// ZERO reactor-state events logged for that chat in that
|
|
665
|
-
// window. User reported "no typing, no reactions". Adding
|
|
666
|
-
// this heartbeat fixes the silent-window class of UX gap
|
|
667
|
-
// without changing agent behavior.
|
|
668
|
-
const VISIBILITY_HEARTBEAT_MS = 30 * 1000;
|
|
669
|
-
const armVisibilityTimer = () => {
|
|
670
|
-
if (visibilityTimer) clearInterval(visibilityTimer);
|
|
671
|
-
visibilityTimer = setInterval(() => {
|
|
672
|
-
if (!entry.pendingQueue.includes(pending)) {
|
|
673
|
-
// Pending no longer in queue → resolved/rejected/dropped.
|
|
674
|
-
// Defensive: clear ourselves even though clearTimers()
|
|
675
|
-
// SHOULD have been called.
|
|
676
|
-
if (visibilityTimer) { clearInterval(visibilityTimer); visibilityTimer = null; }
|
|
677
|
-
return;
|
|
678
|
-
}
|
|
679
|
-
const r = pending.context?.reactor;
|
|
680
|
-
if (r && typeof r.heartbeat === 'function') {
|
|
681
|
-
try { r.heartbeat(); } catch { /* defensive */ }
|
|
682
|
-
}
|
|
683
|
-
}, VISIBILITY_HEARTBEAT_MS);
|
|
684
|
-
visibilityTimer.unref?.();
|
|
685
|
-
};
|
|
686
|
-
|
|
687
|
-
const clearTimers = () => {
|
|
688
|
-
if (idleTimer) { clearTimeout(idleTimer); idleTimer = null; }
|
|
689
|
-
if (maxTimer) { clearTimeout(maxTimer); maxTimer = null; }
|
|
690
|
-
if (visibilityTimer) { clearInterval(visibilityTimer); visibilityTimer = null; }
|
|
691
|
-
};
|
|
692
|
-
|
|
693
|
-
const pending = {
|
|
694
|
-
resolve: (r) => { clearTimers(); resolve(r); },
|
|
695
|
-
reject: (e) => { clearTimers(); reject(e); },
|
|
696
|
-
clearTimers,
|
|
697
|
-
startedAt: null,
|
|
698
|
-
streamText: '',
|
|
699
|
-
context,
|
|
700
|
-
idleTimer: null,
|
|
701
|
-
maxTimer: null,
|
|
702
|
-
activated: false,
|
|
703
|
-
usageByMessage: new Map(),
|
|
704
|
-
lastUsageMessageId: null,
|
|
705
|
-
toolUseCount: 0,
|
|
706
|
-
firstStreamFired: false,
|
|
707
|
-
prompt,
|
|
708
|
-
transientRetries: 0,
|
|
709
|
-
firstAssistantSeen: false,
|
|
710
|
-
thinkingFired: false, // rc.29: extended-thinking → reactor THINKING
|
|
711
|
-
// rc.45: multi-segment same-bubble streaming. priorMessagesText
|
|
712
|
-
// accumulates the full text of completed assistant-message
|
|
713
|
-
// segments in the SAME bubble. On message-id transition WITHOUT
|
|
714
|
-
// a steer, the just-finished segment rolls into priorMessagesText
|
|
715
|
-
// and the new segment's text appends to it (one bubble grows).
|
|
716
|
-
// On message-id transition WITH a steer, priorMessagesText
|
|
717
|
-
// resets and a new bubble starts. pendingSteerCausesNewBubble is
|
|
718
|
-
// set by injectUserMessage; consumed + cleared on the next
|
|
719
|
-
// message-id transition.
|
|
720
|
-
priorMessagesText: '',
|
|
721
|
-
pendingSteerCausesNewBubble: false,
|
|
722
|
-
};
|
|
723
|
-
|
|
724
|
-
pending.fireFirstStream = () => {
|
|
725
|
-
if (pending.firstStreamFired) return;
|
|
726
|
-
pending.firstStreamFired = true;
|
|
727
|
-
try { context?.onFirstStream?.(); }
|
|
728
|
-
catch (err) { this.logger.error?.(`[${entry.label}] onFirstStream: ${err.message}`); }
|
|
729
|
-
};
|
|
730
|
-
|
|
731
|
-
const fireTimeout = (reason) => {
|
|
732
|
-
if (entry.pendingQueue[0] !== pending) return;
|
|
733
|
-
this._logEvent('turn-timeout', {
|
|
734
|
-
session_key: sessionKey,
|
|
735
|
-
chat_id: entry.chatId,
|
|
736
|
-
reason,
|
|
737
|
-
});
|
|
738
|
-
entry.pendingQueue.shift();
|
|
739
|
-
// On idle/wall-clock fire: cancel SDK side first.
|
|
740
|
-
entry.query.interrupt?.().catch(() => {});
|
|
741
|
-
pending.reject(new Error(reason));
|
|
742
|
-
if (entry.pendingQueue.length > 0) {
|
|
743
|
-
entry.pendingQueue[0].activate();
|
|
744
|
-
} else {
|
|
745
|
-
entry.inFlight = false;
|
|
746
|
-
}
|
|
747
|
-
};
|
|
748
|
-
|
|
749
|
-
const armIdle = () => setTimeout(
|
|
750
|
-
() => fireTimeout(`Timeout: ${timeoutMs / 1000}s idle with no Claude activity`),
|
|
751
|
-
timeoutMs,
|
|
752
|
-
);
|
|
753
|
-
|
|
754
|
-
pending.activate = () => {
|
|
755
|
-
if (activated) return;
|
|
756
|
-
activated = true;
|
|
757
|
-
pending.activated = true;
|
|
758
|
-
pending.startedAt = Date.now();
|
|
759
|
-
idleTimer = armIdle();
|
|
760
|
-
pending.idleTimer = idleTimer;
|
|
761
|
-
maxTimer = setTimeout(
|
|
762
|
-
() => fireTimeout(`Turn exceeded ${maxTurnMs / 1000}s wall-clock ceiling`),
|
|
763
|
-
maxTurnMs,
|
|
764
|
-
);
|
|
765
|
-
pending.maxTimer = maxTimer;
|
|
766
|
-
// rc.58: arm the visibility heartbeat at activation.
|
|
767
|
-
armVisibilityTimer();
|
|
768
|
-
try { context?.onActivate?.(); }
|
|
769
|
-
catch (err) { this.logger.error?.(`[${entry.label}] onActivate: ${err.message}`); }
|
|
770
|
-
};
|
|
771
|
-
|
|
772
|
-
pending.resetIdleTimer = () => {
|
|
773
|
-
if (!activated) return;
|
|
774
|
-
if (idleTimer) clearTimeout(idleTimer);
|
|
775
|
-
idleTimer = armIdle();
|
|
776
|
-
pending.idleTimer = idleTimer;
|
|
777
|
-
};
|
|
778
|
-
|
|
779
|
-
// Push into queue, enforce queueCap.
|
|
780
|
-
entry.pendingQueue.push(pending);
|
|
781
|
-
entry.inFlight = true;
|
|
782
|
-
while (entry.pendingQueue.length > this.queueCap) {
|
|
783
|
-
const dropped = entry.pendingQueue.splice(1, 1)[0];
|
|
784
|
-
if (!dropped) break;
|
|
785
|
-
dropped.clearTimers?.();
|
|
786
|
-
const dropErr = Object.assign(
|
|
787
|
-
new Error(`queue overflow: dropped (queue cap ${this.queueCap})`),
|
|
788
|
-
{ code: 'QUEUE_OVERFLOW' },
|
|
789
|
-
);
|
|
790
|
-
this._logEvent('queue-overflow-drop', {
|
|
791
|
-
session_key: sessionKey,
|
|
792
|
-
chat_id: entry.chatId,
|
|
793
|
-
queue_len: entry.pendingQueue.length,
|
|
794
|
-
source_msg_id: dropped.context?.sourceMsgId ?? null,
|
|
795
|
-
});
|
|
796
|
-
if (this.onQueueDrop) {
|
|
797
|
-
try { this.onQueueDrop(sessionKey, dropped, entry); }
|
|
798
|
-
catch (err) { this.logger.error?.(`[${entry.label}] onQueueDrop: ${err.message}`); }
|
|
799
|
-
}
|
|
800
|
-
dropped.reject(dropErr);
|
|
801
|
-
}
|
|
802
|
-
|
|
803
|
-
if (entry.pendingQueue.length === 1) pending.activate();
|
|
804
|
-
|
|
805
|
-
try {
|
|
806
|
-
entry.inputController.push({
|
|
807
|
-
type: 'user',
|
|
808
|
-
message: { role: 'user', content: prompt },
|
|
809
|
-
parent_tool_use_id: null,
|
|
810
|
-
});
|
|
811
|
-
} catch (err) {
|
|
812
|
-
const idx = entry.pendingQueue.indexOf(pending);
|
|
813
|
-
if (idx !== -1) entry.pendingQueue.splice(idx, 1);
|
|
814
|
-
if (entry.pendingQueue.length === 0) entry.inFlight = false;
|
|
815
|
-
pending.reject(err);
|
|
816
|
-
}
|
|
817
|
-
});
|
|
818
|
-
}
|
|
819
|
-
|
|
820
|
-
// ─── Per-session control surface ─────────────────────────────────
|
|
821
|
-
|
|
822
|
-
/**
|
|
823
|
-
* Cancel the in-flight turn. Other queued pendings are NOT
|
|
824
|
-
* auto-rejected (use drainQueue for that). Polygram's /stop
|
|
825
|
-
* handler typically calls interrupt() then drainQueue().
|
|
826
|
-
*/
|
|
827
|
-
async interrupt(sessionKey) {
|
|
828
|
-
const entry = this.procs.get(sessionKey);
|
|
829
|
-
if (!entry || entry.closed) return false;
|
|
830
|
-
try { await entry.query.interrupt?.(); }
|
|
831
|
-
catch (err) {
|
|
832
|
-
this.logger.error?.(`[${entry.label}] interrupt: ${err.message}`);
|
|
833
|
-
return false;
|
|
834
|
-
}
|
|
835
|
-
this._logEvent('interrupt-applied', { session_key: sessionKey });
|
|
836
|
-
return true;
|
|
837
|
-
}
|
|
838
|
-
|
|
839
|
-
/**
|
|
840
|
-
* Reject every pending (head + queued) with a typed
|
|
841
|
-
* `Error('drained:' + errCode)`. Encapsulates the drain inside
|
|
842
|
-
* pm so polygram doesn't poke at pendingQueue (D8 / seam H).
|
|
843
|
-
*/
|
|
844
|
-
drainQueue(sessionKey, errCode = 'INTERRUPTED') {
|
|
845
|
-
const entry = this.procs.get(sessionKey);
|
|
846
|
-
if (!entry) return 0;
|
|
847
|
-
let count = 0;
|
|
848
|
-
while (entry.pendingQueue.length > 0) {
|
|
849
|
-
const p = entry.pendingQueue.shift();
|
|
850
|
-
p.clearTimers?.();
|
|
851
|
-
const err = Object.assign(new Error(`drained:${errCode}`), { code: errCode });
|
|
852
|
-
try { p.reject(err); } catch { /* swallow */ }
|
|
853
|
-
count++;
|
|
854
|
-
}
|
|
855
|
-
entry.inFlight = false;
|
|
856
|
-
this._logEvent('drain-queue', { session_key: sessionKey, code: errCode, count });
|
|
857
|
-
return count;
|
|
858
|
-
}
|
|
859
|
-
|
|
860
|
-
async setModel(sessionKey, model) {
|
|
861
|
-
const entry = this.procs.get(sessionKey);
|
|
862
|
-
if (!entry || entry.closed) return false;
|
|
863
|
-
try { await entry.query.setModel?.(model); return true; }
|
|
864
|
-
catch (err) {
|
|
865
|
-
this.logger.error?.(`[${entry.label}] setModel: ${err.message}`);
|
|
866
|
-
return false;
|
|
867
|
-
}
|
|
868
|
-
}
|
|
869
|
-
|
|
870
|
-
async setPermissionMode(sessionKey, mode) {
|
|
871
|
-
const entry = this.procs.get(sessionKey);
|
|
872
|
-
if (!entry || entry.closed) return false;
|
|
873
|
-
try { await entry.query.setPermissionMode?.(mode); return true; }
|
|
874
|
-
catch (err) {
|
|
875
|
-
this.logger.error?.(`[${entry.label}] setPermissionMode: ${err.message}`);
|
|
876
|
-
return false;
|
|
877
|
-
}
|
|
878
|
-
}
|
|
879
|
-
|
|
880
|
-
async applyFlagSettings(sessionKey, settings) {
|
|
881
|
-
const entry = this.procs.get(sessionKey);
|
|
882
|
-
if (!entry || entry.closed) return false;
|
|
883
|
-
try { await entry.query.applyFlagSettings?.(settings); return true; }
|
|
884
|
-
catch (err) {
|
|
885
|
-
this.logger.error?.(`[${entry.label}] applyFlagSettings: ${err.message}`);
|
|
886
|
-
return false;
|
|
887
|
-
}
|
|
888
|
-
}
|
|
889
|
-
|
|
890
|
-
/**
|
|
891
|
-
* 0.8.0 Phase 2 step 1 — mid-turn steer. Pushes a user message
|
|
892
|
-
* onto the inputController with priority: 'now' so the SDK
|
|
893
|
-
* processes it ahead of any queued normal-priority messages.
|
|
894
|
-
*
|
|
895
|
-
* Phase 0 gate 6 was DEFER — exact "skip remaining sibling
|
|
896
|
-
* tool_uses" semantic must be verified live. If the SDK doesn't
|
|
897
|
-
* skip siblings on priority:'now', polygram-side `/steer` falls
|
|
898
|
-
* back to interrupt() + push (slightly different UX but still
|
|
899
|
-
* works — the in-flight tool batch finishes, then the steer
|
|
900
|
-
* message is the next user turn).
|
|
901
|
-
*
|
|
902
|
-
* shouldQuery: true (default) → steer triggers an immediate
|
|
903
|
-
* response. shouldQuery: false → "append context, don't trigger"
|
|
904
|
-
* — useful when steer is informational only.
|
|
905
|
-
*
|
|
906
|
-
* Returns true if push succeeded; false if session not found or
|
|
907
|
-
* input controller closed.
|
|
908
|
-
*/
|
|
909
|
-
steer(sessionKey, text, { shouldQuery = false } = {}) {
|
|
910
|
-
const entry = this.procs.get(sessionKey);
|
|
911
|
-
if (!entry || entry.closed) return false;
|
|
912
|
-
try {
|
|
913
|
-
// 0.8.0-rc.7 (per v4 plan §0 row 9 + Phase 2 step 1's original
|
|
914
|
-
// shape): push with `shouldQuery: false` so the SDK appends to
|
|
915
|
-
// the transcript without trying to terminate the in-flight turn.
|
|
916
|
-
// The previous default `shouldQuery: true` triggered the CLI
|
|
917
|
-
// binary's `m87` gate (transcript well-formedness check) which
|
|
918
|
-
// emitted `result.subtype = error_during_execution` whenever a
|
|
919
|
-
// plain-text user message arrived while the assistant was mid-
|
|
920
|
-
// tool-use. With shouldQuery=false the message merges into the
|
|
921
|
-
// next natural user turn — the in-flight tools complete first,
|
|
922
|
-
// then the assistant sees the steered context.
|
|
923
|
-
//
|
|
924
|
-
// parent_tool_use_id is required by SDKUserMessage type
|
|
925
|
-
// (sdk.d.ts:3479-3498). The SDK runtime checks `!== null` in
|
|
926
|
-
// multiple places; omitting it falls through to wrong handling
|
|
927
|
-
// branches. The SDK's own `mz.send()` and `pz` replay set it
|
|
928
|
-
// to null explicitly.
|
|
929
|
-
entry.inputController.push({
|
|
930
|
-
type: 'user',
|
|
931
|
-
message: { role: 'user', content: text },
|
|
932
|
-
parent_tool_use_id: null,
|
|
933
|
-
priority: 'now',
|
|
934
|
-
shouldQuery,
|
|
935
|
-
});
|
|
936
|
-
this._logEvent('steer', {
|
|
937
|
-
session_key: sessionKey,
|
|
938
|
-
chat_id: entry.chatId,
|
|
939
|
-
should_query: shouldQuery,
|
|
940
|
-
text_len: text?.length ?? 0,
|
|
941
|
-
});
|
|
942
|
-
return true;
|
|
943
|
-
} catch (err) {
|
|
944
|
-
this.logger.error?.(`[${entry.label}] steer: ${err.message}`);
|
|
945
|
-
return false;
|
|
946
|
-
}
|
|
947
|
-
}
|
|
948
|
-
|
|
949
|
-
/**
|
|
950
|
-
* 0.8.0-rc.42 — native autosteer / queue. Push a user message
|
|
951
|
-
* directly onto the SDK's input controller. The SDK manages
|
|
952
|
-
* absorption / queueing per the `priority` hint:
|
|
953
|
-
* - 'now': abort current turn (terminal_reason='aborted_streaming')
|
|
954
|
-
* and start a fresh turn for this message (verified U7
|
|
955
|
-
* spike 2026-05-01).
|
|
956
|
-
* - 'next': absorb into current turn at next natural pause
|
|
957
|
-
* (between tool calls / after subagent return / etc.)
|
|
958
|
-
* — same UX as the deleted autosteer-buffer + PostToolBatch
|
|
959
|
-
* flow, but the SDK manages the queue. ONE result event
|
|
960
|
-
* for the whole chain.
|
|
961
|
-
* - 'later': queue for after current turn ends. SEPARATE result
|
|
962
|
-
* event per absorbed message. Clean per-msg lifecycle.
|
|
963
|
-
* - undefined: same as 'next'.
|
|
964
|
-
*
|
|
965
|
-
* Returns true on push success, false if no entry / closed.
|
|
966
|
-
*
|
|
967
|
-
* NOTE: this does NOT push a polygram pending into pendingQueue.
|
|
968
|
-
* The message bypasses pm's per-pending bookkeeping (cost-row,
|
|
969
|
-
* idle-timer, wall-clock cap) — those still attach to the
|
|
970
|
-
* trigger pending of the in-flight turn. For 'later' priority,
|
|
971
|
-
* the SDK will fire its own SDKResultMessage for the followup;
|
|
972
|
-
* polygram's onResult only sees one of these per active pending.
|
|
973
|
-
* Callers wanting per-msg accounting must use pm.send() instead.
|
|
974
|
-
*/
|
|
975
|
-
injectUserMessage(sessionKey, { content, priority = 'next', shouldQuery, parent_tool_use_id = null } = {}) {
|
|
976
|
-
const entry = this.procs.get(sessionKey);
|
|
977
|
-
if (!entry || entry.closed) return false;
|
|
978
|
-
if (typeof content !== 'string' || !content) {
|
|
979
|
-
throw new TypeError('injectUserMessage: content (string) required');
|
|
980
|
-
}
|
|
981
|
-
try {
|
|
982
|
-
const msg = {
|
|
983
|
-
type: 'user',
|
|
984
|
-
message: { role: 'user', content },
|
|
985
|
-
parent_tool_use_id,
|
|
986
|
-
};
|
|
987
|
-
if (priority !== undefined) msg.priority = priority;
|
|
988
|
-
if (shouldQuery !== undefined) msg.shouldQuery = shouldQuery;
|
|
989
|
-
entry.inputController.push(msg);
|
|
990
|
-
// rc.45: signal the streamer to start a new bubble at the next
|
|
991
|
-
// assistant-message-id transition. Without this flag,
|
|
992
|
-
// _handleEvent would APPEND the post-steer assistant text into
|
|
993
|
-
// the same bubble as the pre-steer text, hiding the user's
|
|
994
|
-
// intervention. Only set when there's a head pending — if the
|
|
995
|
-
// session is idle, the next pm.send will start a fresh bubble
|
|
996
|
-
// anyway.
|
|
997
|
-
const head = entry.pendingQueue?.[0];
|
|
998
|
-
if (head) head.pendingSteerCausesNewBubble = true;
|
|
999
|
-
this._logEvent('inject-user-message', {
|
|
1000
|
-
session_key: sessionKey,
|
|
1001
|
-
chat_id: entry.chatId,
|
|
1002
|
-
priority: priority ?? null,
|
|
1003
|
-
should_query: shouldQuery ?? null,
|
|
1004
|
-
text_len: content.length,
|
|
1005
|
-
});
|
|
1006
|
-
return true;
|
|
1007
|
-
} catch (err) {
|
|
1008
|
-
this.logger.error?.(`[${entry.label}] injectUserMessage: ${err.message}`);
|
|
1009
|
-
return false;
|
|
1010
|
-
}
|
|
1011
|
-
}
|
|
1012
|
-
|
|
1013
|
-
/**
|
|
1014
|
-
* Forcibly reset a session: drain pendings, close Query, clear
|
|
1015
|
-
* sessionId in DB. Per v4 plan §6.5.2.
|
|
1016
|
-
*/
|
|
1017
|
-
async resetSession(sessionKey, { reason = 'user-requested' } = {}) {
|
|
1018
|
-
const entry = this.procs.get(sessionKey);
|
|
1019
|
-
if (!entry) return { closed: false, drainedPendings: 0 };
|
|
1020
|
-
const drainedPendings = this.drainQueue(sessionKey, 'RESET_SESSION');
|
|
1021
|
-
const closed = await this._closeEntry(entry, reason);
|
|
1022
|
-
if (this.db?.clearSessionId) {
|
|
1023
|
-
try { this.db.clearSessionId(sessionKey); }
|
|
1024
|
-
catch (err) { this.logger.error?.(`[${entry.label}] clearSessionId: ${err.message}`); }
|
|
1025
|
-
}
|
|
1026
|
-
this.procs.delete(sessionKey);
|
|
1027
|
-
this._maybeSignalLruWaiter();
|
|
1028
|
-
this._logEvent('session-reset', {
|
|
1029
|
-
session_key: sessionKey, reason, drained_pendings: drainedPendings, closed,
|
|
1030
|
-
});
|
|
1031
|
-
return { closed, drainedPendings };
|
|
1032
|
-
}
|
|
1033
|
-
|
|
1034
|
-
// ─── Kill / close ────────────────────────────────────────────────
|
|
1035
|
-
|
|
1036
|
-
async kill(sessionKey) {
|
|
1037
|
-
const entry = this.procs.get(sessionKey);
|
|
1038
|
-
if (!entry) return;
|
|
1039
|
-
this.drainQueue(sessionKey, 'KILLED');
|
|
1040
|
-
await this._closeEntry(entry, 'kill');
|
|
1041
|
-
this.procs.delete(sessionKey);
|
|
1042
|
-
this._maybeSignalLruWaiter();
|
|
1043
|
-
}
|
|
1044
|
-
|
|
1045
|
-
/**
|
|
1046
|
-
* Tear down every Query whose sessionKey starts with the given
|
|
1047
|
-
* chatId prefix. Used on Telegram chat→supergroup migration.
|
|
1048
|
-
* Promise.allSettled per D7 — one slow close doesn't block others.
|
|
1049
|
-
*/
|
|
1050
|
-
async killChat(chatId) {
|
|
1051
|
-
const prefix = String(chatId);
|
|
1052
|
-
const matching = [];
|
|
1053
|
-
for (const [key, entry] of this.procs.entries()) {
|
|
1054
|
-
if (key === prefix || key.startsWith(`${prefix}:`)) {
|
|
1055
|
-
matching.push({ key, entry });
|
|
1056
|
-
}
|
|
1057
|
-
}
|
|
1058
|
-
const results = await Promise.allSettled(matching.map(async ({ key, entry }) => {
|
|
1059
|
-
this.drainQueue(key, 'KILLCHAT');
|
|
1060
|
-
await this._closeEntry(entry, 'killChat');
|
|
1061
|
-
this.procs.delete(key);
|
|
1062
|
-
}));
|
|
1063
|
-
this._maybeSignalLruWaiter();
|
|
1064
|
-
return results.map((r, i) => ({
|
|
1065
|
-
sessionKey: matching[i].key,
|
|
1066
|
-
status: r.status,
|
|
1067
|
-
error: r.reason?.message,
|
|
1068
|
-
}));
|
|
1069
|
-
}
|
|
1070
|
-
|
|
1071
|
-
/**
|
|
1072
|
-
* Race Query.close() against a timeout. Returns `true` if close
|
|
1073
|
-
* resolved cleanly; `false` if it timed out (entry still gets
|
|
1074
|
-
* removed from procs by caller). Per D7.
|
|
1075
|
-
*/
|
|
1076
|
-
async _closeEntry(entry, reason) {
|
|
1077
|
-
if (entry.closed) return true;
|
|
1078
|
-
entry.closed = true;
|
|
1079
|
-
// Close the input controller so SDK's streamInput consumer
|
|
1080
|
-
// exits cleanly.
|
|
1081
|
-
try { entry.inputController.close(); }
|
|
1082
|
-
catch { /* swallow */ }
|
|
1083
|
-
let timedOut = false;
|
|
1084
|
-
const closeP = (async () => {
|
|
1085
|
-
try { await entry.query.close?.(); }
|
|
1086
|
-
catch (err) {
|
|
1087
|
-
this.logger.error?.(`[${entry.label}] query.close: ${err.message}`);
|
|
1088
|
-
}
|
|
1089
|
-
})();
|
|
1090
|
-
const timerP = new Promise((resolve) => setTimeout(() => {
|
|
1091
|
-
timedOut = true;
|
|
1092
|
-
resolve();
|
|
1093
|
-
}, this.queryCloseTimeoutMs));
|
|
1094
|
-
await Promise.race([closeP, timerP]);
|
|
1095
|
-
if (timedOut) {
|
|
1096
|
-
this._logEvent('evict-close-timeout', {
|
|
1097
|
-
session_key: entry.sessionKey, reason, timeout_ms: this.queryCloseTimeoutMs,
|
|
1098
|
-
});
|
|
1099
|
-
}
|
|
1100
|
-
return !timedOut;
|
|
1101
|
-
}
|
|
1102
|
-
|
|
1103
|
-
async shutdown() {
|
|
1104
|
-
// Set flag FIRST so any LRU-waiter unparked by _closeEntry's
|
|
1105
|
-
// iteration-finally doesn't recurse into a fresh spawn (which
|
|
1106
|
-
// would leave an orphaned entry after `procs.clear()` below).
|
|
1107
|
-
// Reject parked waiters immediately so their getOrSpawn callers
|
|
1108
|
-
// unwind cleanly rather than racing the shutdown.
|
|
1109
|
-
this._shuttingDown = true;
|
|
1110
|
-
while (this._lruWaiters.length) {
|
|
1111
|
-
const w = this._lruWaiters.shift();
|
|
1112
|
-
clearTimeout(w.timer);
|
|
1113
|
-
w.reject(new Error('shutdown'));
|
|
1114
|
-
}
|
|
1115
|
-
const entries = [...this.procs.values()];
|
|
1116
|
-
await Promise.allSettled(entries.map((e) => {
|
|
1117
|
-
this.drainQueue(e.sessionKey, 'SHUTDOWN');
|
|
1118
|
-
return this._closeEntry(e, 'shutdown');
|
|
1119
|
-
}));
|
|
1120
|
-
this.procs.clear();
|
|
1121
|
-
}
|
|
1122
|
-
|
|
1123
|
-
// ─── Helpers ────────────────────────────────────────────────────
|
|
1124
|
-
|
|
1125
|
-
_failAllPendings(entry, err) {
|
|
1126
|
-
while (entry.pendingQueue.length > 0) {
|
|
1127
|
-
const p = entry.pendingQueue.shift();
|
|
1128
|
-
p.clearTimers?.();
|
|
1129
|
-
try { p.reject(err); } catch { /* swallow */ }
|
|
1130
|
-
}
|
|
1131
|
-
entry.inFlight = false;
|
|
1132
|
-
}
|
|
1133
|
-
|
|
1134
|
-
_handleQueueDrop(entry, droppedMsg) {
|
|
1135
|
-
// The dropped message was a queued user message that hadn't yet
|
|
1136
|
-
// been consumed by the SDK. Find the corresponding pending and
|
|
1137
|
-
// reject it. (Pendings and pushed messages are 1:1 in order.)
|
|
1138
|
-
// We dropped from the FRONT of the input queue (oldest), which
|
|
1139
|
-
// corresponds to pendingQueue[1] (head=in-flight is index 0).
|
|
1140
|
-
if (entry.pendingQueue.length < 2) return;
|
|
1141
|
-
const dropped = entry.pendingQueue.splice(1, 1)[0];
|
|
1142
|
-
if (!dropped) return;
|
|
1143
|
-
dropped.clearTimers?.();
|
|
1144
|
-
const err = Object.assign(
|
|
1145
|
-
new Error(`queue overflow: dropped (queue cap ${this.queueCap})`),
|
|
1146
|
-
{ code: 'QUEUE_OVERFLOW' },
|
|
1147
|
-
);
|
|
1148
|
-
this._logEvent('queue-overflow-drop', {
|
|
1149
|
-
session_key: entry.sessionKey,
|
|
1150
|
-
chat_id: entry.chatId,
|
|
1151
|
-
queue_len: entry.pendingQueue.length,
|
|
1152
|
-
source_msg_id: dropped.context?.sourceMsgId ?? null,
|
|
1153
|
-
});
|
|
1154
|
-
if (this.onQueueDrop) {
|
|
1155
|
-
try { this.onQueueDrop(entry.sessionKey, dropped, entry); }
|
|
1156
|
-
catch (err2) { this.logger.error?.(`[${entry.label}] onQueueDrop: ${err2.message}`); }
|
|
1157
|
-
}
|
|
1158
|
-
dropped.reject(err);
|
|
1159
|
-
}
|
|
1160
|
-
|
|
1161
|
-
_logEvent(kind, detail) {
|
|
1162
|
-
if (!this.db?.logEvent) return;
|
|
1163
|
-
try { this.db.logEvent(kind, detail); }
|
|
1164
|
-
catch (err) { this.logger.error?.(`[pm-sdk] logEvent ${kind} failed: ${err.message}`); }
|
|
1165
|
-
}
|
|
1166
|
-
}
|
|
1167
|
-
|
|
1168
|
-
module.exports = {
|
|
1169
|
-
ProcessManagerSdk,
|
|
1170
|
-
DEFAULT_CAP,
|
|
1171
|
-
DEFAULT_QUEUE_CAP,
|
|
1172
|
-
DEFAULT_QUERY_CLOSE_TIMEOUT_MS,
|
|
1173
|
-
DEFAULT_TRANSIENT_RETRY_DELAY_MS,
|
|
1174
|
-
MAX_TRANSIENT_RETRIES,
|
|
1175
|
-
extractAssistantText,
|
|
1176
|
-
sumUsage,
|
|
1177
|
-
makeInputController,
|
|
1178
|
-
};
|