polygram 0.7.9 → 0.8.0-rc.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,940 @@
1
+ /**
2
+ * SDK-backed ProcessManager — `@anthropic-ai/claude-agent-sdk` Query
3
+ * objects in place of `child_process.spawn('claude', ...)` and
4
+ * stream-json line parsing.
5
+ *
6
+ * Public API matches `lib/process-manager.js` (the CLI version) so
7
+ * polygram.js can swap implementations via env flag (POLYGRAM_USE_SDK=1).
8
+ * Phase 4 deletes the CLI version after Phase 5 soak proves the SDK
9
+ * version stable.
10
+ *
11
+ * Per v4 plan §6.5.7 (buildSdkOptions), §6.6 (ship-breaker
12
+ * mitigations), Phase 0 spike findings (docs/0.8.0-phase0-findings.md).
13
+ *
14
+ * Architecture:
15
+ * - One Query per active sessionKey, held for the chat lifetime
16
+ * (Phase 0 gate 1 PASS — long-lived input AsyncIterable works).
17
+ * - inputController is the writable end of an
18
+ * AsyncIterable<SDKUserMessage>; pm.send() pushes user messages
19
+ * onto it; the SDK's streamInput() consumes from the other end.
20
+ * - iteratePromise is the for-await loop over the Query's
21
+ * AsyncGenerator output. Wrapped in try/catch (D7 commitment).
22
+ * - pendingQueue maps N user messages → N SDKResultMessage events
23
+ * in FIFO order (same as CLI version's stream-json model).
24
+ * - LRU eviction across the procs Map (cap = DEFAULT_CAP) — same
25
+ * behaviour as CLI version, with Query.close() instead of
26
+ * proc.kill().
27
+ *
28
+ * Decisions encoded:
29
+ * D1 streaming: subscribe to SDKAssistantMessage (cumulative)
30
+ * D2 long-lived Query per chat
31
+ * D3 /effort via applyFlagSettings — DELETE requestRespawn
32
+ * D5 Options.env SHADOW — buildSdkOptions enumerates everything
33
+ * D6 Query.close() is fast — 100ms shutdown timeout safe
34
+ * D7 killChat Promise.allSettled with 5s per-Query timeout
35
+ * D8 pm.drainQueue(errCode) owns drain logic
36
+ * D11 stdinLock dropped — SDK preserves FIFO at Query level
37
+ */
38
+
39
+ 'use strict';
40
+
41
+ const { query } = require('@anthropic-ai/claude-agent-sdk');
42
+ const { isTransientHttpError } = require('./error-classify');
43
+
44
+ const DEFAULT_CAP = 10;
45
+ const DEFAULT_QUEUE_CAP = 50;
46
+ const DEFAULT_LRU_WAIT_MS = 300_000; // 5 min waiter timeout
47
+ const DEFAULT_QUERY_CLOSE_TIMEOUT_MS = 5000; // per-Query close ceiling (D7)
48
+ const DEFAULT_TRANSIENT_RETRY_DELAY_MS = 2500;
49
+ const MAX_TRANSIENT_RETRIES = 1;
50
+ // Idle/wall-clock per-pending; SDK has no built-in. Reset on the
51
+ // event allowlist (H13 mitigation): assistant, partial-assistant,
52
+ // tool-progress; NOT on api-retry or compact_boundary.
53
+ const DEFAULT_IDLE_MS = 600_000;
54
+ const DEFAULT_MAX_TURN_MS = 30 * 60_000;
55
+
56
+ // ─── Helpers ────────────────────────────────────────────────────────
57
+
58
+ /**
59
+ * Pull cumulative user-visible text from an SDKAssistantMessage.
60
+ * Same shape as today's stream-json assistant events (per D1):
61
+ * `event.message.content[]` with text blocks.
62
+ *
63
+ * Colon-suffix normalisation matches the CLI pm — turns
64
+ * "Listing dependencies:" into "Listing dependencies…" so a
65
+ * trailing assistant message doesn't read as half-formed.
66
+ */
67
+ function extractAssistantText(event) {
68
+ const blocks = event?.message?.content;
69
+ if (!Array.isArray(blocks)) return '';
70
+ const parts = [];
71
+ for (const b of blocks) {
72
+ if (!b) continue;
73
+ if (b.type === 'text' && typeof b.text === 'string') {
74
+ parts.push(b.text);
75
+ }
76
+ }
77
+ return parts.join('\n\n').trim().replace(/([^:]):\s*$/, '$1…');
78
+ }
79
+
80
+ /**
81
+ * Sum usage across distinct assistant message ids. Per D1 + Phase 0
82
+ * gate 22, modelUsage is camelCase but result.usage is snake_case;
83
+ * this helper sums the latter (matches CLI pm + 0.7.6 turn_metrics).
84
+ */
85
+ function sumUsage(usageByMessage) {
86
+ const out = {
87
+ input_tokens: 0,
88
+ output_tokens: 0,
89
+ cache_creation_input_tokens: 0,
90
+ cache_read_input_tokens: 0,
91
+ };
92
+ for (const u of usageByMessage.values()) {
93
+ if (!u) continue;
94
+ if (Number.isFinite(u.input_tokens)) out.input_tokens += u.input_tokens;
95
+ if (Number.isFinite(u.output_tokens)) out.output_tokens += u.output_tokens;
96
+ if (Number.isFinite(u.cache_creation_input_tokens)) {
97
+ out.cache_creation_input_tokens += u.cache_creation_input_tokens;
98
+ }
99
+ if (Number.isFinite(u.cache_read_input_tokens)) {
100
+ out.cache_read_input_tokens += u.cache_read_input_tokens;
101
+ }
102
+ }
103
+ return out;
104
+ }
105
+
106
+ /**
107
+ * Create the writable-end-of-AsyncIterable that pm pushes user
108
+ * messages onto. SDK's `query({ prompt: <this> })` consumes from the
109
+ * read end via `for await`.
110
+ *
111
+ * Bounded by queueCap (D5). Push beyond cap drops the OLDEST queued
112
+ * (non-yielded) message and rejects its associated pending — matches
113
+ * 0.7.6 H semantics.
114
+ */
115
+ function makeInputController({ queueCap = DEFAULT_QUEUE_CAP } = {}) {
116
+ const queue = []; // pending SDKUserMessages awaiting consumer
117
+ const waiters = []; // outstanding next() promises
118
+ let closed = false;
119
+ let dropCallback = null; // optional (oldestMessage) → void
120
+
121
+ const iter = {
122
+ [Symbol.asyncIterator]() { return iter; },
123
+ next() {
124
+ if (queue.length) {
125
+ return Promise.resolve({ value: queue.shift(), done: false });
126
+ }
127
+ if (closed) {
128
+ return Promise.resolve({ value: undefined, done: true });
129
+ }
130
+ return new Promise((resolve) => waiters.push(resolve));
131
+ },
132
+ async return() {
133
+ closed = true;
134
+ while (waiters.length) waiters.shift()({ value: undefined, done: true });
135
+ return { value: undefined, done: true };
136
+ },
137
+ };
138
+
139
+ function push(msg) {
140
+ if (closed) {
141
+ throw Object.assign(new Error('input controller closed'),
142
+ { code: 'INPUT_CLOSED' });
143
+ }
144
+ if (waiters.length) {
145
+ waiters.shift()({ value: msg, done: false });
146
+ return;
147
+ }
148
+ queue.push(msg);
149
+ while (queue.length > queueCap) {
150
+ const dropped = queue.shift();
151
+ if (dropCallback) {
152
+ try { dropCallback(dropped); }
153
+ catch { /* swallow; pm logs separately */ }
154
+ }
155
+ }
156
+ }
157
+
158
+ function close() {
159
+ if (closed) return;
160
+ closed = true;
161
+ while (waiters.length) waiters.shift()({ value: undefined, done: true });
162
+ }
163
+
164
+ function onDrop(cb) { dropCallback = cb; }
165
+
166
+ return { iter, push, close, onDrop, get size() { return queue.length; } };
167
+ }
168
+
169
+ // ─── ProcessManager ────────────────────────────────────────────────
170
+
171
+ class ProcessManagerSdk {
172
+ constructor({
173
+ cap = DEFAULT_CAP,
174
+ queueCap = DEFAULT_QUEUE_CAP,
175
+ spawnFn, // (sessionKey, ctx) → SdkOptions OR { query, inputController }
176
+ db = null,
177
+ logger = console,
178
+ onInit = null,
179
+ onResult = null,
180
+ onClose = null,
181
+ onStreamChunk = null,
182
+ onToolUse = null,
183
+ onAssistantMessageStart = null,
184
+ onCompactBoundary = null,
185
+ onQueueDrop = null,
186
+ queryCloseTimeoutMs = DEFAULT_QUERY_CLOSE_TIMEOUT_MS,
187
+ } = {}) {
188
+ if (!spawnFn) throw new Error('spawnFn required');
189
+ this.cap = cap;
190
+ this.queueCap = queueCap;
191
+ this.spawnFn = spawnFn;
192
+ this.db = db;
193
+ this.logger = logger;
194
+ this.queryCloseTimeoutMs = queryCloseTimeoutMs;
195
+ this.onInit = onInit;
196
+ this.onResult = onResult;
197
+ this.onClose = onClose;
198
+ this.onStreamChunk = onStreamChunk;
199
+ this.onToolUse = onToolUse;
200
+ this.onAssistantMessageStart = onAssistantMessageStart;
201
+ this.onCompactBoundary = onCompactBoundary;
202
+ this.onQueueDrop = onQueueDrop;
203
+ this.procs = new Map(); // sessionKey → entry
204
+ this._lruWaiters = []; // [{ resolve, reject, timer }]
205
+ }
206
+
207
+ has(sessionKey) { return this.procs.has(sessionKey); }
208
+ get(sessionKey) { return this.procs.get(sessionKey); }
209
+ get size() { return this.procs.size; }
210
+ keys() { return [...this.procs.keys()]; }
211
+
212
+ // ─── Spawn / pool ────────────────────────────────────────────────
213
+
214
+ async getOrSpawn(sessionKey, spawnContext) {
215
+ const existing = this.procs.get(sessionKey);
216
+ if (existing && !existing.closed) return existing;
217
+
218
+ if (this.procs.size >= this.cap) {
219
+ const evicted = this._evictLRU();
220
+ if (!evicted) {
221
+ // All entries in-flight — park.
222
+ await this._awaitLruSlot();
223
+ return this.getOrSpawn(sessionKey, spawnContext);
224
+ }
225
+ }
226
+
227
+ return this._spawnEntry(sessionKey, spawnContext);
228
+ }
229
+
230
+ _evictLRU() {
231
+ let oldest = null;
232
+ let oldestKey = null;
233
+ for (const [k, v] of this.procs.entries()) {
234
+ if (v.inFlight) continue;
235
+ if (!oldest || v.lastUsedTs < oldest.lastUsedTs) {
236
+ oldest = v;
237
+ oldestKey = k;
238
+ }
239
+ }
240
+ if (!oldest) {
241
+ this._logEvent('lru-full', { active: this.procs.size, cap: this.cap });
242
+ return false;
243
+ }
244
+ this._logEvent('evict', { session_key: oldestKey });
245
+ // Async tear-down with timeout (D6/D7).
246
+ this._closeEntry(oldest, 'evict').catch(() => {});
247
+ this.procs.delete(oldestKey);
248
+ return true;
249
+ }
250
+
251
+ async _awaitLruSlot() {
252
+ return new Promise((resolve, reject) => {
253
+ const timer = setTimeout(() => {
254
+ const idx = this._lruWaiters.findIndex((w) => w.resolve === resolve);
255
+ if (idx !== -1) this._lruWaiters.splice(idx, 1);
256
+ this._logEvent('lru-wait-timeout', { wait_ms: DEFAULT_LRU_WAIT_MS });
257
+ reject(new Error(`lru wait timed out after ${DEFAULT_LRU_WAIT_MS}ms`));
258
+ }, DEFAULT_LRU_WAIT_MS);
259
+ this._lruWaiters.push({ resolve, reject, timer });
260
+ this._logEvent('lru-wait', { active: this.procs.size, cap: this.cap });
261
+ });
262
+ }
263
+
264
+ _maybeSignalLruWaiter() {
265
+ const w = this._lruWaiters.shift();
266
+ if (w) { clearTimeout(w.timer); w.resolve(); }
267
+ }
268
+
269
+ _spawnEntry(sessionKey, spawnContext) {
270
+ const spawnResult = this.spawnFn(sessionKey, spawnContext);
271
+ // spawnFn may return either SdkOptions (production) or
272
+ // { query, inputController } (test fakeQuery shortcut).
273
+ let entryQuery, inputController;
274
+ if (spawnResult && typeof spawnResult.next === 'function') {
275
+ // It's already a Query instance (test path).
276
+ entryQuery = spawnResult;
277
+ inputController = makeInputController({ queueCap: this.queueCap });
278
+ // Test path: feed pushed messages back via streamInput.
279
+ // (fakeQuery's streamInput consumes the iter we hand it.)
280
+ entryQuery.streamInput?.(inputController.iter).catch(() => {});
281
+ } else if (spawnResult && spawnResult.query && spawnResult.inputController) {
282
+ // Pre-built (test convenience).
283
+ entryQuery = spawnResult.query;
284
+ inputController = spawnResult.inputController;
285
+ } else {
286
+ // Production: spawnFn returned SdkOptions.
287
+ inputController = makeInputController({ queueCap: this.queueCap });
288
+ entryQuery = query({
289
+ prompt: inputController.iter,
290
+ options: spawnResult || {},
291
+ });
292
+ }
293
+
294
+ const entry = {
295
+ sessionKey,
296
+ chatId: spawnContext?.chatId ?? null,
297
+ label: spawnContext?.label ?? sessionKey,
298
+ query: entryQuery,
299
+ inputController,
300
+ pendingQueue: [],
301
+ sessionId: null,
302
+ closed: false,
303
+ inFlight: false,
304
+ lastUsedTs: Date.now(),
305
+ iteratePromise: null,
306
+ needsRespawn: null,
307
+ };
308
+
309
+ inputController.onDrop((dropped) => {
310
+ // Bound by queueCap; oldest non-active pending was the one
311
+ // associated with this dropped message (head pending = active,
312
+ // its msg was already consumed by SDK; the message we're
313
+ // dropping is from a later pending).
314
+ this._handleQueueDrop(entry, dropped);
315
+ });
316
+
317
+ entry.iteratePromise = this._runIteration(entry).catch((err) => {
318
+ this.logger.error?.(`[${entry.label}] iteration crashed: ${err?.message || err}`);
319
+ this._failAllPendings(entry, err);
320
+ });
321
+
322
+ this.procs.set(sessionKey, entry);
323
+ return entry;
324
+ }
325
+
326
+ // ─── Iteration loop ──────────────────────────────────────────────
327
+
328
+ async _runIteration(entry) {
329
+ try {
330
+ for await (const msg of entry.query) {
331
+ await this._handleEvent(entry, msg);
332
+ if (entry.closed) break;
333
+ }
334
+ } catch (err) {
335
+ // SDK threw (AbortError, network, etc). Reject all pendings
336
+ // with the error; emit onClose; clean up.
337
+ this._failAllPendings(entry, err);
338
+ if (this.onClose) {
339
+ try { this.onClose(entry.sessionKey, err.code === 'AbortError' ? 0 : 1, entry); }
340
+ catch (e) { this.logger.error?.(`[${entry.label}] onClose: ${e.message}`); }
341
+ }
342
+ } finally {
343
+ entry.closed = true;
344
+ entry.inFlight = false;
345
+ this.procs.delete(entry.sessionKey);
346
+ this._maybeSignalLruWaiter();
347
+ }
348
+ }
349
+
350
+ async _handleEvent(entry, msg) {
351
+ const head = entry.pendingQueue[0];
352
+
353
+ // Reset idle timer on activity events (H13 allowlist).
354
+ if (head && this._isActivityEvent(msg)) {
355
+ head.resetIdleTimer?.();
356
+ }
357
+
358
+ if (msg.type === 'system' && msg.subtype === 'init') {
359
+ entry.sessionId = msg.session_id || null;
360
+ if (this.onInit) {
361
+ try { this.onInit(entry.sessionKey, msg, entry); }
362
+ catch (err) { this.logger.error?.(`[${entry.label}] onInit: ${err.message}`); }
363
+ }
364
+ return;
365
+ }
366
+
367
+ if (msg.type === 'system' && msg.subtype === 'compact_boundary') {
368
+ // D6 / §5: surface compaction boundary to caller. Sequencing
369
+ // guarantee — we await this callback before processing the
370
+ // next event so a fresh assistant message after boundary
371
+ // routes to a new bubble cleanly.
372
+ if (this.onCompactBoundary) {
373
+ try { await this.onCompactBoundary(entry.sessionKey, msg, entry); }
374
+ catch (err) { this.logger.error?.(`[${entry.label}] onCompactBoundary: ${err.message}`); }
375
+ }
376
+ this._logEvent('compact-boundary', {
377
+ session_key: entry.sessionKey,
378
+ trigger: msg.compact_metadata?.trigger ?? null,
379
+ pre_tokens: msg.compact_metadata?.pre_tokens ?? null,
380
+ post_tokens: msg.compact_metadata?.post_tokens ?? null,
381
+ });
382
+ return;
383
+ }
384
+
385
+ if (msg.type === 'assistant' && head) {
386
+ // Subagent filter (Phase 1 step 7): top-level only.
387
+ if (msg.parent_tool_use_id != null) return;
388
+
389
+ const messageId = msg.message?.id;
390
+ const added = extractAssistantText(msg);
391
+ const hasToolUse = Array.isArray(msg.message?.content)
392
+ && msg.message.content.some((b) => b?.type === 'tool_use');
393
+
394
+ // First-stream fires when ANY assistant content arrives (text or tool_use).
395
+ if (added || hasToolUse) {
396
+ head.fireFirstStream?.();
397
+ head.firstAssistantSeen = true;
398
+ }
399
+
400
+ // Per-message-id usage (sum across at result time).
401
+ if (messageId != null && msg.message?.usage) {
402
+ head.usageByMessage.set(messageId, msg.message.usage);
403
+ }
404
+
405
+ // Tool-use accounting + onToolUse callback fan-out.
406
+ if (hasToolUse) {
407
+ for (const b of msg.message.content) {
408
+ if (b?.type === 'tool_use') {
409
+ head.toolUseCount++;
410
+ if (this.onToolUse && b.name) {
411
+ try { this.onToolUse(entry.sessionKey, b.name, entry); }
412
+ catch (err) { this.logger.error?.(`[${entry.label}] onToolUse: ${err.message}`); }
413
+ }
414
+ }
415
+ }
416
+ }
417
+
418
+ // forceNewMessage trigger fires BEFORE the new bubble's first
419
+ // chunk: detect message-id transition with non-empty prior
420
+ // streamText, then advance lastAssistantMessageId, THEN emit
421
+ // onStreamChunk for the new content.
422
+ if (added) {
423
+ const isNewMessage = head.lastAssistantMessageId != null
424
+ && messageId != null
425
+ && head.lastAssistantMessageId !== messageId
426
+ && head.streamText
427
+ && head.streamText.length > 0;
428
+ if (isNewMessage && this.onAssistantMessageStart) {
429
+ try { await this.onAssistantMessageStart(entry.sessionKey, entry); }
430
+ catch (err) { this.logger.error?.(`[${entry.label}] onAssistantMessageStart: ${err.message}`); }
431
+ }
432
+ if (messageId != null) head.lastAssistantMessageId = messageId;
433
+ head.streamText = added;
434
+ if (this.onStreamChunk) {
435
+ try { this.onStreamChunk(entry.sessionKey, head.streamText, entry); }
436
+ catch (err) { this.logger.error?.(`[${entry.label}] onStreamChunk: ${err.message}`); }
437
+ }
438
+ }
439
+ return;
440
+ }
441
+
442
+ if (msg.type === 'result' && head) {
443
+ // Transient retry (D11 / 0.7.6 H): retry once if the turn
444
+ // hit a 5xx/429 BEFORE producing any assistant content.
445
+ const errSignal = msg.error || msg.subtype;
446
+ const isError = msg.subtype !== 'success';
447
+ const shouldRetry = isError
448
+ && !head.firstAssistantSeen
449
+ && head.transientRetries < MAX_TRANSIENT_RETRIES
450
+ && head.prompt != null
451
+ && isTransientHttpError({ message: errSignal, subtype: msg.subtype });
452
+ if (shouldRetry) {
453
+ head.transientRetries++;
454
+ this._logEvent('transient-retry', {
455
+ session_key: entry.sessionKey,
456
+ chat_id: entry.chatId,
457
+ attempt: head.transientRetries,
458
+ subtype: msg.subtype,
459
+ error: typeof errSignal === 'string' ? errSignal.slice(0, 200) : null,
460
+ });
461
+ // Reset accumulators; arm idle timer; sleep then re-push.
462
+ head.usageByMessage = new Map();
463
+ head.toolUseCount = 0;
464
+ head.streamText = '';
465
+ head.lastAssistantMessageId = null;
466
+ head.resetIdleTimer?.();
467
+ setTimeout(() => {
468
+ if (entry.pendingQueue[0] !== head || entry.closed) return;
469
+ try {
470
+ entry.inputController.push({
471
+ type: 'user',
472
+ message: { role: 'user', content: head.prompt },
473
+ });
474
+ } catch (err) {
475
+ entry.pendingQueue.shift();
476
+ head.clearTimers();
477
+ head.reject(err);
478
+ }
479
+ }, DEFAULT_TRANSIENT_RETRY_DELAY_MS);
480
+ return;
481
+ }
482
+
483
+ // Normal resolution.
484
+ entry.pendingQueue.shift();
485
+ head.clearTimers();
486
+ if (this.onResult) {
487
+ try { this.onResult(entry.sessionKey, msg, entry, head); }
488
+ catch (err) { this.logger.error?.(`[${entry.label}] onResult: ${err.message}`); }
489
+ }
490
+ const usageTotals = sumUsage(head.usageByMessage);
491
+ head.resolve({
492
+ text: msg.result || '',
493
+ sessionId: msg.session_id,
494
+ cost: msg.total_cost_usd,
495
+ duration: msg.duration_ms,
496
+ error: msg.subtype === 'success' ? null : (msg.error || msg.subtype),
497
+ metrics: {
498
+ inputTokens: usageTotals.input_tokens,
499
+ outputTokens: usageTotals.output_tokens,
500
+ cacheCreationTokens: usageTotals.cache_creation_input_tokens,
501
+ cacheReadTokens: usageTotals.cache_read_input_tokens,
502
+ numAssistantMessages: head.usageByMessage.size,
503
+ numToolUses: head.toolUseCount,
504
+ resultSubtype: msg.subtype || null,
505
+ },
506
+ });
507
+
508
+ // Activate next head or settle idle.
509
+ if (entry.pendingQueue.length > 0) {
510
+ entry.pendingQueue[0].activate();
511
+ } else {
512
+ entry.inFlight = false;
513
+ this._maybeSignalLruWaiter();
514
+ }
515
+ return;
516
+ }
517
+ }
518
+
519
+ _isActivityEvent(msg) {
520
+ if (!msg?.type) return false;
521
+ if (msg.type === 'assistant') return true;
522
+ if (msg.type === 'partial_assistant') return true;
523
+ if (msg.type === 'stream_event') return true;
524
+ if (msg.type === 'tool_progress') return true;
525
+ if (msg.type === 'user') return true; // tool_result bridge events
526
+ return false;
527
+ }
528
+
529
+ // ─── Send ────────────────────────────────────────────────────────
530
+
531
+ send(sessionKey, prompt, {
532
+ timeoutMs = DEFAULT_IDLE_MS,
533
+ maxTurnMs = DEFAULT_MAX_TURN_MS,
534
+ context = {},
535
+ } = {}) {
536
+ return new Promise((resolve, reject) => {
537
+ const entry = this.procs.get(sessionKey);
538
+ if (!entry || entry.closed) {
539
+ return reject(new Error('No process for session'));
540
+ }
541
+ if (entry.needsRespawn) {
542
+ return reject(new Error(`Session awaiting respawn (${entry.needsRespawn})`));
543
+ }
544
+
545
+ entry.lastUsedTs = Date.now();
546
+
547
+ let idleTimer = null;
548
+ let maxTimer = null;
549
+ let activated = false;
550
+
551
+ const clearTimers = () => {
552
+ if (idleTimer) { clearTimeout(idleTimer); idleTimer = null; }
553
+ if (maxTimer) { clearTimeout(maxTimer); maxTimer = null; }
554
+ };
555
+
556
+ const pending = {
557
+ resolve: (r) => { clearTimers(); resolve(r); },
558
+ reject: (e) => { clearTimers(); reject(e); },
559
+ clearTimers,
560
+ startedAt: null,
561
+ streamText: '',
562
+ context,
563
+ idleTimer: null,
564
+ maxTimer: null,
565
+ activated: false,
566
+ usageByMessage: new Map(),
567
+ lastUsageMessageId: null,
568
+ toolUseCount: 0,
569
+ firstStreamFired: false,
570
+ prompt,
571
+ transientRetries: 0,
572
+ firstAssistantSeen: false,
573
+ };
574
+
575
+ pending.fireFirstStream = () => {
576
+ if (pending.firstStreamFired) return;
577
+ pending.firstStreamFired = true;
578
+ try { context?.onFirstStream?.(); }
579
+ catch (err) { this.logger.error?.(`[${entry.label}] onFirstStream: ${err.message}`); }
580
+ };
581
+
582
+ const fireTimeout = (reason) => {
583
+ if (entry.pendingQueue[0] !== pending) return;
584
+ this._logEvent('turn-timeout', {
585
+ session_key: sessionKey,
586
+ chat_id: entry.chatId,
587
+ reason,
588
+ });
589
+ entry.pendingQueue.shift();
590
+ // On idle/wall-clock fire: cancel SDK side first.
591
+ entry.query.interrupt?.().catch(() => {});
592
+ pending.reject(new Error(reason));
593
+ if (entry.pendingQueue.length > 0) {
594
+ entry.pendingQueue[0].activate();
595
+ } else {
596
+ entry.inFlight = false;
597
+ }
598
+ };
599
+
600
+ const armIdle = () => setTimeout(
601
+ () => fireTimeout(`Timeout: ${timeoutMs / 1000}s idle with no Claude activity`),
602
+ timeoutMs,
603
+ );
604
+
605
+ pending.activate = () => {
606
+ if (activated) return;
607
+ activated = true;
608
+ pending.activated = true;
609
+ pending.startedAt = Date.now();
610
+ idleTimer = armIdle();
611
+ pending.idleTimer = idleTimer;
612
+ maxTimer = setTimeout(
613
+ () => fireTimeout(`Turn exceeded ${maxTurnMs / 1000}s wall-clock ceiling`),
614
+ maxTurnMs,
615
+ );
616
+ pending.maxTimer = maxTimer;
617
+ try { context?.onActivate?.(); }
618
+ catch (err) { this.logger.error?.(`[${entry.label}] onActivate: ${err.message}`); }
619
+ };
620
+
621
+ pending.resetIdleTimer = () => {
622
+ if (!activated) return;
623
+ if (idleTimer) clearTimeout(idleTimer);
624
+ idleTimer = armIdle();
625
+ pending.idleTimer = idleTimer;
626
+ };
627
+
628
+ // Push into queue, enforce queueCap.
629
+ entry.pendingQueue.push(pending);
630
+ entry.inFlight = true;
631
+ while (entry.pendingQueue.length > this.queueCap) {
632
+ const dropped = entry.pendingQueue.splice(1, 1)[0];
633
+ if (!dropped) break;
634
+ dropped.clearTimers?.();
635
+ const dropErr = Object.assign(
636
+ new Error(`queue overflow: dropped (queue cap ${this.queueCap})`),
637
+ { code: 'QUEUE_OVERFLOW' },
638
+ );
639
+ this._logEvent('queue-overflow-drop', {
640
+ session_key: sessionKey,
641
+ chat_id: entry.chatId,
642
+ queue_len: entry.pendingQueue.length,
643
+ source_msg_id: dropped.context?.sourceMsgId ?? null,
644
+ });
645
+ if (this.onQueueDrop) {
646
+ try { this.onQueueDrop(sessionKey, dropped, entry); }
647
+ catch (err) { this.logger.error?.(`[${entry.label}] onQueueDrop: ${err.message}`); }
648
+ }
649
+ dropped.reject(dropErr);
650
+ }
651
+
652
+ if (entry.pendingQueue.length === 1) pending.activate();
653
+
654
+ try {
655
+ entry.inputController.push({
656
+ type: 'user',
657
+ message: { role: 'user', content: prompt },
658
+ });
659
+ } catch (err) {
660
+ const idx = entry.pendingQueue.indexOf(pending);
661
+ if (idx !== -1) entry.pendingQueue.splice(idx, 1);
662
+ if (entry.pendingQueue.length === 0) entry.inFlight = false;
663
+ pending.reject(err);
664
+ }
665
+ });
666
+ }
667
+
668
+ // ─── Per-session control surface ─────────────────────────────────
669
+
670
+ /**
671
+ * Cancel the in-flight turn. Other queued pendings are NOT
672
+ * auto-rejected (use drainQueue for that). Polygram's /stop
673
+ * handler typically calls interrupt() then drainQueue().
674
+ */
675
+ async interrupt(sessionKey) {
676
+ const entry = this.procs.get(sessionKey);
677
+ if (!entry || entry.closed) return false;
678
+ try { await entry.query.interrupt?.(); }
679
+ catch (err) {
680
+ this.logger.error?.(`[${entry.label}] interrupt: ${err.message}`);
681
+ return false;
682
+ }
683
+ this._logEvent('interrupt-applied', { session_key: sessionKey });
684
+ return true;
685
+ }
686
+
687
+ /**
688
+ * Reject every pending (head + queued) with a typed
689
+ * `Error('drained:' + errCode)`. Encapsulates the drain inside
690
+ * pm so polygram doesn't poke at pendingQueue (D8 / seam H).
691
+ */
692
+ drainQueue(sessionKey, errCode = 'INTERRUPTED') {
693
+ const entry = this.procs.get(sessionKey);
694
+ if (!entry) return 0;
695
+ let count = 0;
696
+ while (entry.pendingQueue.length > 0) {
697
+ const p = entry.pendingQueue.shift();
698
+ p.clearTimers?.();
699
+ const err = Object.assign(new Error(`drained:${errCode}`), { code: errCode });
700
+ try { p.reject(err); } catch { /* swallow */ }
701
+ count++;
702
+ }
703
+ entry.inFlight = false;
704
+ this._logEvent('drain-queue', { session_key: sessionKey, code: errCode, count });
705
+ return count;
706
+ }
707
+
708
+ async setModel(sessionKey, model) {
709
+ const entry = this.procs.get(sessionKey);
710
+ if (!entry || entry.closed) return false;
711
+ try { await entry.query.setModel?.(model); return true; }
712
+ catch (err) {
713
+ this.logger.error?.(`[${entry.label}] setModel: ${err.message}`);
714
+ return false;
715
+ }
716
+ }
717
+
718
+ async setPermissionMode(sessionKey, mode) {
719
+ const entry = this.procs.get(sessionKey);
720
+ if (!entry || entry.closed) return false;
721
+ try { await entry.query.setPermissionMode?.(mode); return true; }
722
+ catch (err) {
723
+ this.logger.error?.(`[${entry.label}] setPermissionMode: ${err.message}`);
724
+ return false;
725
+ }
726
+ }
727
+
728
+ async applyFlagSettings(sessionKey, settings) {
729
+ const entry = this.procs.get(sessionKey);
730
+ if (!entry || entry.closed) return false;
731
+ try { await entry.query.applyFlagSettings?.(settings); return true; }
732
+ catch (err) {
733
+ this.logger.error?.(`[${entry.label}] applyFlagSettings: ${err.message}`);
734
+ return false;
735
+ }
736
+ }
737
+
738
+ /**
739
+ * 0.8.0 Phase 2 step 1 — mid-turn steer. Pushes a user message
740
+ * onto the inputController with priority: 'now' so the SDK
741
+ * processes it ahead of any queued normal-priority messages.
742
+ *
743
+ * Phase 0 gate 6 was DEFER — exact "skip remaining sibling
744
+ * tool_uses" semantic must be verified live. If the SDK doesn't
745
+ * skip siblings on priority:'now', polygram-side `/steer` falls
746
+ * back to interrupt() + push (slightly different UX but still
747
+ * works — the in-flight tool batch finishes, then the steer
748
+ * message is the next user turn).
749
+ *
750
+ * shouldQuery: true (default) → steer triggers an immediate
751
+ * response. shouldQuery: false → "append context, don't trigger"
752
+ * — useful when steer is informational only.
753
+ *
754
+ * Returns true if push succeeded; false if session not found or
755
+ * input controller closed.
756
+ */
757
+ steer(sessionKey, text, { shouldQuery = true } = {}) {
758
+ const entry = this.procs.get(sessionKey);
759
+ if (!entry || entry.closed) return false;
760
+ try {
761
+ entry.inputController.push({
762
+ type: 'user',
763
+ message: { role: 'user', content: text },
764
+ priority: 'now',
765
+ shouldQuery,
766
+ });
767
+ this._logEvent('steer', {
768
+ session_key: sessionKey,
769
+ chat_id: entry.chatId,
770
+ should_query: shouldQuery,
771
+ text_len: text?.length ?? 0,
772
+ });
773
+ return true;
774
+ } catch (err) {
775
+ this.logger.error?.(`[${entry.label}] steer: ${err.message}`);
776
+ return false;
777
+ }
778
+ }
779
+
780
+ /**
781
+ * Forcibly reset a session: drain pendings, close Query, clear
782
+ * sessionId in DB. Per v4 plan §6.5.2.
783
+ */
784
+ async resetSession(sessionKey, { reason = 'user-requested' } = {}) {
785
+ const entry = this.procs.get(sessionKey);
786
+ if (!entry) return { closed: false, drainedPendings: 0 };
787
+ const drainedPendings = this.drainQueue(sessionKey, 'RESET_SESSION');
788
+ const closed = await this._closeEntry(entry, reason);
789
+ if (this.db?.clearSessionId) {
790
+ try { this.db.clearSessionId(sessionKey); }
791
+ catch (err) { this.logger.error?.(`[${entry.label}] clearSessionId: ${err.message}`); }
792
+ }
793
+ this.procs.delete(sessionKey);
794
+ this._maybeSignalLruWaiter();
795
+ this._logEvent('session-reset', {
796
+ session_key: sessionKey, reason, drained_pendings: drainedPendings, closed,
797
+ });
798
+ return { closed, drainedPendings };
799
+ }
800
+
801
+ // ─── Kill / close ────────────────────────────────────────────────
802
+
803
+ async kill(sessionKey) {
804
+ const entry = this.procs.get(sessionKey);
805
+ if (!entry) return;
806
+ this.drainQueue(sessionKey, 'KILLED');
807
+ await this._closeEntry(entry, 'kill');
808
+ this.procs.delete(sessionKey);
809
+ this._maybeSignalLruWaiter();
810
+ }
811
+
812
+ /**
813
+ * Tear down every Query whose sessionKey starts with the given
814
+ * chatId prefix. Used on Telegram chat→supergroup migration.
815
+ * Promise.allSettled per D7 — one slow close doesn't block others.
816
+ */
817
+ async killChat(chatId) {
818
+ const prefix = String(chatId);
819
+ const matching = [];
820
+ for (const [key, entry] of this.procs.entries()) {
821
+ if (key === prefix || key.startsWith(`${prefix}:`)) {
822
+ matching.push({ key, entry });
823
+ }
824
+ }
825
+ const results = await Promise.allSettled(matching.map(async ({ key, entry }) => {
826
+ this.drainQueue(key, 'KILLCHAT');
827
+ await this._closeEntry(entry, 'killChat');
828
+ this.procs.delete(key);
829
+ }));
830
+ this._maybeSignalLruWaiter();
831
+ return results.map((r, i) => ({
832
+ sessionKey: matching[i].key,
833
+ status: r.status,
834
+ error: r.reason?.message,
835
+ }));
836
+ }
837
+
838
+ /**
839
+ * Race Query.close() against a timeout. Returns `true` if close
840
+ * resolved cleanly; `false` if it timed out (entry still gets
841
+ * removed from procs by caller). Per D7.
842
+ */
843
+ async _closeEntry(entry, reason) {
844
+ if (entry.closed) return true;
845
+ entry.closed = true;
846
+ // Close the input controller so SDK's streamInput consumer
847
+ // exits cleanly.
848
+ try { entry.inputController.close(); }
849
+ catch { /* swallow */ }
850
+ let timedOut = false;
851
+ const closeP = (async () => {
852
+ try { await entry.query.close?.(); }
853
+ catch (err) {
854
+ this.logger.error?.(`[${entry.label}] query.close: ${err.message}`);
855
+ }
856
+ })();
857
+ const timerP = new Promise((resolve) => setTimeout(() => {
858
+ timedOut = true;
859
+ resolve();
860
+ }, this.queryCloseTimeoutMs));
861
+ await Promise.race([closeP, timerP]);
862
+ if (timedOut) {
863
+ this._logEvent('evict-close-timeout', {
864
+ session_key: entry.sessionKey, reason, timeout_ms: this.queryCloseTimeoutMs,
865
+ });
866
+ }
867
+ return !timedOut;
868
+ }
869
+
870
+ async shutdown() {
871
+ const entries = [...this.procs.values()];
872
+ await Promise.allSettled(entries.map((e) => {
873
+ this.drainQueue(e.sessionKey, 'SHUTDOWN');
874
+ return this._closeEntry(e, 'shutdown');
875
+ }));
876
+ this.procs.clear();
877
+ // Reject any remaining LRU waiters.
878
+ while (this._lruWaiters.length) {
879
+ const w = this._lruWaiters.shift();
880
+ clearTimeout(w.timer);
881
+ w.reject(new Error('shutdown'));
882
+ }
883
+ }
884
+
885
+ // ─── Helpers ────────────────────────────────────────────────────
886
+
887
+ _failAllPendings(entry, err) {
888
+ while (entry.pendingQueue.length > 0) {
889
+ const p = entry.pendingQueue.shift();
890
+ p.clearTimers?.();
891
+ try { p.reject(err); } catch { /* swallow */ }
892
+ }
893
+ entry.inFlight = false;
894
+ }
895
+
896
+ _handleQueueDrop(entry, droppedMsg) {
897
+ // The dropped message was a queued user message that hadn't yet
898
+ // been consumed by the SDK. Find the corresponding pending and
899
+ // reject it. (Pendings and pushed messages are 1:1 in order.)
900
+ // We dropped from the FRONT of the input queue (oldest), which
901
+ // corresponds to pendingQueue[1] (head=in-flight is index 0).
902
+ if (entry.pendingQueue.length < 2) return;
903
+ const dropped = entry.pendingQueue.splice(1, 1)[0];
904
+ if (!dropped) return;
905
+ dropped.clearTimers?.();
906
+ const err = Object.assign(
907
+ new Error(`queue overflow: dropped (queue cap ${this.queueCap})`),
908
+ { code: 'QUEUE_OVERFLOW' },
909
+ );
910
+ this._logEvent('queue-overflow-drop', {
911
+ session_key: entry.sessionKey,
912
+ chat_id: entry.chatId,
913
+ queue_len: entry.pendingQueue.length,
914
+ source_msg_id: dropped.context?.sourceMsgId ?? null,
915
+ });
916
+ if (this.onQueueDrop) {
917
+ try { this.onQueueDrop(entry.sessionKey, dropped, entry); }
918
+ catch (err2) { this.logger.error?.(`[${entry.label}] onQueueDrop: ${err2.message}`); }
919
+ }
920
+ dropped.reject(err);
921
+ }
922
+
923
+ _logEvent(kind, detail) {
924
+ if (!this.db?.logEvent) return;
925
+ try { this.db.logEvent(kind, detail); }
926
+ catch (err) { this.logger.error?.(`[pm-sdk] logEvent ${kind} failed: ${err.message}`); }
927
+ }
928
+ }
929
+
930
+ module.exports = {
931
+ ProcessManagerSdk,
932
+ DEFAULT_CAP,
933
+ DEFAULT_QUEUE_CAP,
934
+ DEFAULT_QUERY_CLOSE_TIMEOUT_MS,
935
+ DEFAULT_TRANSIENT_RETRY_DELAY_MS,
936
+ MAX_TRANSIENT_RETRIES,
937
+ extractAssistantText,
938
+ sumUsage,
939
+ makeInputController,
940
+ };