polygram 0.9.0 → 0.10.0-rc.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1022 @@
1
+ /**
2
+ * TmuxProcess — tmux backend for the Process abstraction.
3
+ *
4
+ * One claude TUI hosted inside a `tmux` session, with capture-pane based
5
+ * lifecycle detection. Phase 2 MVP — covers required + easy-optional
6
+ * methods. The §4.B `--debug-file` structured-event channel is wired in
7
+ * Phase 3 (G7+G9 gates).
8
+ *
9
+ * Cost weight: 3 (per F-spike-2 — tmux RSS ≈10× SDK pm; weighted LRU
10
+ * budget=10 means ~3 tmux chats OR 1 tmux + 7 SDK chats co-exist).
11
+ *
12
+ * Spike findings driving this code:
13
+ * F-spike-1 — `--permission-mode acceptEdits` mirrors SDK pm default;
14
+ * no in-chat approval UI in Phase 2
15
+ * F-spike-3 — `\n` inside paste-buffer splits into multiple Enters;
16
+ * TmuxRunner.pasteText() encodes as MULTILINE_SEPARATOR
17
+ * F-spike-4 — `bypassPermissions` mode needs `--dangerously-skip-permissions`
18
+ * companion (matches SDK's allowDangerouslySkipPermissions:true)
19
+ * G5b — control-char sanitization (TmuxRunner does it; we also
20
+ * sanitize on inject path for the "no live turn" early-out)
21
+ * G6 / G6b — `? for shortcuts` / `accept edits on` = READY;
22
+ * `esc to interrupt` = STREAMING. Pair drives completion detect.
23
+ *
24
+ * R-audit findings applied:
25
+ * R1-F1 — drainQueue/injectUserMessage/steer NEVER throw
26
+ * R2-F1 — control chars stripped before any send
27
+ * R2-F7 — _spawning sentinel + _killing flag prevent races
28
+ * R2-F8 — start() vs attach() distinct; spawn errors fail loud
29
+ * R3-F4 — getContextUsage throws NotImplementedYetError, not silently ok
30
+ *
31
+ * @see docs/0.10.0-process-manager-abstraction-plan.md §12.3
32
+ * @see docs/0.10.0-phase0-spike-findings.md
33
+ */
34
+
35
+ 'use strict';
36
+
37
+ const crypto = require('crypto');
38
+ const { Process, UnsupportedOperationError } = require('./process');
39
+ const { LogTail } = require('../tmux/log-tail');
40
+ const { sessionLogPath, pipeToParser } = require('../tmux/session-log-parser');
41
+ const { computeCostUsd } = require('../model-costs');
42
+ const { getTopicConfig } = require('../session-key');
43
+
44
+ // Context window per model. All Claude 4.x models are 200k. If
45
+ // Anthropic ships a model with a different window, promote this to
46
+ // a lookup table again. Single constant for now — no per-model
47
+ // branching needed.
48
+ const DEFAULT_CONTEXT_WINDOW = 200_000;
49
+
50
+ // ─── TUI lifecycle indicators (locked by spike G6/G6b) ───────────────
51
+
52
+ // READY hints: claude TUI shows "? for shortcuts" when idle and ready
53
+ // for the next prompt. Under `--permission-mode acceptEdits` (our
54
+ // default), the bottom-of-pane indicator can also read "accept edits
55
+ // on" instead; treat either as ready.
56
+ const READY_HINTS_RE = /\?\s+for shortcuts|accept edits on/;
57
+ const STREAMING_HINT_RE = /esc to interrupt/;
58
+
59
+ // TUI approval-prompt indicators. When a chat is spawned WITHOUT
60
+ // --permission-mode acceptEdits, claude pauses on risky tools and
61
+ // draws a prompt like:
62
+ //
63
+ // ⏺ Bash(rm foo.txt)
64
+ // ⎿ Do you want to do this?
65
+ // 1. Yes
66
+ // 2. Yes, allow always for similar commands
67
+ // 3. No, and tell Claude what to do differently
68
+ //
69
+ // SECURITY (audit H1 fix): require BOTH the question text AND a
70
+ // following numbered menu line ("1. ...") so a malicious assistant
71
+ // message text like "Do you want to proceed?" can't trigger a fake
72
+ // approval card by itself. The menu is part of the TUI's pause
73
+ // state; the assistant can't render it without actually being paused.
74
+ const APPROVAL_PROMPT_RE = /Do you want to (?:proceed|do this|continue)\??[\s\S]{0,400}?(?:^|\n)\s*1\.\s+/im;
75
+ // Pull the tool name + raw arg snippet from the line preceding the
76
+ // approval prompt. Capture-pane preserves the ⏺ marker.
77
+ const TOOL_INVOCATION_RE = /⏺\s+([A-Za-z_]\w*)\s*\((.*?)\)\s*$/m;
78
+
79
+ // ─── Defaults — overridable per construction for tests ───────────────
80
+
81
+ const DEFAULT_READY_TIMEOUT_MS = 30_000;
82
+ const DEFAULT_TURN_TIMEOUT_MS = 5 * 60_000;
83
+ const DEFAULT_POLL_MS = 250;
84
+ const DEFAULT_QUIESCE_MS = 500; // require READY for this long before declaring done
85
+
86
+ class TmuxProcess extends Process {
87
+ /**
88
+ * @param {object} opts
89
+ * @param {string} opts.sessionKey
90
+ * @param {string|null} opts.chatId
91
+ * @param {string|null} opts.threadId
92
+ * @param {string} [opts.label]
93
+ * @param {object} opts.runner — TmuxRunner instance
94
+ * @param {string} opts.botName — for session naming + log path
95
+ * @param {object} [opts.logger=console]
96
+ * @param {Function} [opts.sleepFn] — test seam for polling
97
+ * @param {Function} [opts.nowFn] — test seam for timeouts
98
+ * @param {number} [opts.readyTimeoutMs]
99
+ * @param {number} [opts.turnTimeoutMs]
100
+ * @param {number} [opts.pollMs]
101
+ * @param {number} [opts.quiesceMs]
102
+ */
103
+ constructor({
104
+ sessionKey, chatId, threadId, label,
105
+ runner, botName, logger = console,
106
+ sleepFn, nowFn,
107
+ readyTimeoutMs = DEFAULT_READY_TIMEOUT_MS,
108
+ turnTimeoutMs = DEFAULT_TURN_TIMEOUT_MS,
109
+ pollMs = DEFAULT_POLL_MS,
110
+ quiesceMs = DEFAULT_QUIESCE_MS,
111
+ lateGraceMs = 1500,
112
+ queueCap = 50, // P0.1 parity: SDK enforces queueCap=50 too
113
+ pollScheduler = null, // O1 optimization: shared cross-process tick
114
+ } = {}) {
115
+ super({ sessionKey, chatId, threadId, label });
116
+ if (!runner) throw new TypeError('TmuxProcess: runner required');
117
+ if (!botName) throw new TypeError('TmuxProcess: botName required');
118
+ this.backend = 'tmux';
119
+ this.runner = runner;
120
+ this.botName = botName;
121
+ this.logger = logger;
122
+
123
+ this.tmuxName = runner.sessionName(botName, this.chatId, this.threadId);
124
+ this.debugLogPath = runner.debugLogPath(botName, this.chatId, this.threadId);
125
+
126
+ // Race guards (R2-F7)
127
+ this._spawning = null;
128
+ this._killing = false;
129
+
130
+ // Test seams
131
+ this._sleep = sleepFn || ((ms) => new Promise((r) => setTimeout(r, ms)));
132
+ this._now = nowFn || (() => Date.now());
133
+
134
+ // Tunables
135
+ this.readyTimeoutMs = readyTimeoutMs;
136
+ this.turnTimeoutMs = turnTimeoutMs;
137
+ this.pollMs = pollMs;
138
+ this.quiesceMs = quiesceMs;
139
+ this.lateGraceMs = lateGraceMs;
140
+ this.queueCap = queueCap;
141
+ // Optional shared poll scheduler. When provided, the polling
142
+ // loops register/release lifetimes and use scheduler.waitTick()
143
+ // instead of per-instance setTimeout — N processes share one
144
+ // setInterval. When null, falls back to per-instance setTimeout.
145
+ this.pollScheduler = pollScheduler;
146
+
147
+ // Latest usage snapshot from JSONL assistant messages. Updated by
148
+ // _handleSessionEvent on every 'usage' event; consumed by
149
+ // getContextUsage() so polygram's post-turn auto-hint works on
150
+ // the tmux backend just like SDK.
151
+ this._lastUsage = null;
152
+ }
153
+
154
+ get cost() { return 3; }
155
+
156
+ // ─── Lifecycle ───────────────────────────────────────────────────
157
+
158
+ /**
159
+ * Cold-spawn the claude TUI inside a new tmux session.
160
+ *
161
+ * Accepts the standard ProcessManager spawnContext shape (same as
162
+ * SdkProcess.start), pulling model/effort/cwd from chatConfig.
163
+ *
164
+ * @param {object} ctx
165
+ * @param {string|null} [ctx.existingSessionId] — for --resume
166
+ * @param {object} [ctx.chatConfig={}] — supplies model, effort, cwd, agent, permissionMode
167
+ * @param {string} [ctx.model] — override (rare; e.g. tests)
168
+ * @param {string} [ctx.effort] — override
169
+ * @param {string} [ctx.cwd] — override
170
+ * @param {object} [ctx.envExtras={}]
171
+ */
172
+ async start(ctx = {}) {
173
+ if (this._killing) {
174
+ throw Object.assign(new Error('TmuxProcess in killing state'), { code: 'TMUX_KILLING' });
175
+ }
176
+ if (this._spawning) {
177
+ // Concurrent start() call — wait on the in-flight spawn.
178
+ await this._spawning;
179
+ return;
180
+ }
181
+
182
+ this._spawning = (async () => {
183
+ const chatConfig = ctx.chatConfig || {};
184
+ // Topic-level config overrides chat-level (mirrors SDK's
185
+ // buildSdkOptions). Without this, a chat with per-topic
186
+ // `agent`/`cwd`/`model`/`effort` overrides would silently spawn
187
+ // claude with chat-level defaults — production bug surfaced in
188
+ // 0.10.0-rc.1: Music topic's music-curation agent + rekordbox
189
+ // cwd were ignored; TUI spawned with the chat-level shumabit
190
+ // agent and didn't signal ready in 30s.
191
+ const topicConfig = getTopicConfig(chatConfig, ctx.threadId);
192
+ const model = ctx.model || topicConfig.model || chatConfig.model;
193
+ const effort = ctx.effort || topicConfig.effort || chatConfig.effort;
194
+ const cwd = ctx.cwd || topicConfig.cwd || chatConfig.cwd;
195
+ const agent = topicConfig.agent || chatConfig.agent;
196
+ const permissionMode = topicConfig.permissionMode || chatConfig.permissionMode || 'acceptEdits';
197
+
198
+ // Pre-allocate the sessionId via --session-id flag (v9 finding).
199
+ // claude accepts a valid UUID and uses it as THE session ID for the
200
+ // run; on --resume we pass the existing one. Either way we KNOW
201
+ // the sessionId at spawn time, no parsing required.
202
+ this.claudeSessionId = ctx.existingSessionId || crypto.randomUUID();
203
+
204
+ const args = [];
205
+ if (ctx.existingSessionId) {
206
+ args.push('--resume', ctx.existingSessionId);
207
+ } else {
208
+ args.push('--session-id', this.claudeSessionId);
209
+ }
210
+ if (model) args.push('--model', model);
211
+ if (effort) args.push('--effort', effort);
212
+ args.push('--permission-mode', permissionMode);
213
+ if (permissionMode === 'bypassPermissions') {
214
+ // F-spike-4: TUI rejects bypassPermissions without companion flag.
215
+ args.push('--dangerously-skip-permissions');
216
+ }
217
+ args.push('--debug-file', this.debugLogPath);
218
+ if (agent) args.push('--agent', agent);
219
+
220
+ // R2-F8: spawn errors must fail loud, not silent-catch.
221
+ await this.runner.spawn({
222
+ name: this.tmuxName,
223
+ cwd,
224
+ command: 'claude',
225
+ args,
226
+ envExtras: ctx.envExtras || {},
227
+ });
228
+
229
+ // v9: tail the per-session JSONL file (the REAL structured-event
230
+ // channel — v9 probe showed --debug-file emits only infra noise).
231
+ // Path is deterministic once we have cwd + sessionId. The file
232
+ // may not exist for ~100ms after spawn; LogTail tolerates ENOENT.
233
+ this._cwd = cwd;
234
+ this._armSessionLogTail({ resuming: Boolean(ctx.existingSessionId) });
235
+
236
+ // G6 — block until TUI is responsive.
237
+ await this._waitForReady();
238
+ this.emit('init', {
239
+ session_id: this.claudeSessionId,
240
+ label: this.label,
241
+ backend: 'tmux',
242
+ tmux_name: this.tmuxName,
243
+ });
244
+ })();
245
+
246
+ try {
247
+ await this._spawning;
248
+ } finally {
249
+ this._spawning = null;
250
+ }
251
+ }
252
+
253
+
254
+ // ─── send ─────────────────────────────────────────────────────────
255
+
256
+ /**
257
+ * Submit a turn. Resolves with PmSendResult on completion.
258
+ *
259
+ * The MVP detects completion via capture-pane diffing:
260
+ * 1. paste prompt + Enter
261
+ * 2. wait for STREAMING indicator OR up to readyTimeout (some short
262
+ * turns finish before we even see the streaming hint — that's OK,
263
+ * step 3 catches them via quiescence)
264
+ * 3. poll until READY persists for `quiesceMs`
265
+ * 4. extract assistant text from final capture
266
+ *
267
+ * Errors normalize to PmSendResult.error rather than throwing — matches
268
+ * SdkProcess contract.
269
+ *
270
+ * @param {string} prompt
271
+ * @param {object} [opts]
272
+ * @param {number} [opts.timeoutMs] — overrides turnTimeoutMs
273
+ * @param {string} [opts.context] — ignored (SDK-only, future use)
274
+ */
275
+ async send(prompt, opts = {}) {
276
+ if (this.closed) {
277
+ // Match SdkProcess contract: send() on closed Process REJECTS
278
+ // rather than returning an error result. Callers (polygram
279
+ // dispatch) already wrap pm.send in try/catch for this case.
280
+ // Runtime turn errors (paste fail, timeout) still surface as
281
+ // an error-shaped PmSendResult — that's the other path below.
282
+ throw Object.assign(new Error('No process for session'), { code: 'PROCESS_CLOSED' });
283
+ }
284
+ // P0.1 fix: enforce queueCap (parity with SDK). Without this a
285
+ // misbehaving caller could grow pendingQueue unbounded.
286
+ if (this.inFlight && this.pendingQueue.length >= this.queueCap) {
287
+ throw Object.assign(
288
+ new Error(`queue overflow: queueCap ${this.queueCap}`),
289
+ { code: 'QUEUE_OVERFLOW' },
290
+ );
291
+ }
292
+ if (this.inFlight) {
293
+ // For Phase 2 MVP we serialize: queue the prompt locally and
294
+ // await the in-flight turn. Include `context` so polygram's
295
+ // streamer/reactor lookups via pendingQueue[N].context work
296
+ // when this pending becomes the head.
297
+ return new Promise((resolve, reject) => {
298
+ this.pendingQueue.push({
299
+ prompt, opts,
300
+ context: opts.context || {},
301
+ resolve, reject,
302
+ });
303
+ });
304
+ }
305
+
306
+ this.inFlight = true;
307
+ const turnTimeoutMs = opts.timeoutMs || this.turnTimeoutMs;
308
+ const startedAt = this._now();
309
+
310
+ // P0.1 fix: push a HEAD pending with the caller's `context` so
311
+ // polygram's onStreamChunk / onToolUse / onAssistantMessageStart
312
+ // callbacks (which read entry.pendingQueue[0].context.streamer
313
+ // and entry.pendingQueue[0].context.reactor) work for tmux too.
314
+ // Without this, live bubble updates and reactor heartbeats
315
+ // silently no-op on tmux. Shape mirrors SdkProcess pendings.
316
+ const headPending = {
317
+ prompt, opts,
318
+ context: opts.context || {},
319
+ streamText: '',
320
+ };
321
+ this.pendingQueue.unshift(headPending);
322
+
323
+ // v9: prime turn-scoped event collection. Assistant chunks and
324
+ // tool-uses arriving via the JSONL tail accumulate into _turnState;
325
+ // the 'result' event resolves the turn.
326
+ this._turnState = {
327
+ text: '',
328
+ toolUses: 0,
329
+ resolveResult: null,
330
+ resultEvent: null,
331
+ pendingSteerCausesNewBubble: false,
332
+ };
333
+ const turnResultP = new Promise((resolve) => {
334
+ this._turnState.resolveResult = resolve;
335
+ });
336
+
337
+ try {
338
+ // R2-F1: sanitization happens inside runner.pasteText; we also
339
+ // log when chars get stripped.
340
+ const result = await this.runner.pasteText(this.tmuxName, prompt);
341
+ if (result.stripped > 0) {
342
+ this.logger.warn?.(
343
+ `[${this.label}] stripped ${result.stripped} control chars from prompt`,
344
+ );
345
+ this.emit('prompt-sanitized', { stripped: result.stripped, source: 'send' });
346
+ }
347
+ await this.runner.sendControl(this.tmuxName, 'Enter');
348
+
349
+ // Race: JSONL result event vs capture-pane quiescence fallback
350
+ // vs hard timeout. JSONL is the primary signal (carries structured
351
+ // text); capture-pane wins for old claude versions or if JSONL
352
+ // file write lags behind UI quiescence.
353
+ const captureAtStart = await this.runner.captureWide(this.tmuxName);
354
+ const captureCompleteP = this._awaitTurnComplete({
355
+ captureAtStart, timeoutMs: turnTimeoutMs,
356
+ });
357
+
358
+ // Whichever resolves first wins.
359
+ let resolvedVia = 'jsonl';
360
+ const winner = await Promise.race([
361
+ turnResultP.then((ev) => ({ kind: 'jsonl', ev })),
362
+ captureCompleteP.then((cap) => ({ kind: 'capture', cap })),
363
+ ]);
364
+
365
+ let text;
366
+ let resultSubtype = 'success';
367
+ let stopReason = null;
368
+ if (winner.kind === 'jsonl') {
369
+ text = this._turnState.text || winner.ev.text || '';
370
+ resultSubtype = winner.ev.subtype || 'success';
371
+ stopReason = winner.ev.stopReason || null;
372
+ // Update sessionId from the result if claude assigned a fresh one
373
+ if (winner.ev.sessionId) this.claudeSessionId = winner.ev.sessionId;
374
+ } else {
375
+ // Capture-pane won, but for short turns claude may flush JSONL
376
+ // AFTER the TUI shows ready. Wait briefly for the structured
377
+ // event so we can use its (clean) text over capture-pane diff.
378
+ //
379
+ // OPTIMIZATION: if JSONL has ALREADY delivered assistant text
380
+ // by the time capture-pane resolves, we already have the
381
+ // structured text — skip the late-grace wait entirely. Saves
382
+ // ~1.5s on every short reply where the JSONL streamed in
383
+ // during the turn.
384
+ if (this._turnState.text) {
385
+ resolvedVia = 'jsonl-streamed';
386
+ text = this._turnState.text;
387
+ } else {
388
+ const lateGraceMs = this.lateGraceMs ?? 1500;
389
+ const late = await Promise.race([
390
+ turnResultP.then((ev) => ({ kind: 'jsonl-late', ev })),
391
+ new Promise((r) => setTimeout(() => r({ kind: 'no-jsonl' }), lateGraceMs)),
392
+ ]);
393
+ if (late.kind === 'jsonl-late') {
394
+ resolvedVia = 'jsonl-late';
395
+ text = this._turnState.text || late.ev.text || '';
396
+ resultSubtype = late.ev.subtype || 'success';
397
+ stopReason = late.ev.stopReason || null;
398
+ if (late.ev.sessionId) this.claudeSessionId = late.ev.sessionId;
399
+ } else {
400
+ resolvedVia = 'capture-pane';
401
+ text = this._turnState.text || this._extractTurnReply(captureAtStart, winner.cap);
402
+ }
403
+ }
404
+ }
405
+
406
+ const duration = this._now() - startedAt;
407
+ this.emit('result', { subtype: resultSubtype, resolvedVia }, { streamText: text, stopReason });
408
+
409
+ // Token + cost telemetry from the latest JSONL usage snapshot.
410
+ // claude doesn't write cost into JSONL; we compute from token
411
+ // counts × `lib/model-costs.js` rate table. The result populates
412
+ // turn_metrics so cost dashboards work the same as SDK.
413
+ const u = this._lastUsage;
414
+ const cost = u ? computeCostUsd(u, u.model) : null;
415
+
416
+ const pmResult = {
417
+ text,
418
+ sessionId: this.claudeSessionId,
419
+ cost,
420
+ duration,
421
+ error: null,
422
+ metrics: {
423
+ inputTokens: u?.inputTokens ?? null,
424
+ outputTokens: u?.outputTokens ?? null,
425
+ cacheCreationTokens: u?.cacheCreationTokens ?? null,
426
+ cacheReadTokens: u?.cacheReadTokens ?? null,
427
+ numAssistantMessages: 1,
428
+ numToolUses: this._turnState.toolUses,
429
+ resultSubtype,
430
+ stopReason,
431
+ resolvedVia,
432
+ },
433
+ };
434
+ this._completeTurn();
435
+ return pmResult;
436
+ } catch (err) {
437
+ this._completeTurn();
438
+ return this._errorResult(err.code || 'tmux_send_error', err.message || String(err));
439
+ }
440
+ }
441
+
442
+ _completeTurn() {
443
+ this.inFlight = false;
444
+ // Shift the HEAD pending (just-completed turn). After this, the
445
+ // queue contains only items queued while inFlight (each carrying
446
+ // their own resolve/reject pair). If any, re-enter send() on the
447
+ // next one — send() will push its own fresh head pending.
448
+ this.pendingQueue.shift();
449
+ const next = this.pendingQueue.shift();
450
+ if (next && next.resolve) {
451
+ this.send(next.prompt, next.opts).then(next.resolve, next.reject);
452
+ } else {
453
+ this.emit('idle');
454
+ }
455
+ }
456
+
457
+ _errorResult(code, message) {
458
+ return {
459
+ text: '',
460
+ sessionId: this.claudeSessionId,
461
+ cost: null,
462
+ duration: 0,
463
+ error: message,
464
+ metrics: {
465
+ inputTokens: null, outputTokens: null,
466
+ cacheCreationTokens: null, cacheReadTokens: null,
467
+ numAssistantMessages: 0, numToolUses: 0,
468
+ resultSubtype: code,
469
+ },
470
+ };
471
+ }
472
+
473
+ // ─── session-log tail (§4.B JSONL path — primary event channel) ──
474
+
475
+ /**
476
+ * Open a tail on `~/.claude/projects/<cwd-encoded>/<sessionId>.jsonl`
477
+ * and forward parsed events to Process listeners.
478
+ *
479
+ * Events forwarded:
480
+ * - assistant-chunk → emit 'stream-chunk' (matches SdkProcess shape)
481
+ * - tool-use → emit 'tool-use'
482
+ * - result → resolve current turn's _turnState.resolveResult
483
+ * - last-prompt → fallback turn-complete signal
484
+ */
485
+ _armSessionLogTail({ resuming = false } = {}) {
486
+ if (this._sessionLogTail) return; // idempotent
487
+ if (!this._cwd) {
488
+ this.logger.warn?.(`[${this.label}] _armSessionLogTail: no cwd available, skipping`);
489
+ return;
490
+ }
491
+ const logPath = sessionLogPath(this._cwd, this.claudeSessionId);
492
+ // skipExisting: on --resume the JSONL already has historic turns;
493
+ // we must NOT replay them or the first new send() would prematurely
494
+ // resolve on a historic 'result' event.
495
+ // OPTIMIZATION O2: prefer fs.watch over 50ms polling — drops the
496
+ // steady-state IO from 20 stat+open/sec per chat to ~zero. Falls
497
+ // back to polling automatically if fs.watch fails (sandboxed env,
498
+ // unsupported FS). The slow safety-net poll inside LogTail catches
499
+ // any missed watch events.
500
+ const tail = new LogTail({
501
+ path: logPath, intervalMs: 50, skipExisting: resuming,
502
+ useWatch: 'auto',
503
+ logger: this.logger,
504
+ });
505
+ pipeToParser(tail);
506
+ tail.on('event', (ev) => this._handleSessionEvent(ev));
507
+ tail.on('error', (err) => {
508
+ this.logger.warn?.(`[${this.label}] session-log-tail error: ${err.message}`);
509
+ });
510
+ tail.start();
511
+ this._sessionLogTail = tail;
512
+ this._sessionLogPath = logPath;
513
+ }
514
+
515
+ _handleSessionEvent(ev) {
516
+ if (ev.type === 'assistant-chunk') {
517
+ if (this._turnState) {
518
+ // If a mid-turn steer just happened, the NEXT assistant message
519
+ // should start a fresh Telegram bubble — otherwise the post-steer
520
+ // reply visually appends to the pre-steer text bubble, making the
521
+ // user's follow-up look unanswered. Mirror SdkProcess's logic:
522
+ // emit 'assistant-message-start', reset the accumulator, clear
523
+ // the flag. Subsequent chunks within THIS new assistant message
524
+ // continue to accumulate in the fresh bubble.
525
+ if (this._turnState.pendingSteerCausesNewBubble) {
526
+ this._turnState.pendingSteerCausesNewBubble = false;
527
+ this._turnState.text = '';
528
+ this.emit('assistant-message-start');
529
+ }
530
+ // In-flight turn: accumulate text + forward as stream-chunk so
531
+ // pm consumers can render incremental output.
532
+ this._turnState.text = this._turnState.text
533
+ ? `${this._turnState.text}\n\n${ev.text}`
534
+ : ev.text;
535
+ this.emit('stream-chunk', this._turnState.text);
536
+ } else {
537
+ // No turn in flight — this is an autonomous assistant message
538
+ // (claude self-initiated; typically ScheduleWakeup firing).
539
+ // Mirror SdkProcess.onAutonomousAssistantMessage routing so
540
+ // pm consumers receive these the same way regardless of backend.
541
+ this.emit('autonomous-assistant-message', {
542
+ text: ev.text,
543
+ sessionId: this.claudeSessionId,
544
+ backend: 'tmux',
545
+ });
546
+ }
547
+ } else if (ev.type === 'tool-use') {
548
+ if (this._turnState) this._turnState.toolUses++;
549
+ this.emit('tool-use', ev.name);
550
+ } else if (ev.type === 'usage') {
551
+ // Token-usage snapshot from JSONL. Cache for getContextUsage().
552
+ // Each assistant message carries the cumulative usage; latest
553
+ // wins. Model name comes from the assistant message itself
554
+ // (e.g. "claude-haiku-4-5-20251001") so we don't need a
555
+ // chatConfig.model lookup.
556
+ //
557
+ // Compact-boundary detection: if cumulative tokens DROP between
558
+ // consecutive usage snapshots, claude auto-compacted. Emit a
559
+ // compact-boundary event mirroring SdkProcess's so polygram can
560
+ // mark the boundary in the chat exactly the same way for both
561
+ // backends.
562
+ // Use the same "full context size" formula as getContextUsage —
563
+ // input (incl. cache reads/writes) + output. Apples-to-apples
564
+ // comparison across turns; compaction shows up as a clear drop.
565
+ const prevTotal = this._lastUsage
566
+ ? ((this._lastUsage.inputTokens || 0)
567
+ + (this._lastUsage.cacheReadTokens || 0)
568
+ + (this._lastUsage.cacheCreationTokens || 0)
569
+ + (this._lastUsage.outputTokens || 0))
570
+ : 0;
571
+ const newTotal = (ev.inputTokens || 0)
572
+ + (ev.cacheReadTokens || 0)
573
+ + (ev.cacheCreationTokens || 0)
574
+ + (ev.outputTokens || 0);
575
+ if (prevTotal > 0 && newTotal < prevTotal * 0.7) {
576
+ // Tokens dropped by more than 30% — strong compaction signal.
577
+ // (Cache eviction without compaction never drops this much.)
578
+ this.emit('compact-boundary', {
579
+ trigger: 'auto',
580
+ pre_tokens: prevTotal,
581
+ post_tokens: newTotal,
582
+ backend: 'tmux',
583
+ });
584
+ }
585
+ this._lastUsage = ev;
586
+ } else if (ev.type === 'result') {
587
+ if (this._turnState && this._turnState.resolveResult) {
588
+ this._turnState.resultEvent = ev;
589
+ this._turnState.resolveResult(ev);
590
+ }
591
+ // If no turn in flight, the result event simply marks the end of
592
+ // an autonomous message segment — already handled by the
593
+ // assistant-chunk branch above.
594
+ } else if (ev.type === 'last-prompt') {
595
+ // Fallback complete signal. If 'result' didn't fire (rare; some
596
+ // claude versions may write last-prompt instead of stop_reason),
597
+ // synthesize a success result.
598
+ if (this._turnState && this._turnState.resolveResult && !this._turnState.resultEvent) {
599
+ const synthetic = {
600
+ type: 'result',
601
+ subtype: 'success',
602
+ text: this._turnState.text,
603
+ stopReason: 'last-prompt',
604
+ sessionId: this.claudeSessionId,
605
+ };
606
+ this._turnState.resultEvent = synthetic;
607
+ this._turnState.resolveResult(synthetic);
608
+ }
609
+ }
610
+ }
611
+
612
+ // ─── completion detection (§4.A capture-pane diff path — fallback) ──
613
+
614
+ /**
615
+ * Wait for the next poll tick. When a shared PollScheduler is wired,
616
+ * N concurrent TmuxProcess instances share ONE setInterval rather
617
+ * than spawning N independent setTimeout chains. Falls back to a
618
+ * per-instance setTimeout when no scheduler is provided (test path).
619
+ */
620
+ _waitForNextTick() {
621
+ if (this.pollScheduler) return this.pollScheduler.waitTick();
622
+ return this._sleep(this.pollMs);
623
+ }
624
+
625
+ async _waitForReady() {
626
+ const deadline = this._now() + this.readyTimeoutMs;
627
+ if (this.pollScheduler) this.pollScheduler.acquire();
628
+ try {
629
+ while (this._now() < deadline) {
630
+ // OPTIMIZATION: ready hint lives in the bottom ~5 lines of the
631
+ // pane. Polling 1000 lines each tick is wasteful — cap at 80
632
+ // for a ~12× cheaper tmux subprocess.
633
+ const buf = await this.runner.captureWide(this.tmuxName, { lines: 80 });
634
+ if (READY_HINTS_RE.test(buf)) return;
635
+ await this._waitForNextTick();
636
+ }
637
+ } finally {
638
+ if (this.pollScheduler) this.pollScheduler.release();
639
+ }
640
+ throw Object.assign(new Error('TmuxProcess: TUI did not signal ready'), {
641
+ code: 'TMUX_READY_TIMEOUT',
642
+ tmuxName: this.tmuxName,
643
+ });
644
+ }
645
+
646
+ /**
647
+ * Poll capture-pane until READY hint has been visible for at least
648
+ * `quiesceMs` continuously. Returns the final capture.
649
+ *
650
+ * OPTIMIZATION: polling uses a smaller `lines: 200` window (enough
651
+ * to cover the approval-prompt's tool-invocation line + menu + ready
652
+ * hint at the bottom). For the FINAL capture used to extract reply
653
+ * text, we fall back to the default 1000-line wide capture.
654
+ */
655
+ async _awaitTurnComplete({ timeoutMs }) {
656
+ const deadline = this._now() + timeoutMs;
657
+ let firstReadyAt = null;
658
+ let lastBuf = '';
659
+ let prevBufLen = -1;
660
+ let cachedReady = false;
661
+ let cachedStreaming = false;
662
+ if (this.pollScheduler) this.pollScheduler.acquire();
663
+ try {
664
+ while (this._now() < deadline) {
665
+ lastBuf = await this.runner.captureWide(this.tmuxName, { lines: 200 });
666
+
667
+ // OPTIMIZATION: skip the three regex tests when the capture
668
+ // buffer is identical (by length) to the previous tick. claude
669
+ // TUI is usually quiescent between events, so most polls see no
670
+ // change — running 3 regexes over a 200-line buffer each tick
671
+ // is wasted CPU. Length-compare is a probabilistic check
672
+ // (collisions theoretically possible) but in practice the
673
+ // bottom of the pane shifts even a few bytes whenever claude
674
+ // emits anything observable.
675
+ const bufLenChanged = lastBuf.length !== prevBufLen;
676
+ if (bufLenChanged) {
677
+ prevBufLen = lastBuf.length;
678
+ cachedReady = READY_HINTS_RE.test(lastBuf);
679
+ cachedStreaming = STREAMING_HINT_RE.test(lastBuf);
680
+ // Approval-prompt detection ONLY runs on changed captures.
681
+ // It's the heaviest regex (`[\s\S]{0,400}?` non-greedy) so
682
+ // worth skipping on quiescent ticks.
683
+ if (APPROVAL_PROMPT_RE.test(lastBuf)) {
684
+ await this._handleApprovalPrompt(lastBuf);
685
+ firstReadyAt = null; // approval pause resets ready clock
686
+ await this._waitForNextTick();
687
+ continue;
688
+ }
689
+ }
690
+
691
+ const isReady = cachedReady;
692
+ const isStreaming = cachedStreaming;
693
+ if (isReady && !isStreaming) {
694
+ if (firstReadyAt == null) firstReadyAt = this._now();
695
+ if (this._now() - firstReadyAt >= this.quiesceMs) return lastBuf;
696
+ } else {
697
+ firstReadyAt = null;
698
+ }
699
+ await this._waitForNextTick();
700
+ }
701
+ throw Object.assign(new Error('TmuxProcess: turn did not complete in time'), {
702
+ code: 'TMUX_TURN_TIMEOUT',
703
+ tmuxName: this.tmuxName,
704
+ });
705
+ } finally {
706
+ if (this.pollScheduler) this.pollScheduler.release();
707
+ }
708
+ }
709
+
710
+ /**
711
+ * Surface an in-pane approval prompt to consumers. Emits a single
712
+ * `approval-required` event per prompt instance — dedup tracked via
713
+ * `_pendingApprovalId`. The event payload includes a `respond()`
714
+ * callback the consumer invokes with 'allow' | 'deny' | string
715
+ * (free-form feedback for the "no, tell claude what to do" path).
716
+ *
717
+ * Until respond() is called, subsequent captures showing the same
718
+ * prompt are no-ops — the TUI stays paused, we stay parked.
719
+ */
720
+ async _handleApprovalPrompt(captureBuf) {
721
+ if (this._pendingApprovalId) return; // already surfaced
722
+ // Parse tool name + input from the line preceding the prompt.
723
+ // capture-pane joins wrapped lines (-J) so the regex sees the
724
+ // single ⏺ line.
725
+ const match = captureBuf.match(TOOL_INVOCATION_RE);
726
+ const toolName = match ? match[1] : 'unknown';
727
+ const toolInput = match ? match[2] : '';
728
+ const id = `approval-${this.tmuxName}-${this._now()}`;
729
+ this._pendingApprovalId = id;
730
+
731
+ this.emit('approval-required', {
732
+ id,
733
+ toolName,
734
+ toolInput,
735
+ sessionId: this.claudeSessionId,
736
+ backend: 'tmux',
737
+ respond: (decision, message) => this.respondToApproval(id, decision, message),
738
+ });
739
+ }
740
+
741
+ /**
742
+ * Send the approval decision back to the TUI.
743
+ *
744
+ * @param {string} id — must match the most recent approval
745
+ * @param {string} decision — 'allow' | 'deny' (or 'always-allow')
746
+ * @param {string} [message] — used when decision === 'deny' for the
747
+ * "no, and tell Claude what to do differently" path
748
+ */
749
+ async respondToApproval(id, decision, message) {
750
+ if (this._pendingApprovalId !== id) {
751
+ // Stale or duplicate — ignore. Real TUI has moved past this prompt.
752
+ return false;
753
+ }
754
+ const choice = decision === 'allow' ? '1'
755
+ : decision === 'always-allow' ? '2'
756
+ : '3';
757
+ try {
758
+ // SECURITY (audit H2 fix): always paste the menu choice ALONE
759
+ // first + Enter, then paste the feedback message as a separate
760
+ // step. Pre-P0.6 we did `3 ${message}` on one line — if the
761
+ // feedback string happened to start with a digit, claude's
762
+ // menu parser could misinterpret. Splitting eliminates the
763
+ // ambiguity entirely.
764
+ await this.runner.pasteText(this.tmuxName, choice);
765
+ await this.runner.sendControl(this.tmuxName, 'Enter');
766
+ if (choice === '3' && message) {
767
+ // claude TUI prompts for the "tell Claude what to do
768
+ // differently" follow-up; paste the message + Enter.
769
+ await this.runner.pasteText(this.tmuxName, message);
770
+ await this.runner.sendControl(this.tmuxName, 'Enter');
771
+ }
772
+ this._pendingApprovalId = null;
773
+ return true;
774
+ } catch (err) {
775
+ this.emit('approval-fail', { id, err: err.message });
776
+ return false;
777
+ }
778
+ }
779
+
780
+ /**
781
+ * Best-effort: text between the start-of-turn snapshot and the
782
+ * post-completion snapshot. The capture-pane diff strategy is
783
+ * intentionally crude in MVP — Phase 3 will switch to --debug-file
784
+ * for structured assistant-message extraction.
785
+ */
786
+ _extractTurnReply(captureAtStart, captureAtEnd) {
787
+ if (!captureAtEnd) return '';
788
+ if (captureAtStart && captureAtEnd.startsWith(captureAtStart)) {
789
+ return captureAtEnd.slice(captureAtStart.length).trim();
790
+ }
791
+ // Fallback: return whatever's after the user's last prompt marker.
792
+ return captureAtEnd.trim();
793
+ }
794
+
795
+ // ─── interrupts / control ────────────────────────────────────────
796
+
797
+ // Return-value parity with SdkProcess: these return boolean
798
+ // (true on success, false on closed/no-op/error) so pm.* wrappers
799
+ // and callers can branch uniformly across backends.
800
+
801
+ async interrupt() {
802
+ if (this.closed) return false;
803
+ try { await this.runner.sendControl(this.tmuxName, 'C-c'); }
804
+ catch (err) {
805
+ this.logger.error?.(`[${this.label}] interrupt: ${err.message}`);
806
+ return false;
807
+ }
808
+ this.emit('interrupt-applied', { backend: 'tmux' });
809
+ return true;
810
+ }
811
+
812
+ async setModel(model) {
813
+ if (this.closed || !model) return false;
814
+ try {
815
+ // Slash commands go through pasteText so embedded multibyte
816
+ // chars in arg are safe. (Model names are ASCII, but uniform.)
817
+ await this.runner.pasteText(this.tmuxName, `/model ${model}`);
818
+ await this.runner.sendControl(this.tmuxName, 'Enter');
819
+ return true;
820
+ } catch (err) {
821
+ this.logger.error?.(`[${this.label}] setModel: ${err.message}`);
822
+ return false;
823
+ }
824
+ }
825
+
826
+ async applyFlagSettings(settings = {}) {
827
+ if (this.closed) return false;
828
+ if (!settings.effortLevel) return false;
829
+ try {
830
+ await this.runner.pasteText(this.tmuxName, `/effort ${settings.effortLevel}`);
831
+ await this.runner.sendControl(this.tmuxName, 'Enter');
832
+ return true;
833
+ } catch (err) {
834
+ this.logger.error?.(`[${this.label}] applyFlagSettings: ${err.message}`);
835
+ return false;
836
+ }
837
+ }
838
+
839
+ async setPermissionMode(mode) {
840
+ if (this.closed || !mode) return false;
841
+ try {
842
+ await this.runner.pasteText(this.tmuxName, `/permission-mode ${mode}`);
843
+ await this.runner.sendControl(this.tmuxName, 'Enter');
844
+ return true;
845
+ } catch (err) {
846
+ this.logger.error?.(`[${this.label}] setPermissionMode: ${err.message}`);
847
+ return false;
848
+ }
849
+ }
850
+
851
+ /**
852
+ * Fire-and-forget user-message paste. Used by polygram's slash-command
853
+ * paths (/compact). Unlike injectUserMessage (mid-turn fold only),
854
+ * this works regardless of inFlight state — the TUI either folds
855
+ * (if mid-stream) or starts a new turn (if idle). Fire-and-forget.
856
+ */
857
+ fireUserMessage(text) {
858
+ if (this.closed) return false;
859
+ if (typeof text !== 'string' || !text) return false;
860
+ const safe = text.replace(/[\x00-\x08\x0b-\x1f\x7f]/g, '');
861
+ if (!safe) return false;
862
+ Promise.resolve()
863
+ .then(() => this.runner.pasteText(this.tmuxName, safe))
864
+ .then(() => this.runner.sendControl(this.tmuxName, 'Enter'))
865
+ .catch((err) => {
866
+ this.logger.error?.(`[${this.label}] fireUserMessage: ${err.message}`);
867
+ });
868
+ return true;
869
+ }
870
+
871
+ async resetSession() {
872
+ // Drain locally-queued pendings before /new fires.
873
+ const drained = this.drainQueue('RESET_SESSION');
874
+ await this.runner.pasteText(this.tmuxName, '/new');
875
+ await this.runner.sendControl(this.tmuxName, 'Enter');
876
+ this.claudeSessionId = null;
877
+ return { closed: false, drainedPendings: drained };
878
+ }
879
+
880
+ async getContextUsage() {
881
+ // Compute from the latest assistant-message usage snapshot in the
882
+ // session JSONL. Returns the same shape SdkProcess does so polygram's
883
+ // formatContextReply + maybeContextFullHint helpers work identically
884
+ // for both backends.
885
+ //
886
+ // Notes:
887
+ // - totalTokens = input + cache_read + cache_creation
888
+ // (SDK reports this same sum as "context window in use")
889
+ // - maxTokens defaults to 200k (all Claude 4.x models). If a
890
+ // future model has a different window, add the lookup here.
891
+ // - claude TUI auto-compacts around 85% of the window; surface
892
+ // that so the chat hint "I'll auto-compact when needed" stays
893
+ // accurate.
894
+ if (this.closed) {
895
+ // Parity with SdkProcess: after the Process is killed, treat
896
+ // the snapshot as unavailable rather than returning stale cached
897
+ // data. Polygram's /context handler maps this to "send a message
898
+ // first" on both backends.
899
+ throw new UnsupportedOperationError('getContextUsage', this.backend);
900
+ }
901
+ if (!this._lastUsage) {
902
+ // No turn has completed yet — no usage snapshot available.
903
+ throw new UnsupportedOperationError('getContextUsage', this.backend);
904
+ }
905
+ const u = this._lastUsage;
906
+ // Each assistant message's `usage` block is cumulative for THIS
907
+ // turn — claude's API always receives the full conversation
908
+ // history every turn (cache just affects pricing, not context
909
+ // size). So input + cache_read + cache_creation = full prompt
910
+ // size that just landed at claude.
911
+ //
912
+ // PLUS output_tokens: claude's just-emitted reply IS now part of
913
+ // the conversation. Next turn will see (this turn's input) +
914
+ // (this turn's output) as its input. The "70% full" warning is
915
+ // about predicting the next compaction trigger, so include the
916
+ // output to be accurate forward-looking.
917
+ const totalTokens = (u.inputTokens || 0)
918
+ + (u.cacheReadTokens || 0)
919
+ + (u.cacheCreationTokens || 0)
920
+ + (u.outputTokens || 0);
921
+ const maxTokens = DEFAULT_CONTEXT_WINDOW;
922
+ const percentage = maxTokens > 0 ? (totalTokens / maxTokens) * 100 : 0;
923
+ return {
924
+ percentage,
925
+ totalTokens,
926
+ maxTokens,
927
+ model: u.model,
928
+ isAutoCompactEnabled: true,
929
+ autoCompactThreshold: 85,
930
+ };
931
+ }
932
+
933
+ // ─── HOT-PATH sync — must NOT throw (R1-F1) ──────────────────────
934
+
935
+ /**
936
+ * Reject all local pendings with the supplied code. Returns count.
937
+ * No-throw contract — autosteer's call site has no try/catch.
938
+ */
939
+ drainQueue(code = 'INTERRUPTED') {
940
+ const drained = this.pendingQueue.length;
941
+ if (drained === 0) return 0;
942
+ const err = Object.assign(new Error(`drained:${code}`), { code });
943
+ while (this.pendingQueue.length > 0) {
944
+ const p = this.pendingQueue.shift();
945
+ // Head pending (currently-running turn) has no resolve/reject —
946
+ // it returns directly via send()'s promise chain. Skip rejection
947
+ // for those; the send() flow handles errors via _errorResult.
948
+ if (p && typeof p.reject === 'function') {
949
+ try { p.reject(err); } catch (e) {
950
+ this.logger.error?.(`[${this.label}] drainQueue reject: ${e.message}`);
951
+ }
952
+ }
953
+ }
954
+ this.emit('queue-drop', drained);
955
+ return drained;
956
+ }
957
+
958
+ /**
959
+ * Inject text into the in-flight turn. Fire-and-forget paste; errors
960
+ * surface via 'inject-fail' event, never as a thrown exception.
961
+ *
962
+ * @returns {boolean} false if no live turn (caller falls through to
963
+ * pm.send queue path) OR if content sanitized to empty.
964
+ */
965
+ injectUserMessage({ content, priority = 'next', shouldQuery } = {}) {
966
+ if (!this.inFlight || this.closed) return false;
967
+ // Mirror R2-F1: sanitize even though pasteText also sanitizes.
968
+ // We need to detect empty-after-sanitize here so caller can fall
969
+ // through (pasteText would happily send the empty string).
970
+ const safe = String(content || '').replace(/[\x00-\x08\x0b-\x1f\x7f]/g, '');
971
+ if (!safe) return false;
972
+
973
+ Promise.resolve()
974
+ .then(() => this.runner.pasteText(this.tmuxName, safe))
975
+ .then(() => this.runner.sendControl(this.tmuxName, 'Enter'))
976
+ .catch((err) => this.emit('inject-fail', { err: err.message }));
977
+
978
+ // Tell the next assistant-chunk to open a fresh Telegram bubble
979
+ // so the post-steer reply visually follows the user's mid-turn
980
+ // message instead of appending to the pre-steer bubble. Mirrors
981
+ // SdkProcess's pendingSteerCausesNewBubble flag.
982
+ if (this._turnState) {
983
+ this._turnState.pendingSteerCausesNewBubble = true;
984
+ }
985
+
986
+ this.emit('inject-user-message', { text_len: safe.length, priority, shouldQuery });
987
+ return true;
988
+ }
989
+
990
+ /**
991
+ * Steer — semantically same as inject for tmux backend (TUI has no
992
+ * priority='now' channel; the bracketed-paste-aware buffer folds at
993
+ * the next pause regardless). Returns boolean.
994
+ */
995
+ steer(text, opts = {}) {
996
+ return this.injectUserMessage({ content: text, priority: 'now', ...opts });
997
+ }
998
+
999
+ // ─── teardown ────────────────────────────────────────────────────
1000
+
1001
+ async kill(reason = 'kill') {
1002
+ if (this._killing) return;
1003
+ this._killing = true;
1004
+ this.closed = true;
1005
+ this.drainQueue('KILLED');
1006
+ if (this._sessionLogTail) {
1007
+ try { this._sessionLogTail.close(); } catch { /* swallow */ }
1008
+ this._sessionLogTail = null;
1009
+ }
1010
+ await this.runner.killSession(this.tmuxName);
1011
+ // P1.3 close-event parity: emit integer code first (matches SDK
1012
+ // shape `0`/`1`). Optional second arg carries tmux-specific
1013
+ // metadata for consumers that want it. Polygram's onClose only
1014
+ // reads the code today; the second arg is informational.
1015
+ this.emit('close', 0, { reason, backend: 'tmux' });
1016
+ this.emit('idle'); // pm signals LRU waiter
1017
+ }
1018
+ }
1019
+
1020
+ module.exports = {
1021
+ TmuxProcess,
1022
+ };