npm - polygram - Versions diffs - 0.12.0-rc.1 → 0.12.0-rc.10 - Mend

polygram 0.12.0-rc.1 → 0.12.0-rc.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/config.example.json +3 -1
package/lib/attachments.js +46 -2
package/lib/handlers/abort.js +38 -1
package/lib/ipc/file-validator.js +8 -1
package/lib/process/channels-tool-dispatcher.js +20 -2
package/lib/process/cli-process.js +274 -52
package/lib/process/factory.js +0 -5
package/lib/process-manager.js +13 -0
package/lib/sdk/callbacks.js +110 -5
package/lib/telegram/api.js +9 -0
package/lib/telegram/input-file.js +76 -0
package/lib/telegram/reactions.js +5 -0
package/lib/tmux/log-tail.js +11 -1
package/lib/tmux/startup-gate.js +65 -1
package/package.json +1 -1
package/polygram.js +34 -12

package/config.example.json CHANGED Viewed

@@ -4,6 +4,7 @@
   "bots": {
     "admin-bot": {
       "token": "REPLACE_WITH_BOT_TOKEN_FROM_BOTFATHER",
+      "_comment_apiRoot": "Optional. Point grammy at a self-hosted Telegram Bot API server (e.g. 'http://localhost:8082' from a local `telegram-bot-api --local` process) to raise file send/receive limits from cloud's 50MB-out / 20MB-in to 2GB both ways. Omit for cloud Telegram (default, unchanged). The server is a separate localhost-only companion daemon — see docs/0.12.0-file-send.md.",
       "allowConfigCommands": true,
       "_comment_adminChatId": "Required when allowConfigCommands is true for pairing commands (/pair-code, /pairings, /unpair) to work. These grant cross-chat trust and are gated to the admin chat only.",
       "adminChatId": "123456789",
@@ -70,7 +71,8 @@
       "model": "opus",
       "effort": "medium",
       "cwd": "/Users/you/admin-agent",
-      "timeout": 600
+      "timeout": 600,
+      "_comment_maxFileBytes": "OPTIONAL per-chat (or per-topic; topic wins) file-size cap in BYTES. There is NO fixed default — the default is backend-derived: cloud Telegram = 50MB send / 20MB receive; with a local Bot API server (bot.apiRoot set) = 2GB both ways. This key only LOWERS that ceiling for this chat (Telegram rejects anything above the backend limit regardless); omit it to use the full backend default. To set one, add e.g. \"maxFileBytes\": 104857600 (=100MB) — only meaningful when apiRoot is set, since cloud already clamps to 50/20MB."
     },
     "-1000000000001": {

package/lib/attachments.js CHANGED Viewed

@@ -22,8 +22,48 @@
  *     extension — the fallback only kicks in when MIME is unhelpful.
  */
-const MAX_FILE_BYTES = 10 * 1024 * 1024;
-const MAX_TOTAL_BYTES = 20 * 1024 * 1024;
+// Inbound (user → bot) per-file cap. Telegram's cloud Bot API hard-caps
+// bot file DOWNLOADS (getFile) at 20 MB, so 20 MB is the real ceiling on
+// cloud — raised from 10 MB so users can send larger tracks/docs. With a
+// self-hosted Bot API server (config.bot.apiRoot) the Telegram limit rises
+// to 2 GB; resolveFileCaps() raises the default accordingly.
+const MAX_FILE_BYTES = 20 * 1024 * 1024;
+const MAX_TOTAL_BYTES = 50 * 1024 * 1024;
+// ─── Backend-derived file-size caps (cloud vs local Bot API server) ──
+//
+// These are the HARD ceilings Telegram itself enforces — a per-chat
+// override can lower them but never exceed them (Telegram rejects beyond
+// regardless). NOT "adaptive": there is no intermediate tier. Cloud is a
+// flat 20 in / 50 out; a local `telegram-bot-api --local` server is a flat
+// 2 GB both ways.
+const CLOUD_MAX_IN_BYTES  = 20 * 1024 * 1024;          // getFile download limit
+const CLOUD_MAX_OUT_BYTES = 50 * 1024 * 1024;          // sendDocument upload limit
+const LOCAL_MAX_BYTES     = 2000 * 1024 * 1024;        // --local server, both ways
+/**
+ * Resolve the effective per-file caps for a chat/topic.
+ *
+ * @param {object} opts
+ * @param {boolean} opts.localApi   — true when config.bot.apiRoot is set
+ *   (a local Bot API server is in use → 2 GB ceiling).
+ * @param {...number} opts.override  — per-chat/topic maxFileBytes (bytes).
+ *   Resolved by the caller from topic → chat → undefined; clamped to the
+ *   backend ceiling.
+ * @returns {{ inBytes:number, outBytes:number, ceiling:number, localApi:boolean }}
+ */
+function resolveFileCaps({ localApi = false, override = null } = {}) {
+  const ceiling = localApi ? LOCAL_MAX_BYTES : null;
+  const defIn  = localApi ? LOCAL_MAX_BYTES : CLOUD_MAX_IN_BYTES;
+  const defOut = localApi ? LOCAL_MAX_BYTES : CLOUD_MAX_OUT_BYTES;
+  // A numeric override sets BOTH directions to the same value, clamped to
+  // the backend hard ceiling (cloud uses the per-direction default as the
+  // clamp so an override can't push past Telegram's own limit).
+  const ovr = (typeof override === 'number' && override > 0) ? override : null;
+  const inBytes  = ovr ? (localApi ? Math.min(ovr, ceiling) : Math.min(ovr, CLOUD_MAX_IN_BYTES))  : defIn;
+  const outBytes = ovr ? (localApi ? Math.min(ovr, ceiling) : Math.min(ovr, CLOUD_MAX_OUT_BYTES)) : defOut;
+  return { inBytes, outBytes, ceiling: ceiling ?? CLOUD_MAX_OUT_BYTES, localApi };
+}
 const MIME_ALLOW = [
   /^image\//, /^audio\//, /^video\//,
   /^application\/pdf$/, /^text\/plain$/,
@@ -109,8 +149,12 @@ function filterAttachments(attachments, opts = {}) {
 module.exports = {
   filterAttachments,
+  resolveFileCaps,
   MAX_FILE_BYTES,
   MAX_TOTAL_BYTES,
+  CLOUD_MAX_IN_BYTES,
+  CLOUD_MAX_OUT_BYTES,
+  LOCAL_MAX_BYTES,
   MIME_ALLOW,
   EXTENSION_ALLOW,
   FALLBACK_MIMES,

package/lib/handlers/abort.js CHANGED Viewed

@@ -42,13 +42,37 @@ function createHandleAbort({
     const threadId = msg.message_thread_id?.toString();
     const sessionKey = getSessionKey(chatId, threadId, chatConfig);
     const proc = pm.has(sessionKey) ? pm.get(sessionKey) : null;
-    const hadActive = !!proc?.inFlight;
+    let hadActive = !!proc?.inFlight;
     // Mark BEFORE killing: the 'close' event fires almost immediately
     // after interrupt, and the surrounding handleMessage's catch
     // needs to see the flag to skip the generic error-reply.
     if (hadActive) markSessionAborted(sessionKey);
+    // "Stop" incident (shumorobot Music, 2026-05-31 13:08): on the
+    // CliProcess/channels backend a turn resolves on the quiet-window
+    // after claude's last reply tool call (inFlight → false), but claude
+    // can still be working (subagent, long Bash). Keying the ack on
+    // inFlight alone made "Stop" say "Nothing to stop" while a subagent
+    // download churned. probeBusyState() reads the TUI "esc to interrupt"
+    // hint — the truthful signal — so detection, the abort mark, and the
+    // ack all agree. The probe result is logged below (forensics) so the
+    // heuristic can be refined against real states later. Channels analog
+    // of the (deleted) tmux hasBackgroundShell branch; typeof-guarded so
+    // it's a no-op on backends without it.
+    let busyProbe = null;
+    if (!hadActive && proc && typeof proc.probeBusyState === 'function') {
+      try {
+        busyProbe = await proc.probeBusyState();
+        if (busyProbe?.busy) {
+          hadActive = true;
+          markSessionAborted(sessionKey);
+        }
+      } catch (err) {
+        logger.error?.(`[${botName}] busy-probe failed: ${err.message}`);
+      }
+    }
     // Bug 1 (incident 2026-05-18): "Stop" was turn-scoped — it only
     // looked at an in-flight TURN. But the agent can leave a DETACHED
     // background shell running (a `run_in_background:true` Bash) that
@@ -87,6 +111,19 @@ function createHandleAbort({
       chat_id: chatId, user_id: msg.from?.id || null,
       had_active: hadActive,
       killed_background_shell: killedBackgroundShell,
+      // "Stop" incident forensics: the raw busy-probe signals at decision
+      // time. Lets us query, across real aborts, where the esc-hint /
+      // inFlight / pending-turn signals agreed vs diverged and refine the
+      // heuristic later. null when no probe ran (turn was already inFlight,
+      // or the backend has no probeBusyState).
+      busy_probe: busyProbe ? {
+        busy: busyProbe.busy,
+        streaming: busyProbe.streaming,
+        in_flight: busyProbe.inFlight,
+        pending_turns: busyProbe.pendingTurns,
+        captured: busyProbe.captured,
+        pane_tail: busyProbe.paneTail,
+      } : null,
       trigger: cleanText.slice(0, 40),
     });

package/lib/ipc/file-validator.js CHANGED Viewed

@@ -50,7 +50,14 @@ function validateIpcFileParam(method, params = {}) {
   const fileParam = FILE_PARAM_BY_METHOD[method];
   if (!fileParam) return null;
   const val = params[fileParam];
-  if (typeof val !== 'string') return null;       // envelope/Buffer/etc — pass through
+  // { source: '/abs/path' } envelope — now coerced to a grammy InputFile in
+  // tg() (coerceFileParams). Validate it has a usable absolute source, else
+  // pass through (Buffer / stream / InputFile shapes).
+  if (val && typeof val === 'object' && typeof val.source === 'string') {
+    if (val.source.length === 0) return `polygram IPC: ${fileParam}.source is empty`;
+    return null;
+  }
+  if (typeof val !== 'string') return null;       // Buffer/InputFile/etc — pass through
   if (val.length === 0) return `polygram IPC: ${fileParam} is empty`;
   const looksUrl = /^(https?|ftp):\/\//i.test(val);

package/lib/process/channels-tool-dispatcher.js CHANGED Viewed

@@ -125,7 +125,7 @@ function createChannelsToolDispatcher({
     || require('../telegram/process-agent-reply').processAndDeliverAgentText;
   return async function channelsToolDispatcher(call) {
-    const { sessionKey, chatId, threadId, toolName, text, files, sourceMsgId } = call;
+    const { sessionKey, chatId, threadId, toolName, text, files, sourceMsgId, maxOutboundFileBytes } = call;
     if (toolName !== 'reply') {
       // 0.11.0 Phase 1 ships `reply` only — react and edit_message are
@@ -196,6 +196,21 @@ function createChannelsToolDispatcher({
             failedAttachments.push({ path: filePath, error: check.error });
             continue;
           }
+          // Backend/chat-derived upload cap. Reject oversize BEFORE upload with
+          // a clear error (vs Telegram's cryptic 413/"file is too big") so
+          // claude can convert/compress and retry. maxOutboundFileBytes is
+          // undefined for non-channels callers → no cap (Telegram still gates).
+          if (typeof maxOutboundFileBytes === 'number' && maxOutboundFileBytes > 0) {
+            let size = 0;
+            try { size = fs.statSync(check.resolved).size; } catch {}
+            if (size > maxOutboundFileBytes) {
+              const mb = (n) => (n / (1024 * 1024)).toFixed(1);
+              const err = `file too large to send: ${mb(size)}MB > ${mb(maxOutboundFileBytes)}MB limit`;
+              logger.warn?.(`[channels-tool-dispatcher] ${err} (${check.resolved})`);
+              failedAttachments.push({ path: filePath, error: err });
+              continue;
+            }
+          }
           try {
             const ext = path.extname(check.resolved).toLowerCase();
             const isImage = ['.jpg', '.jpeg', '.png', '.gif', '.webp'].includes(ext);
@@ -203,7 +218,10 @@ function createChannelsToolDispatcher({
             const fieldName = isImage ? 'photo' : 'document';
             const params = {
               chat_id: chatId,
-              [fieldName]: { source: check.resolved },
+              // { source } envelope → grammy InputFile in tg()'s coerceFileParams.
+              // Pre-fix this bare object reached grammy unrecognized and every
+              // upload 400'd with "Wrong port number" (file-send never worked).
+              [fieldName]: { source: check.resolved, filename: path.basename(check.resolved) },
             };
             if (threadId) params.message_thread_id = threadId;
             await send(bot, method, params, { source: 'channels-tool-dispatcher', sessionKey });

package/lib/process/cli-process.js CHANGED Viewed

@@ -48,6 +48,11 @@ const { Process, UnsupportedOperationError } = require('./process');
 const { ChannelsBridgeServer } = require('./channels-bridge-server');
 const { writeHookFiles, removeHookFiles } = require('./hook-settings');
 const { createHookTail } = require('./hook-event-tail');
+// File-send staging: reuse the dispatcher's allowlist root so the dir we
+// create exactly matches the realpath the validator accepts (no /tmp vs
+// /private/tmp drift — one of the original Music-topic failures).
+const { DEFAULT_ATTACHMENT_BASE } = require('./channels-tool-dispatcher');
+const { resolveFileCaps } = require('../attachments');
 const { runStartupGate } = require('../tmux/startup-gate');
 const { POLYGRAM_DISPLAY_HINT } = require('../telegram/display-hint');
@@ -251,6 +256,10 @@ class CliProcess extends Process {
     // pending turn(s): turn_id → { resolve, reject, replies: [], quietTimer, hardTimer, startedAt }
     this.pendingTurns = new Map();
+    // File-send outbound cap (bot → user). Safe cloud default; overwritten in
+    // _spawnTmuxClaude with the backend/chat-resolved value before any turn.
+    this.maxOutboundFileBytes = resolveFileCaps({ localApi: false }).outBytes;
     // P1 security (review #8): track resolved permission request_ids so a
     // double-fire of respond() can't write a second perm_verdict for the same
     // request. TmuxProcess gates on _pendingApprovalId; this is the channels
@@ -297,6 +306,23 @@ class CliProcess extends Process {
     // permit files under the agent's workspace.
     this.sessionCwd = opts.cwd || null;
+    // File-send staging dir (2026-06 file-send feature). The dispatcher
+    // allowlist always permits <DEFAULT_ATTACHMENT_BASE>/<sessionKey>/, but
+    // nothing ever CREATED it — so claude's reply(files) attempts at
+    // /tmp/polygram-attachments failed (dir absent / realpath mismatch) and
+    // it flailed across other paths. Create it here and surface it to the
+    // prompt so claude has one blessed, always-allowed place to stage a file
+    // before sending. realpathSync so the stored path matches what the
+    // validator resolves (the /tmp ↔ /private/tmp fix).
+    try {
+      const dir = path.join(DEFAULT_ATTACHMENT_BASE, String(this.sessionKey));
+      fs.mkdirSync(dir, { recursive: true, mode: 0o700 });
+      this.attachmentStagingDir = fs.realpathSync(dir);
+    } catch (err) {
+      this.attachmentStagingDir = null;
+      this.logger.warn?.(`[${this.label}] channels: staging dir create failed: ${err.message}`);
+    }
     // Opaque random token for socket filename — do NOT leak sessionKey to /tmp.
     const socketToken = crypto.randomBytes(16).toString('hex');
     this.sockPath = path.join(os.tmpdir(), `polygram-${socketToken}.sock`);
@@ -416,28 +442,7 @@ class CliProcess extends Process {
     this.bridgeServer.on('bridge-message', msg => this._handleBridgeMessage(msg));
-    this.bridgeServer.on('bridge-disconnected', () => {
-      this.bridgeReady = false;
-      this.mcpReady = false;
-      if (!this.closed) {
-        this.logger.warn?.(`[${this.label}] channels: bridge disconnected unexpectedly`);
-        // P1 #5: drain pendingTurns immediately so hardTimers don't run 10min.
-        for (const [, pending] of this.pendingTurns) {
-          if (pending.quietTimer) clearTimeout(pending.quietTimer);
-          if (pending.hardTimer) clearTimeout(pending.hardTimer);
-          if (pending.absoluteTimer) clearTimeout(pending.absoluteTimer);
-          if (pending._stopGraceTimer) clearTimeout(pending._stopGraceTimer);
-          const err = new Error('bridge disconnected');
-          err.code = 'BRIDGE_DISCONNECTED';
-          try { pending.reject(err); } catch {}
-        }
-        this.pendingTurns.clear();
-        this.pendingQueue.length = 0;
-        this.inFlight = false;
-        this.emit('bridge-disconnected');
-        this._logEvent('bridge-disconnected', { reason: 'socket-close' });
-      }
-    });
+    this.bridgeServer.on('bridge-disconnected', () => this._handleBridgeDisconnected());
     await this.bridgeServer.listen();
   }
@@ -493,6 +498,18 @@ class CliProcess extends Process {
     const effort = topicConfig?.effort || opts.chatConfig?.effort || opts.effort;
     const resolvedCwd = topicConfig?.cwd || opts.chatConfig?.cwd || opts.cwd;
+    // File-send outbound cap (bot → user). Backend-derived (cloud 50MB vs
+    // local Bot API server 2GB via opts.localApi) with per-topic/chat
+    // maxFileBytes override, clamped to the backend ceiling. Stored for the
+    // dispatcher (live size-check) and the system prompt (so claude states
+    // the right limit). Resolved here so it follows the same topic→chat
+    // precedence as cwd/agent above.
+    const _capOverride = topicConfig?.maxFileBytes ?? opts.chatConfig?.maxFileBytes ?? null;
+    this.maxOutboundFileBytes = resolveFileCaps({
+      localApi: !!opts.localApi,
+      override: _capOverride,
+    }).outBytes;
     // Parity audit P8 + rc.8 fs-guard (2026-05-26 shumorobot Music topic):
     // `--session-id <id>` creates a NEW claude session with that id;
     // `--resume <id>` resumes the EXISTING conversation. Lazy-respawn after
@@ -540,6 +557,9 @@ class CliProcess extends Process {
         );
       }
     }
+    // Finding 0.12-M2: record the resume decision so _armHookTail (run
+    // after spawn) skips the prior session's still-on-disk hook ndjson.
+    this._resumedSession = canResume;
     if (agent)  claudeArgs.push('--agent', agent);
     if (model)  claudeArgs.unshift('--model', model);
     if (effort) claudeArgs.push('--effort', effort);
@@ -616,6 +636,28 @@ class CliProcess extends Process {
       'Internal tool calls (Bash, Edit, Write, Read, etc.) are fine to use',
       'as normal — only the FINAL user-visible message needs to go through',
       'the reply tool.',
+      '',
+      '### Sending FILES (tracks, images, docs) to the user',
+      '',
+      'The `mcp__polygram-bridge__reply` tool takes an optional `files` array of',
+      'absolute paths. This is the ONLY way to send a file. Do NOT use Bash,',
+      'curl, the Telegram Bot API, or polygram-ipc to send files — those fail.',
+      '',
+      ...(this.attachmentStagingDir ? [
+        `To send a file: COPY it into the staging dir \`${this.attachmentStagingDir}\`,`,
+        'then call reply with its absolute path, e.g.:',
+        `  reply(chat_id="<id>", text="Here's the track", files=["${this.attachmentStagingDir}/track.flac"])`,
+        'polygram auto-deletes staged files after the turn — you do not need to clean up.',
+        'You may also send directly from the agent workspace (cwd); other paths are rejected.',
+      ] : [
+        'Copy the file somewhere under your workspace (cwd) and pass its absolute',
+        'path in `files`. Paths outside the workspace are rejected for safety.',
+      ]),
+      '',
+      `Max file size for sending: ${Math.round(this.maxOutboundFileBytes / (1024 * 1024))} MB. ` +
+        'For larger lossless audio, convert to FLAC/MP3 under the limit first, ' +
+        'or tell the user it exceeds the limit. Images go as photos; everything ' +
+        'else as documents.',
     ].join('\n'));
     // Parity audit P6: honor isolateUserConfig — mirrors tmux pattern at
@@ -705,6 +747,20 @@ class CliProcess extends Process {
       ],
       readySignal: /Listening for channel messages from: server:polygram-bridge/i,
       timeoutCode: 'CHANNELS_DIALOG_TIMEOUT',
+      // Progress-aware gate (shumorobot General incident 2026-05-30): a
+      // cold spawn that's mid-download (runtime fetch, "24%" progress bar)
+      // is genuinely working and must NOT be killed by the blind 30s
+      // wall-clock. stallMs fails fast only when the pane is FROZEN; an
+      // actively-changing pane (download bar, dialog nav) keeps resetting
+      // the stall clock and rides out to the ready signal. deadlineMs stays
+      // the absolute backstop. 30s of zero pane activity = genuinely wedged.
+      // Stall = pane rendered then went static (genuinely wedged). 60s, not
+      // 30s: some topics' TUIs cold-render slowly (Music ~45s, slow MCP
+      // startup) — 30s was too tight and false-aborted them. Blank panes
+      // don't arm the stall timer at all now (see runStartupGate), so this
+      // only bounds a TUI that rendered and then truly hung.
+      stallMs: this.startupGateStallMs ?? 60_000,
+      deadlineMs: this.startupGateDeadlineMs ?? 180_000,
       logger: this.logger,
       label: `${this.label}:startup-gate`,
     });
@@ -849,15 +905,18 @@ class CliProcess extends Process {
     // rate-limit / chat-id-mismatch path. Live shumorobot 2026-05-26 23:44
     // observed 3+ "Called polygram-bridge" entries in the TUI pane with
     // ZERO OUT messages delivered to TG and zero warn-level diagnostics —
-    // need to see args.text / args.chat_id / args.turn_id to know whether
-    // claude is calling reply with empty text, wrong chat_id, or something
-    // else entirely.
-    this.logger.warn?.(
+    // need to see args.chat_id / args.turn_id to know whether claude is
+    // calling reply with empty text, wrong chat_id, or something else.
+    // L13: root-caused — demoted to debug and DROPPED text_head. Logging
+    // the first 80 chars of every reply at warn level leaked private chat
+    // content / file excerpts / secrets into the default log sink,
+    // unconditionally. name/chat_id/turn_id/text_len diagnose dispatch
+    // without exposing message content.
+    this.logger.debug?.(
       `[${this.label}] channels: tool-call name=${msg.name} ` +
       `chat_id=${JSON.stringify(args.chat_id)} ` +
       `turn_id=${JSON.stringify(args.turn_id)} ` +
-      `text_len=${typeof args.text === 'string' ? args.text.length : 'non-string'} ` +
-      `text_head=${JSON.stringify((args.text || '').slice(0, 80))}`,
+      `text_len=${typeof args.text === 'string' ? args.text.length : 'non-string'}`,
     );
     // Review P1 #7: idempotency. If we've already ACK'd this tool_call_id,
@@ -948,6 +1007,7 @@ class CliProcess extends Process {
         text: args.text,
         files: args.files,
         sessionCwd: this.sessionCwd,        // P0 #2: dispatcher uses this to allowlist file roots
+        maxOutboundFileBytes: this.maxOutboundFileBytes, // backend/chat-derived upload cap
       });
     } catch (err) {
       this._writeToBridge({ kind: 'tool_ack', tool_call_id: msg.tool_call_id, ok: false, error: err.message });
@@ -1122,13 +1182,27 @@ class CliProcess extends Process {
       this._finalizeTurn(turnId);
     };
     const onStop = (info) => {
-      // Capture the fallback text; the actual finalize call below will pick
-      // it up via pending._stopHookData.
+      // Finding 0.12-M1: the Stop hook carries NO turn_id, and a single
+      // global 'stop-hook' emission fires EVERY per-turn onStop listener.
+      // When more than one turn is in stop-grace we cannot attribute this
+      // Stop (or its last_assistant_message) to a specific turn — the
+      // pre-fix code let one Stop finalize all grace-pending turns and
+      // cross-attribute one turn's text to another (the exact class the
+      // F#3 reply routing prevents). Mirror that drop-rather-than-
+      // misattribute discipline: only consume the Stop when exactly ONE
+      // turn is in grace; otherwise ignore it and let each turn finalize
+      // on its own grace timer (each keeps its own reply text).
+      let graceCount = 0;
+      for (const p of this.pendingTurns.values()) if (p._stopGracePending) graceCount++;
+      if (graceCount !== 1) return;
       pending._stopHookData = info;
       clearTimeout(pending._stopGraceTimer);
       pending._stopGraceTimer = null;
       finalize();
     };
+    // L5: stash the closure so teardown paths that bypass Process.kill()'s
+    // removeAllListeners (bridge-disconnect drain, resetSession) can off it.
+    pending._onStop = onStop;
     pending._stopGraceTimer = setTimeout(finalize, this.stopGraceMs);
     // unref so a never-fired grace doesn't pin the event loop. In tests
     // where a CliProcess is created, send() is called, then the test
@@ -1195,6 +1269,27 @@ class CliProcess extends Process {
     pending.resolve(result);
     this.emit('result', { subtype: 'success' }, { streamText: text });
     this.emit('idle');
+    // File-send staging auto-purge (your choice — no "claude must delete").
+    // Once the LAST turn settles, wipe the staging dir's contents so files
+    // claude copied in to send don't accumulate on disk across turns. Only
+    // when fully idle, so a file staged for a still-pending concurrent turn
+    // isn't yanked mid-send.
+    if (this.pendingTurns.size === 0) this._purgeStagingDir();
+  }
+  /**
+   * Empty the per-session file-send staging dir (keep the dir itself).
+   * Best-effort; never throws. Called when the session goes idle and on kill.
+   */
+  _purgeStagingDir() {
+    if (!this.attachmentStagingDir) return;
+    let entries;
+    try { entries = fs.readdirSync(this.attachmentStagingDir); }
+    catch { return; }
+    for (const name of entries) {
+      try { fs.rmSync(path.join(this.attachmentStagingDir, name), { recursive: true, force: true }); }
+      catch { /* best-effort */ }
+    }
   }
   // ─── public Process API ──────────────────────────────────────────
@@ -1386,6 +1481,63 @@ class CliProcess extends Process {
     this._interruptGraceTimer.unref?.();
   }
+  /**
+   * Is claude actually still working, regardless of the resolved-turn flag?
+   *
+   * "Stop" incident (shumorobot Music, 2026-05-31 13:08): the channels
+   * backend resolves a turn on the quiet-window after claude's last reply
+   * tool call (inFlight → false), but claude can keep working afterwards
+   * (a subagent, a long Bash). The abort handler keyed its ack on inFlight
+   * alone, so "Stop" said "Nothing to stop" one second after the bot said
+   * "On it — downloading…" while a subagent churned.
+   *
+   * The TUI prints "esc to interrupt" (STREAMING_HINT_RE) continuously
+   * whenever claude is busy — capture-pane is the truthful signal, the
+   * channels analog of the (deleted) tmux hasBackgroundShell() probe.
+   *
+   * Returns a STRUCTURED probe (not just a boolean) so the abort path can
+   * log the raw signals — pane tail + flags — to the events DB. That lets
+   * us later characterize which states the heuristic gets right/wrong and
+   * refine it (e.g. add signals beyond the esc-hint) without guessing.
+   *
+   * Never throws — a failed capture returns captured:false, busy:false.
+   *
+   * @returns {Promise<{busy:boolean, streaming:boolean, inFlight:boolean,
+   *   pendingTurns:number, captured:boolean, paneTail:(string|null)}>}
+   */
+  async probeBusyState() {
+    const base = {
+      busy: false, streaming: false,
+      inFlight: this.inFlight, pendingTurns: this.pendingTurns.size,
+      captured: false, paneTail: null,
+    };
+    if (this.closed || !this.tmuxSession || typeof this.runner?.captureWide !== 'function') {
+      return base;
+    }
+    let pane;
+    try {
+      pane = await this.runner.captureWide(this.tmuxSession);
+    } catch (err) {
+      this.logger.warn?.(`[${this.label}] channels: probeBusyState captureWide failed: ${err.message}`);
+      return base;
+    }
+    if (!pane) return base;
+    const streaming = STREAMING_HINT_RE.test(pane);
+    return {
+      ...base,
+      busy: streaming,
+      streaming,
+      captured: true,
+      paneTail: pane.slice(-200),
+    };
+  }
+  /** Boolean shorthand for probeBusyState().busy (abort-path convenience). */
+  async isBusy() {
+    const { busy } = await this.probeBusyState();
+    return busy;
+  }
   async kill(reason = 'kill') {
     if (this.closed) return;
     // Parity P19: re-entry guard for concurrent kill() calls. Mirrors
@@ -1415,17 +1567,18 @@ class CliProcess extends Process {
       this.logger.warn?.(`[${this.label}] _armHookTail: _hookNdjsonPath unset; hooks disabled. Phase 1.2 may have failed.`);
       return;
     }
-    // Fresh spawn: ndjson was just touched by writeHookFiles and is empty,
-    // so `skipExisting: false` (default) is correct. For lazy-respawn on
-    // existingSessionId, we currently re-run writeHookFiles which touches
-    // a NEW file with the same name (overwrite). If we ever switch to
-    // resume-without-touch, set skipExisting: true to avoid replaying
-    // stale events from the prior process — same pattern tmux uses on
-    // --resume per rc.42 #5.
+    // Finding 0.12-M2: writeHookFiles opens the ndjson in APPEND mode
+    // ('a') and never truncates, so on a --resume respawn the prior
+    // session's hook lines are still on disk under the same path. Replaying
+    // them re-drives the turn state machine from stale Stop/PreToolUse
+    // events (a stale Stop can finalize the fresh turn). So skip existing
+    // content when (and only when) this is a resumed session — the same
+    // discipline the JSONL tail uses on --resume. A fresh spawn's ndjson is
+    // empty, so skipExisting:false is correct there.
     this._hookTail = createHookTail({
       path: this._hookNdjsonPath,
       logger: this.logger,
-      skipExisting: false,
+      skipExisting: this._resumedSession === true,
     });
     this._hookTail.on('event', (ev) => {
       try {
@@ -1465,25 +1618,18 @@ class CliProcess extends Process {
     // gates tag-out on median < 2s and p99 < 5s across the events DB.
     if (Number.isFinite(ev.receivedAtMs)) {
       const lagMs = Date.now() - ev.receivedAtMs;
+      // L10: emit ONLY — the onHookLagSample callback owns the DB write
+      // (CALLBACK_TO_EVENT → callbacks.js). Previously this ALSO wrote
+      // directly via this.db.logEvent, double-persisting every sample and
+      // inflating the Phase 1.8 soak-gate row count. Consistent with how
+      // tool-result / subagent-start / subagent-done are handled (emit,
+      // don't double-write).
       this.emit('hook-lag-sample', {
         hookEventName: ev.type,
         lagMs,
         toolName: ev.toolName || null,
         backend: this.backend,
       });
-      // Log to events DB if wired. db is optional (factory injects when
-      // available) — same pattern as the other parity-P1 _logEvent calls.
-      if (this.db?.logEvent) {
-        try {
-          this.db.logEvent('hook-lag-sample', {
-            session_key: this.sessionKey,
-            backend: this.backend,
-            hook_event_name: ev.type,
-            tool_name: ev.toolName || null,
-            lag_ms: lagMs,
-          });
-        } catch {}
-      }
     }
     switch (ev.type) {
@@ -1503,6 +1649,16 @@ class CliProcess extends Process {
           const subagentType = ev.toolInput?.subagent_type
             || ev.toolInput?.agent_type
             || 'general-purpose';
+          // Finding 0.12-M4: SubagentStop carries agent_id/agent_type but
+          // NOT the originating Agent tool_use_id, so without help the
+          // subagent-start/subagent-done rows share no JOIN key (the
+          // documented soak query on $.tool_use_id returns zero rows).
+          // Track the in-flight Agent tool_use_id keyed by subagent type so
+          // the paired SubagentStop below can stamp it onto subagent-done.
+          (this._pendingSubagentStarts ||= []).push({
+            agentType: subagentType,
+            toolUseId: ev.toolUseId,
+          });
           this.emit('subagent-start', {
             agentType: subagentType,
             // PreToolUse for Agent carries no agent_id (set later on
@@ -1541,14 +1697,27 @@ class CliProcess extends Process {
         });
         return;
-      case 'SubagentStop':
+      case 'SubagentStop': {
+        // Finding 0.12-M4: recover the originating Agent tool_use_id so the
+        // subagent-start/subagent-done pair is JOINable. Prefer a match on
+        // agent type (correct for parallel subagents of different types);
+        // fall back to the oldest pending start when types don't line up.
+        let subagentToolUseId = null;
+        const pendingStarts = this._pendingSubagentStarts;
+        if (pendingStarts && pendingStarts.length) {
+          let idx = pendingStarts.findIndex(s => s.agentType === ev.agentType);
+          if (idx < 0) idx = 0;
+          subagentToolUseId = pendingStarts.splice(idx, 1)[0]?.toolUseId ?? null;
+        }
         this.emit('subagent-done', {
           agentType: ev.agentType,
           agentId: ev.agentId,
           durationMs: ev.durationMs,
+          toolUseId: subagentToolUseId,
           backend: this.backend,
         });
         return;
+      }
       case 'Stop':
         // Phase 1.7 (TODO) will use this as the authoritative turn-end
@@ -1665,6 +1834,50 @@ class CliProcess extends Process {
     }
   }
+  /**
+   * Drain on unexpected bridge socket loss (claude crash, bridge crash,
+   * EOF). Extracted from the inline 'bridge-disconnected' handler so the
+   * teardown is testable and consistent with _doKill.
+   *
+   * Findings 0.12-L5 + L6: in addition to clearing the per-turn timers
+   * and rejecting pendings (the original P1 #5 behavior), this now also
+   * (L5) removes each turn's stop-hook listener — this drain does NOT go
+   * through Process.kill()'s blanket removeAllListeners, so a turn torn
+   * down mid-stop-grace would otherwise leak its onStop closure — and
+   * (L6) clears _interruptGraceTimer, matching _doKill (a /stop verdict
+   * landing just before the disconnect would otherwise leave a stray
+   * timer on the dead instance).
+   */
+  _handleBridgeDisconnected() {
+    this.bridgeReady = false;
+    this.mcpReady = false;
+    if (this.closed) return;
+    this.logger.warn?.(`[${this.label}] channels: bridge disconnected unexpectedly`);
+    // L6: clear the interrupt grace timer alongside the rest of the lifecycle.
+    if (this._interruptGraceTimer) {
+      clearTimeout(this._interruptGraceTimer);
+      this._interruptGraceTimer = null;
+    }
+    // P1 #5: drain pendingTurns immediately so hardTimers don't run 10min.
+    for (const [, pending] of this.pendingTurns) {
+      if (pending.quietTimer) clearTimeout(pending.quietTimer);
+      if (pending.hardTimer) clearTimeout(pending.hardTimer);
+      if (pending.absoluteTimer) clearTimeout(pending.absoluteTimer);
+      if (pending._stopGraceTimer) clearTimeout(pending._stopGraceTimer);
+      // L5: remove the per-turn stop-hook listener (this path bypasses
+      // Process.kill()'s removeAllListeners).
+      if (pending._onStop) this.off('stop-hook', pending._onStop);
+      const err = new Error('bridge disconnected');
+      err.code = 'BRIDGE_DISCONNECTED';
+      try { pending.reject(err); } catch {}
+    }
+    this.pendingTurns.clear();
+    this.pendingQueue.length = 0;
+    this.inFlight = false;
+    this.emit('bridge-disconnected');
+    this._logEvent('bridge-disconnected', { reason: 'socket-close' });
+  }
   async _doKill(reason) {
     this.closed = true;
     this.inFlight = false;
@@ -1688,6 +1901,7 @@ class CliProcess extends Process {
       if (pending.hardTimer) clearTimeout(pending.hardTimer);
       if (pending.absoluteTimer) clearTimeout(pending.absoluteTimer);
       if (pending._stopGraceTimer) clearTimeout(pending._stopGraceTimer);
+      if (pending._onStop) this.off('stop-hook', pending._onStop); // L5
       const err = new Error(`session killed: ${reason}`);
       err.code = 'KILLED';
       pending.reject(err);
@@ -1734,6 +1948,12 @@ class CliProcess extends Process {
     if (this.botName && this.claudeSessionId) {
       try { removeHookFiles({ botName: this.botName, sessionId: this.claudeSessionId }); } catch {}
     }
+    // File-send staging: remove the whole per-session dir on kill (purge only
+    // empties it between turns; kill is end-of-life so drop it entirely).
+    if (this.attachmentStagingDir) {
+      try { fs.rmSync(this.attachmentStagingDir, { recursive: true, force: true }); } catch {}
+      this.attachmentStagingDir = null;
+    }
     this.emit('close', 0);
   }
@@ -1876,6 +2096,8 @@ class CliProcess extends Process {
       if (pending.quietTimer) clearTimeout(pending.quietTimer);
       if (pending.hardTimer) clearTimeout(pending.hardTimer);
       if (pending.absoluteTimer) clearTimeout(pending.absoluteTimer);
+      if (pending._stopGraceTimer) clearTimeout(pending._stopGraceTimer); // L5
+      if (pending._onStop) this.off('stop-hook', pending._onStop);        // L5
       const err = new Error(`session reset: ${reason}`);
       err.code = 'RESET';
       try { pending.reject(err); } catch {}

package/lib/process/factory.js CHANGED Viewed

@@ -91,10 +91,6 @@ function _maybeWarnR12Migration({ rawPm, canonical, chatId, threadId, chatCfg, t
  * @param {number} [opts.queryCloseTimeoutMs]
  * @param {object} [opts.tmuxRunner]       — required when ANY chat routes to 'cli'
  * @param {string} [opts.botName]          — required when ANY chat routes to 'cli'
- * @param {object} [opts.pollScheduler]    — DEPRECATED in 0.12 — was used by the
- *   removed tmux backend to share one setInterval across all chats; CliProcess's
- *   per-session pongWatchdog handles its own cadence. Param kept for caller
- *   back-compat; ignored. Will be removed in 0.13.
  * @param {Function} [opts.toolDispatcher] — required when ANY chat routes to 'cli'.
  *   async ({sessionKey, chatId, threadId, toolName, text, files}) => {ok, error?}.
  *   Called when Claude's reply (or react/edit_message) tool fires inside a
@@ -113,7 +109,6 @@ function createProcessFactory({
   queryCloseTimeoutMs,
   tmuxRunner = null,
   botName = null,
-  pollScheduler = null,
   toolDispatcher = null,
   channelsClaudeBin = null,
 } = {}) {

package/lib/process-manager.js CHANGED Viewed

@@ -123,6 +123,19 @@ const CALLBACK_TO_EVENT = {
   // menu auto-dismissed by `_waitForReady`. Surfacing the event so
   // soak can count how often aged-session resumes hit this path.
   onSessionAgePromptDismissed:  'session-age-prompt-dismissed',
+  // 0.12 CliProcess observability — typed hook events from cli-process.js
+  // _handleHookEvent. Each gets its own callback so polygram can persist
+  // structured rows to the events DB for soak-time aggregate queries.
+  //   - hook-lag-sample: Phase 1.8 — per-event lag_ms (target: median<2s, p99<5s)
+  //   - tool-result:     Phase 1.3 — PostToolUse durationMs per tool
+  //   - subagent-start / subagent-done: Phase 1.3 — typed subagent lifecycle
+  //     (we DO get tool-use='Agent' via onToolUse, but agent_type + durationMs
+  //      only fire on these typed events). SDK backend never emits — hooks
+  //     are CliProcess-specific (and were tmux-specific in 0.10–0.11).
+  onHookLagSample:              'hook-lag-sample',
+  onToolResult:                 'tool-result',
+  onSubagentStart:              'subagent-start',
+  onSubagentDone:               'subagent-done',
 };
 class ProcessManager {

package/lib/sdk/callbacks.js CHANGED Viewed

@@ -464,7 +464,10 @@ function createSdkCallbacks({
         const detail = {
           chat_id: getChatIdFromKey(sessionKey),
           session_key: sessionKey,
-          backend: 'tmux',
+          // Finding 0.12-M3: tmux backend was deleted in 0.12; these hook
+          // handlers only ever fire on the CLI driver now — default to 'cli'
+          // (honor an explicit payload.backend if a caller ever sets one).
+          backend: payload?.backend ?? 'cli',
           hook_type:           payload?.type ?? null,
           claude_session_id:   payload?.sessionId ?? null,
           tool_name:           payload?.toolName ?? null,
@@ -555,7 +558,7 @@ function createSdkCallbacks({
         logEvent('turn-timeout', {
           chat_id: getChatIdFromKey(sessionKey),
           session_key: sessionKey,
-          backend: 'tmux',
+          backend: payload?.backend ?? 'cli', // Finding 0.12-M3
           turn_id:             payload?.turnId ?? null,
           reason:              payload?.reason ?? null,
           idle_ms:             payload?.idleMs ?? null,
@@ -578,7 +581,7 @@ function createSdkCallbacks({
         logEvent('hook-tail-error', {
           chat_id: getChatIdFromKey(sessionKey),
           session_key: sessionKey,
-          backend: 'tmux',
+          backend: payload?.backend ?? 'cli', // Finding 0.12-M3 (fires on the CLI hook tail)
           message:           (payload?.message || '').slice(0, 200),
           path:              payload?.path ?? null,
           claude_session_id: payload?.sessionId ?? null,
@@ -596,7 +599,7 @@ function createSdkCallbacks({
         logEvent('stop-hook-resolved', {
           chat_id: getChatIdFromKey(sessionKey),
           session_key: sessionKey,
-          backend: 'tmux',
+          backend: payload?.backend ?? 'cli', // Finding 0.12-M3
           turn_id:           payload?.turnId ?? null,
           claude_session_id: payload?.sessionId ?? null,
         });
@@ -614,7 +617,7 @@ function createSdkCallbacks({
         logEvent('session-age-prompt-dismissed', {
           chat_id: getChatIdFromKey(sessionKey),
           session_key: sessionKey,
-          backend: 'tmux',
+          backend: payload?.backend ?? 'cli', // Finding 0.12-M3
           claude_session_id: payload?.sessionId ?? null,
         });
       } catch (err) {
@@ -622,6 +625,108 @@ function createSdkCallbacks({
       }
     },
+    // 0.12 Phase 1.8 — hook-lag persistence for the soak gate (median<2s,
+    // p99<5s). Each row carries the hookEventName + lagMs so we can:
+    //   SELECT json_extract(detail_json, '$.hook_event_name') AS evt,
+    //          AVG(json_extract(detail_json, '$.lag_ms')) AS avg_lag,
+    //          MAX(json_extract(detail_json, '$.lag_ms')) AS max_lag
+    //   FROM events WHERE kind='hook-lag-sample' AND ts>...
+    //   GROUP BY evt;
+    onHookLagSample: (sessionKey, payload /* , entry */) => {
+      try {
+        logEvent('hook-lag-sample', {
+          chat_id: getChatIdFromKey(sessionKey),
+          session_key: sessionKey,
+          backend: payload?.backend ?? 'cli',
+          hook_event_name: payload?.hookEventName ?? null,
+          lag_ms: payload?.lagMs ?? null,
+          tool_name: payload?.toolName ?? null,
+        });
+      } catch (err) {
+        logger.error?.(`[${botName}] hook-lag-sample handler: ${err.message}`);
+      }
+    },
+    // 0.12 Phase 1.3 — tool-result with durationMs. Pairs with the
+    // existing onToolUse row (which fires on PreToolUse) so the soak can
+    // compute per-tool average + p99 durations:
+    //   SELECT json_extract(detail_json, '$.tool_name') AS tool,
+    //          AVG(json_extract(detail_json, '$.duration_ms')) AS avg_ms,
+    //          MAX(json_extract(detail_json, '$.duration_ms')) AS max_ms
+    //   FROM events WHERE kind='tool-result' GROUP BY tool;
+    // isError captures the rare PostToolUse where the tool itself failed
+    // (vs the tool succeeding but claude deciding to retry).
+    onToolResult: (sessionKey, payload /* , entry */) => {
+      try {
+        logEvent('tool-result', {
+          chat_id: getChatIdFromKey(sessionKey),
+          session_key: sessionKey,
+          backend: payload?.backend ?? 'cli',
+          tool_name: payload?.name ?? null,
+          duration_ms: payload?.durationMs ?? null,
+          agent_id: payload?.agentId ?? null,
+          agent_type: payload?.agentType ?? null,
+          tool_use_id: payload?.toolUseId ?? null,
+          is_error: payload?.isError === true,
+        });
+      } catch (err) {
+        logger.error?.(`[${botName}] tool-result handler: ${err.message}`);
+      }
+    },
+    // 0.12 Phase 1.3 — subagent lifecycle. PreToolUse with name='Agent'
+    // synthesizes 'subagent-start' (no agent_id yet — claude doesn't
+    // hand one out until the inner SubagentStop). 'subagent-done' carries
+    // the agent_id + duration_ms so a soak can correlate the pair:
+    //   SELECT s.detail_json AS start, d.detail_json AS done
+    //   FROM events s JOIN events d
+    //     ON json_extract(s.detail_json, '$.tool_use_id') =
+    //        json_extract(d.detail_json, '$.tool_use_id')
+    //   WHERE s.kind='subagent-start' AND d.kind='subagent-done';
+    onSubagentStart: (sessionKey, payload, entry) => {
+      try {
+        logEvent('subagent-start', {
+          chat_id: getChatIdFromKey(sessionKey),
+          session_key: sessionKey,
+          backend: payload?.backend ?? 'cli',
+          agent_type: payload?.agentType ?? null,
+          tool_use_id: payload?.toolUseId ?? null,
+        });
+        // Findings L9/L14: drive the head reactor into the distinct SUBAGENT
+        // state so a running subagent shows 👾 rather than freezing on the
+        // prior tool's emoji. The plan promised this; previously the handler
+        // only persisted the DB row and never touched the reactor.
+        const r = entry?.pendingQueue?.[0]?.context?.reactor;
+        if (r) r.setState('SUBAGENT');
+      } catch (err) {
+        logger.error?.(`[${botName}] subagent-start handler: ${err.message}`);
+      }
+    },
+    onSubagentDone: (sessionKey, payload, entry) => {
+      try {
+        // L9/L14: heartbeat at subagent end so the cascade/stall clock
+        // resets; the next tool's PreToolUse sets the following state.
+        const r = entry?.pendingQueue?.[0]?.context?.reactor;
+        if (r && typeof r.heartbeat === 'function') r.heartbeat();
+        logEvent('subagent-done', {
+          chat_id: getChatIdFromKey(sessionKey),
+          session_key: sessionKey,
+          backend: payload?.backend ?? 'cli',
+          agent_type: payload?.agentType ?? null,
+          agent_id: payload?.agentId ?? null,
+          duration_ms: payload?.durationMs ?? null,
+          // Finding 0.12-M4: persist the originating Agent tool_use_id so the
+          // documented subagent-start/subagent-done soak JOIN on
+          // $.tool_use_id matches (subagent-done's tool_use_id is recovered
+          // in cli-process.js from the paired Agent PreToolUse).
+          tool_use_id: payload?.toolUseId ?? null,
+        });
+      } catch (err) {
+        logger.error?.(`[${botName}] subagent-done handler: ${err.message}`);
+      }
+    },
     onInjectFail: (sessionKey, payload /* , entry */) => {
       try {
         const msgId = payload?.msgId;

package/lib/telegram/api.js CHANGED Viewed

@@ -28,6 +28,7 @@ const {
   getRetryAfterMs,
 } = require('./format');
 const { isSafeToRetry, redactBotToken } = require('../error/net');
+const { coerceFileParams } = require('./input-file');
 // Topic deletion race: a user can delete a forum topic while a turn is in
 // flight, turning a valid `message_thread_id` into a 404. Telegram's error
@@ -112,6 +113,14 @@ async function send({ bot, method, params, db = null, meta = {}, logger = consol
   const chatId = params.chat_id != null ? String(params.chat_id) : null;
   const threadId = params.message_thread_id != null ? String(params.message_thread_id) : null;
+  // File-upload bug fix (2026-05-31): coerce a `{ source: '/abs/path' }`
+  // file param into a grammy InputFile so local-file uploads actually work.
+  // grammy doesn't recognize the bare envelope → it failed every send with
+  // "Wrong port number". Single choke point: fixes channels reply(files)
+  // AND the IPC send path at once. No-op for non-file methods / file_id /
+  // URL strings / existing InputFile instances.
+  coerceFileParams(method, params);
   // 0.7.4: empty-text short-circuit. Pre-fix, an empty params.text on
   // sendMessage/editMessageText reached Telegram and 400'd with
   // "message text is empty"; the row was marked failed and propagated

package/lib/telegram/input-file.js ADDED Viewed

@@ -0,0 +1,76 @@
+/**
+ * input-file — coerce file-upload params into grammy InputFile instances.
+ *
+ * The bug (2026-05-31, shumorobot Music): callers passed a Telegraf-style
+ * `{ source: '/abs/path' }` envelope as the file param (document/photo/…).
+ * grammy 1.x does NOT recognize that shape — it's not an InputFile, so
+ * grammy serializes it as a plain object and Telegram tries to read it as
+ * a URL/file_id, failing with "invalid file HTTP URL: Wrong port number".
+ * Result: file-send NEVER worked (channels reply(files) AND the IPC path
+ * both produced this exact error). The existing dispatcher test used a fake
+ * `send` and only asserted the METHOD, so it couldn't catch the bad shape.
+ *
+ * grammy uploads a local file only when the param is `new InputFile(path)`.
+ * This helper normalizes, at the single send choke point (tg()), the
+ * `{ source: <abs path> }` envelope → `new InputFile(path)`, leaving every
+ * other shape untouched:
+ *   - string file_id / https URL  → pass through (Telegram resolves)
+ *   - existing InputFile instance → pass through (already correct)
+ *   - Buffer / stream            → pass through (grammy handles)
+ *
+ * Only the explicit `{ source: string }` envelope is transformed — bare
+ * path strings are intentionally NOT coerced (a Telegram file_id is also a
+ * bare string; coercing would break sends-by-id).
+ */
+'use strict';
+const { InputFile } = require('grammy');
+// method → the params field that carries the file.
+const FILE_FIELD_BY_METHOD = {
+  sendPhoto: 'photo',
+  sendDocument: 'document',
+  sendAudio: 'audio',
+  sendVideo: 'video',
+  sendAnimation: 'animation',
+  sendVoice: 'voice',
+  sendVideoNote: 'video_note',
+};
+/**
+ * Return a grammy-uploadable value for a single file param, or the original
+ * value unchanged if it's not the `{ source }` envelope we coerce.
+ */
+function coerceFileValue(val) {
+  if (val && typeof val === 'object' && !(val instanceof InputFile)
+      && typeof val.source === 'string' && val.source.length > 0) {
+    // { source: '/abs/path' } | { source: 'https://…', filename } → InputFile
+    return new InputFile(val.source, val.filename);
+  }
+  return val;
+}
+/**
+ * Mutate `params` in place so its file field (if any) is grammy-uploadable.
+ * No-op for non-file methods and for params with no file field set.
+ *
+ * @param {string} method
+ * @param {object} params
+ * @returns {object} the same params object (for chaining)
+ */
+function coerceFileParams(method, params) {
+  if (!params || typeof params !== 'object') return params;
+  const field = FILE_FIELD_BY_METHOD[method];
+  if (!field) return params;
+  if (params[field] != null) {
+    params[field] = coerceFileValue(params[field]);
+  }
+  return params;
+}
+module.exports = {
+  coerceFileParams,
+  coerceFileValue,
+  FILE_FIELD_BY_METHOD,
+};

package/lib/telegram/reactions.js CHANGED Viewed

@@ -55,6 +55,11 @@ const STATES = {
   // mid-turn user message is buffered for the next PostToolBatch
   // injection.
   AUTOSTEERED: { label: 'autosteered', chain: ['✍', '👀']        },
+  // 0.12 (Findings L9/L14): distinct in-progress reaction for a running
+  // subagent (Agent PreToolUse → SubagentStop). Driven by onSubagentStart.
+  // Preferred 👾 (NOT 🤖 — 🤖 is REACTION_INVALID for bots, same class as
+  // the rc.37 🧐 bug); falls back to 🔥 then 🤔, all bot-usable.
+  SUBAGENT:    { label: 'subagent',    chain: ['👾', '🔥', '🤔']  },
   DONE:        { label: 'done',        chain: ['👍']             },
   ERROR:       { label: 'error',       chain: ['🤯', '🤔']       },
   STALL:       { label: 'stall',       chain: ['🥱', '🤔']       },

package/lib/tmux/log-tail.js CHANGED Viewed

@@ -42,6 +42,7 @@
 const EventEmitter = require('events');
 const fs = require('fs');
 const path = require('path');
+const { StringDecoder } = require('string_decoder');
 const DEFAULT_INTERVAL_MS = 100;
 // Slow safety-net poll when fs.watch is active. Catches any events
@@ -91,6 +92,13 @@ class LogTail extends EventEmitter {
     this.fs = fsOverride || fs;
     this._offset = 0;
     this._buf = '';
+    // L8: decode bytes through a StringDecoder so a multibyte UTF-8 char
+    // split across two read chunks (the 64KB DEFAULT_CHUNK_BYTES boundary)
+    // isn't corrupted into U+FFFD. The decoder holds an incomplete trailing
+    // sequence until the continuation bytes arrive on the next read. The
+    // hook ndjson carries large non-ASCII tool payloads, so this is
+    // load-bearing on the CliProcess observability path.
+    this._decoder = new StringDecoder('utf8');
     this._closed = false;
     this._timer = null;
     this._watcher = null;
@@ -260,7 +268,9 @@ class LogTail extends EventEmitter {
         const readSize = Math.min(remaining, buffer.length);
         const { bytesRead } = await fd.read(buffer, 0, readSize, this._offset + totalRead);
         if (bytesRead === 0) break;
-        this._buf += buffer.slice(0, bytesRead).toString('utf8');
+        // L8: StringDecoder.write instead of per-chunk toString('utf8') so a
+        // multibyte char straddling the read boundary survives intact.
+        this._buf += this._decoder.write(buffer.subarray(0, bytesRead));
         totalRead += bytesRead;
       }
       this._offset += totalRead;

package/lib/tmux/startup-gate.js CHANGED Viewed

@@ -17,6 +17,19 @@
  *   - if `readySignal` regex matches the captured pane content, resolve
  *   - if `Date.now()` exceeds the deadline, throw with `err.code = timeoutCode`
  *
+ * Progress-aware (stall) deadline — `stallMs`:
+ *   The blind wall-clock `deadlineMs` can't tell "claude is mid-download
+ *   (24% progress bar, genuinely working)" from "claude is wedged". The
+ *   shumorobot General incident (2026-05-30) killed a cold-spawn that was
+ *   actively downloading the runtime. When `stallMs` is set, the gate
+ *   tracks pane ACTIVITY: any change in captured pane content — or a
+ *   trigger key being sent — resets a stall clock. The gate fails early
+ *   (with `timeoutCode`) only after `stallMs` elapses with NO activity,
+ *   i.e. the pane is frozen. `deadlineMs` remains an absolute backstop so
+ *   a pane that animates forever but never reaches `readySignal` still
+ *   terminates. When `stallMs` is omitted (default), behavior is the pure
+ *   `deadlineMs` wall-clock exactly as before.
+ *
  * Each trigger is one-shot per gate run (tracked by `name` in a Set).
  *
  * Caller supplies:
@@ -40,7 +53,10 @@ const DEFAULT_SETTLE_MS = 500;
  * @param {string} opts.tmuxName              — tmux session name to poll
  * @param {Array<{name:string, regex:RegExp, key:string}>} opts.triggers
  * @param {RegExp} opts.readySignal           — match → resolve
- * @param {number} [opts.deadlineMs=30000]
+ * @param {number} [opts.deadlineMs=30000]    — absolute backstop
+ * @param {number} [opts.stallMs]             — if set, fail after this much
+ *   wall-clock with NO pane activity (progress-aware). Omit for pure
+ *   wall-clock behavior.
  * @param {number} [opts.pollMs=300]
  * @param {number} [opts.settleMs=500]
  * @param {string} [opts.timeoutCode='TUI_STARTUP_TIMEOUT']
@@ -54,6 +70,7 @@ async function runStartupGate({
   triggers = [],
   readySignal,
   deadlineMs = DEFAULT_DEADLINE_MS,
+  stallMs,
   pollMs = DEFAULT_POLL_MS,
   settleMs = DEFAULT_SETTLE_MS,
   timeoutCode = 'TUI_STARTUP_TIMEOUT',
@@ -70,6 +87,7 @@ async function runStartupGate({
   const startedAt = Date.now();
   const deadline = startedAt + deadlineMs;
+  const stallEnabled = Number.isFinite(stallMs) && stallMs > 0;
   const seen = new Set();
   const matchedTriggers = [];
   // rc.4: remember the most recent successful pane snapshot. If the gate
@@ -78,8 +96,37 @@ async function runStartupGate({
   // this, "claude exits code 0 after dev-channels Enter" surfaces as a
   // 30-second `can't find pane` spam with no diagnostic about WHY.
   let lastPane = null;
+  // Progress-aware gate: timestamp of the last observed pane CHANGE (or
+  // trigger send). Only consulted when stallEnabled.
+  let lastActivityAt = startedAt;
+  // Music incident (2026-06-01): the stall timer must NOT arm while the pane
+  // is still BLANK. A blank-and-unchanging pane means claude hasn't started
+  // rendering yet (slow cold-start), NOT that it wedged — the TUI for some
+  // topics takes 30-45s to first-render. Arming the stall timer on a blank
+  // pane killed a legitimate slow spawn at stallMs with a false "wedged".
+  // So the stall clock only runs once the pane has shown non-whitespace
+  // content; before that, only the absolute `deadlineMs` governs.
+  let sawContent = false;
   while (Date.now() < deadline) {
+    // Stall check (progress-aware): the pane RENDERED something and has then
+    // been static for stallMs → genuinely wedged. Gated on sawContent so a
+    // blank cold-start isn't mistaken for a wedge. Fires early so a truly
+    // hung TUI fails fast, while an actively-progressing one (download bar,
+    // dialog navigation) keeps resetting lastActivityAt below.
+    if (stallEnabled && sawContent && Date.now() - lastActivityAt >= stallMs) {
+      const err = new Error(
+        `[${label}] startup gate: pane rendered then went static for ${stallMs}ms for ${tmuxName} ` +
+        `(matched: ${matchedTriggers.length ? matchedTriggers.join(', ') : 'none'}). ` +
+        `Appears wedged. Last pane content:\n` +
+        _formatPaneTail(lastPane),
+      );
+      err.code = timeoutCode;
+      err.lastPane = lastPane;
+      err.matchedTriggers = matchedTriggers;
+      err.reason = 'stall';
+      throw err;
+    }
     let pane;
     try {
       pane = await runner.captureWide(tmuxName);
@@ -107,6 +154,19 @@ async function runStartupGate({
       await new Promise(r => setTimeout(r, settleMs));
       continue;
     }
+    // First non-whitespace content = the TUI has started rendering. Only
+    // from here does the stall timer become meaningful (before this, a blank
+    // pane is cold-start, governed by the absolute deadline). Seed
+    // lastActivityAt at the moment content first appears so the stall window
+    // is measured from "rendered", not from spawn.
+    if (!sawContent && pane && pane.trim().length > 0) {
+      sawContent = true;
+      lastActivityAt = Date.now();
+    }
+    // Progress signal: any change in pane content is activity → reset the
+    // stall clock. A captureWide that returns the SAME bytes is NOT
+    // activity (a frozen download bar at 24% reads identically each poll).
+    if (pane !== lastPane) lastActivityAt = Date.now();
     lastPane = pane;
     // Walk triggers in declaration order — first match (and not yet seen) wins
@@ -122,6 +182,10 @@ async function runStartupGate({
       seen.add(trigger.name);
       matchedTriggers.push(trigger.name);
       matched = true;
+      // Sending a key is activity — navigating the TUI counts as progress
+      // even if the pre-transition pane text was static (e.g. a dialog we
+      // just answered). Reset the stall clock so we don't fail mid-nav.
+      lastActivityAt = Date.now();
       // Settle window so the TUI transitions out of the dialog before next poll
       await new Promise(r => setTimeout(r, settleMs));
       break;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "polygram",
-  "version": "0.12.0-rc.1",
+  "version": "0.12.0-rc.10",
   "description": "Telegram daemon for Claude Code that preserves the OpenClaw per-chat session model. Migration path for OpenClaw users moving to Claude Code.",
   "main": "lib/ipc/client.js",
   "bin": {

package/polygram.js CHANGED Viewed

@@ -28,7 +28,7 @@ const {
   migrateJsonToDb, getClaudeSessionId, resolveSessionForSpawn,
 } = require('./lib/db/sessions');
 const { buildPrompt } = require('./lib/prompt');
-const { filterAttachments } = require('./lib/attachments');
+const { filterAttachments, resolveFileCaps, MAX_TOTAL_BYTES } = require('./lib/attachments');
 // 0.9.0: SDK ProcessManager is the only pm. CLI pm
 // (lib/process-manager.js) deleted in commit 6.
 // Both implementations expose the same public API (constructor +
@@ -51,7 +51,6 @@ const { extractAssistantText } = require('./lib/process/sdk-process');
 const { createChannelsToolDispatcher } = require('./lib/process/channels-tool-dispatcher');
 const { createTmuxRunner } = require('./lib/tmux/tmux-runner');
 const { sweepTmuxOrphans } = require('./lib/tmux/orphan-sweep');
-const { PollScheduler } = require('./lib/tmux/poll-scheduler');
 // rc.42: autosteer-buffer module deleted. Native SDK priority push
 // (pm.injectUserMessage) replaces the buffer + PostToolBatch detour.
 const { createAutosteeredRefs } = require('./lib/autosteered-refs');
@@ -462,6 +461,10 @@ function buildSpawnContext(sessionKey) {
     threadId: threadId || null,
     label: getSessionLabel(chatConfig, threadId),
     existingSessionId,
+    // File-send outbound cap inputs: localApi (bot-level) so CliProcess can
+    // resolve the per-chat/topic outbound cap (resolveFileCaps) the same way
+    // it resolves cwd/agent. Override itself lives in chatConfig/topic.
+    localApi: !!config.bot?.apiRoot,
   };
 }
@@ -755,7 +758,19 @@ async function handleMessage(sessionKey, chatId, msg, bot) {
   const sessionCtx = !pm.has(sessionKey) ? await readSessionContext(sessionKey, chatConfig.cwd) : '';
   const rawAtts = extractAttachments(msg);
-  const { accepted, rejected } = filterAttachments(rawAtts);
+  // Backend-derived inbound cap with per-topic/chat override. Cloud → 20MB;
+  // a local Bot API server (config.bot.apiRoot) → 2GB; override via
+  // chats[id].maxFileBytes or topics[t].maxFileBytes, clamped to the
+  // backend ceiling. Bytes-valued config; resolveFileCaps does the clamp.
+  const _inTopicCfg = getTopicConfig(chatConfig, threadIdStr || null);
+  const _fileCaps = resolveFileCaps({
+    localApi: !!config.bot?.apiRoot,
+    override: _inTopicCfg.maxFileBytes ?? chatConfig.maxFileBytes ?? null,
+  });
+  const { accepted, rejected } = filterAttachments(rawAtts, {
+    maxFileBytes: _fileCaps.inBytes,
+    maxTotalBytes: Math.max(_fileCaps.inBytes, MAX_TOTAL_BYTES),
+  });
   for (const { att, reason } of rejected) {
     console.log(`[${label}] attachment skipped: ${att.name} (${reason})`);
     logEvent('attachment-skipped', { chat_id: chatId, msg_id: msg.message_id, name: att.name, reason });
@@ -1673,9 +1688,23 @@ function shouldHandle(msg, chatConfig, botUsername) {
 }
 function createBot(token) {
+  // Optional self-hosted Telegram Bot API server. When config.bot.apiRoot is
+  // set (e.g. "http://localhost:8081" from a local `telegram-bot-api`
+  // process), grammy routes all Bot API calls there instead of
+  // api.telegram.org — which lifts file send/receive from cloud's 50 MB-out /
+  // 20 MB-in to 2 GB both ways. Omit it (default) → cloud Telegram, unchanged.
+  // The local server is a separate companion daemon; this is just the knob
+  // that points polygram at it. See docs/0.12.0-file-send.md.
+  const apiRoot = config.bot?.apiRoot;
   const bot = new Bot(token, {
-    client: { timeoutSeconds: 60 },
+    client: {
+      timeoutSeconds: 60,
+      ...(apiRoot ? { apiRoot } : {}),
+    },
   });
+  if (apiRoot) {
+    console.log(`[polygram] using local Telegram Bot API server: ${apiRoot} (2GB file limit)`);
+  }
   let botUsername = '';
   // Cached once @botUsername is known — was recompiling per inbound msg.
   let mentionRe = null;
@@ -2244,19 +2273,13 @@ async function main() {
     const binCheck = verifyPinnedClaudeBin(CLAUDE_CLI_PINNED_VERSION);
     if (binCheck.ok) {
       console.log(
-        `[polygram] tmux backend pinned to claude CLI v${CLAUDE_CLI_PINNED_VERSION}: ${binCheck.path}`,
+        `[polygram] CliProcess pinned to claude CLI v${CLAUDE_CLI_PINNED_VERSION}: ${binCheck.path}`,
       );
       pinnedClaudeBin = binCheck.path;
     } else {
       console.warn(`[polygram] WARNING: ${binCheck.reason}`);
     }
   }
-  // O1 optimization: shared poll-tick scheduler. N TmuxProcess
-  // instances share ONE setInterval instead of spawning N independent
-  // setTimeout chains. Idle when no chats are in flight (zero timers
-  // running). Configurable via config.bot.tmuxPollIntervalMs.
-  const tmuxPollIntervalMs = config.bot?.tmuxPollIntervalMs || 250;
-  const pollScheduler = new PollScheduler({ intervalMs: tmuxPollIntervalMs });
   // 0.11.0: channels backend wiring. Used when a chat opts in via
   // `pm: 'channels'` config. Falls back to SDK gracefully if the pinned
   // claude binary isn't present (see factory.js — channelsClaudeBin
@@ -2282,7 +2305,6 @@ async function main() {
     logger: console,
     tmuxRunner,
     botName: BOT_NAME,
-    pollScheduler,
     // channels backend
     toolDispatcher: channelsToolDispatcher,
     channelsClaudeBin,