npm - polygram - Versions diffs - 0.11.0-rc.9 → 0.12.0-rc.2 - Mend

polygram 0.11.0-rc.9 → 0.12.0-rc.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

package/.claude-plugin/plugin.json +1 -1
package/config.example.json +1 -1
package/lib/claude-bin.js +13 -9
package/lib/db/auto-resume.js +7 -0
package/lib/db/sessions.js +14 -6
package/lib/error/classify.js +41 -0
package/lib/handlers/slash-commands.js +22 -2
package/lib/process/channels-bridge-protocol.js +12 -2
package/lib/process/channels-bridge-server.js +67 -7
package/lib/process/channels-bridge.mjs +85 -14
package/lib/process/channels-tool-dispatcher.js +46 -20
package/lib/process/{channels-process.js → cli-process.js} +1008 -86
package/lib/process/factory.js +112 -47
package/lib/process/hook-event-tail.js +1 -1
package/lib/process/hook-settings.js +33 -3
package/lib/process-manager.js +24 -1
package/lib/sdk/callbacks.js +173 -1
package/lib/telegram/process-agent-reply.js +233 -0
package/lib/telegram/reactions.js +9 -0
package/lib/tmux/log-tail.js +1 -1
package/lib/tmux/startup-gate.js +1 -1
package/lib/{tmux/session-log-parser.js → util/claude-session-jsonl.js} +20 -9
package/package.json +3 -3
package/polygram.js +74 -57
package/lib/process/tmux-process.js +0 -3321
package/lib/process/turn-phase.js +0 -150
package/lib/telegram/heartbeat-reactor.js +0 -254
package/lib/tmux/tui-tool-input.js +0 -62

package/.claude-plugin/plugin.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "$schema": "https://anthropic.com/claude-code/plugin.schema.json",
   "name": "polygram",
-  "version": "0.11.0-rc.9",
+  "version": "0.11.0-rc.15",
   "description": "Telegram integration for Claude Code that preserves the OpenClaw per-chat session model. Migration target for OpenClaw users. Multi-bot, multi-chat, per-topic isolation; SQLite transcripts; inline-keyboard approvals. Bundles /polygram:status|logs|pair-code|approvals admin commands plus history (transcript queries) and polygram-send (out-of-turn IPC sends with file-upload validation) skills.",
   "keywords": [
     "telegram",

package/config.example.json CHANGED Viewed

@@ -100,7 +100,7 @@
       "isolateTopics": true,
       "_comment_topics": "rc.48: each topic entry is EITHER a string (legacy: just a label) OR an object with optional fields {name, agent, cwd, model, effort, permissionMode, isolateUserConfig}. Object form lets a topic override chat-level config. Per-topic permissionMode overrides chat-level — typical use: scope one topic to permissionMode:'default' (so settings.json gates apply) while the rest of the chat stays on bypassPermissions. Object form requires isolateTopics: true (each topic gets its own SDK Query); polygram emits a startup warning otherwise.",
       "_comment_isolateUserConfig": "0.10.0, tmux backend only: isolateUserConfig:true spawns the topic's claude TUI cut off from the user-level ~/.claude config — passes --strict-mcp-config (zero MCP servers load) and --setting-sources project,local (drops ~/.claude/settings.json; the spawn cwd's own .claude/settings.json still loads). Use it when a topic's agent would otherwise inherit slow user-global MCP servers whose cold-start (tens of seconds) wedges the TUI before it can accept a prompt. Settable at chat OR topic level (topic wins). Default false.",
-      "_comment_pm": "0.11.0: 'pm' selects the Process backend: 'sdk' (default; per-token Console API; full SDK features), 'tmux' (subscription-priced claude CLI in tmux; JSONL/pane parsing for IO), 'channels' (subscription-priced claude CLI in tmux; structured IO via the official Channels MCP protocol — see docs/0.11.0-channels-driver-plan.md). Settable at bot, chat, OR topic level (topic > chat > bot). Channels requires Pro/Max subscription, Claude Code v2.1.80+, and is in research preview — invokes --dangerously-load-development-channels.",
+      "_comment_pm": "0.12.0: 'pm' selects the Process backend. Two canonical values: 'sdk' (default; per-token Console API billing; full SDK features) and 'cli' (subscription-priced claude CLI in tmux + Channels MCP bridge + hooks ndjson observability — see docs/0.12.0-cli-driver-plan.md). Settable at bot, chat, OR topic level (topic > chat > bot). Aliases preserved for back-compat with 0.10/0.11 configs: 'channels' and 'tmux' both resolve to 'cli' with a once-at-boot deprecation warn. CLI requires Pro/Max subscription, Claude Code v2.1.80+, and uses --dangerously-load-development-channels (research preview flag).",
       "topics": {
         "100": "Customer A",
         "200": {

package/lib/claude-bin.js CHANGED Viewed

@@ -4,16 +4,20 @@ const os = require('os');
 const path = require('path');
 const fs = require('fs');
+// 0.12 Phase 4: moved from lib/process/tmux-process.js into the helper module
+// that consumes it, so the constant survives TmuxProcess deletion. CliProcess
+// + spike scripts + polygram boot all import from here now.
+const CLAUDE_CLI_PINNED_VERSION = '2.1.142';
 /**
- * Resolve + verify the pinned claude CLI binary for the tmux backend.
+ * Resolve + verify the pinned claude CLI binary.
  *
- * Why this exists: the tmux backend reads claude CLI INTERNAL
- * artefacts (JSONL events, queue-operation semantics, TUI banner
- * ASCII, READY hint strings, stop_reason values) — none a stable
- * public contract. polygram pins ONE version
- * (CLAUDE_CLI_PINNED_VERSION in lib/process/tmux-process.js) and
- * must spawn THAT binary, never whatever `claude` on $PATH happens
- * to resolve to.
+ * Why this exists: the tmux + CLI backends read claude CLI internal
+ * artefacts (TUI banner ASCII, READY hint strings, channel notification
+ * registration timing, MCP-init order) — none a stable public contract.
+ * polygram pins ONE version (`CLAUDE_CLI_PINNED_VERSION`) and must
+ * spawn THAT binary, never whatever `claude` on $PATH happens to
+ * resolve to.
  *
  * Before this module the tmux runner spawned the bare string
  * `claude`, resolved through $PATH. The claude CLI installs each
@@ -75,4 +79,4 @@ function verifyPinnedClaudeBin(version) {
   }
 }
-module.exports = { resolvePinnedClaudeBin, verifyPinnedClaudeBin };
+module.exports = { resolvePinnedClaudeBin, verifyPinnedClaudeBin, CLAUDE_CLI_PINNED_VERSION };

package/lib/db/auto-resume.js CHANGED Viewed

@@ -90,6 +90,13 @@ function createAutoResumeTracker({ cooldownMs = DEFAULT_COOLDOWN_MS, now = Date.
  */
 function isAutoResumable({ error, aborted, replay, shuttingDown }) {
   if (aborted || replay || shuttingDown) return false;
+  // Review F#6: channels analog of the tmux 'idle with no Claude activity'
+  // pattern. The bridge socket dropped mid-turn (claude crashed, bridge
+  // process died) — that's a wedge, not a runaway. Same intent as the
+  // regex match below, just expressed via err.code because channels throws
+  // a different message string. TURN_TIMEOUT stays NON-resumable (it's
+  // the channels analog of the wall-clock ceiling — likely a runaway).
+  if (error?.code === 'BRIDGE_DISCONNECTED') return true;
   const msg = String(error?.message || error || '');
   return /idle with no Claude activity/i.test(msg);
 }

package/lib/db/sessions.js CHANGED Viewed

@@ -205,12 +205,20 @@ function resolveSessionForSpawn(db, sessionKey, resolved = {}) {
   // of THAT task; claude responded with music release info, inline,
   // never calling the reply tool. Every turn timed out at 3min.
   //
-  // Rule: any transition TO or FROM channels drops the prior session.
-  // XOR — flips between channels and {sdk,tmux} invalidate; sdk↔tmux
-  // flips remain free.
-  const wasChannels = before.pm_backend === 'channels';
-  const willBeChannels = after.pm_backend === 'channels';
-  if (after.pm_backend != null && wasChannels !== willBeChannels) {
+  // Rule: any transition TO or FROM the channels/CLI backend drops the
+  // prior session. XOR — flips between (channels|cli) and {sdk,tmux}
+  // invalidate; sdk↔tmux flips remain free (rc.32 reasoning).
+  //
+  // 0.12: 'cli' is the canonical name for what was 'channels' in 0.11.
+  // Treat both as the same "channels-class" backend for transition
+  // invalidation purposes — a row persisted with pm_backend='channels'
+  // before 0.12 and a row created today with pm_backend='cli' are
+  // semantically the same in terms of session-context invariants
+  // (bridge MCP server mounted, reply-tool contract enforced).
+  const CHANNELS_CLASS = new Set(['channels', 'cli']);
+  const wasChannelsClass = CHANNELS_CLASS.has(before.pm_backend);
+  const willBeChannelsClass = CHANNELS_CLASS.has(after.pm_backend);
+  if (after.pm_backend != null && wasChannelsClass !== willBeChannelsClass) {
     drifted.push('pm_backend');
   }

package/lib/error/classify.js CHANGED Viewed

@@ -163,6 +163,47 @@ const CODES = {
     isTransient: false,
     autoRecover: null,
   },
+  // Review F#5: channels-specific error codes. Pre-fix these fell through
+  // to the generic 'unknown' kind (errorReplyText: "Hit a snag. Try
+  // resending.") which lies about what happened. Mirrors the rc.46→rc.47
+  // tmuxToolWedge fix where backend-specific codes needed their own kinds.
+  //
+  // BRIDGE_DISCONNECTED: thrown by CliProcess when the mcp-bridge
+  // socket drops mid-turn (claude crashed, bridge process died, etc).
+  // isTransient: true because the daemon retries spawning the backend.
+  BRIDGE_DISCONNECTED: {
+    kind: 'bridgeDisconnected',
+    userMessage: '🔌 Lost the bridge to Claude mid-turn. Retrying — please resend if I don\'t reply in 30s.',
+    isTransient: true,
+    autoRecover: null,
+  },
+  // CHANNELS_HANDSHAKE_TIMEOUT: bridge process never sent session_init
+  // within the handshake window during start(). Usually means the bridge
+  // crashed pre-init or the socket file is stale.
+  CHANNELS_HANDSHAKE_TIMEOUT: {
+    kind: 'channelsHandshakeTimeout',
+    userMessage: '⏳ Couldn\'t start a Claude session — the bridge didn\'t respond in time. Try again in a moment.',
+    isTransient: true,
+    autoRecover: null,
+  },
+  // CHANNELS_DIALOG_TIMEOUT: a permission / usage-limit / context-overflow
+  // dialog opened mid-turn and we couldn't auto-respond within the dialog
+  // window. The turn is dead; user needs to retry.
+  CHANNELS_DIALOG_TIMEOUT: {
+    kind: 'channelsDialogTimeout',
+    userMessage: '🚧 Claude hit a dialog (permission/usage-limit) mid-turn and I couldn\'t auto-respond in time. Please resend.',
+    isTransient: false,
+    autoRecover: null,
+  },
+  // TURN_TIMEOUT: 10-min wall-clock cap on a single channels turn. Mirror
+  // of the tmux wall-clock ceiling — typically a runaway, not a wedge.
+  // Not transient (auto-retry would just runaway again).
+  TURN_TIMEOUT: {
+    kind: 'turnTimeout',
+    userMessage: '⏱ The turn ran past the 10-minute cap. Resend if the answer still matters.',
+    isTransient: false,
+    autoRecover: null,
+  },
 };
 /**

package/lib/handlers/slash-commands.js CHANGED Viewed

@@ -199,7 +199,17 @@ function createSlashCommands({
         }), 'log model change');
         const { anyActive } = await applyConfigChange('model', newModel);
         const ver = (modelVersionsDesc && modelVersionsDesc[newModel]) || newModel;
-        const suffix = anyActive ? ` — I'll switch when I finish` : '';
+        // Review F#10: channels backend can't apply model/effort changes
+        // live — its setModel/applyFlagSettings throw UNSUPPORTED_OPERATION,
+        // pm.setModel returns false → `anyActive` is true → user saw the
+        // misleading "I'll switch when I finish" message. Now we detect
+        // the channels backend explicitly and give an honest answer:
+        // settings are persisted to chatConfig and take effect on the next
+        // /reset or /new (channels lacks an in-place re-init path).
+        const backendName = typeof pm.getBackend === 'function' ? pm.getBackend(sessionKey) : null;
+        const suffix = backendName === 'channels'
+          ? ` — applies on next /reset (channels)`
+          : (anyActive ? ` — I'll switch when I finish` : '');
         await sendReply(`Model → ${newModel} (${ver})${suffix}`);
       } else {
         await sendReply(`Unknown model. Use: opus, sonnet, haiku`);
@@ -219,7 +229,17 @@ function createSlashCommands({
           user: cmdUser, user_id: cmdUserId, source: 'command',
         }), 'log effort change');
         const { anyActive } = await applyConfigChange('effort', newEffort);
-        const suffix = anyActive ? ` — I'll switch when I finish` : '';
+        // Review F#10: channels backend can't apply model/effort changes
+        // live — its setModel/applyFlagSettings throw UNSUPPORTED_OPERATION,
+        // pm.setModel returns false → `anyActive` is true → user saw the
+        // misleading "I'll switch when I finish" message. Now we detect
+        // the channels backend explicitly and give an honest answer:
+        // settings are persisted to chatConfig and take effect on the next
+        // /reset or /new (channels lacks an in-place re-init path).
+        const backendName = typeof pm.getBackend === 'function' ? pm.getBackend(sessionKey) : null;
+        const suffix = backendName === 'channels'
+          ? ` — applies on next /reset (channels)`
+          : (anyActive ? ` — I'll switch when I finish` : '');
         await sendReply(`Effort → ${newEffort}${suffix}`);
       } else {
         await sendReply(`Unknown effort. Use: low, medium, high, xhigh, max`);

package/lib/process/channels-bridge-protocol.js CHANGED Viewed

@@ -2,7 +2,7 @@
  * Bridge ↔ daemon socket protocol — typed schemas.
  *
  * Wire format: newline-delimited JSON over a unix socket per session.
- * Both endpoints (ChannelsProcess and channels-bridge.mjs) speak the same
+ * Both endpoints (CliProcess and channels-bridge.mjs) speak the same
  * message kinds. This module centralizes the shape so both sides safeParse
  * inbound messages with the same constraints — protecting against malformed
  * payloads silently corrupting pending-state Maps.
@@ -10,7 +10,7 @@
  * Adding a new message kind:
  *   1. Define its schema below as `<KindName>MessageSchema`
  *   2. Add it to `AnyDaemonToBridgeMessage` or `AnyBridgeToDaemonMessage`
- *   3. Handle it in the corresponding switch (channels-process.js
+ *   3. Handle it in the corresponding switch (cli-process.js
  *      _onBridgeMsg or channels-bridge.mjs handleDaemonMessage)
  *
  * Validation policy:
@@ -67,12 +67,22 @@ const PongMessageSchema = z.object({
   kind: z.literal('pong'),
 }).passthrough();
+// 0.12 Phase 1.6: bridge tells daemon when claude has finished registering
+// the bridge as an MCP server (claude sent its first ListToolsRequest).
+// Polygram's _waitForBridgeHandshake gates on this in addition to hello,
+// eliminating the cold-spawn race (Finding 0.3.A).
+const McpReadyMessageSchema = z.object({
+  kind: z.literal('mcp-ready'),
+  session: NonEmptyString,
+}).passthrough();
 const AnyBridgeToDaemonMessage = z.discriminatedUnion('kind', [
   HelloSchema,
   SessionInitSchema,
   ToolCallMessageSchema,
   PermRequestMessageSchema,
   PongMessageSchema,
+  McpReadyMessageSchema,
 ]);
 // ─── daemon → bridge ───────────────────────────────────────────────

package/lib/process/channels-bridge-server.js CHANGED Viewed

@@ -2,11 +2,11 @@
  * ChannelsBridgeServer — per-session unix-socket server for the bridge
  * subprocess to connect back to.
  *
- * Extracted from ChannelsProcess (M1 refactor) so the socket lifecycle —
+ * Extracted from CliProcess (M1 refactor) so the socket lifecycle —
  * listen with restrictive umask, accept ONE bridge, hello-handshake auth,
  * line-delimited JSON I/O, schema validation, single-bridge-per-session
  * enforcement, clean teardown — lives in one focused class instead of
- * sprawling across ChannelsProcess.
+ * sprawling across CliProcess.
  *
  * Owns:
  *   - net.Server lifecycle (listen / close)
@@ -17,11 +17,14 @@
  *
  * Does NOT own:
  *   - protocol semantics (tool routing, perm relay, turn lifecycle) — those
- *     stay in ChannelsProcess, which subscribes to the events this class emits
+ *     stay in CliProcess, which subscribes to the events this class emits
  *   - claude/bridge process lifecycle
  *
  * Event surface (EventEmitter):
- *   'bridge-ready'        — handshake complete; safe to send daemon→bridge msgs
+ *   'bridge-ready'        — daemon-side handshake (hello + session_init) complete
+ *   'mcp-ready'           — claude-side MCP-server registration complete (first
+ *                            ListToolsRequest received from claude). 0.12 P1.6
+ *                            cold-spawn race fix — see channels-bridge.mjs.
  *   'bridge-message', msg — every validated bridge→daemon message (post-auth)
  *   'bridge-disconnected' — single-bridge connection closed
  *   'error', err          — socket-level errors (rare; non-fatal)
@@ -29,6 +32,7 @@
 'use strict';
+const crypto = require('node:crypto');
 const EventEmitter = require('node:events');
 const fs = require('node:fs');
 const net = require('node:net');
@@ -162,14 +166,26 @@ class ChannelsBridgeServer extends EventEmitter {
         }
         if (!authenticated) {
-          if (raw.kind === 'hello'
-              && raw.session_key === this.sessionKey
-              && raw.secret === this.sockSecret) {
+          // Review F#7: harden the hello-handshake.
+          //   1. timingSafeEqual for the secret compare so a same-uid
+          //      attacker can't byte-by-byte probe via response-timing.
+          //   2. ROTATE the secret after first successful auth (set to
+          //      null) so a stale POLYGRAM_SOCK_SECRET leaked via
+          //      /proc/<pid>/environ can't replay against this
+          //      CliProcess after the legit bridge disconnects.
+          //      The bridge process is one-shot per spawn anyway (it
+          //      exits on socket close — see channels-bridge.mjs:109),
+          //      so legitimate re-auth within one CliProcess
+          //      instance never happens — only a hijacker would.
+          const verdict = this._verifyHelloAuth(raw);
+          if (verdict.ok) {
             authenticated = true;
             this.authenticated = true;
+            this.sockSecret = null;   // invalidate — single-shot per instance
             try { conn.write(JSON.stringify({ kind: 'hello_ack' }) + '\n'); } catch {}
             continue;
           }
+          this.logger.warn?.(`[${this.label}] hello rejected — reason=${verdict.reason}`);
           try { conn.write(JSON.stringify({ kind: 'hello_reject', reason: 'auth' }) + '\n'); } catch {}
           conn.end();
           this.conn = null;
@@ -193,6 +209,13 @@ class ChannelsBridgeServer extends EventEmitter {
           this.emit('bridge-ready');
           continue;
         }
+        if (parsed.msg.kind === 'mcp-ready') {
+          // 0.12 Phase 1.6: bridge signals that claude has finished
+          // registering it as an MCP server. Polygram gates send() on this
+          // (Finding 0.3.A — cold-spawn race).
+          this.emit('mcp-ready', parsed.msg);
+          continue;
+        }
         this.emit('bridge-message', parsed.msg);
       }
     });
@@ -209,6 +232,43 @@ class ChannelsBridgeServer extends EventEmitter {
       this.logger.warn?.(`[${this.label}] bridge conn error: ${err.message}`);
     });
   }
+  /**
+   * Review F#7: hello-handshake verification, extracted as a pure method so it
+   * can be exercised in isolation. Returns `{ ok: true }` on accept or
+   * `{ ok: false, reason }` on reject. Uses crypto.timingSafeEqual for the
+   * secret compare and refuses if this.sockSecret has already been consumed
+   * (post-auth rotation).
+   *
+   * @param {object} raw — parsed bridge→daemon hello payload
+   * @returns {{ ok: true } | { ok: false, reason: string }}
+   */
+  _verifyHelloAuth(raw) {
+    if (this.sockSecret == null) {
+      return { ok: false, reason: 'secret-consumed' };
+    }
+    if (!raw || raw.kind !== 'hello') {
+      return { ok: false, reason: 'not-hello' };
+    }
+    if (raw.session_key !== this.sessionKey) {
+      return { ok: false, reason: 'wrong-session-key' };
+    }
+    if (typeof raw.secret !== 'string' || raw.secret.length === 0) {
+      return { ok: false, reason: 'no-secret' };
+    }
+    const a = Buffer.from(raw.secret, 'utf8');
+    const b = Buffer.from(this.sockSecret, 'utf8');
+    if (a.length !== b.length) {
+      // timingSafeEqual requires equal-length inputs; length mismatch is a
+      // wrong-secret signal but constant-time compares MUST short-circuit
+      // here (otherwise we'd leak the secret's length).
+      return { ok: false, reason: 'wrong-secret' };
+    }
+    if (!crypto.timingSafeEqual(a, b)) {
+      return { ok: false, reason: 'wrong-secret' };
+    }
+    return { ok: true };
+  }
 }
 module.exports = { ChannelsBridgeServer };

package/lib/process/channels-bridge.mjs CHANGED Viewed

@@ -1,8 +1,8 @@
 #!/usr/bin/env node
-// polygram-bridge — production Channels MCP bridge for ChannelsProcess.
+// polygram-bridge — production Channels MCP bridge for CliProcess.
 //
 // Runs as stdio child of `claude --dangerously-load-development-channels server:polygram-bridge`.
-// Connects back to its parent ChannelsProcess (in the polygram daemon) over a per-session
+// Connects back to its parent CliProcess (in the polygram daemon) over a per-session
 // unix socket whose path + auth secret are passed via env.
 //
 // Owns nothing semantic. Pure proxy:
@@ -23,9 +23,19 @@
 import { Server } from '@modelcontextprotocol/sdk/server/index.js'
 import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'
 import { ListToolsRequestSchema, CallToolRequestSchema } from '@modelcontextprotocol/sdk/types.js'
+// Review F#15: validate daemon→bridge messages with the shared zod schema.
+// Pre-fix handleDaemonMessage operated on raw JSON.parse output — a
+// malformed user_msg (e.g. text=undefined) silently injected the literal
+// string "undefined" into Claude's prompt; a malformed tool_ack with
+// null tool_call_id silently no-op'd and the bridge timed out on
+// awaitToolAck → isError → Claude retry.
+import { parseDaemonToBridgeMessage } from './channels-bridge-protocol.js'
 import { z } from 'zod'
 import { connect } from 'node:net'
 import { randomUUID } from 'node:crypto'
+import { appendFileSync, mkdirSync } from 'node:fs'
+import { join } from 'node:path'
+import { homedir } from 'node:os'
 const SESSION_KEY = process.env.POLYGRAM_SESSION_KEY
 const SOCK        = process.env.POLYGRAM_SOCK
@@ -38,8 +48,23 @@ if (!SESSION_KEY || !SOCK || !SOCK_SECRET) {
   process.exit(2)
 }
-const log = (kind, payload = {}) =>
-  process.stderr.write(`[polygram-bridge] ${JSON.stringify({ t: Date.now(), kind, ...payload })}\n`)
+// rc.11 diagnostic: bridge stderr goes to claude's TUI which is a tiny
+// scrollback. The Music-topic shumorobot live failure leaves no trace of
+// whether user_msg ever reached the bridge or whether the MCP notification
+// dispatched successfully. Mirror every log line to a per-session file so
+// we can definitively pin the failure point.
+const LOG_DIR = join(homedir(), '.polygram', 'bridge-logs')
+try { mkdirSync(LOG_DIR, { recursive: true }) } catch {}
+// Filename: session-key gets sanitized (`:` → `_`) for file safety.
+const LOG_FILE = join(LOG_DIR, `${String(SESSION_KEY).replace(/[^a-zA-Z0-9_-]/g, '_')}.${process.pid}.log`)
+const fileWrite = (line) => { try { appendFileSync(LOG_FILE, line + '\n') } catch {} }
+const log = (kind, payload = {}) => {
+  const line = `[polygram-bridge] ${JSON.stringify({ t: Date.now(), kind, ...payload })}`
+  process.stderr.write(line + '\n')
+  fileWrite(line)
+}
+log('boot', { session_key: SESSION_KEY, log_file: LOG_FILE, pid: process.pid })
 // ─── Stdin EOF → claude crashed; we exit so the daemon notices via socket close ──
 process.stdin.on('end',   () => { log('stdin', { event: 'end'   }); process.exit(0) })
@@ -124,9 +149,23 @@ sock.on('data', chunk => {
     const line = buf.slice(0, nl)
     buf = buf.slice(nl + 1)
     if (!line.trim()) continue
-    let msg
-    try { msg = JSON.parse(line) } catch { log('parse-error', { line: line.slice(0, 200) }); continue }
-    handleDaemonMessage(msg)
+    let raw
+    try { raw = JSON.parse(line) } catch { log('parse-error', { line: line.slice(0, 200) }); continue }
+    // Review F#15: zod-validate before dispatch. Malformed messages drop with
+    // a log instead of silently corrupting downstream state. hello_ack /
+    // hello_reject are skipped here because they're pre-auth and the
+    // discriminated union expects only post-auth shapes — handle them
+    // directly off the raw payload.
+    if (raw.kind === 'hello_ack' || raw.kind === 'hello_reject') {
+      handleDaemonMessage(raw)
+      continue
+    }
+    const parsed = parseDaemonToBridgeMessage(raw)
+    if (!parsed.ok) {
+      log('daemon-msg-schema-invalid', { kind: raw?.kind, error: parsed.error })
+      continue
+    }
+    handleDaemonMessage(parsed.msg)
   }
 })
@@ -142,6 +181,7 @@ function handleDaemonMessage(msg) {
       break
     case 'user_msg':
+      log('user_msg-rx', { text_len: msg.text?.length, turn_id: msg.turn_id, chat_id: msg.chat_id })
       mcp.notification({
         method: 'notifications/claude/channel',
         params: {
@@ -156,7 +196,10 @@ function handleDaemonMessage(msg) {
             turn_id: escapeChannelAttr(msg.turn_id ?? ''),
           },
         },
-      }).catch(e => log('notify-error', { kind: 'user_msg', error: e.message }))
+      }).then(
+        () => log('user_msg-notify-ok', { turn_id: msg.turn_id }),
+        (e) => log('notify-error', { kind: 'user_msg', error: e.message }),
+      )
       break
     case 'perm_verdict':
@@ -203,7 +246,27 @@ const mcp = new Server(
   },
 )
-mcp.setRequestHandler(ListToolsRequestSchema, async () => ({
+// 0.12 Phase 1.6 — MCP-ready signal (cold-spawn race fix, Finding 0.3.A).
+// Claude's MCP client calls ListTools exactly once during server registration
+// (after Initialize, before notifications can be routed). When that first
+// call arrives here, we know claude has the bridge fully registered and
+// will route incoming notifications to our 'claude/channel' capability.
+// We tell the daemon by writing a single {kind:'mcp-ready'} message, and
+// polygram's _waitForBridgeHandshake gates send() on this in addition to
+// the existing daemon-side hello. Before this fix, polygram's handshake
+// resolved when the bridge connected to the daemon socket — BEFORE claude
+// finished MCP registration — and user_msg notifications were silently
+// dropped 33% of the time (probe-cold-spawn.mjs).
+let _mcpReadySent = false
+mcp.setRequestHandler(ListToolsRequestSchema, async () => {
+  if (!_mcpReadySent) {
+    _mcpReadySent = true
+    log('mcp-ready', { trigger: 'first ListToolsRequest' })
+    try { sock.write(JSON.stringify({ kind: 'mcp-ready', session: SESSION_KEY }) + '\n') } catch (err) {
+      log('mcp-ready-write-fail', { error: err.message })
+    }
+  }
+  return {
   tools: [{
     name: 'reply',
     description: 'Send a message back to the originating Telegram chat. ' +
@@ -221,7 +284,8 @@ mcp.setRequestHandler(ListToolsRequestSchema, async () => ({
       required: ['chat_id', 'text'],
     },
   }],
-}))
+  }
+})
 mcp.setRequestHandler(CallToolRequestSchema, async req => {
   if (req.params.name !== 'reply') {
@@ -245,13 +309,20 @@ mcp.setRequestHandler(CallToolRequestSchema, async req => {
 })
 // ─── Permission relay: Claude Code → bridge → daemon → human → verdict back ──
+// Review F#14: only request_id + tool_name are required. description /
+// input_preview MAY be empty (Bash with no args, future tool variants, slim
+// tools that don't carry a preview). Pre-fix any of those four being absent
+// or empty rejected the whole notification — MCP silently dropped the perm
+// request, no approval card surfaced, Claude blocked forever waiting for a
+// verdict that never came. Now those two are optional+defaulted to '' so
+// the perm request always relays.
 const PermissionRequestSchema = z.object({
   method: z.literal('notifications/claude/channel/permission_request'),
   params: z.object({
-    request_id:    z.string(),
-    tool_name:     z.string(),
-    description:   z.string(),
-    input_preview: z.string(),
+    request_id:    z.string().min(1),
+    tool_name:     z.string().min(1),
+    description:   z.string().optional().default(''),
+    input_preview: z.string().optional().default(''),
   }).passthrough(),
 })

package/lib/process/channels-tool-dispatcher.js CHANGED Viewed

@@ -1,15 +1,15 @@
 /**
- * channels-tool-dispatcher — adapter between ChannelsProcess's reply tool
+ * channels-tool-dispatcher — adapter between CliProcess's reply tool
  * callback and polygram's existing Telegram delivery primitives.
  *
- * ChannelsProcess calls `toolDispatcher({sessionKey, chatId, threadId,
+ * CliProcess calls `toolDispatcher({sessionKey, chatId, threadId,
  * toolName, text, files})` whenever Claude invokes the reply tool over
  * the Channels protocol. This module wires that into:
  *   - lib/telegram/chunk.js     for size-aware splitting
  *   - lib/telegram/deliver.js   for the actual sendMessage loop
  *   - bot.api.sendPhoto/Document for file attachments
  *
- * The dispatcher returns `{ok: boolean, error?: string}` — ChannelsProcess
+ * The dispatcher returns `{ok: boolean, error?: string}` — CliProcess
  * relays this to the bridge as tool_ack, which surfaces to Claude as the
  * tool's return value (`'sent'` on ok, error message on failure).
  *
@@ -84,8 +84,12 @@ function validateAttachmentPath(filePath, allowedRoots) {
  * @param {object} deps
  * @param {object} deps.bot                        — grammy Bot instance
  * @param {Function} deps.send                     — tg(bot, method, params, meta) sender wrapper
- * @param {Function} deps.chunkText                — (text, maxLen?) → string[] chunks
- * @param {object} [deps.deliverReplies]           — optional pre-bound deliverReplies; defaults to lib/telegram/deliver.deliverReplies
+ * @param {Function} deps.chunkText                — (text, maxLen?) → string[] chunks (chunkMarkdownText)
+ * @param {Function} deps.deliverReplies           — async ({ bot, send, chatId, threadId, chunks, replyToMessageId, meta, logger }) → { sent, failed }
+ * @param {Function} deps.parseResponse            — Review F#1: text → { text, sticker, stickers[], reaction, reactions[], ... }; required so [sticker:NAME] / [react:EMOJI] don't leak as literal text
+ * @param {Function} deps.sanitizeAssistantReply   — Review F#1: text → { text, replaced, original? }; required so CLI canned strings (`No response requested.`) are intercepted
+ * @param {Function} [deps.processAndDeliverAgentText] — Review F#1: defaults to lib/telegram/process-agent-reply.js helper; DI-overridable for tests
+ * @param {Function} [deps.logEvent]               — (kind, detail) → void; piped into the helper for canned-reply-suppressed forensics
  * @param {object} [deps.logger=console]
  * @param {number} [deps.maxChunkLen=4000]         — TG hard cap is 4096; leave headroom for HTML wrapping
  * @param {string[]} [deps.attachmentAllowlist]    — additional absolute-path roots files may live under (extends defaults)
@@ -96,6 +100,10 @@ function createChannelsToolDispatcher({
   send,
   chunkText,
   deliverReplies,
+  parseResponse,
+  sanitizeAssistantReply,
+  processAndDeliverAgentText,
+  logEvent = null,
   logger = console,
   maxChunkLen = 4000,
   attachmentAllowlist = [],
@@ -104,15 +112,20 @@ function createChannelsToolDispatcher({
   if (typeof send !== 'function') throw new TypeError('channels-tool-dispatcher: send required');
   if (typeof chunkText !== 'function') throw new TypeError('channels-tool-dispatcher: chunkText required');
   if (typeof deliverReplies !== 'function') throw new TypeError('channels-tool-dispatcher: deliverReplies required');
+  if (typeof parseResponse !== 'function') throw new TypeError('channels-tool-dispatcher: parseResponse required (Review F#1)');
+  if (typeof sanitizeAssistantReply !== 'function') throw new TypeError('channels-tool-dispatcher: sanitizeAssistantReply required (Review F#1)');
-  // Review M3: deliverReplies is required now (was optional with lazy
-  // require fallback). The lazy fallback meant two code paths reached the
-  // same destination, which would silently drift if lib/telegram/deliver
-  // renamed its export. Single explicit DI is cleaner.
-  const deliver = deliverReplies;
+  // Review F#1: route reply text through the shared agent-reply pipeline so
+  // parseResponse + sanitizeAssistantReply + chunkMarkdownText + deliverReplies
+  // + inline sticker/reaction handling fire uniformly with SDK/tmux callers.
+  // Pre-fix the dispatcher did raw `chunkText` + `deliver()`, leaking
+  // [sticker:NAME], [react:EMOJI], and `No response requested.` as literal
+  // text into Telegram.
+  const deliverAgent = processAndDeliverAgentText
+    || require('../telegram/process-agent-reply').processAndDeliverAgentText;
   return async function channelsToolDispatcher(call) {
-    const { sessionKey, chatId, threadId, toolName, text, files } = call;
+    const { sessionKey, chatId, threadId, toolName, text, files, sourceMsgId } = call;
     if (toolName !== 'reply') {
       // 0.11.0 Phase 1 ships `reply` only — react and edit_message are
@@ -128,21 +141,34 @@ function createChannelsToolDispatcher({
     }
     try {
-      const chunks = chunkText(text, maxChunkLen);
-      const result = await deliver({
+      // Review F#1: helper does parse → sanitize → chunk → deliver →
+      // inline-sticker → reaction in one place. summary.deliverResult is
+      // null if the post-parse text was empty (solo sticker/reaction).
+      const summary = await deliverAgent({
+        text,
         bot,
-        send,
+        tg: send,
         chatId,
         threadId,
-        chunks,
-        replyToMessageId: null,   // ChannelsProcess doesn't track source-msg per-reply yet
-        meta: { source: 'channels-tool-dispatcher', sessionKey, toolName },
+        replyToMessageId: sourceMsgId || null,
+        applyReactions: sourceMsgId != null,
+        source: 'channels-tool-dispatcher',
+        meta: { sessionKey, toolName },
+        parseResponse,
+        sanitizeAssistantReply,
+        chunkMarkdownText: chunkText,
+        deliverReplies,
+        chunkBudget: maxChunkLen,
+        logEvent,
+        sessionKey,
         logger,
       });
-      if (result.failed?.length > 0) {
-        const failedDetail = result.failed.map(f => f.error?.message || 'unknown').join(', ');
-        return { ok: false, error: `delivered ${result.sent.length} of ${chunks.length} chunks; failed: ${failedDetail}` };
+      const dr = summary.deliverResult;
+      if (dr && dr.failed?.length > 0) {
+        const failedDetail = dr.failed.map(f => f.error?.message || 'unknown').join(', ');
+        const totalChunks = (dr.sent?.length || 0) + dr.failed.length;
+        return { ok: false, error: `delivered ${dr.sent?.length || 0} of ${totalChunks} chunks; failed: ${failedDetail}` };
       }
       // File attachments — sent as separate messages AFTER the text.