polygram 0.11.0-rc.9 → 0.12.0-rc.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "$schema": "https://anthropic.com/claude-code/plugin.schema.json",
3
3
  "name": "polygram",
4
- "version": "0.11.0-rc.9",
4
+ "version": "0.11.0-rc.15",
5
5
  "description": "Telegram integration for Claude Code that preserves the OpenClaw per-chat session model. Migration target for OpenClaw users. Multi-bot, multi-chat, per-topic isolation; SQLite transcripts; inline-keyboard approvals. Bundles /polygram:status|logs|pair-code|approvals admin commands plus history (transcript queries) and polygram-send (out-of-turn IPC sends with file-upload validation) skills.",
6
6
  "keywords": [
7
7
  "telegram",
@@ -4,6 +4,7 @@
4
4
  "bots": {
5
5
  "admin-bot": {
6
6
  "token": "REPLACE_WITH_BOT_TOKEN_FROM_BOTFATHER",
7
+ "_comment_apiRoot": "Optional. Point grammy at a self-hosted Telegram Bot API server (e.g. 'http://localhost:8082' from a local `telegram-bot-api --local` process) to raise file send/receive limits from cloud's 50MB-out / 20MB-in to 2GB both ways. Omit for cloud Telegram (default, unchanged). The server is a separate localhost-only companion daemon — see docs/0.12.0-file-send.md.",
7
8
  "allowConfigCommands": true,
8
9
  "_comment_adminChatId": "Required when allowConfigCommands is true for pairing commands (/pair-code, /pairings, /unpair) to work. These grant cross-chat trust and are gated to the admin chat only.",
9
10
  "adminChatId": "123456789",
@@ -70,7 +71,8 @@
70
71
  "model": "opus",
71
72
  "effort": "medium",
72
73
  "cwd": "/Users/you/admin-agent",
73
- "timeout": 600
74
+ "timeout": 600,
75
+ "_comment_maxFileBytes": "OPTIONAL per-chat (or per-topic; topic wins) file-size cap in BYTES. There is NO fixed default — the default is backend-derived: cloud Telegram = 50MB send / 20MB receive; with a local Bot API server (bot.apiRoot set) = 2GB both ways. This key only LOWERS that ceiling for this chat (Telegram rejects anything above the backend limit regardless); omit it to use the full backend default. To set one, add e.g. \"maxFileBytes\": 104857600 (=100MB) — only meaningful when apiRoot is set, since cloud already clamps to 50/20MB."
74
76
  },
75
77
 
76
78
  "-1000000000001": {
@@ -100,7 +102,7 @@
100
102
  "isolateTopics": true,
101
103
  "_comment_topics": "rc.48: each topic entry is EITHER a string (legacy: just a label) OR an object with optional fields {name, agent, cwd, model, effort, permissionMode, isolateUserConfig}. Object form lets a topic override chat-level config. Per-topic permissionMode overrides chat-level — typical use: scope one topic to permissionMode:'default' (so settings.json gates apply) while the rest of the chat stays on bypassPermissions. Object form requires isolateTopics: true (each topic gets its own SDK Query); polygram emits a startup warning otherwise.",
102
104
  "_comment_isolateUserConfig": "0.10.0, tmux backend only: isolateUserConfig:true spawns the topic's claude TUI cut off from the user-level ~/.claude config — passes --strict-mcp-config (zero MCP servers load) and --setting-sources project,local (drops ~/.claude/settings.json; the spawn cwd's own .claude/settings.json still loads). Use it when a topic's agent would otherwise inherit slow user-global MCP servers whose cold-start (tens of seconds) wedges the TUI before it can accept a prompt. Settable at chat OR topic level (topic wins). Default false.",
103
- "_comment_pm": "0.11.0: 'pm' selects the Process backend: 'sdk' (default; per-token Console API; full SDK features), 'tmux' (subscription-priced claude CLI in tmux; JSONL/pane parsing for IO), 'channels' (subscription-priced claude CLI in tmux; structured IO via the official Channels MCP protocol — see docs/0.11.0-channels-driver-plan.md). Settable at bot, chat, OR topic level (topic > chat > bot). Channels requires Pro/Max subscription, Claude Code v2.1.80+, and is in research preview — invokes --dangerously-load-development-channels.",
105
+ "_comment_pm": "0.12.0: 'pm' selects the Process backend. Two canonical values: 'sdk' (default; per-token Console API billing; full SDK features) and 'cli' (subscription-priced claude CLI in tmux + Channels MCP bridge + hooks ndjson observability — see docs/0.12.0-cli-driver-plan.md). Settable at bot, chat, OR topic level (topic > chat > bot). Aliases preserved for back-compat with 0.10/0.11 configs: 'channels' and 'tmux' both resolve to 'cli' with a once-at-boot deprecation warn. CLI requires Pro/Max subscription, Claude Code v2.1.80+, and uses --dangerously-load-development-channels (research preview flag).",
104
106
  "topics": {
105
107
  "100": "Customer A",
106
108
  "200": {
@@ -22,8 +22,48 @@
22
22
  * extension — the fallback only kicks in when MIME is unhelpful.
23
23
  */
24
24
 
25
- const MAX_FILE_BYTES = 10 * 1024 * 1024;
26
- const MAX_TOTAL_BYTES = 20 * 1024 * 1024;
25
+ // Inbound (user bot) per-file cap. Telegram's cloud Bot API hard-caps
26
+ // bot file DOWNLOADS (getFile) at 20 MB, so 20 MB is the real ceiling on
27
+ // cloud — raised from 10 MB so users can send larger tracks/docs. With a
28
+ // self-hosted Bot API server (config.bot.apiRoot) the Telegram limit rises
29
+ // to 2 GB; resolveFileCaps() raises the default accordingly.
30
+ const MAX_FILE_BYTES = 20 * 1024 * 1024;
31
+ const MAX_TOTAL_BYTES = 50 * 1024 * 1024;
32
+
33
+ // ─── Backend-derived file-size caps (cloud vs local Bot API server) ──
34
+ //
35
+ // These are the HARD ceilings Telegram itself enforces — a per-chat
36
+ // override can lower them but never exceed them (Telegram rejects beyond
37
+ // regardless). NOT "adaptive": there is no intermediate tier. Cloud is a
38
+ // flat 20 in / 50 out; a local `telegram-bot-api --local` server is a flat
39
+ // 2 GB both ways.
40
+ const CLOUD_MAX_IN_BYTES = 20 * 1024 * 1024; // getFile download limit
41
+ const CLOUD_MAX_OUT_BYTES = 50 * 1024 * 1024; // sendDocument upload limit
42
+ const LOCAL_MAX_BYTES = 2000 * 1024 * 1024; // --local server, both ways
43
+
44
+ /**
45
+ * Resolve the effective per-file caps for a chat/topic.
46
+ *
47
+ * @param {object} opts
48
+ * @param {boolean} opts.localApi — true when config.bot.apiRoot is set
49
+ * (a local Bot API server is in use → 2 GB ceiling).
50
+ * @param {...number} opts.override — per-chat/topic maxFileBytes (bytes).
51
+ * Resolved by the caller from topic → chat → undefined; clamped to the
52
+ * backend ceiling.
53
+ * @returns {{ inBytes:number, outBytes:number, ceiling:number, localApi:boolean }}
54
+ */
55
+ function resolveFileCaps({ localApi = false, override = null } = {}) {
56
+ const ceiling = localApi ? LOCAL_MAX_BYTES : null;
57
+ const defIn = localApi ? LOCAL_MAX_BYTES : CLOUD_MAX_IN_BYTES;
58
+ const defOut = localApi ? LOCAL_MAX_BYTES : CLOUD_MAX_OUT_BYTES;
59
+ // A numeric override sets BOTH directions to the same value, clamped to
60
+ // the backend hard ceiling (cloud uses the per-direction default as the
61
+ // clamp so an override can't push past Telegram's own limit).
62
+ const ovr = (typeof override === 'number' && override > 0) ? override : null;
63
+ const inBytes = ovr ? (localApi ? Math.min(ovr, ceiling) : Math.min(ovr, CLOUD_MAX_IN_BYTES)) : defIn;
64
+ const outBytes = ovr ? (localApi ? Math.min(ovr, ceiling) : Math.min(ovr, CLOUD_MAX_OUT_BYTES)) : defOut;
65
+ return { inBytes, outBytes, ceiling: ceiling ?? CLOUD_MAX_OUT_BYTES, localApi };
66
+ }
27
67
  const MIME_ALLOW = [
28
68
  /^image\//, /^audio\//, /^video\//,
29
69
  /^application\/pdf$/, /^text\/plain$/,
@@ -109,8 +149,12 @@ function filterAttachments(attachments, opts = {}) {
109
149
 
110
150
  module.exports = {
111
151
  filterAttachments,
152
+ resolveFileCaps,
112
153
  MAX_FILE_BYTES,
113
154
  MAX_TOTAL_BYTES,
155
+ CLOUD_MAX_IN_BYTES,
156
+ CLOUD_MAX_OUT_BYTES,
157
+ LOCAL_MAX_BYTES,
114
158
  MIME_ALLOW,
115
159
  EXTENSION_ALLOW,
116
160
  FALLBACK_MIMES,
package/lib/claude-bin.js CHANGED
@@ -4,16 +4,20 @@ const os = require('os');
4
4
  const path = require('path');
5
5
  const fs = require('fs');
6
6
 
7
+ // 0.12 Phase 4: moved from lib/process/tmux-process.js into the helper module
8
+ // that consumes it, so the constant survives TmuxProcess deletion. CliProcess
9
+ // + spike scripts + polygram boot all import from here now.
10
+ const CLAUDE_CLI_PINNED_VERSION = '2.1.142';
11
+
7
12
  /**
8
- * Resolve + verify the pinned claude CLI binary for the tmux backend.
13
+ * Resolve + verify the pinned claude CLI binary.
9
14
  *
10
- * Why this exists: the tmux backend reads claude CLI INTERNAL
11
- * artefacts (JSONL events, queue-operation semantics, TUI banner
12
- * ASCII, READY hint strings, stop_reason values) — none a stable
13
- * public contract. polygram pins ONE version
14
- * (CLAUDE_CLI_PINNED_VERSION in lib/process/tmux-process.js) and
15
- * must spawn THAT binary, never whatever `claude` on $PATH happens
16
- * to resolve to.
15
+ * Why this exists: the tmux + CLI backends read claude CLI internal
16
+ * artefacts (TUI banner ASCII, READY hint strings, channel notification
17
+ * registration timing, MCP-init order) — none a stable public contract.
18
+ * polygram pins ONE version (`CLAUDE_CLI_PINNED_VERSION`) and must
19
+ * spawn THAT binary, never whatever `claude` on $PATH happens to
20
+ * resolve to.
17
21
  *
18
22
  * Before this module the tmux runner spawned the bare string
19
23
  * `claude`, resolved through $PATH. The claude CLI installs each
@@ -75,4 +79,4 @@ function verifyPinnedClaudeBin(version) {
75
79
  }
76
80
  }
77
81
 
78
- module.exports = { resolvePinnedClaudeBin, verifyPinnedClaudeBin };
82
+ module.exports = { resolvePinnedClaudeBin, verifyPinnedClaudeBin, CLAUDE_CLI_PINNED_VERSION };
@@ -90,6 +90,13 @@ function createAutoResumeTracker({ cooldownMs = DEFAULT_COOLDOWN_MS, now = Date.
90
90
  */
91
91
  function isAutoResumable({ error, aborted, replay, shuttingDown }) {
92
92
  if (aborted || replay || shuttingDown) return false;
93
+ // Review F#6: channels analog of the tmux 'idle with no Claude activity'
94
+ // pattern. The bridge socket dropped mid-turn (claude crashed, bridge
95
+ // process died) — that's a wedge, not a runaway. Same intent as the
96
+ // regex match below, just expressed via err.code because channels throws
97
+ // a different message string. TURN_TIMEOUT stays NON-resumable (it's
98
+ // the channels analog of the wall-clock ceiling — likely a runaway).
99
+ if (error?.code === 'BRIDGE_DISCONNECTED') return true;
93
100
  const msg = String(error?.message || error || '');
94
101
  return /idle with no Claude activity/i.test(msg);
95
102
  }
@@ -205,12 +205,20 @@ function resolveSessionForSpawn(db, sessionKey, resolved = {}) {
205
205
  // of THAT task; claude responded with music release info, inline,
206
206
  // never calling the reply tool. Every turn timed out at 3min.
207
207
  //
208
- // Rule: any transition TO or FROM channels drops the prior session.
209
- // XOR — flips between channels and {sdk,tmux} invalidate; sdk↔tmux
210
- // flips remain free.
211
- const wasChannels = before.pm_backend === 'channels';
212
- const willBeChannels = after.pm_backend === 'channels';
213
- if (after.pm_backend != null && wasChannels !== willBeChannels) {
208
+ // Rule: any transition TO or FROM the channels/CLI backend drops the
209
+ // prior session. XOR — flips between (channels|cli) and {sdk,tmux}
210
+ // invalidate; sdk↔tmux flips remain free (rc.32 reasoning).
211
+ //
212
+ // 0.12: 'cli' is the canonical name for what was 'channels' in 0.11.
213
+ // Treat both as the same "channels-class" backend for transition
214
+ // invalidation purposes — a row persisted with pm_backend='channels'
215
+ // before 0.12 and a row created today with pm_backend='cli' are
216
+ // semantically the same in terms of session-context invariants
217
+ // (bridge MCP server mounted, reply-tool contract enforced).
218
+ const CHANNELS_CLASS = new Set(['channels', 'cli']);
219
+ const wasChannelsClass = CHANNELS_CLASS.has(before.pm_backend);
220
+ const willBeChannelsClass = CHANNELS_CLASS.has(after.pm_backend);
221
+ if (after.pm_backend != null && wasChannelsClass !== willBeChannelsClass) {
214
222
  drifted.push('pm_backend');
215
223
  }
216
224
 
@@ -163,6 +163,47 @@ const CODES = {
163
163
  isTransient: false,
164
164
  autoRecover: null,
165
165
  },
166
+ // Review F#5: channels-specific error codes. Pre-fix these fell through
167
+ // to the generic 'unknown' kind (errorReplyText: "Hit a snag. Try
168
+ // resending.") which lies about what happened. Mirrors the rc.46→rc.47
169
+ // tmuxToolWedge fix where backend-specific codes needed their own kinds.
170
+ //
171
+ // BRIDGE_DISCONNECTED: thrown by CliProcess when the mcp-bridge
172
+ // socket drops mid-turn (claude crashed, bridge process died, etc).
173
+ // isTransient: true because the daemon retries spawning the backend.
174
+ BRIDGE_DISCONNECTED: {
175
+ kind: 'bridgeDisconnected',
176
+ userMessage: '🔌 Lost the bridge to Claude mid-turn. Retrying — please resend if I don\'t reply in 30s.',
177
+ isTransient: true,
178
+ autoRecover: null,
179
+ },
180
+ // CHANNELS_HANDSHAKE_TIMEOUT: bridge process never sent session_init
181
+ // within the handshake window during start(). Usually means the bridge
182
+ // crashed pre-init or the socket file is stale.
183
+ CHANNELS_HANDSHAKE_TIMEOUT: {
184
+ kind: 'channelsHandshakeTimeout',
185
+ userMessage: '⏳ Couldn\'t start a Claude session — the bridge didn\'t respond in time. Try again in a moment.',
186
+ isTransient: true,
187
+ autoRecover: null,
188
+ },
189
+ // CHANNELS_DIALOG_TIMEOUT: a permission / usage-limit / context-overflow
190
+ // dialog opened mid-turn and we couldn't auto-respond within the dialog
191
+ // window. The turn is dead; user needs to retry.
192
+ CHANNELS_DIALOG_TIMEOUT: {
193
+ kind: 'channelsDialogTimeout',
194
+ userMessage: '🚧 Claude hit a dialog (permission/usage-limit) mid-turn and I couldn\'t auto-respond in time. Please resend.',
195
+ isTransient: false,
196
+ autoRecover: null,
197
+ },
198
+ // TURN_TIMEOUT: 10-min wall-clock cap on a single channels turn. Mirror
199
+ // of the tmux wall-clock ceiling — typically a runaway, not a wedge.
200
+ // Not transient (auto-retry would just runaway again).
201
+ TURN_TIMEOUT: {
202
+ kind: 'turnTimeout',
203
+ userMessage: '⏱ The turn ran past the 10-minute cap. Resend if the answer still matters.',
204
+ isTransient: false,
205
+ autoRecover: null,
206
+ },
166
207
  };
167
208
 
168
209
  /**
@@ -42,13 +42,37 @@ function createHandleAbort({
42
42
  const threadId = msg.message_thread_id?.toString();
43
43
  const sessionKey = getSessionKey(chatId, threadId, chatConfig);
44
44
  const proc = pm.has(sessionKey) ? pm.get(sessionKey) : null;
45
- const hadActive = !!proc?.inFlight;
45
+ let hadActive = !!proc?.inFlight;
46
46
 
47
47
  // Mark BEFORE killing: the 'close' event fires almost immediately
48
48
  // after interrupt, and the surrounding handleMessage's catch
49
49
  // needs to see the flag to skip the generic error-reply.
50
50
  if (hadActive) markSessionAborted(sessionKey);
51
51
 
52
+ // "Stop" incident (shumorobot Music, 2026-05-31 13:08): on the
53
+ // CliProcess/channels backend a turn resolves on the quiet-window
54
+ // after claude's last reply tool call (inFlight → false), but claude
55
+ // can still be working (subagent, long Bash). Keying the ack on
56
+ // inFlight alone made "Stop" say "Nothing to stop" while a subagent
57
+ // download churned. probeBusyState() reads the TUI "esc to interrupt"
58
+ // hint — the truthful signal — so detection, the abort mark, and the
59
+ // ack all agree. The probe result is logged below (forensics) so the
60
+ // heuristic can be refined against real states later. Channels analog
61
+ // of the (deleted) tmux hasBackgroundShell branch; typeof-guarded so
62
+ // it's a no-op on backends without it.
63
+ let busyProbe = null;
64
+ if (!hadActive && proc && typeof proc.probeBusyState === 'function') {
65
+ try {
66
+ busyProbe = await proc.probeBusyState();
67
+ if (busyProbe?.busy) {
68
+ hadActive = true;
69
+ markSessionAborted(sessionKey);
70
+ }
71
+ } catch (err) {
72
+ logger.error?.(`[${botName}] busy-probe failed: ${err.message}`);
73
+ }
74
+ }
75
+
52
76
  // Bug 1 (incident 2026-05-18): "Stop" was turn-scoped — it only
53
77
  // looked at an in-flight TURN. But the agent can leave a DETACHED
54
78
  // background shell running (a `run_in_background:true` Bash) that
@@ -87,6 +111,19 @@ function createHandleAbort({
87
111
  chat_id: chatId, user_id: msg.from?.id || null,
88
112
  had_active: hadActive,
89
113
  killed_background_shell: killedBackgroundShell,
114
+ // "Stop" incident forensics: the raw busy-probe signals at decision
115
+ // time. Lets us query, across real aborts, where the esc-hint /
116
+ // inFlight / pending-turn signals agreed vs diverged and refine the
117
+ // heuristic later. null when no probe ran (turn was already inFlight,
118
+ // or the backend has no probeBusyState).
119
+ busy_probe: busyProbe ? {
120
+ busy: busyProbe.busy,
121
+ streaming: busyProbe.streaming,
122
+ in_flight: busyProbe.inFlight,
123
+ pending_turns: busyProbe.pendingTurns,
124
+ captured: busyProbe.captured,
125
+ pane_tail: busyProbe.paneTail,
126
+ } : null,
90
127
  trigger: cleanText.slice(0, 40),
91
128
  });
92
129
 
@@ -199,7 +199,17 @@ function createSlashCommands({
199
199
  }), 'log model change');
200
200
  const { anyActive } = await applyConfigChange('model', newModel);
201
201
  const ver = (modelVersionsDesc && modelVersionsDesc[newModel]) || newModel;
202
- const suffix = anyActive ? ` — I'll switch when I finish` : '';
202
+ // Review F#10: channels backend can't apply model/effort changes
203
+ // live — its setModel/applyFlagSettings throw UNSUPPORTED_OPERATION,
204
+ // pm.setModel returns false → `anyActive` is true → user saw the
205
+ // misleading "I'll switch when I finish" message. Now we detect
206
+ // the channels backend explicitly and give an honest answer:
207
+ // settings are persisted to chatConfig and take effect on the next
208
+ // /reset or /new (channels lacks an in-place re-init path).
209
+ const backendName = typeof pm.getBackend === 'function' ? pm.getBackend(sessionKey) : null;
210
+ const suffix = backendName === 'channels'
211
+ ? ` — applies on next /reset (channels)`
212
+ : (anyActive ? ` — I'll switch when I finish` : '');
203
213
  await sendReply(`Model → ${newModel} (${ver})${suffix}`);
204
214
  } else {
205
215
  await sendReply(`Unknown model. Use: opus, sonnet, haiku`);
@@ -219,7 +229,17 @@ function createSlashCommands({
219
229
  user: cmdUser, user_id: cmdUserId, source: 'command',
220
230
  }), 'log effort change');
221
231
  const { anyActive } = await applyConfigChange('effort', newEffort);
222
- const suffix = anyActive ? ` — I'll switch when I finish` : '';
232
+ // Review F#10: channels backend can't apply model/effort changes
233
+ // live — its setModel/applyFlagSettings throw UNSUPPORTED_OPERATION,
234
+ // pm.setModel returns false → `anyActive` is true → user saw the
235
+ // misleading "I'll switch when I finish" message. Now we detect
236
+ // the channels backend explicitly and give an honest answer:
237
+ // settings are persisted to chatConfig and take effect on the next
238
+ // /reset or /new (channels lacks an in-place re-init path).
239
+ const backendName = typeof pm.getBackend === 'function' ? pm.getBackend(sessionKey) : null;
240
+ const suffix = backendName === 'channels'
241
+ ? ` — applies on next /reset (channels)`
242
+ : (anyActive ? ` — I'll switch when I finish` : '');
223
243
  await sendReply(`Effort → ${newEffort}${suffix}`);
224
244
  } else {
225
245
  await sendReply(`Unknown effort. Use: low, medium, high, xhigh, max`);
@@ -50,7 +50,14 @@ function validateIpcFileParam(method, params = {}) {
50
50
  const fileParam = FILE_PARAM_BY_METHOD[method];
51
51
  if (!fileParam) return null;
52
52
  const val = params[fileParam];
53
- if (typeof val !== 'string') return null; // envelope/Buffer/etcpass through
53
+ // { source: '/abs/path' } envelope — now coerced to a grammy InputFile in
54
+ // tg() (coerceFileParams). Validate it has a usable absolute source, else
55
+ // pass through (Buffer / stream / InputFile shapes).
56
+ if (val && typeof val === 'object' && typeof val.source === 'string') {
57
+ if (val.source.length === 0) return `polygram IPC: ${fileParam}.source is empty`;
58
+ return null;
59
+ }
60
+ if (typeof val !== 'string') return null; // Buffer/InputFile/etc — pass through
54
61
  if (val.length === 0) return `polygram IPC: ${fileParam} is empty`;
55
62
 
56
63
  const looksUrl = /^(https?|ftp):\/\//i.test(val);
@@ -2,7 +2,7 @@
2
2
  * Bridge ↔ daemon socket protocol — typed schemas.
3
3
  *
4
4
  * Wire format: newline-delimited JSON over a unix socket per session.
5
- * Both endpoints (ChannelsProcess and channels-bridge.mjs) speak the same
5
+ * Both endpoints (CliProcess and channels-bridge.mjs) speak the same
6
6
  * message kinds. This module centralizes the shape so both sides safeParse
7
7
  * inbound messages with the same constraints — protecting against malformed
8
8
  * payloads silently corrupting pending-state Maps.
@@ -10,7 +10,7 @@
10
10
  * Adding a new message kind:
11
11
  * 1. Define its schema below as `<KindName>MessageSchema`
12
12
  * 2. Add it to `AnyDaemonToBridgeMessage` or `AnyBridgeToDaemonMessage`
13
- * 3. Handle it in the corresponding switch (channels-process.js
13
+ * 3. Handle it in the corresponding switch (cli-process.js
14
14
  * _onBridgeMsg or channels-bridge.mjs handleDaemonMessage)
15
15
  *
16
16
  * Validation policy:
@@ -67,12 +67,22 @@ const PongMessageSchema = z.object({
67
67
  kind: z.literal('pong'),
68
68
  }).passthrough();
69
69
 
70
+ // 0.12 Phase 1.6: bridge tells daemon when claude has finished registering
71
+ // the bridge as an MCP server (claude sent its first ListToolsRequest).
72
+ // Polygram's _waitForBridgeHandshake gates on this in addition to hello,
73
+ // eliminating the cold-spawn race (Finding 0.3.A).
74
+ const McpReadyMessageSchema = z.object({
75
+ kind: z.literal('mcp-ready'),
76
+ session: NonEmptyString,
77
+ }).passthrough();
78
+
70
79
  const AnyBridgeToDaemonMessage = z.discriminatedUnion('kind', [
71
80
  HelloSchema,
72
81
  SessionInitSchema,
73
82
  ToolCallMessageSchema,
74
83
  PermRequestMessageSchema,
75
84
  PongMessageSchema,
85
+ McpReadyMessageSchema,
76
86
  ]);
77
87
 
78
88
  // ─── daemon → bridge ───────────────────────────────────────────────
@@ -2,11 +2,11 @@
2
2
  * ChannelsBridgeServer — per-session unix-socket server for the bridge
3
3
  * subprocess to connect back to.
4
4
  *
5
- * Extracted from ChannelsProcess (M1 refactor) so the socket lifecycle —
5
+ * Extracted from CliProcess (M1 refactor) so the socket lifecycle —
6
6
  * listen with restrictive umask, accept ONE bridge, hello-handshake auth,
7
7
  * line-delimited JSON I/O, schema validation, single-bridge-per-session
8
8
  * enforcement, clean teardown — lives in one focused class instead of
9
- * sprawling across ChannelsProcess.
9
+ * sprawling across CliProcess.
10
10
  *
11
11
  * Owns:
12
12
  * - net.Server lifecycle (listen / close)
@@ -17,11 +17,14 @@
17
17
  *
18
18
  * Does NOT own:
19
19
  * - protocol semantics (tool routing, perm relay, turn lifecycle) — those
20
- * stay in ChannelsProcess, which subscribes to the events this class emits
20
+ * stay in CliProcess, which subscribes to the events this class emits
21
21
  * - claude/bridge process lifecycle
22
22
  *
23
23
  * Event surface (EventEmitter):
24
- * 'bridge-ready' — handshake complete; safe to send daemon→bridge msgs
24
+ * 'bridge-ready' — daemon-side handshake (hello + session_init) complete
25
+ * 'mcp-ready' — claude-side MCP-server registration complete (first
26
+ * ListToolsRequest received from claude). 0.12 P1.6
27
+ * cold-spawn race fix — see channels-bridge.mjs.
25
28
  * 'bridge-message', msg — every validated bridge→daemon message (post-auth)
26
29
  * 'bridge-disconnected' — single-bridge connection closed
27
30
  * 'error', err — socket-level errors (rare; non-fatal)
@@ -29,6 +32,7 @@
29
32
 
30
33
  'use strict';
31
34
 
35
+ const crypto = require('node:crypto');
32
36
  const EventEmitter = require('node:events');
33
37
  const fs = require('node:fs');
34
38
  const net = require('node:net');
@@ -162,14 +166,26 @@ class ChannelsBridgeServer extends EventEmitter {
162
166
  }
163
167
 
164
168
  if (!authenticated) {
165
- if (raw.kind === 'hello'
166
- && raw.session_key === this.sessionKey
167
- && raw.secret === this.sockSecret) {
169
+ // Review F#7: harden the hello-handshake.
170
+ // 1. timingSafeEqual for the secret compare so a same-uid
171
+ // attacker can't byte-by-byte probe via response-timing.
172
+ // 2. ROTATE the secret after first successful auth (set to
173
+ // null) so a stale POLYGRAM_SOCK_SECRET leaked via
174
+ // /proc/<pid>/environ can't replay against this
175
+ // CliProcess after the legit bridge disconnects.
176
+ // The bridge process is one-shot per spawn anyway (it
177
+ // exits on socket close — see channels-bridge.mjs:109),
178
+ // so legitimate re-auth within one CliProcess
179
+ // instance never happens — only a hijacker would.
180
+ const verdict = this._verifyHelloAuth(raw);
181
+ if (verdict.ok) {
168
182
  authenticated = true;
169
183
  this.authenticated = true;
184
+ this.sockSecret = null; // invalidate — single-shot per instance
170
185
  try { conn.write(JSON.stringify({ kind: 'hello_ack' }) + '\n'); } catch {}
171
186
  continue;
172
187
  }
188
+ this.logger.warn?.(`[${this.label}] hello rejected — reason=${verdict.reason}`);
173
189
  try { conn.write(JSON.stringify({ kind: 'hello_reject', reason: 'auth' }) + '\n'); } catch {}
174
190
  conn.end();
175
191
  this.conn = null;
@@ -193,6 +209,13 @@ class ChannelsBridgeServer extends EventEmitter {
193
209
  this.emit('bridge-ready');
194
210
  continue;
195
211
  }
212
+ if (parsed.msg.kind === 'mcp-ready') {
213
+ // 0.12 Phase 1.6: bridge signals that claude has finished
214
+ // registering it as an MCP server. Polygram gates send() on this
215
+ // (Finding 0.3.A — cold-spawn race).
216
+ this.emit('mcp-ready', parsed.msg);
217
+ continue;
218
+ }
196
219
  this.emit('bridge-message', parsed.msg);
197
220
  }
198
221
  });
@@ -209,6 +232,43 @@ class ChannelsBridgeServer extends EventEmitter {
209
232
  this.logger.warn?.(`[${this.label}] bridge conn error: ${err.message}`);
210
233
  });
211
234
  }
235
+
236
+ /**
237
+ * Review F#7: hello-handshake verification, extracted as a pure method so it
238
+ * can be exercised in isolation. Returns `{ ok: true }` on accept or
239
+ * `{ ok: false, reason }` on reject. Uses crypto.timingSafeEqual for the
240
+ * secret compare and refuses if this.sockSecret has already been consumed
241
+ * (post-auth rotation).
242
+ *
243
+ * @param {object} raw — parsed bridge→daemon hello payload
244
+ * @returns {{ ok: true } | { ok: false, reason: string }}
245
+ */
246
+ _verifyHelloAuth(raw) {
247
+ if (this.sockSecret == null) {
248
+ return { ok: false, reason: 'secret-consumed' };
249
+ }
250
+ if (!raw || raw.kind !== 'hello') {
251
+ return { ok: false, reason: 'not-hello' };
252
+ }
253
+ if (raw.session_key !== this.sessionKey) {
254
+ return { ok: false, reason: 'wrong-session-key' };
255
+ }
256
+ if (typeof raw.secret !== 'string' || raw.secret.length === 0) {
257
+ return { ok: false, reason: 'no-secret' };
258
+ }
259
+ const a = Buffer.from(raw.secret, 'utf8');
260
+ const b = Buffer.from(this.sockSecret, 'utf8');
261
+ if (a.length !== b.length) {
262
+ // timingSafeEqual requires equal-length inputs; length mismatch is a
263
+ // wrong-secret signal but constant-time compares MUST short-circuit
264
+ // here (otherwise we'd leak the secret's length).
265
+ return { ok: false, reason: 'wrong-secret' };
266
+ }
267
+ if (!crypto.timingSafeEqual(a, b)) {
268
+ return { ok: false, reason: 'wrong-secret' };
269
+ }
270
+ return { ok: true };
271
+ }
212
272
  }
213
273
 
214
274
  module.exports = { ChannelsBridgeServer };