polygram 0.12.0-rc.1 → 0.12.0-rc.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config.example.json +3 -1
- package/lib/attachments.js +46 -2
- package/lib/handlers/abort.js +38 -1
- package/lib/ipc/file-validator.js +8 -1
- package/lib/process/channels-tool-dispatcher.js +20 -2
- package/lib/process/cli-process.js +274 -52
- package/lib/process/factory.js +0 -5
- package/lib/process-manager.js +13 -0
- package/lib/sdk/callbacks.js +110 -5
- package/lib/telegram/api.js +9 -0
- package/lib/telegram/input-file.js +76 -0
- package/lib/telegram/reactions.js +5 -0
- package/lib/tmux/log-tail.js +11 -1
- package/lib/tmux/startup-gate.js +65 -1
- package/package.json +1 -1
- package/polygram.js +34 -12
package/config.example.json
CHANGED
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
"bots": {
|
|
5
5
|
"admin-bot": {
|
|
6
6
|
"token": "REPLACE_WITH_BOT_TOKEN_FROM_BOTFATHER",
|
|
7
|
+
"_comment_apiRoot": "Optional. Point grammy at a self-hosted Telegram Bot API server (e.g. 'http://localhost:8082' from a local `telegram-bot-api --local` process) to raise file send/receive limits from cloud's 50MB-out / 20MB-in to 2GB both ways. Omit for cloud Telegram (default, unchanged). The server is a separate localhost-only companion daemon — see docs/0.12.0-file-send.md.",
|
|
7
8
|
"allowConfigCommands": true,
|
|
8
9
|
"_comment_adminChatId": "Required when allowConfigCommands is true for pairing commands (/pair-code, /pairings, /unpair) to work. These grant cross-chat trust and are gated to the admin chat only.",
|
|
9
10
|
"adminChatId": "123456789",
|
|
@@ -70,7 +71,8 @@
|
|
|
70
71
|
"model": "opus",
|
|
71
72
|
"effort": "medium",
|
|
72
73
|
"cwd": "/Users/you/admin-agent",
|
|
73
|
-
"timeout": 600
|
|
74
|
+
"timeout": 600,
|
|
75
|
+
"_comment_maxFileBytes": "OPTIONAL per-chat (or per-topic; topic wins) file-size cap in BYTES. There is NO fixed default — the default is backend-derived: cloud Telegram = 50MB send / 20MB receive; with a local Bot API server (bot.apiRoot set) = 2GB both ways. This key only LOWERS that ceiling for this chat (Telegram rejects anything above the backend limit regardless); omit it to use the full backend default. To set one, add e.g. \"maxFileBytes\": 104857600 (=100MB) — only meaningful when apiRoot is set, since cloud already clamps to 50/20MB."
|
|
74
76
|
},
|
|
75
77
|
|
|
76
78
|
"-1000000000001": {
|
package/lib/attachments.js
CHANGED
|
@@ -22,8 +22,48 @@
|
|
|
22
22
|
* extension — the fallback only kicks in when MIME is unhelpful.
|
|
23
23
|
*/
|
|
24
24
|
|
|
25
|
-
|
|
26
|
-
|
|
25
|
+
// Inbound (user → bot) per-file cap. Telegram's cloud Bot API hard-caps
|
|
26
|
+
// bot file DOWNLOADS (getFile) at 20 MB, so 20 MB is the real ceiling on
|
|
27
|
+
// cloud — raised from 10 MB so users can send larger tracks/docs. With a
|
|
28
|
+
// self-hosted Bot API server (config.bot.apiRoot) the Telegram limit rises
|
|
29
|
+
// to 2 GB; resolveFileCaps() raises the default accordingly.
|
|
30
|
+
const MAX_FILE_BYTES = 20 * 1024 * 1024;
|
|
31
|
+
const MAX_TOTAL_BYTES = 50 * 1024 * 1024;
|
|
32
|
+
|
|
33
|
+
// ─── Backend-derived file-size caps (cloud vs local Bot API server) ──
|
|
34
|
+
//
|
|
35
|
+
// These are the HARD ceilings Telegram itself enforces — a per-chat
|
|
36
|
+
// override can lower them but never exceed them (Telegram rejects beyond
|
|
37
|
+
// regardless). NOT "adaptive": there is no intermediate tier. Cloud is a
|
|
38
|
+
// flat 20 in / 50 out; a local `telegram-bot-api --local` server is a flat
|
|
39
|
+
// 2 GB both ways.
|
|
40
|
+
const CLOUD_MAX_IN_BYTES = 20 * 1024 * 1024; // getFile download limit
|
|
41
|
+
const CLOUD_MAX_OUT_BYTES = 50 * 1024 * 1024; // sendDocument upload limit
|
|
42
|
+
const LOCAL_MAX_BYTES = 2000 * 1024 * 1024; // --local server, both ways
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Resolve the effective per-file caps for a chat/topic.
|
|
46
|
+
*
|
|
47
|
+
* @param {object} opts
|
|
48
|
+
* @param {boolean} opts.localApi — true when config.bot.apiRoot is set
|
|
49
|
+
* (a local Bot API server is in use → 2 GB ceiling).
|
|
50
|
+
* @param {...number} opts.override — per-chat/topic maxFileBytes (bytes).
|
|
51
|
+
* Resolved by the caller from topic → chat → undefined; clamped to the
|
|
52
|
+
* backend ceiling.
|
|
53
|
+
* @returns {{ inBytes:number, outBytes:number, ceiling:number, localApi:boolean }}
|
|
54
|
+
*/
|
|
55
|
+
function resolveFileCaps({ localApi = false, override = null } = {}) {
|
|
56
|
+
const ceiling = localApi ? LOCAL_MAX_BYTES : null;
|
|
57
|
+
const defIn = localApi ? LOCAL_MAX_BYTES : CLOUD_MAX_IN_BYTES;
|
|
58
|
+
const defOut = localApi ? LOCAL_MAX_BYTES : CLOUD_MAX_OUT_BYTES;
|
|
59
|
+
// A numeric override sets BOTH directions to the same value, clamped to
|
|
60
|
+
// the backend hard ceiling (cloud uses the per-direction default as the
|
|
61
|
+
// clamp so an override can't push past Telegram's own limit).
|
|
62
|
+
const ovr = (typeof override === 'number' && override > 0) ? override : null;
|
|
63
|
+
const inBytes = ovr ? (localApi ? Math.min(ovr, ceiling) : Math.min(ovr, CLOUD_MAX_IN_BYTES)) : defIn;
|
|
64
|
+
const outBytes = ovr ? (localApi ? Math.min(ovr, ceiling) : Math.min(ovr, CLOUD_MAX_OUT_BYTES)) : defOut;
|
|
65
|
+
return { inBytes, outBytes, ceiling: ceiling ?? CLOUD_MAX_OUT_BYTES, localApi };
|
|
66
|
+
}
|
|
27
67
|
const MIME_ALLOW = [
|
|
28
68
|
/^image\//, /^audio\//, /^video\//,
|
|
29
69
|
/^application\/pdf$/, /^text\/plain$/,
|
|
@@ -109,8 +149,12 @@ function filterAttachments(attachments, opts = {}) {
|
|
|
109
149
|
|
|
110
150
|
module.exports = {
|
|
111
151
|
filterAttachments,
|
|
152
|
+
resolveFileCaps,
|
|
112
153
|
MAX_FILE_BYTES,
|
|
113
154
|
MAX_TOTAL_BYTES,
|
|
155
|
+
CLOUD_MAX_IN_BYTES,
|
|
156
|
+
CLOUD_MAX_OUT_BYTES,
|
|
157
|
+
LOCAL_MAX_BYTES,
|
|
114
158
|
MIME_ALLOW,
|
|
115
159
|
EXTENSION_ALLOW,
|
|
116
160
|
FALLBACK_MIMES,
|
package/lib/handlers/abort.js
CHANGED
|
@@ -42,13 +42,37 @@ function createHandleAbort({
|
|
|
42
42
|
const threadId = msg.message_thread_id?.toString();
|
|
43
43
|
const sessionKey = getSessionKey(chatId, threadId, chatConfig);
|
|
44
44
|
const proc = pm.has(sessionKey) ? pm.get(sessionKey) : null;
|
|
45
|
-
|
|
45
|
+
let hadActive = !!proc?.inFlight;
|
|
46
46
|
|
|
47
47
|
// Mark BEFORE killing: the 'close' event fires almost immediately
|
|
48
48
|
// after interrupt, and the surrounding handleMessage's catch
|
|
49
49
|
// needs to see the flag to skip the generic error-reply.
|
|
50
50
|
if (hadActive) markSessionAborted(sessionKey);
|
|
51
51
|
|
|
52
|
+
// "Stop" incident (shumorobot Music, 2026-05-31 13:08): on the
|
|
53
|
+
// CliProcess/channels backend a turn resolves on the quiet-window
|
|
54
|
+
// after claude's last reply tool call (inFlight → false), but claude
|
|
55
|
+
// can still be working (subagent, long Bash). Keying the ack on
|
|
56
|
+
// inFlight alone made "Stop" say "Nothing to stop" while a subagent
|
|
57
|
+
// download churned. probeBusyState() reads the TUI "esc to interrupt"
|
|
58
|
+
// hint — the truthful signal — so detection, the abort mark, and the
|
|
59
|
+
// ack all agree. The probe result is logged below (forensics) so the
|
|
60
|
+
// heuristic can be refined against real states later. Channels analog
|
|
61
|
+
// of the (deleted) tmux hasBackgroundShell branch; typeof-guarded so
|
|
62
|
+
// it's a no-op on backends without it.
|
|
63
|
+
let busyProbe = null;
|
|
64
|
+
if (!hadActive && proc && typeof proc.probeBusyState === 'function') {
|
|
65
|
+
try {
|
|
66
|
+
busyProbe = await proc.probeBusyState();
|
|
67
|
+
if (busyProbe?.busy) {
|
|
68
|
+
hadActive = true;
|
|
69
|
+
markSessionAborted(sessionKey);
|
|
70
|
+
}
|
|
71
|
+
} catch (err) {
|
|
72
|
+
logger.error?.(`[${botName}] busy-probe failed: ${err.message}`);
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
|
|
52
76
|
// Bug 1 (incident 2026-05-18): "Stop" was turn-scoped — it only
|
|
53
77
|
// looked at an in-flight TURN. But the agent can leave a DETACHED
|
|
54
78
|
// background shell running (a `run_in_background:true` Bash) that
|
|
@@ -87,6 +111,19 @@ function createHandleAbort({
|
|
|
87
111
|
chat_id: chatId, user_id: msg.from?.id || null,
|
|
88
112
|
had_active: hadActive,
|
|
89
113
|
killed_background_shell: killedBackgroundShell,
|
|
114
|
+
// "Stop" incident forensics: the raw busy-probe signals at decision
|
|
115
|
+
// time. Lets us query, across real aborts, where the esc-hint /
|
|
116
|
+
// inFlight / pending-turn signals agreed vs diverged and refine the
|
|
117
|
+
// heuristic later. null when no probe ran (turn was already inFlight,
|
|
118
|
+
// or the backend has no probeBusyState).
|
|
119
|
+
busy_probe: busyProbe ? {
|
|
120
|
+
busy: busyProbe.busy,
|
|
121
|
+
streaming: busyProbe.streaming,
|
|
122
|
+
in_flight: busyProbe.inFlight,
|
|
123
|
+
pending_turns: busyProbe.pendingTurns,
|
|
124
|
+
captured: busyProbe.captured,
|
|
125
|
+
pane_tail: busyProbe.paneTail,
|
|
126
|
+
} : null,
|
|
90
127
|
trigger: cleanText.slice(0, 40),
|
|
91
128
|
});
|
|
92
129
|
|
|
@@ -50,7 +50,14 @@ function validateIpcFileParam(method, params = {}) {
|
|
|
50
50
|
const fileParam = FILE_PARAM_BY_METHOD[method];
|
|
51
51
|
if (!fileParam) return null;
|
|
52
52
|
const val = params[fileParam];
|
|
53
|
-
|
|
53
|
+
// { source: '/abs/path' } envelope — now coerced to a grammy InputFile in
|
|
54
|
+
// tg() (coerceFileParams). Validate it has a usable absolute source, else
|
|
55
|
+
// pass through (Buffer / stream / InputFile shapes).
|
|
56
|
+
if (val && typeof val === 'object' && typeof val.source === 'string') {
|
|
57
|
+
if (val.source.length === 0) return `polygram IPC: ${fileParam}.source is empty`;
|
|
58
|
+
return null;
|
|
59
|
+
}
|
|
60
|
+
if (typeof val !== 'string') return null; // Buffer/InputFile/etc — pass through
|
|
54
61
|
if (val.length === 0) return `polygram IPC: ${fileParam} is empty`;
|
|
55
62
|
|
|
56
63
|
const looksUrl = /^(https?|ftp):\/\//i.test(val);
|
|
@@ -125,7 +125,7 @@ function createChannelsToolDispatcher({
|
|
|
125
125
|
|| require('../telegram/process-agent-reply').processAndDeliverAgentText;
|
|
126
126
|
|
|
127
127
|
return async function channelsToolDispatcher(call) {
|
|
128
|
-
const { sessionKey, chatId, threadId, toolName, text, files, sourceMsgId } = call;
|
|
128
|
+
const { sessionKey, chatId, threadId, toolName, text, files, sourceMsgId, maxOutboundFileBytes } = call;
|
|
129
129
|
|
|
130
130
|
if (toolName !== 'reply') {
|
|
131
131
|
// 0.11.0 Phase 1 ships `reply` only — react and edit_message are
|
|
@@ -196,6 +196,21 @@ function createChannelsToolDispatcher({
|
|
|
196
196
|
failedAttachments.push({ path: filePath, error: check.error });
|
|
197
197
|
continue;
|
|
198
198
|
}
|
|
199
|
+
// Backend/chat-derived upload cap. Reject oversize BEFORE upload with
|
|
200
|
+
// a clear error (vs Telegram's cryptic 413/"file is too big") so
|
|
201
|
+
// claude can convert/compress and retry. maxOutboundFileBytes is
|
|
202
|
+
// undefined for non-channels callers → no cap (Telegram still gates).
|
|
203
|
+
if (typeof maxOutboundFileBytes === 'number' && maxOutboundFileBytes > 0) {
|
|
204
|
+
let size = 0;
|
|
205
|
+
try { size = fs.statSync(check.resolved).size; } catch {}
|
|
206
|
+
if (size > maxOutboundFileBytes) {
|
|
207
|
+
const mb = (n) => (n / (1024 * 1024)).toFixed(1);
|
|
208
|
+
const err = `file too large to send: ${mb(size)}MB > ${mb(maxOutboundFileBytes)}MB limit`;
|
|
209
|
+
logger.warn?.(`[channels-tool-dispatcher] ${err} (${check.resolved})`);
|
|
210
|
+
failedAttachments.push({ path: filePath, error: err });
|
|
211
|
+
continue;
|
|
212
|
+
}
|
|
213
|
+
}
|
|
199
214
|
try {
|
|
200
215
|
const ext = path.extname(check.resolved).toLowerCase();
|
|
201
216
|
const isImage = ['.jpg', '.jpeg', '.png', '.gif', '.webp'].includes(ext);
|
|
@@ -203,7 +218,10 @@ function createChannelsToolDispatcher({
|
|
|
203
218
|
const fieldName = isImage ? 'photo' : 'document';
|
|
204
219
|
const params = {
|
|
205
220
|
chat_id: chatId,
|
|
206
|
-
|
|
221
|
+
// { source } envelope → grammy InputFile in tg()'s coerceFileParams.
|
|
222
|
+
// Pre-fix this bare object reached grammy unrecognized and every
|
|
223
|
+
// upload 400'd with "Wrong port number" (file-send never worked).
|
|
224
|
+
[fieldName]: { source: check.resolved, filename: path.basename(check.resolved) },
|
|
207
225
|
};
|
|
208
226
|
if (threadId) params.message_thread_id = threadId;
|
|
209
227
|
await send(bot, method, params, { source: 'channels-tool-dispatcher', sessionKey });
|
|
@@ -48,6 +48,11 @@ const { Process, UnsupportedOperationError } = require('./process');
|
|
|
48
48
|
const { ChannelsBridgeServer } = require('./channels-bridge-server');
|
|
49
49
|
const { writeHookFiles, removeHookFiles } = require('./hook-settings');
|
|
50
50
|
const { createHookTail } = require('./hook-event-tail');
|
|
51
|
+
// File-send staging: reuse the dispatcher's allowlist root so the dir we
|
|
52
|
+
// create exactly matches the realpath the validator accepts (no /tmp vs
|
|
53
|
+
// /private/tmp drift — one of the original Music-topic failures).
|
|
54
|
+
const { DEFAULT_ATTACHMENT_BASE } = require('./channels-tool-dispatcher');
|
|
55
|
+
const { resolveFileCaps } = require('../attachments');
|
|
51
56
|
const { runStartupGate } = require('../tmux/startup-gate');
|
|
52
57
|
const { POLYGRAM_DISPLAY_HINT } = require('../telegram/display-hint');
|
|
53
58
|
|
|
@@ -251,6 +256,10 @@ class CliProcess extends Process {
|
|
|
251
256
|
// pending turn(s): turn_id → { resolve, reject, replies: [], quietTimer, hardTimer, startedAt }
|
|
252
257
|
this.pendingTurns = new Map();
|
|
253
258
|
|
|
259
|
+
// File-send outbound cap (bot → user). Safe cloud default; overwritten in
|
|
260
|
+
// _spawnTmuxClaude with the backend/chat-resolved value before any turn.
|
|
261
|
+
this.maxOutboundFileBytes = resolveFileCaps({ localApi: false }).outBytes;
|
|
262
|
+
|
|
254
263
|
// P1 security (review #8): track resolved permission request_ids so a
|
|
255
264
|
// double-fire of respond() can't write a second perm_verdict for the same
|
|
256
265
|
// request. TmuxProcess gates on _pendingApprovalId; this is the channels
|
|
@@ -297,6 +306,23 @@ class CliProcess extends Process {
|
|
|
297
306
|
// permit files under the agent's workspace.
|
|
298
307
|
this.sessionCwd = opts.cwd || null;
|
|
299
308
|
|
|
309
|
+
// File-send staging dir (2026-06 file-send feature). The dispatcher
|
|
310
|
+
// allowlist always permits <DEFAULT_ATTACHMENT_BASE>/<sessionKey>/, but
|
|
311
|
+
// nothing ever CREATED it — so claude's reply(files) attempts at
|
|
312
|
+
// /tmp/polygram-attachments failed (dir absent / realpath mismatch) and
|
|
313
|
+
// it flailed across other paths. Create it here and surface it to the
|
|
314
|
+
// prompt so claude has one blessed, always-allowed place to stage a file
|
|
315
|
+
// before sending. realpathSync so the stored path matches what the
|
|
316
|
+
// validator resolves (the /tmp ↔ /private/tmp fix).
|
|
317
|
+
try {
|
|
318
|
+
const dir = path.join(DEFAULT_ATTACHMENT_BASE, String(this.sessionKey));
|
|
319
|
+
fs.mkdirSync(dir, { recursive: true, mode: 0o700 });
|
|
320
|
+
this.attachmentStagingDir = fs.realpathSync(dir);
|
|
321
|
+
} catch (err) {
|
|
322
|
+
this.attachmentStagingDir = null;
|
|
323
|
+
this.logger.warn?.(`[${this.label}] channels: staging dir create failed: ${err.message}`);
|
|
324
|
+
}
|
|
325
|
+
|
|
300
326
|
// Opaque random token for socket filename — do NOT leak sessionKey to /tmp.
|
|
301
327
|
const socketToken = crypto.randomBytes(16).toString('hex');
|
|
302
328
|
this.sockPath = path.join(os.tmpdir(), `polygram-${socketToken}.sock`);
|
|
@@ -416,28 +442,7 @@ class CliProcess extends Process {
|
|
|
416
442
|
|
|
417
443
|
this.bridgeServer.on('bridge-message', msg => this._handleBridgeMessage(msg));
|
|
418
444
|
|
|
419
|
-
this.bridgeServer.on('bridge-disconnected', () =>
|
|
420
|
-
this.bridgeReady = false;
|
|
421
|
-
this.mcpReady = false;
|
|
422
|
-
if (!this.closed) {
|
|
423
|
-
this.logger.warn?.(`[${this.label}] channels: bridge disconnected unexpectedly`);
|
|
424
|
-
// P1 #5: drain pendingTurns immediately so hardTimers don't run 10min.
|
|
425
|
-
for (const [, pending] of this.pendingTurns) {
|
|
426
|
-
if (pending.quietTimer) clearTimeout(pending.quietTimer);
|
|
427
|
-
if (pending.hardTimer) clearTimeout(pending.hardTimer);
|
|
428
|
-
if (pending.absoluteTimer) clearTimeout(pending.absoluteTimer);
|
|
429
|
-
if (pending._stopGraceTimer) clearTimeout(pending._stopGraceTimer);
|
|
430
|
-
const err = new Error('bridge disconnected');
|
|
431
|
-
err.code = 'BRIDGE_DISCONNECTED';
|
|
432
|
-
try { pending.reject(err); } catch {}
|
|
433
|
-
}
|
|
434
|
-
this.pendingTurns.clear();
|
|
435
|
-
this.pendingQueue.length = 0;
|
|
436
|
-
this.inFlight = false;
|
|
437
|
-
this.emit('bridge-disconnected');
|
|
438
|
-
this._logEvent('bridge-disconnected', { reason: 'socket-close' });
|
|
439
|
-
}
|
|
440
|
-
});
|
|
445
|
+
this.bridgeServer.on('bridge-disconnected', () => this._handleBridgeDisconnected());
|
|
441
446
|
|
|
442
447
|
await this.bridgeServer.listen();
|
|
443
448
|
}
|
|
@@ -493,6 +498,18 @@ class CliProcess extends Process {
|
|
|
493
498
|
const effort = topicConfig?.effort || opts.chatConfig?.effort || opts.effort;
|
|
494
499
|
const resolvedCwd = topicConfig?.cwd || opts.chatConfig?.cwd || opts.cwd;
|
|
495
500
|
|
|
501
|
+
// File-send outbound cap (bot → user). Backend-derived (cloud 50MB vs
|
|
502
|
+
// local Bot API server 2GB via opts.localApi) with per-topic/chat
|
|
503
|
+
// maxFileBytes override, clamped to the backend ceiling. Stored for the
|
|
504
|
+
// dispatcher (live size-check) and the system prompt (so claude states
|
|
505
|
+
// the right limit). Resolved here so it follows the same topic→chat
|
|
506
|
+
// precedence as cwd/agent above.
|
|
507
|
+
const _capOverride = topicConfig?.maxFileBytes ?? opts.chatConfig?.maxFileBytes ?? null;
|
|
508
|
+
this.maxOutboundFileBytes = resolveFileCaps({
|
|
509
|
+
localApi: !!opts.localApi,
|
|
510
|
+
override: _capOverride,
|
|
511
|
+
}).outBytes;
|
|
512
|
+
|
|
496
513
|
// Parity audit P8 + rc.8 fs-guard (2026-05-26 shumorobot Music topic):
|
|
497
514
|
// `--session-id <id>` creates a NEW claude session with that id;
|
|
498
515
|
// `--resume <id>` resumes the EXISTING conversation. Lazy-respawn after
|
|
@@ -540,6 +557,9 @@ class CliProcess extends Process {
|
|
|
540
557
|
);
|
|
541
558
|
}
|
|
542
559
|
}
|
|
560
|
+
// Finding 0.12-M2: record the resume decision so _armHookTail (run
|
|
561
|
+
// after spawn) skips the prior session's still-on-disk hook ndjson.
|
|
562
|
+
this._resumedSession = canResume;
|
|
543
563
|
if (agent) claudeArgs.push('--agent', agent);
|
|
544
564
|
if (model) claudeArgs.unshift('--model', model);
|
|
545
565
|
if (effort) claudeArgs.push('--effort', effort);
|
|
@@ -616,6 +636,28 @@ class CliProcess extends Process {
|
|
|
616
636
|
'Internal tool calls (Bash, Edit, Write, Read, etc.) are fine to use',
|
|
617
637
|
'as normal — only the FINAL user-visible message needs to go through',
|
|
618
638
|
'the reply tool.',
|
|
639
|
+
'',
|
|
640
|
+
'### Sending FILES (tracks, images, docs) to the user',
|
|
641
|
+
'',
|
|
642
|
+
'The `mcp__polygram-bridge__reply` tool takes an optional `files` array of',
|
|
643
|
+
'absolute paths. This is the ONLY way to send a file. Do NOT use Bash,',
|
|
644
|
+
'curl, the Telegram Bot API, or polygram-ipc to send files — those fail.',
|
|
645
|
+
'',
|
|
646
|
+
...(this.attachmentStagingDir ? [
|
|
647
|
+
`To send a file: COPY it into the staging dir \`${this.attachmentStagingDir}\`,`,
|
|
648
|
+
'then call reply with its absolute path, e.g.:',
|
|
649
|
+
` reply(chat_id="<id>", text="Here's the track", files=["${this.attachmentStagingDir}/track.flac"])`,
|
|
650
|
+
'polygram auto-deletes staged files after the turn — you do not need to clean up.',
|
|
651
|
+
'You may also send directly from the agent workspace (cwd); other paths are rejected.',
|
|
652
|
+
] : [
|
|
653
|
+
'Copy the file somewhere under your workspace (cwd) and pass its absolute',
|
|
654
|
+
'path in `files`. Paths outside the workspace are rejected for safety.',
|
|
655
|
+
]),
|
|
656
|
+
'',
|
|
657
|
+
`Max file size for sending: ${Math.round(this.maxOutboundFileBytes / (1024 * 1024))} MB. ` +
|
|
658
|
+
'For larger lossless audio, convert to FLAC/MP3 under the limit first, ' +
|
|
659
|
+
'or tell the user it exceeds the limit. Images go as photos; everything ' +
|
|
660
|
+
'else as documents.',
|
|
619
661
|
].join('\n'));
|
|
620
662
|
|
|
621
663
|
// Parity audit P6: honor isolateUserConfig — mirrors tmux pattern at
|
|
@@ -705,6 +747,20 @@ class CliProcess extends Process {
|
|
|
705
747
|
],
|
|
706
748
|
readySignal: /Listening for channel messages from: server:polygram-bridge/i,
|
|
707
749
|
timeoutCode: 'CHANNELS_DIALOG_TIMEOUT',
|
|
750
|
+
// Progress-aware gate (shumorobot General incident 2026-05-30): a
|
|
751
|
+
// cold spawn that's mid-download (runtime fetch, "24%" progress bar)
|
|
752
|
+
// is genuinely working and must NOT be killed by the blind 30s
|
|
753
|
+
// wall-clock. stallMs fails fast only when the pane is FROZEN; an
|
|
754
|
+
// actively-changing pane (download bar, dialog nav) keeps resetting
|
|
755
|
+
// the stall clock and rides out to the ready signal. deadlineMs stays
|
|
756
|
+
// the absolute backstop. 30s of zero pane activity = genuinely wedged.
|
|
757
|
+
// Stall = pane rendered then went static (genuinely wedged). 60s, not
|
|
758
|
+
// 30s: some topics' TUIs cold-render slowly (Music ~45s, slow MCP
|
|
759
|
+
// startup) — 30s was too tight and false-aborted them. Blank panes
|
|
760
|
+
// don't arm the stall timer at all now (see runStartupGate), so this
|
|
761
|
+
// only bounds a TUI that rendered and then truly hung.
|
|
762
|
+
stallMs: this.startupGateStallMs ?? 60_000,
|
|
763
|
+
deadlineMs: this.startupGateDeadlineMs ?? 180_000,
|
|
708
764
|
logger: this.logger,
|
|
709
765
|
label: `${this.label}:startup-gate`,
|
|
710
766
|
});
|
|
@@ -849,15 +905,18 @@ class CliProcess extends Process {
|
|
|
849
905
|
// rate-limit / chat-id-mismatch path. Live shumorobot 2026-05-26 23:44
|
|
850
906
|
// observed 3+ "Called polygram-bridge" entries in the TUI pane with
|
|
851
907
|
// ZERO OUT messages delivered to TG and zero warn-level diagnostics —
|
|
852
|
-
// need to see args.
|
|
853
|
-
//
|
|
854
|
-
//
|
|
855
|
-
|
|
908
|
+
// need to see args.chat_id / args.turn_id to know whether claude is
|
|
909
|
+
// calling reply with empty text, wrong chat_id, or something else.
|
|
910
|
+
// L13: root-caused — demoted to debug and DROPPED text_head. Logging
|
|
911
|
+
// the first 80 chars of every reply at warn level leaked private chat
|
|
912
|
+
// content / file excerpts / secrets into the default log sink,
|
|
913
|
+
// unconditionally. name/chat_id/turn_id/text_len diagnose dispatch
|
|
914
|
+
// without exposing message content.
|
|
915
|
+
this.logger.debug?.(
|
|
856
916
|
`[${this.label}] channels: tool-call name=${msg.name} ` +
|
|
857
917
|
`chat_id=${JSON.stringify(args.chat_id)} ` +
|
|
858
918
|
`turn_id=${JSON.stringify(args.turn_id)} ` +
|
|
859
|
-
`text_len=${typeof args.text === 'string' ? args.text.length : 'non-string'}
|
|
860
|
-
`text_head=${JSON.stringify((args.text || '').slice(0, 80))}`,
|
|
919
|
+
`text_len=${typeof args.text === 'string' ? args.text.length : 'non-string'}`,
|
|
861
920
|
);
|
|
862
921
|
|
|
863
922
|
// Review P1 #7: idempotency. If we've already ACK'd this tool_call_id,
|
|
@@ -948,6 +1007,7 @@ class CliProcess extends Process {
|
|
|
948
1007
|
text: args.text,
|
|
949
1008
|
files: args.files,
|
|
950
1009
|
sessionCwd: this.sessionCwd, // P0 #2: dispatcher uses this to allowlist file roots
|
|
1010
|
+
maxOutboundFileBytes: this.maxOutboundFileBytes, // backend/chat-derived upload cap
|
|
951
1011
|
});
|
|
952
1012
|
} catch (err) {
|
|
953
1013
|
this._writeToBridge({ kind: 'tool_ack', tool_call_id: msg.tool_call_id, ok: false, error: err.message });
|
|
@@ -1122,13 +1182,27 @@ class CliProcess extends Process {
|
|
|
1122
1182
|
this._finalizeTurn(turnId);
|
|
1123
1183
|
};
|
|
1124
1184
|
const onStop = (info) => {
|
|
1125
|
-
//
|
|
1126
|
-
//
|
|
1185
|
+
// Finding 0.12-M1: the Stop hook carries NO turn_id, and a single
|
|
1186
|
+
// global 'stop-hook' emission fires EVERY per-turn onStop listener.
|
|
1187
|
+
// When more than one turn is in stop-grace we cannot attribute this
|
|
1188
|
+
// Stop (or its last_assistant_message) to a specific turn — the
|
|
1189
|
+
// pre-fix code let one Stop finalize all grace-pending turns and
|
|
1190
|
+
// cross-attribute one turn's text to another (the exact class the
|
|
1191
|
+
// F#3 reply routing prevents). Mirror that drop-rather-than-
|
|
1192
|
+
// misattribute discipline: only consume the Stop when exactly ONE
|
|
1193
|
+
// turn is in grace; otherwise ignore it and let each turn finalize
|
|
1194
|
+
// on its own grace timer (each keeps its own reply text).
|
|
1195
|
+
let graceCount = 0;
|
|
1196
|
+
for (const p of this.pendingTurns.values()) if (p._stopGracePending) graceCount++;
|
|
1197
|
+
if (graceCount !== 1) return;
|
|
1127
1198
|
pending._stopHookData = info;
|
|
1128
1199
|
clearTimeout(pending._stopGraceTimer);
|
|
1129
1200
|
pending._stopGraceTimer = null;
|
|
1130
1201
|
finalize();
|
|
1131
1202
|
};
|
|
1203
|
+
// L5: stash the closure so teardown paths that bypass Process.kill()'s
|
|
1204
|
+
// removeAllListeners (bridge-disconnect drain, resetSession) can off it.
|
|
1205
|
+
pending._onStop = onStop;
|
|
1132
1206
|
pending._stopGraceTimer = setTimeout(finalize, this.stopGraceMs);
|
|
1133
1207
|
// unref so a never-fired grace doesn't pin the event loop. In tests
|
|
1134
1208
|
// where a CliProcess is created, send() is called, then the test
|
|
@@ -1195,6 +1269,27 @@ class CliProcess extends Process {
|
|
|
1195
1269
|
pending.resolve(result);
|
|
1196
1270
|
this.emit('result', { subtype: 'success' }, { streamText: text });
|
|
1197
1271
|
this.emit('idle');
|
|
1272
|
+
// File-send staging auto-purge (your choice — no "claude must delete").
|
|
1273
|
+
// Once the LAST turn settles, wipe the staging dir's contents so files
|
|
1274
|
+
// claude copied in to send don't accumulate on disk across turns. Only
|
|
1275
|
+
// when fully idle, so a file staged for a still-pending concurrent turn
|
|
1276
|
+
// isn't yanked mid-send.
|
|
1277
|
+
if (this.pendingTurns.size === 0) this._purgeStagingDir();
|
|
1278
|
+
}
|
|
1279
|
+
|
|
1280
|
+
/**
|
|
1281
|
+
* Empty the per-session file-send staging dir (keep the dir itself).
|
|
1282
|
+
* Best-effort; never throws. Called when the session goes idle and on kill.
|
|
1283
|
+
*/
|
|
1284
|
+
_purgeStagingDir() {
|
|
1285
|
+
if (!this.attachmentStagingDir) return;
|
|
1286
|
+
let entries;
|
|
1287
|
+
try { entries = fs.readdirSync(this.attachmentStagingDir); }
|
|
1288
|
+
catch { return; }
|
|
1289
|
+
for (const name of entries) {
|
|
1290
|
+
try { fs.rmSync(path.join(this.attachmentStagingDir, name), { recursive: true, force: true }); }
|
|
1291
|
+
catch { /* best-effort */ }
|
|
1292
|
+
}
|
|
1198
1293
|
}
|
|
1199
1294
|
|
|
1200
1295
|
// ─── public Process API ──────────────────────────────────────────
|
|
@@ -1386,6 +1481,63 @@ class CliProcess extends Process {
|
|
|
1386
1481
|
this._interruptGraceTimer.unref?.();
|
|
1387
1482
|
}
|
|
1388
1483
|
|
|
1484
|
+
/**
|
|
1485
|
+
* Is claude actually still working, regardless of the resolved-turn flag?
|
|
1486
|
+
*
|
|
1487
|
+
* "Stop" incident (shumorobot Music, 2026-05-31 13:08): the channels
|
|
1488
|
+
* backend resolves a turn on the quiet-window after claude's last reply
|
|
1489
|
+
* tool call (inFlight → false), but claude can keep working afterwards
|
|
1490
|
+
* (a subagent, a long Bash). The abort handler keyed its ack on inFlight
|
|
1491
|
+
* alone, so "Stop" said "Nothing to stop" one second after the bot said
|
|
1492
|
+
* "On it — downloading…" while a subagent churned.
|
|
1493
|
+
*
|
|
1494
|
+
* The TUI prints "esc to interrupt" (STREAMING_HINT_RE) continuously
|
|
1495
|
+
* whenever claude is busy — capture-pane is the truthful signal, the
|
|
1496
|
+
* channels analog of the (deleted) tmux hasBackgroundShell() probe.
|
|
1497
|
+
*
|
|
1498
|
+
* Returns a STRUCTURED probe (not just a boolean) so the abort path can
|
|
1499
|
+
* log the raw signals — pane tail + flags — to the events DB. That lets
|
|
1500
|
+
* us later characterize which states the heuristic gets right/wrong and
|
|
1501
|
+
* refine it (e.g. add signals beyond the esc-hint) without guessing.
|
|
1502
|
+
*
|
|
1503
|
+
* Never throws — a failed capture returns captured:false, busy:false.
|
|
1504
|
+
*
|
|
1505
|
+
* @returns {Promise<{busy:boolean, streaming:boolean, inFlight:boolean,
|
|
1506
|
+
* pendingTurns:number, captured:boolean, paneTail:(string|null)}>}
|
|
1507
|
+
*/
|
|
1508
|
+
async probeBusyState() {
|
|
1509
|
+
const base = {
|
|
1510
|
+
busy: false, streaming: false,
|
|
1511
|
+
inFlight: this.inFlight, pendingTurns: this.pendingTurns.size,
|
|
1512
|
+
captured: false, paneTail: null,
|
|
1513
|
+
};
|
|
1514
|
+
if (this.closed || !this.tmuxSession || typeof this.runner?.captureWide !== 'function') {
|
|
1515
|
+
return base;
|
|
1516
|
+
}
|
|
1517
|
+
let pane;
|
|
1518
|
+
try {
|
|
1519
|
+
pane = await this.runner.captureWide(this.tmuxSession);
|
|
1520
|
+
} catch (err) {
|
|
1521
|
+
this.logger.warn?.(`[${this.label}] channels: probeBusyState captureWide failed: ${err.message}`);
|
|
1522
|
+
return base;
|
|
1523
|
+
}
|
|
1524
|
+
if (!pane) return base;
|
|
1525
|
+
const streaming = STREAMING_HINT_RE.test(pane);
|
|
1526
|
+
return {
|
|
1527
|
+
...base,
|
|
1528
|
+
busy: streaming,
|
|
1529
|
+
streaming,
|
|
1530
|
+
captured: true,
|
|
1531
|
+
paneTail: pane.slice(-200),
|
|
1532
|
+
};
|
|
1533
|
+
}
|
|
1534
|
+
|
|
1535
|
+
/** Boolean shorthand for probeBusyState().busy (abort-path convenience). */
|
|
1536
|
+
async isBusy() {
|
|
1537
|
+
const { busy } = await this.probeBusyState();
|
|
1538
|
+
return busy;
|
|
1539
|
+
}
|
|
1540
|
+
|
|
1389
1541
|
async kill(reason = 'kill') {
|
|
1390
1542
|
if (this.closed) return;
|
|
1391
1543
|
// Parity P19: re-entry guard for concurrent kill() calls. Mirrors
|
|
@@ -1415,17 +1567,18 @@ class CliProcess extends Process {
|
|
|
1415
1567
|
this.logger.warn?.(`[${this.label}] _armHookTail: _hookNdjsonPath unset; hooks disabled. Phase 1.2 may have failed.`);
|
|
1416
1568
|
return;
|
|
1417
1569
|
}
|
|
1418
|
-
//
|
|
1419
|
-
//
|
|
1420
|
-
//
|
|
1421
|
-
//
|
|
1422
|
-
//
|
|
1423
|
-
//
|
|
1424
|
-
// --resume
|
|
1570
|
+
// Finding 0.12-M2: writeHookFiles opens the ndjson in APPEND mode
|
|
1571
|
+
// ('a') and never truncates, so on a --resume respawn the prior
|
|
1572
|
+
// session's hook lines are still on disk under the same path. Replaying
|
|
1573
|
+
// them re-drives the turn state machine from stale Stop/PreToolUse
|
|
1574
|
+
// events (a stale Stop can finalize the fresh turn). So skip existing
|
|
1575
|
+
// content when (and only when) this is a resumed session — the same
|
|
1576
|
+
// discipline the JSONL tail uses on --resume. A fresh spawn's ndjson is
|
|
1577
|
+
// empty, so skipExisting:false is correct there.
|
|
1425
1578
|
this._hookTail = createHookTail({
|
|
1426
1579
|
path: this._hookNdjsonPath,
|
|
1427
1580
|
logger: this.logger,
|
|
1428
|
-
skipExisting:
|
|
1581
|
+
skipExisting: this._resumedSession === true,
|
|
1429
1582
|
});
|
|
1430
1583
|
this._hookTail.on('event', (ev) => {
|
|
1431
1584
|
try {
|
|
@@ -1465,25 +1618,18 @@ class CliProcess extends Process {
|
|
|
1465
1618
|
// gates tag-out on median < 2s and p99 < 5s across the events DB.
|
|
1466
1619
|
if (Number.isFinite(ev.receivedAtMs)) {
|
|
1467
1620
|
const lagMs = Date.now() - ev.receivedAtMs;
|
|
1621
|
+
// L10: emit ONLY — the onHookLagSample callback owns the DB write
|
|
1622
|
+
// (CALLBACK_TO_EVENT → callbacks.js). Previously this ALSO wrote
|
|
1623
|
+
// directly via this.db.logEvent, double-persisting every sample and
|
|
1624
|
+
// inflating the Phase 1.8 soak-gate row count. Consistent with how
|
|
1625
|
+
// tool-result / subagent-start / subagent-done are handled (emit,
|
|
1626
|
+
// don't double-write).
|
|
1468
1627
|
this.emit('hook-lag-sample', {
|
|
1469
1628
|
hookEventName: ev.type,
|
|
1470
1629
|
lagMs,
|
|
1471
1630
|
toolName: ev.toolName || null,
|
|
1472
1631
|
backend: this.backend,
|
|
1473
1632
|
});
|
|
1474
|
-
// Log to events DB if wired. db is optional (factory injects when
|
|
1475
|
-
// available) — same pattern as the other parity-P1 _logEvent calls.
|
|
1476
|
-
if (this.db?.logEvent) {
|
|
1477
|
-
try {
|
|
1478
|
-
this.db.logEvent('hook-lag-sample', {
|
|
1479
|
-
session_key: this.sessionKey,
|
|
1480
|
-
backend: this.backend,
|
|
1481
|
-
hook_event_name: ev.type,
|
|
1482
|
-
tool_name: ev.toolName || null,
|
|
1483
|
-
lag_ms: lagMs,
|
|
1484
|
-
});
|
|
1485
|
-
} catch {}
|
|
1486
|
-
}
|
|
1487
1633
|
}
|
|
1488
1634
|
|
|
1489
1635
|
switch (ev.type) {
|
|
@@ -1503,6 +1649,16 @@ class CliProcess extends Process {
|
|
|
1503
1649
|
const subagentType = ev.toolInput?.subagent_type
|
|
1504
1650
|
|| ev.toolInput?.agent_type
|
|
1505
1651
|
|| 'general-purpose';
|
|
1652
|
+
// Finding 0.12-M4: SubagentStop carries agent_id/agent_type but
|
|
1653
|
+
// NOT the originating Agent tool_use_id, so without help the
|
|
1654
|
+
// subagent-start/subagent-done rows share no JOIN key (the
|
|
1655
|
+
// documented soak query on $.tool_use_id returns zero rows).
|
|
1656
|
+
// Track the in-flight Agent tool_use_id keyed by subagent type so
|
|
1657
|
+
// the paired SubagentStop below can stamp it onto subagent-done.
|
|
1658
|
+
(this._pendingSubagentStarts ||= []).push({
|
|
1659
|
+
agentType: subagentType,
|
|
1660
|
+
toolUseId: ev.toolUseId,
|
|
1661
|
+
});
|
|
1506
1662
|
this.emit('subagent-start', {
|
|
1507
1663
|
agentType: subagentType,
|
|
1508
1664
|
// PreToolUse for Agent carries no agent_id (set later on
|
|
@@ -1541,14 +1697,27 @@ class CliProcess extends Process {
|
|
|
1541
1697
|
});
|
|
1542
1698
|
return;
|
|
1543
1699
|
|
|
1544
|
-
case 'SubagentStop':
|
|
1700
|
+
case 'SubagentStop': {
|
|
1701
|
+
// Finding 0.12-M4: recover the originating Agent tool_use_id so the
|
|
1702
|
+
// subagent-start/subagent-done pair is JOINable. Prefer a match on
|
|
1703
|
+
// agent type (correct for parallel subagents of different types);
|
|
1704
|
+
// fall back to the oldest pending start when types don't line up.
|
|
1705
|
+
let subagentToolUseId = null;
|
|
1706
|
+
const pendingStarts = this._pendingSubagentStarts;
|
|
1707
|
+
if (pendingStarts && pendingStarts.length) {
|
|
1708
|
+
let idx = pendingStarts.findIndex(s => s.agentType === ev.agentType);
|
|
1709
|
+
if (idx < 0) idx = 0;
|
|
1710
|
+
subagentToolUseId = pendingStarts.splice(idx, 1)[0]?.toolUseId ?? null;
|
|
1711
|
+
}
|
|
1545
1712
|
this.emit('subagent-done', {
|
|
1546
1713
|
agentType: ev.agentType,
|
|
1547
1714
|
agentId: ev.agentId,
|
|
1548
1715
|
durationMs: ev.durationMs,
|
|
1716
|
+
toolUseId: subagentToolUseId,
|
|
1549
1717
|
backend: this.backend,
|
|
1550
1718
|
});
|
|
1551
1719
|
return;
|
|
1720
|
+
}
|
|
1552
1721
|
|
|
1553
1722
|
case 'Stop':
|
|
1554
1723
|
// Phase 1.7 (TODO) will use this as the authoritative turn-end
|
|
@@ -1665,6 +1834,50 @@ class CliProcess extends Process {
|
|
|
1665
1834
|
}
|
|
1666
1835
|
}
|
|
1667
1836
|
|
|
1837
|
+
/**
|
|
1838
|
+
* Drain on unexpected bridge socket loss (claude crash, bridge crash,
|
|
1839
|
+
* EOF). Extracted from the inline 'bridge-disconnected' handler so the
|
|
1840
|
+
* teardown is testable and consistent with _doKill.
|
|
1841
|
+
*
|
|
1842
|
+
* Findings 0.12-L5 + L6: in addition to clearing the per-turn timers
|
|
1843
|
+
* and rejecting pendings (the original P1 #5 behavior), this now also
|
|
1844
|
+
* (L5) removes each turn's stop-hook listener — this drain does NOT go
|
|
1845
|
+
* through Process.kill()'s blanket removeAllListeners, so a turn torn
|
|
1846
|
+
* down mid-stop-grace would otherwise leak its onStop closure — and
|
|
1847
|
+
* (L6) clears _interruptGraceTimer, matching _doKill (a /stop verdict
|
|
1848
|
+
* landing just before the disconnect would otherwise leave a stray
|
|
1849
|
+
* timer on the dead instance).
|
|
1850
|
+
*/
|
|
1851
|
+
_handleBridgeDisconnected() {
|
|
1852
|
+
this.bridgeReady = false;
|
|
1853
|
+
this.mcpReady = false;
|
|
1854
|
+
if (this.closed) return;
|
|
1855
|
+
this.logger.warn?.(`[${this.label}] channels: bridge disconnected unexpectedly`);
|
|
1856
|
+
// L6: clear the interrupt grace timer alongside the rest of the lifecycle.
|
|
1857
|
+
if (this._interruptGraceTimer) {
|
|
1858
|
+
clearTimeout(this._interruptGraceTimer);
|
|
1859
|
+
this._interruptGraceTimer = null;
|
|
1860
|
+
}
|
|
1861
|
+
// P1 #5: drain pendingTurns immediately so hardTimers don't run 10min.
|
|
1862
|
+
for (const [, pending] of this.pendingTurns) {
|
|
1863
|
+
if (pending.quietTimer) clearTimeout(pending.quietTimer);
|
|
1864
|
+
if (pending.hardTimer) clearTimeout(pending.hardTimer);
|
|
1865
|
+
if (pending.absoluteTimer) clearTimeout(pending.absoluteTimer);
|
|
1866
|
+
if (pending._stopGraceTimer) clearTimeout(pending._stopGraceTimer);
|
|
1867
|
+
// L5: remove the per-turn stop-hook listener (this path bypasses
|
|
1868
|
+
// Process.kill()'s removeAllListeners).
|
|
1869
|
+
if (pending._onStop) this.off('stop-hook', pending._onStop);
|
|
1870
|
+
const err = new Error('bridge disconnected');
|
|
1871
|
+
err.code = 'BRIDGE_DISCONNECTED';
|
|
1872
|
+
try { pending.reject(err); } catch {}
|
|
1873
|
+
}
|
|
1874
|
+
this.pendingTurns.clear();
|
|
1875
|
+
this.pendingQueue.length = 0;
|
|
1876
|
+
this.inFlight = false;
|
|
1877
|
+
this.emit('bridge-disconnected');
|
|
1878
|
+
this._logEvent('bridge-disconnected', { reason: 'socket-close' });
|
|
1879
|
+
}
|
|
1880
|
+
|
|
1668
1881
|
async _doKill(reason) {
|
|
1669
1882
|
this.closed = true;
|
|
1670
1883
|
this.inFlight = false;
|
|
@@ -1688,6 +1901,7 @@ class CliProcess extends Process {
|
|
|
1688
1901
|
if (pending.hardTimer) clearTimeout(pending.hardTimer);
|
|
1689
1902
|
if (pending.absoluteTimer) clearTimeout(pending.absoluteTimer);
|
|
1690
1903
|
if (pending._stopGraceTimer) clearTimeout(pending._stopGraceTimer);
|
|
1904
|
+
if (pending._onStop) this.off('stop-hook', pending._onStop); // L5
|
|
1691
1905
|
const err = new Error(`session killed: ${reason}`);
|
|
1692
1906
|
err.code = 'KILLED';
|
|
1693
1907
|
pending.reject(err);
|
|
@@ -1734,6 +1948,12 @@ class CliProcess extends Process {
|
|
|
1734
1948
|
if (this.botName && this.claudeSessionId) {
|
|
1735
1949
|
try { removeHookFiles({ botName: this.botName, sessionId: this.claudeSessionId }); } catch {}
|
|
1736
1950
|
}
|
|
1951
|
+
// File-send staging: remove the whole per-session dir on kill (purge only
|
|
1952
|
+
// empties it between turns; kill is end-of-life so drop it entirely).
|
|
1953
|
+
if (this.attachmentStagingDir) {
|
|
1954
|
+
try { fs.rmSync(this.attachmentStagingDir, { recursive: true, force: true }); } catch {}
|
|
1955
|
+
this.attachmentStagingDir = null;
|
|
1956
|
+
}
|
|
1737
1957
|
|
|
1738
1958
|
this.emit('close', 0);
|
|
1739
1959
|
}
|
|
@@ -1876,6 +2096,8 @@ class CliProcess extends Process {
|
|
|
1876
2096
|
if (pending.quietTimer) clearTimeout(pending.quietTimer);
|
|
1877
2097
|
if (pending.hardTimer) clearTimeout(pending.hardTimer);
|
|
1878
2098
|
if (pending.absoluteTimer) clearTimeout(pending.absoluteTimer);
|
|
2099
|
+
if (pending._stopGraceTimer) clearTimeout(pending._stopGraceTimer); // L5
|
|
2100
|
+
if (pending._onStop) this.off('stop-hook', pending._onStop); // L5
|
|
1879
2101
|
const err = new Error(`session reset: ${reason}`);
|
|
1880
2102
|
err.code = 'RESET';
|
|
1881
2103
|
try { pending.reject(err); } catch {}
|
package/lib/process/factory.js
CHANGED
|
@@ -91,10 +91,6 @@ function _maybeWarnR12Migration({ rawPm, canonical, chatId, threadId, chatCfg, t
|
|
|
91
91
|
* @param {number} [opts.queryCloseTimeoutMs]
|
|
92
92
|
* @param {object} [opts.tmuxRunner] — required when ANY chat routes to 'cli'
|
|
93
93
|
* @param {string} [opts.botName] — required when ANY chat routes to 'cli'
|
|
94
|
-
* @param {object} [opts.pollScheduler] — DEPRECATED in 0.12 — was used by the
|
|
95
|
-
* removed tmux backend to share one setInterval across all chats; CliProcess's
|
|
96
|
-
* per-session pongWatchdog handles its own cadence. Param kept for caller
|
|
97
|
-
* back-compat; ignored. Will be removed in 0.13.
|
|
98
94
|
* @param {Function} [opts.toolDispatcher] — required when ANY chat routes to 'cli'.
|
|
99
95
|
* async ({sessionKey, chatId, threadId, toolName, text, files}) => {ok, error?}.
|
|
100
96
|
* Called when Claude's reply (or react/edit_message) tool fires inside a
|
|
@@ -113,7 +109,6 @@ function createProcessFactory({
|
|
|
113
109
|
queryCloseTimeoutMs,
|
|
114
110
|
tmuxRunner = null,
|
|
115
111
|
botName = null,
|
|
116
|
-
pollScheduler = null,
|
|
117
112
|
toolDispatcher = null,
|
|
118
113
|
channelsClaudeBin = null,
|
|
119
114
|
} = {}) {
|
package/lib/process-manager.js
CHANGED
|
@@ -123,6 +123,19 @@ const CALLBACK_TO_EVENT = {
|
|
|
123
123
|
// menu auto-dismissed by `_waitForReady`. Surfacing the event so
|
|
124
124
|
// soak can count how often aged-session resumes hit this path.
|
|
125
125
|
onSessionAgePromptDismissed: 'session-age-prompt-dismissed',
|
|
126
|
+
// 0.12 CliProcess observability — typed hook events from cli-process.js
|
|
127
|
+
// _handleHookEvent. Each gets its own callback so polygram can persist
|
|
128
|
+
// structured rows to the events DB for soak-time aggregate queries.
|
|
129
|
+
// - hook-lag-sample: Phase 1.8 — per-event lag_ms (target: median<2s, p99<5s)
|
|
130
|
+
// - tool-result: Phase 1.3 — PostToolUse durationMs per tool
|
|
131
|
+
// - subagent-start / subagent-done: Phase 1.3 — typed subagent lifecycle
|
|
132
|
+
// (we DO get tool-use='Agent' via onToolUse, but agent_type + durationMs
|
|
133
|
+
// only fire on these typed events). SDK backend never emits — hooks
|
|
134
|
+
// are CliProcess-specific (and were tmux-specific in 0.10–0.11).
|
|
135
|
+
onHookLagSample: 'hook-lag-sample',
|
|
136
|
+
onToolResult: 'tool-result',
|
|
137
|
+
onSubagentStart: 'subagent-start',
|
|
138
|
+
onSubagentDone: 'subagent-done',
|
|
126
139
|
};
|
|
127
140
|
|
|
128
141
|
class ProcessManager {
|
package/lib/sdk/callbacks.js
CHANGED
|
@@ -464,7 +464,10 @@ function createSdkCallbacks({
|
|
|
464
464
|
const detail = {
|
|
465
465
|
chat_id: getChatIdFromKey(sessionKey),
|
|
466
466
|
session_key: sessionKey,
|
|
467
|
-
|
|
467
|
+
// Finding 0.12-M3: tmux backend was deleted in 0.12; these hook
|
|
468
|
+
// handlers only ever fire on the CLI driver now — default to 'cli'
|
|
469
|
+
// (honor an explicit payload.backend if a caller ever sets one).
|
|
470
|
+
backend: payload?.backend ?? 'cli',
|
|
468
471
|
hook_type: payload?.type ?? null,
|
|
469
472
|
claude_session_id: payload?.sessionId ?? null,
|
|
470
473
|
tool_name: payload?.toolName ?? null,
|
|
@@ -555,7 +558,7 @@ function createSdkCallbacks({
|
|
|
555
558
|
logEvent('turn-timeout', {
|
|
556
559
|
chat_id: getChatIdFromKey(sessionKey),
|
|
557
560
|
session_key: sessionKey,
|
|
558
|
-
backend: '
|
|
561
|
+
backend: payload?.backend ?? 'cli', // Finding 0.12-M3
|
|
559
562
|
turn_id: payload?.turnId ?? null,
|
|
560
563
|
reason: payload?.reason ?? null,
|
|
561
564
|
idle_ms: payload?.idleMs ?? null,
|
|
@@ -578,7 +581,7 @@ function createSdkCallbacks({
|
|
|
578
581
|
logEvent('hook-tail-error', {
|
|
579
582
|
chat_id: getChatIdFromKey(sessionKey),
|
|
580
583
|
session_key: sessionKey,
|
|
581
|
-
backend: '
|
|
584
|
+
backend: payload?.backend ?? 'cli', // Finding 0.12-M3 (fires on the CLI hook tail)
|
|
582
585
|
message: (payload?.message || '').slice(0, 200),
|
|
583
586
|
path: payload?.path ?? null,
|
|
584
587
|
claude_session_id: payload?.sessionId ?? null,
|
|
@@ -596,7 +599,7 @@ function createSdkCallbacks({
|
|
|
596
599
|
logEvent('stop-hook-resolved', {
|
|
597
600
|
chat_id: getChatIdFromKey(sessionKey),
|
|
598
601
|
session_key: sessionKey,
|
|
599
|
-
backend: '
|
|
602
|
+
backend: payload?.backend ?? 'cli', // Finding 0.12-M3
|
|
600
603
|
turn_id: payload?.turnId ?? null,
|
|
601
604
|
claude_session_id: payload?.sessionId ?? null,
|
|
602
605
|
});
|
|
@@ -614,7 +617,7 @@ function createSdkCallbacks({
|
|
|
614
617
|
logEvent('session-age-prompt-dismissed', {
|
|
615
618
|
chat_id: getChatIdFromKey(sessionKey),
|
|
616
619
|
session_key: sessionKey,
|
|
617
|
-
backend: '
|
|
620
|
+
backend: payload?.backend ?? 'cli', // Finding 0.12-M3
|
|
618
621
|
claude_session_id: payload?.sessionId ?? null,
|
|
619
622
|
});
|
|
620
623
|
} catch (err) {
|
|
@@ -622,6 +625,108 @@ function createSdkCallbacks({
|
|
|
622
625
|
}
|
|
623
626
|
},
|
|
624
627
|
|
|
628
|
+
// 0.12 Phase 1.8 — hook-lag persistence for the soak gate (median<2s,
|
|
629
|
+
// p99<5s). Each row carries the hookEventName + lagMs so we can:
|
|
630
|
+
// SELECT json_extract(detail_json, '$.hook_event_name') AS evt,
|
|
631
|
+
// AVG(json_extract(detail_json, '$.lag_ms')) AS avg_lag,
|
|
632
|
+
// MAX(json_extract(detail_json, '$.lag_ms')) AS max_lag
|
|
633
|
+
// FROM events WHERE kind='hook-lag-sample' AND ts>...
|
|
634
|
+
// GROUP BY evt;
|
|
635
|
+
onHookLagSample: (sessionKey, payload /* , entry */) => {
|
|
636
|
+
try {
|
|
637
|
+
logEvent('hook-lag-sample', {
|
|
638
|
+
chat_id: getChatIdFromKey(sessionKey),
|
|
639
|
+
session_key: sessionKey,
|
|
640
|
+
backend: payload?.backend ?? 'cli',
|
|
641
|
+
hook_event_name: payload?.hookEventName ?? null,
|
|
642
|
+
lag_ms: payload?.lagMs ?? null,
|
|
643
|
+
tool_name: payload?.toolName ?? null,
|
|
644
|
+
});
|
|
645
|
+
} catch (err) {
|
|
646
|
+
logger.error?.(`[${botName}] hook-lag-sample handler: ${err.message}`);
|
|
647
|
+
}
|
|
648
|
+
},
|
|
649
|
+
|
|
650
|
+
// 0.12 Phase 1.3 — tool-result with durationMs. Pairs with the
|
|
651
|
+
// existing onToolUse row (which fires on PreToolUse) so the soak can
|
|
652
|
+
// compute per-tool average + p99 durations:
|
|
653
|
+
// SELECT json_extract(detail_json, '$.tool_name') AS tool,
|
|
654
|
+
// AVG(json_extract(detail_json, '$.duration_ms')) AS avg_ms,
|
|
655
|
+
// MAX(json_extract(detail_json, '$.duration_ms')) AS max_ms
|
|
656
|
+
// FROM events WHERE kind='tool-result' GROUP BY tool;
|
|
657
|
+
// isError captures the rare PostToolUse where the tool itself failed
|
|
658
|
+
// (vs the tool succeeding but claude deciding to retry).
|
|
659
|
+
onToolResult: (sessionKey, payload /* , entry */) => {
|
|
660
|
+
try {
|
|
661
|
+
logEvent('tool-result', {
|
|
662
|
+
chat_id: getChatIdFromKey(sessionKey),
|
|
663
|
+
session_key: sessionKey,
|
|
664
|
+
backend: payload?.backend ?? 'cli',
|
|
665
|
+
tool_name: payload?.name ?? null,
|
|
666
|
+
duration_ms: payload?.durationMs ?? null,
|
|
667
|
+
agent_id: payload?.agentId ?? null,
|
|
668
|
+
agent_type: payload?.agentType ?? null,
|
|
669
|
+
tool_use_id: payload?.toolUseId ?? null,
|
|
670
|
+
is_error: payload?.isError === true,
|
|
671
|
+
});
|
|
672
|
+
} catch (err) {
|
|
673
|
+
logger.error?.(`[${botName}] tool-result handler: ${err.message}`);
|
|
674
|
+
}
|
|
675
|
+
},
|
|
676
|
+
|
|
677
|
+
// 0.12 Phase 1.3 — subagent lifecycle. PreToolUse with name='Agent'
|
|
678
|
+
// synthesizes 'subagent-start' (no agent_id yet — claude doesn't
|
|
679
|
+
// hand one out until the inner SubagentStop). 'subagent-done' carries
|
|
680
|
+
// the agent_id + duration_ms so a soak can correlate the pair:
|
|
681
|
+
// SELECT s.detail_json AS start, d.detail_json AS done
|
|
682
|
+
// FROM events s JOIN events d
|
|
683
|
+
// ON json_extract(s.detail_json, '$.tool_use_id') =
|
|
684
|
+
// json_extract(d.detail_json, '$.tool_use_id')
|
|
685
|
+
// WHERE s.kind='subagent-start' AND d.kind='subagent-done';
|
|
686
|
+
onSubagentStart: (sessionKey, payload, entry) => {
|
|
687
|
+
try {
|
|
688
|
+
logEvent('subagent-start', {
|
|
689
|
+
chat_id: getChatIdFromKey(sessionKey),
|
|
690
|
+
session_key: sessionKey,
|
|
691
|
+
backend: payload?.backend ?? 'cli',
|
|
692
|
+
agent_type: payload?.agentType ?? null,
|
|
693
|
+
tool_use_id: payload?.toolUseId ?? null,
|
|
694
|
+
});
|
|
695
|
+
// Findings L9/L14: drive the head reactor into the distinct SUBAGENT
|
|
696
|
+
// state so a running subagent shows 👾 rather than freezing on the
|
|
697
|
+
// prior tool's emoji. The plan promised this; previously the handler
|
|
698
|
+
// only persisted the DB row and never touched the reactor.
|
|
699
|
+
const r = entry?.pendingQueue?.[0]?.context?.reactor;
|
|
700
|
+
if (r) r.setState('SUBAGENT');
|
|
701
|
+
} catch (err) {
|
|
702
|
+
logger.error?.(`[${botName}] subagent-start handler: ${err.message}`);
|
|
703
|
+
}
|
|
704
|
+
},
|
|
705
|
+
|
|
706
|
+
onSubagentDone: (sessionKey, payload, entry) => {
|
|
707
|
+
try {
|
|
708
|
+
// L9/L14: heartbeat at subagent end so the cascade/stall clock
|
|
709
|
+
// resets; the next tool's PreToolUse sets the following state.
|
|
710
|
+
const r = entry?.pendingQueue?.[0]?.context?.reactor;
|
|
711
|
+
if (r && typeof r.heartbeat === 'function') r.heartbeat();
|
|
712
|
+
logEvent('subagent-done', {
|
|
713
|
+
chat_id: getChatIdFromKey(sessionKey),
|
|
714
|
+
session_key: sessionKey,
|
|
715
|
+
backend: payload?.backend ?? 'cli',
|
|
716
|
+
agent_type: payload?.agentType ?? null,
|
|
717
|
+
agent_id: payload?.agentId ?? null,
|
|
718
|
+
duration_ms: payload?.durationMs ?? null,
|
|
719
|
+
// Finding 0.12-M4: persist the originating Agent tool_use_id so the
|
|
720
|
+
// documented subagent-start/subagent-done soak JOIN on
|
|
721
|
+
// $.tool_use_id matches (subagent-done's tool_use_id is recovered
|
|
722
|
+
// in cli-process.js from the paired Agent PreToolUse).
|
|
723
|
+
tool_use_id: payload?.toolUseId ?? null,
|
|
724
|
+
});
|
|
725
|
+
} catch (err) {
|
|
726
|
+
logger.error?.(`[${botName}] subagent-done handler: ${err.message}`);
|
|
727
|
+
}
|
|
728
|
+
},
|
|
729
|
+
|
|
625
730
|
onInjectFail: (sessionKey, payload /* , entry */) => {
|
|
626
731
|
try {
|
|
627
732
|
const msgId = payload?.msgId;
|
package/lib/telegram/api.js
CHANGED
|
@@ -28,6 +28,7 @@ const {
|
|
|
28
28
|
getRetryAfterMs,
|
|
29
29
|
} = require('./format');
|
|
30
30
|
const { isSafeToRetry, redactBotToken } = require('../error/net');
|
|
31
|
+
const { coerceFileParams } = require('./input-file');
|
|
31
32
|
|
|
32
33
|
// Topic deletion race: a user can delete a forum topic while a turn is in
|
|
33
34
|
// flight, turning a valid `message_thread_id` into a 404. Telegram's error
|
|
@@ -112,6 +113,14 @@ async function send({ bot, method, params, db = null, meta = {}, logger = consol
|
|
|
112
113
|
const chatId = params.chat_id != null ? String(params.chat_id) : null;
|
|
113
114
|
const threadId = params.message_thread_id != null ? String(params.message_thread_id) : null;
|
|
114
115
|
|
|
116
|
+
// File-upload bug fix (2026-05-31): coerce a `{ source: '/abs/path' }`
|
|
117
|
+
// file param into a grammy InputFile so local-file uploads actually work.
|
|
118
|
+
// grammy doesn't recognize the bare envelope → it failed every send with
|
|
119
|
+
// "Wrong port number". Single choke point: fixes channels reply(files)
|
|
120
|
+
// AND the IPC send path at once. No-op for non-file methods / file_id /
|
|
121
|
+
// URL strings / existing InputFile instances.
|
|
122
|
+
coerceFileParams(method, params);
|
|
123
|
+
|
|
115
124
|
// 0.7.4: empty-text short-circuit. Pre-fix, an empty params.text on
|
|
116
125
|
// sendMessage/editMessageText reached Telegram and 400'd with
|
|
117
126
|
// "message text is empty"; the row was marked failed and propagated
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* input-file — coerce file-upload params into grammy InputFile instances.
|
|
3
|
+
*
|
|
4
|
+
* The bug (2026-05-31, shumorobot Music): callers passed a Telegraf-style
|
|
5
|
+
* `{ source: '/abs/path' }` envelope as the file param (document/photo/…).
|
|
6
|
+
* grammy 1.x does NOT recognize that shape — it's not an InputFile, so
|
|
7
|
+
* grammy serializes it as a plain object and Telegram tries to read it as
|
|
8
|
+
* a URL/file_id, failing with "invalid file HTTP URL: Wrong port number".
|
|
9
|
+
* Result: file-send NEVER worked (channels reply(files) AND the IPC path
|
|
10
|
+
* both produced this exact error). The existing dispatcher test used a fake
|
|
11
|
+
* `send` and only asserted the METHOD, so it couldn't catch the bad shape.
|
|
12
|
+
*
|
|
13
|
+
* grammy uploads a local file only when the param is `new InputFile(path)`.
|
|
14
|
+
* This helper normalizes, at the single send choke point (tg()), the
|
|
15
|
+
* `{ source: <abs path> }` envelope → `new InputFile(path)`, leaving every
|
|
16
|
+
* other shape untouched:
|
|
17
|
+
* - string file_id / https URL → pass through (Telegram resolves)
|
|
18
|
+
* - existing InputFile instance → pass through (already correct)
|
|
19
|
+
* - Buffer / stream → pass through (grammy handles)
|
|
20
|
+
*
|
|
21
|
+
* Only the explicit `{ source: string }` envelope is transformed — bare
|
|
22
|
+
* path strings are intentionally NOT coerced (a Telegram file_id is also a
|
|
23
|
+
* bare string; coercing would break sends-by-id).
|
|
24
|
+
*/
|
|
25
|
+
|
|
26
|
+
'use strict';
|
|
27
|
+
|
|
28
|
+
const { InputFile } = require('grammy');
|
|
29
|
+
|
|
30
|
+
// method → the params field that carries the file.
|
|
31
|
+
const FILE_FIELD_BY_METHOD = {
|
|
32
|
+
sendPhoto: 'photo',
|
|
33
|
+
sendDocument: 'document',
|
|
34
|
+
sendAudio: 'audio',
|
|
35
|
+
sendVideo: 'video',
|
|
36
|
+
sendAnimation: 'animation',
|
|
37
|
+
sendVoice: 'voice',
|
|
38
|
+
sendVideoNote: 'video_note',
|
|
39
|
+
};
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Return a grammy-uploadable value for a single file param, or the original
|
|
43
|
+
* value unchanged if it's not the `{ source }` envelope we coerce.
|
|
44
|
+
*/
|
|
45
|
+
function coerceFileValue(val) {
|
|
46
|
+
if (val && typeof val === 'object' && !(val instanceof InputFile)
|
|
47
|
+
&& typeof val.source === 'string' && val.source.length > 0) {
|
|
48
|
+
// { source: '/abs/path' } | { source: 'https://…', filename } → InputFile
|
|
49
|
+
return new InputFile(val.source, val.filename);
|
|
50
|
+
}
|
|
51
|
+
return val;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* Mutate `params` in place so its file field (if any) is grammy-uploadable.
|
|
56
|
+
* No-op for non-file methods and for params with no file field set.
|
|
57
|
+
*
|
|
58
|
+
* @param {string} method
|
|
59
|
+
* @param {object} params
|
|
60
|
+
* @returns {object} the same params object (for chaining)
|
|
61
|
+
*/
|
|
62
|
+
function coerceFileParams(method, params) {
|
|
63
|
+
if (!params || typeof params !== 'object') return params;
|
|
64
|
+
const field = FILE_FIELD_BY_METHOD[method];
|
|
65
|
+
if (!field) return params;
|
|
66
|
+
if (params[field] != null) {
|
|
67
|
+
params[field] = coerceFileValue(params[field]);
|
|
68
|
+
}
|
|
69
|
+
return params;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
module.exports = {
|
|
73
|
+
coerceFileParams,
|
|
74
|
+
coerceFileValue,
|
|
75
|
+
FILE_FIELD_BY_METHOD,
|
|
76
|
+
};
|
|
@@ -55,6 +55,11 @@ const STATES = {
|
|
|
55
55
|
// mid-turn user message is buffered for the next PostToolBatch
|
|
56
56
|
// injection.
|
|
57
57
|
AUTOSTEERED: { label: 'autosteered', chain: ['✍', '👀'] },
|
|
58
|
+
// 0.12 (Findings L9/L14): distinct in-progress reaction for a running
|
|
59
|
+
// subagent (Agent PreToolUse → SubagentStop). Driven by onSubagentStart.
|
|
60
|
+
// Preferred 👾 (NOT 🤖 — 🤖 is REACTION_INVALID for bots, same class as
|
|
61
|
+
// the rc.37 🧐 bug); falls back to 🔥 then 🤔, all bot-usable.
|
|
62
|
+
SUBAGENT: { label: 'subagent', chain: ['👾', '🔥', '🤔'] },
|
|
58
63
|
DONE: { label: 'done', chain: ['👍'] },
|
|
59
64
|
ERROR: { label: 'error', chain: ['🤯', '🤔'] },
|
|
60
65
|
STALL: { label: 'stall', chain: ['🥱', '🤔'] },
|
package/lib/tmux/log-tail.js
CHANGED
|
@@ -42,6 +42,7 @@
|
|
|
42
42
|
const EventEmitter = require('events');
|
|
43
43
|
const fs = require('fs');
|
|
44
44
|
const path = require('path');
|
|
45
|
+
const { StringDecoder } = require('string_decoder');
|
|
45
46
|
|
|
46
47
|
const DEFAULT_INTERVAL_MS = 100;
|
|
47
48
|
// Slow safety-net poll when fs.watch is active. Catches any events
|
|
@@ -91,6 +92,13 @@ class LogTail extends EventEmitter {
|
|
|
91
92
|
this.fs = fsOverride || fs;
|
|
92
93
|
this._offset = 0;
|
|
93
94
|
this._buf = '';
|
|
95
|
+
// L8: decode bytes through a StringDecoder so a multibyte UTF-8 char
|
|
96
|
+
// split across two read chunks (the 64KB DEFAULT_CHUNK_BYTES boundary)
|
|
97
|
+
// isn't corrupted into U+FFFD. The decoder holds an incomplete trailing
|
|
98
|
+
// sequence until the continuation bytes arrive on the next read. The
|
|
99
|
+
// hook ndjson carries large non-ASCII tool payloads, so this is
|
|
100
|
+
// load-bearing on the CliProcess observability path.
|
|
101
|
+
this._decoder = new StringDecoder('utf8');
|
|
94
102
|
this._closed = false;
|
|
95
103
|
this._timer = null;
|
|
96
104
|
this._watcher = null;
|
|
@@ -260,7 +268,9 @@ class LogTail extends EventEmitter {
|
|
|
260
268
|
const readSize = Math.min(remaining, buffer.length);
|
|
261
269
|
const { bytesRead } = await fd.read(buffer, 0, readSize, this._offset + totalRead);
|
|
262
270
|
if (bytesRead === 0) break;
|
|
263
|
-
|
|
271
|
+
// L8: StringDecoder.write instead of per-chunk toString('utf8') so a
|
|
272
|
+
// multibyte char straddling the read boundary survives intact.
|
|
273
|
+
this._buf += this._decoder.write(buffer.subarray(0, bytesRead));
|
|
264
274
|
totalRead += bytesRead;
|
|
265
275
|
}
|
|
266
276
|
this._offset += totalRead;
|
package/lib/tmux/startup-gate.js
CHANGED
|
@@ -17,6 +17,19 @@
|
|
|
17
17
|
* - if `readySignal` regex matches the captured pane content, resolve
|
|
18
18
|
* - if `Date.now()` exceeds the deadline, throw with `err.code = timeoutCode`
|
|
19
19
|
*
|
|
20
|
+
* Progress-aware (stall) deadline — `stallMs`:
|
|
21
|
+
* The blind wall-clock `deadlineMs` can't tell "claude is mid-download
|
|
22
|
+
* (24% progress bar, genuinely working)" from "claude is wedged". The
|
|
23
|
+
* shumorobot General incident (2026-05-30) killed a cold-spawn that was
|
|
24
|
+
* actively downloading the runtime. When `stallMs` is set, the gate
|
|
25
|
+
* tracks pane ACTIVITY: any change in captured pane content — or a
|
|
26
|
+
* trigger key being sent — resets a stall clock. The gate fails early
|
|
27
|
+
* (with `timeoutCode`) only after `stallMs` elapses with NO activity,
|
|
28
|
+
* i.e. the pane is frozen. `deadlineMs` remains an absolute backstop so
|
|
29
|
+
* a pane that animates forever but never reaches `readySignal` still
|
|
30
|
+
* terminates. When `stallMs` is omitted (default), behavior is the pure
|
|
31
|
+
* `deadlineMs` wall-clock exactly as before.
|
|
32
|
+
*
|
|
20
33
|
* Each trigger is one-shot per gate run (tracked by `name` in a Set).
|
|
21
34
|
*
|
|
22
35
|
* Caller supplies:
|
|
@@ -40,7 +53,10 @@ const DEFAULT_SETTLE_MS = 500;
|
|
|
40
53
|
* @param {string} opts.tmuxName — tmux session name to poll
|
|
41
54
|
* @param {Array<{name:string, regex:RegExp, key:string}>} opts.triggers
|
|
42
55
|
* @param {RegExp} opts.readySignal — match → resolve
|
|
43
|
-
* @param {number} [opts.deadlineMs=30000]
|
|
56
|
+
* @param {number} [opts.deadlineMs=30000] — absolute backstop
|
|
57
|
+
* @param {number} [opts.stallMs] — if set, fail after this much
|
|
58
|
+
* wall-clock with NO pane activity (progress-aware). Omit for pure
|
|
59
|
+
* wall-clock behavior.
|
|
44
60
|
* @param {number} [opts.pollMs=300]
|
|
45
61
|
* @param {number} [opts.settleMs=500]
|
|
46
62
|
* @param {string} [opts.timeoutCode='TUI_STARTUP_TIMEOUT']
|
|
@@ -54,6 +70,7 @@ async function runStartupGate({
|
|
|
54
70
|
triggers = [],
|
|
55
71
|
readySignal,
|
|
56
72
|
deadlineMs = DEFAULT_DEADLINE_MS,
|
|
73
|
+
stallMs,
|
|
57
74
|
pollMs = DEFAULT_POLL_MS,
|
|
58
75
|
settleMs = DEFAULT_SETTLE_MS,
|
|
59
76
|
timeoutCode = 'TUI_STARTUP_TIMEOUT',
|
|
@@ -70,6 +87,7 @@ async function runStartupGate({
|
|
|
70
87
|
|
|
71
88
|
const startedAt = Date.now();
|
|
72
89
|
const deadline = startedAt + deadlineMs;
|
|
90
|
+
const stallEnabled = Number.isFinite(stallMs) && stallMs > 0;
|
|
73
91
|
const seen = new Set();
|
|
74
92
|
const matchedTriggers = [];
|
|
75
93
|
// rc.4: remember the most recent successful pane snapshot. If the gate
|
|
@@ -78,8 +96,37 @@ async function runStartupGate({
|
|
|
78
96
|
// this, "claude exits code 0 after dev-channels Enter" surfaces as a
|
|
79
97
|
// 30-second `can't find pane` spam with no diagnostic about WHY.
|
|
80
98
|
let lastPane = null;
|
|
99
|
+
// Progress-aware gate: timestamp of the last observed pane CHANGE (or
|
|
100
|
+
// trigger send). Only consulted when stallEnabled.
|
|
101
|
+
let lastActivityAt = startedAt;
|
|
102
|
+
// Music incident (2026-06-01): the stall timer must NOT arm while the pane
|
|
103
|
+
// is still BLANK. A blank-and-unchanging pane means claude hasn't started
|
|
104
|
+
// rendering yet (slow cold-start), NOT that it wedged — the TUI for some
|
|
105
|
+
// topics takes 30-45s to first-render. Arming the stall timer on a blank
|
|
106
|
+
// pane killed a legitimate slow spawn at stallMs with a false "wedged".
|
|
107
|
+
// So the stall clock only runs once the pane has shown non-whitespace
|
|
108
|
+
// content; before that, only the absolute `deadlineMs` governs.
|
|
109
|
+
let sawContent = false;
|
|
81
110
|
|
|
82
111
|
while (Date.now() < deadline) {
|
|
112
|
+
// Stall check (progress-aware): the pane RENDERED something and has then
|
|
113
|
+
// been static for stallMs → genuinely wedged. Gated on sawContent so a
|
|
114
|
+
// blank cold-start isn't mistaken for a wedge. Fires early so a truly
|
|
115
|
+
// hung TUI fails fast, while an actively-progressing one (download bar,
|
|
116
|
+
// dialog navigation) keeps resetting lastActivityAt below.
|
|
117
|
+
if (stallEnabled && sawContent && Date.now() - lastActivityAt >= stallMs) {
|
|
118
|
+
const err = new Error(
|
|
119
|
+
`[${label}] startup gate: pane rendered then went static for ${stallMs}ms for ${tmuxName} ` +
|
|
120
|
+
`(matched: ${matchedTriggers.length ? matchedTriggers.join(', ') : 'none'}). ` +
|
|
121
|
+
`Appears wedged. Last pane content:\n` +
|
|
122
|
+
_formatPaneTail(lastPane),
|
|
123
|
+
);
|
|
124
|
+
err.code = timeoutCode;
|
|
125
|
+
err.lastPane = lastPane;
|
|
126
|
+
err.matchedTriggers = matchedTriggers;
|
|
127
|
+
err.reason = 'stall';
|
|
128
|
+
throw err;
|
|
129
|
+
}
|
|
83
130
|
let pane;
|
|
84
131
|
try {
|
|
85
132
|
pane = await runner.captureWide(tmuxName);
|
|
@@ -107,6 +154,19 @@ async function runStartupGate({
|
|
|
107
154
|
await new Promise(r => setTimeout(r, settleMs));
|
|
108
155
|
continue;
|
|
109
156
|
}
|
|
157
|
+
// First non-whitespace content = the TUI has started rendering. Only
|
|
158
|
+
// from here does the stall timer become meaningful (before this, a blank
|
|
159
|
+
// pane is cold-start, governed by the absolute deadline). Seed
|
|
160
|
+
// lastActivityAt at the moment content first appears so the stall window
|
|
161
|
+
// is measured from "rendered", not from spawn.
|
|
162
|
+
if (!sawContent && pane && pane.trim().length > 0) {
|
|
163
|
+
sawContent = true;
|
|
164
|
+
lastActivityAt = Date.now();
|
|
165
|
+
}
|
|
166
|
+
// Progress signal: any change in pane content is activity → reset the
|
|
167
|
+
// stall clock. A captureWide that returns the SAME bytes is NOT
|
|
168
|
+
// activity (a frozen download bar at 24% reads identically each poll).
|
|
169
|
+
if (pane !== lastPane) lastActivityAt = Date.now();
|
|
110
170
|
lastPane = pane;
|
|
111
171
|
|
|
112
172
|
// Walk triggers in declaration order — first match (and not yet seen) wins
|
|
@@ -122,6 +182,10 @@ async function runStartupGate({
|
|
|
122
182
|
seen.add(trigger.name);
|
|
123
183
|
matchedTriggers.push(trigger.name);
|
|
124
184
|
matched = true;
|
|
185
|
+
// Sending a key is activity — navigating the TUI counts as progress
|
|
186
|
+
// even if the pre-transition pane text was static (e.g. a dialog we
|
|
187
|
+
// just answered). Reset the stall clock so we don't fail mid-nav.
|
|
188
|
+
lastActivityAt = Date.now();
|
|
125
189
|
// Settle window so the TUI transitions out of the dialog before next poll
|
|
126
190
|
await new Promise(r => setTimeout(r, settleMs));
|
|
127
191
|
break;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "polygram",
|
|
3
|
-
"version": "0.12.0-rc.
|
|
3
|
+
"version": "0.12.0-rc.10",
|
|
4
4
|
"description": "Telegram daemon for Claude Code that preserves the OpenClaw per-chat session model. Migration path for OpenClaw users moving to Claude Code.",
|
|
5
5
|
"main": "lib/ipc/client.js",
|
|
6
6
|
"bin": {
|
package/polygram.js
CHANGED
|
@@ -28,7 +28,7 @@ const {
|
|
|
28
28
|
migrateJsonToDb, getClaudeSessionId, resolveSessionForSpawn,
|
|
29
29
|
} = require('./lib/db/sessions');
|
|
30
30
|
const { buildPrompt } = require('./lib/prompt');
|
|
31
|
-
const { filterAttachments } = require('./lib/attachments');
|
|
31
|
+
const { filterAttachments, resolveFileCaps, MAX_TOTAL_BYTES } = require('./lib/attachments');
|
|
32
32
|
// 0.9.0: SDK ProcessManager is the only pm. CLI pm
|
|
33
33
|
// (lib/process-manager.js) deleted in commit 6.
|
|
34
34
|
// Both implementations expose the same public API (constructor +
|
|
@@ -51,7 +51,6 @@ const { extractAssistantText } = require('./lib/process/sdk-process');
|
|
|
51
51
|
const { createChannelsToolDispatcher } = require('./lib/process/channels-tool-dispatcher');
|
|
52
52
|
const { createTmuxRunner } = require('./lib/tmux/tmux-runner');
|
|
53
53
|
const { sweepTmuxOrphans } = require('./lib/tmux/orphan-sweep');
|
|
54
|
-
const { PollScheduler } = require('./lib/tmux/poll-scheduler');
|
|
55
54
|
// rc.42: autosteer-buffer module deleted. Native SDK priority push
|
|
56
55
|
// (pm.injectUserMessage) replaces the buffer + PostToolBatch detour.
|
|
57
56
|
const { createAutosteeredRefs } = require('./lib/autosteered-refs');
|
|
@@ -462,6 +461,10 @@ function buildSpawnContext(sessionKey) {
|
|
|
462
461
|
threadId: threadId || null,
|
|
463
462
|
label: getSessionLabel(chatConfig, threadId),
|
|
464
463
|
existingSessionId,
|
|
464
|
+
// File-send outbound cap inputs: localApi (bot-level) so CliProcess can
|
|
465
|
+
// resolve the per-chat/topic outbound cap (resolveFileCaps) the same way
|
|
466
|
+
// it resolves cwd/agent. Override itself lives in chatConfig/topic.
|
|
467
|
+
localApi: !!config.bot?.apiRoot,
|
|
465
468
|
};
|
|
466
469
|
}
|
|
467
470
|
|
|
@@ -755,7 +758,19 @@ async function handleMessage(sessionKey, chatId, msg, bot) {
|
|
|
755
758
|
const sessionCtx = !pm.has(sessionKey) ? await readSessionContext(sessionKey, chatConfig.cwd) : '';
|
|
756
759
|
|
|
757
760
|
const rawAtts = extractAttachments(msg);
|
|
758
|
-
|
|
761
|
+
// Backend-derived inbound cap with per-topic/chat override. Cloud → 20MB;
|
|
762
|
+
// a local Bot API server (config.bot.apiRoot) → 2GB; override via
|
|
763
|
+
// chats[id].maxFileBytes or topics[t].maxFileBytes, clamped to the
|
|
764
|
+
// backend ceiling. Bytes-valued config; resolveFileCaps does the clamp.
|
|
765
|
+
const _inTopicCfg = getTopicConfig(chatConfig, threadIdStr || null);
|
|
766
|
+
const _fileCaps = resolveFileCaps({
|
|
767
|
+
localApi: !!config.bot?.apiRoot,
|
|
768
|
+
override: _inTopicCfg.maxFileBytes ?? chatConfig.maxFileBytes ?? null,
|
|
769
|
+
});
|
|
770
|
+
const { accepted, rejected } = filterAttachments(rawAtts, {
|
|
771
|
+
maxFileBytes: _fileCaps.inBytes,
|
|
772
|
+
maxTotalBytes: Math.max(_fileCaps.inBytes, MAX_TOTAL_BYTES),
|
|
773
|
+
});
|
|
759
774
|
for (const { att, reason } of rejected) {
|
|
760
775
|
console.log(`[${label}] attachment skipped: ${att.name} (${reason})`);
|
|
761
776
|
logEvent('attachment-skipped', { chat_id: chatId, msg_id: msg.message_id, name: att.name, reason });
|
|
@@ -1673,9 +1688,23 @@ function shouldHandle(msg, chatConfig, botUsername) {
|
|
|
1673
1688
|
}
|
|
1674
1689
|
|
|
1675
1690
|
function createBot(token) {
|
|
1691
|
+
// Optional self-hosted Telegram Bot API server. When config.bot.apiRoot is
|
|
1692
|
+
// set (e.g. "http://localhost:8081" from a local `telegram-bot-api`
|
|
1693
|
+
// process), grammy routes all Bot API calls there instead of
|
|
1694
|
+
// api.telegram.org — which lifts file send/receive from cloud's 50 MB-out /
|
|
1695
|
+
// 20 MB-in to 2 GB both ways. Omit it (default) → cloud Telegram, unchanged.
|
|
1696
|
+
// The local server is a separate companion daemon; this is just the knob
|
|
1697
|
+
// that points polygram at it. See docs/0.12.0-file-send.md.
|
|
1698
|
+
const apiRoot = config.bot?.apiRoot;
|
|
1676
1699
|
const bot = new Bot(token, {
|
|
1677
|
-
client: {
|
|
1700
|
+
client: {
|
|
1701
|
+
timeoutSeconds: 60,
|
|
1702
|
+
...(apiRoot ? { apiRoot } : {}),
|
|
1703
|
+
},
|
|
1678
1704
|
});
|
|
1705
|
+
if (apiRoot) {
|
|
1706
|
+
console.log(`[polygram] using local Telegram Bot API server: ${apiRoot} (2GB file limit)`);
|
|
1707
|
+
}
|
|
1679
1708
|
let botUsername = '';
|
|
1680
1709
|
// Cached once @botUsername is known — was recompiling per inbound msg.
|
|
1681
1710
|
let mentionRe = null;
|
|
@@ -2244,19 +2273,13 @@ async function main() {
|
|
|
2244
2273
|
const binCheck = verifyPinnedClaudeBin(CLAUDE_CLI_PINNED_VERSION);
|
|
2245
2274
|
if (binCheck.ok) {
|
|
2246
2275
|
console.log(
|
|
2247
|
-
`[polygram]
|
|
2276
|
+
`[polygram] CliProcess pinned to claude CLI v${CLAUDE_CLI_PINNED_VERSION}: ${binCheck.path}`,
|
|
2248
2277
|
);
|
|
2249
2278
|
pinnedClaudeBin = binCheck.path;
|
|
2250
2279
|
} else {
|
|
2251
2280
|
console.warn(`[polygram] WARNING: ${binCheck.reason}`);
|
|
2252
2281
|
}
|
|
2253
2282
|
}
|
|
2254
|
-
// O1 optimization: shared poll-tick scheduler. N TmuxProcess
|
|
2255
|
-
// instances share ONE setInterval instead of spawning N independent
|
|
2256
|
-
// setTimeout chains. Idle when no chats are in flight (zero timers
|
|
2257
|
-
// running). Configurable via config.bot.tmuxPollIntervalMs.
|
|
2258
|
-
const tmuxPollIntervalMs = config.bot?.tmuxPollIntervalMs || 250;
|
|
2259
|
-
const pollScheduler = new PollScheduler({ intervalMs: tmuxPollIntervalMs });
|
|
2260
2283
|
// 0.11.0: channels backend wiring. Used when a chat opts in via
|
|
2261
2284
|
// `pm: 'channels'` config. Falls back to SDK gracefully if the pinned
|
|
2262
2285
|
// claude binary isn't present (see factory.js — channelsClaudeBin
|
|
@@ -2282,7 +2305,6 @@ async function main() {
|
|
|
2282
2305
|
logger: console,
|
|
2283
2306
|
tmuxRunner,
|
|
2284
2307
|
botName: BOT_NAME,
|
|
2285
|
-
pollScheduler,
|
|
2286
2308
|
// channels backend
|
|
2287
2309
|
toolDispatcher: channelsToolDispatcher,
|
|
2288
2310
|
channelsClaudeBin,
|