polygram 0.7.4 → 0.7.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "$schema": "https://anthropic.com/claude-code/plugin.schema.json",
3
3
  "name": "polygram",
4
- "version": "0.7.4",
4
+ "version": "0.7.6",
5
5
  "description": "Telegram integration for Claude Code that preserves the OpenClaw per-chat session model. Migration target for OpenClaw users. Multi-bot, multi-chat, per-topic isolation; SQLite transcripts; inline-keyboard approvals. Bundles /polygram:status|logs|pair-code|approvals admin commands and a history skill.",
6
6
  "keywords": [
7
7
  "telegram",
package/lib/db.js CHANGED
@@ -152,6 +152,26 @@ function wrap(db) {
152
152
  VALUES (?, ?, ?, ?)
153
153
  `);
154
154
 
155
+ // 0.7.6 (item F): per-turn cost / token / duration metrics. Persisted
156
+ // at turn end (onResult callback). One row per dispatched user
157
+ // message → final reply cycle, even if the cycle had multiple
158
+ // assistant messages. See migrations/009-turn-metrics.sql.
159
+ const insertTurnMetricStmt = db.prepare(`
160
+ INSERT INTO turn_metrics (
161
+ ts, chat_id, thread_id, msg_id, session_id, bot_name,
162
+ model, effort,
163
+ input_tokens, output_tokens, cache_creation_tokens, cache_read_tokens,
164
+ cost_usd, duration_ms, num_assistant_messages, num_tool_uses,
165
+ result_subtype, error
166
+ ) VALUES (
167
+ @ts, @chat_id, @thread_id, @msg_id, @session_id, @bot_name,
168
+ @model, @effort,
169
+ @input_tokens, @output_tokens, @cache_creation_tokens, @cache_read_tokens,
170
+ @cost_usd, @duration_ms, @num_assistant_messages, @num_tool_uses,
171
+ @result_subtype, @error
172
+ )
173
+ `);
174
+
155
175
  const logConfigChangeStmt = db.prepare(`
156
176
  INSERT INTO config_changes (
157
177
  chat_id, thread_id, field, old_value, new_value,
@@ -277,6 +297,29 @@ function wrap(db) {
277
297
  );
278
298
  },
279
299
 
300
+ insertTurnMetric(row) {
301
+ return insertTurnMetricStmt.run({
302
+ ts: row.ts || Date.now(),
303
+ chat_id: String(row.chat_id),
304
+ thread_id: row.thread_id != null ? String(row.thread_id) : null,
305
+ msg_id: row.msg_id,
306
+ session_id: row.session_id || null,
307
+ bot_name: row.bot_name || null,
308
+ model: row.model || null,
309
+ effort: row.effort || null,
310
+ input_tokens: row.input_tokens ?? null,
311
+ output_tokens: row.output_tokens ?? null,
312
+ cache_creation_tokens: row.cache_creation_tokens ?? null,
313
+ cache_read_tokens: row.cache_read_tokens ?? null,
314
+ cost_usd: row.cost_usd ?? null,
315
+ duration_ms: row.duration_ms ?? null,
316
+ num_assistant_messages: row.num_assistant_messages ?? null,
317
+ num_tool_uses: row.num_tool_uses ?? null,
318
+ result_subtype: row.result_subtype || null,
319
+ error: row.error || null,
320
+ });
321
+ },
322
+
280
323
  logConfigChange(row) {
281
324
  return logConfigChangeStmt.run({
282
325
  chat_id: String(row.chat_id),
@@ -0,0 +1,56 @@
1
+ /**
2
+ * Parse Claude's final-turn text into one of three outbound shapes:
3
+ * - sticker (single emoji that maps to a sticker, OR literal
4
+ * `[sticker:NAME]` mimic — see below)
5
+ * - reaction (single emoji not mapped to a sticker)
6
+ * - text (everything else)
7
+ *
8
+ * Why this lives in lib/: polygram.js is a top-level script (calls main()
9
+ * at bottom) and can't be require()'d from a test without starting a bot.
10
+ * Pulling parseResponse out lets tests cover the regex edge cases.
11
+ *
12
+ * 0.7.5 (item: sticker regression):
13
+ * deriveOutboundText (lib/telegram.js) synthesises `[sticker:<name>]` for
14
+ * sendSticker calls so the messages.text column has *something* legible.
15
+ * On session resume Claude reads its own past assistant rows and sees
16
+ * `[sticker:working]` as the assistant message text — and starts mimicking
17
+ * the format LITERALLY, emitting the string `[sticker:working]` as plain
18
+ * text. parseResponse used to fall through to the chunked-text path, so
19
+ * the placeholder ended up rendered in the user's chat instead of an
20
+ * actual sticker.
21
+ *
22
+ * Match shape: optional whitespace, `[sticker:`, NAME (alnum/_/-), `]`,
23
+ * optional whitespace. NAME must resolve in the supplied stickerMap;
24
+ * unknown NAMEs fall through to the text path so a genuine
25
+ * "[sticker:foo]" message (e.g. someone joking, or a stale name from an
26
+ * older deploy) still reaches the user verbatim.
27
+ */
28
+
29
+ const STICKER_TAG_RE = /^\s*\[sticker:([A-Za-z0-9_-]+)\]\s*$/;
30
+
31
+ function parseResponse(text, { stickerMap = {}, emojiToSticker = {} } = {}) {
32
+ const trimmed = (text || '').trim();
33
+
34
+ const tagMatch = trimmed.match(STICKER_TAG_RE);
35
+ if (tagMatch) {
36
+ const name = tagMatch[1];
37
+ const fileId = stickerMap[name];
38
+ if (fileId) {
39
+ return { text: '', sticker: fileId, stickerLabel: name, reaction: null };
40
+ }
41
+ }
42
+
43
+ const emojiOnly = /^\p{Emoji_Presentation}$/u.test(trimmed)
44
+ || /^\p{Emoji}️?$/u.test(trimmed);
45
+
46
+ if (emojiOnly && trimmed) {
47
+ if (emojiToSticker[trimmed]) {
48
+ return { text: '', sticker: emojiToSticker[trimmed], stickerLabel: trimmed, reaction: null };
49
+ }
50
+ return { text: '', sticker: null, stickerLabel: null, reaction: trimmed };
51
+ }
52
+
53
+ return { text: trimmed, sticker: null, stickerLabel: null, reaction: null };
54
+ }
55
+
56
+ module.exports = { parseResponse, STICKER_TAG_RE };
@@ -29,6 +29,14 @@ const { createInterface } = require('readline');
29
29
 
30
30
  const DEFAULT_CAP = 10;
31
31
  const DEFAULT_KILL_TIMEOUT_MS = 3000;
32
+ // 0.7.6 (item H): hard cap on per-session pending queue depth.
33
+ // Pre-fix, a chat with rapid-fire user messages (or a stuck Claude that
34
+ // stops emitting `result`) could grow pendingQueue unbounded — each
35
+ // pending holds a streamer + reactor + timers, so a runaway client
36
+ // could exhaust memory or burn API quota for ack reactions on every
37
+ // dropped message. 50 is generous (a normal turn never queues more
38
+ // than a handful) but safely bounded.
39
+ const DEFAULT_QUEUE_CAP = 50;
32
40
 
33
41
  /**
34
42
  * Pull user-visible text from a stream-json `assistant` event.
@@ -47,9 +55,38 @@ function extractAssistantText(event) {
47
55
  return parts.join('\n\n').trim().replace(/([^:]):\s*$/, '$1…');
48
56
  }
49
57
 
58
+ // 0.7.6 (item F): sum the four canonical usage counters across a Map of
59
+ // per-message usage objects. Each map value is the LAST-SEEN usage for
60
+ // that message id (Anthropic emits cumulative totals within a message);
61
+ // summing across map values gives the turn-wide totals.
62
+ //
63
+ // Defensive against missing fields — older claude versions may not
64
+ // always emit cache_*_input_tokens.
65
+ function sumUsage(usageByMessage) {
66
+ const out = {
67
+ input_tokens: 0,
68
+ output_tokens: 0,
69
+ cache_creation_input_tokens: 0,
70
+ cache_read_input_tokens: 0,
71
+ };
72
+ for (const u of usageByMessage.values()) {
73
+ if (!u) continue;
74
+ if (Number.isFinite(u.input_tokens)) out.input_tokens += u.input_tokens;
75
+ if (Number.isFinite(u.output_tokens)) out.output_tokens += u.output_tokens;
76
+ if (Number.isFinite(u.cache_creation_input_tokens)) {
77
+ out.cache_creation_input_tokens += u.cache_creation_input_tokens;
78
+ }
79
+ if (Number.isFinite(u.cache_read_input_tokens)) {
80
+ out.cache_read_input_tokens += u.cache_read_input_tokens;
81
+ }
82
+ }
83
+ return out;
84
+ }
85
+
50
86
  class ProcessManager {
51
87
  constructor({
52
88
  cap = DEFAULT_CAP,
89
+ queueCap = DEFAULT_QUEUE_CAP,
53
90
  spawnFn,
54
91
  db = null,
55
92
  logger = console,
@@ -61,9 +98,11 @@ class ProcessManager {
61
98
  onToolUse = null, // (sessionKey, toolName, entry) → void — routes to pendingQueue[0]
62
99
  onAssistantMessageStart = null, // (sessionKey, entry) → void — fires when a NEW top-level assistant message begins (after a previous one ended). Used by polygram.js to call streamer.forceNewMessage() so each assistant message gets its own bubble.
63
100
  onRespawn = null, // (sessionKey, reason, entry) → void — fires after graceful drain-and-kill
101
+ onQueueDrop = null, // 0.7.6: (sessionKey, droppedPending, entry) → void — fired when a pending is dropped because pendingQueue exceeded queueCap. Polygram uses this to surface a warning on the dropped message.
64
102
  } = {}) {
65
103
  if (!spawnFn) throw new Error('spawnFn required');
66
104
  this.cap = cap;
105
+ this.queueCap = queueCap;
67
106
  this.spawnFn = spawnFn;
68
107
  this.db = db;
69
108
  this.logger = logger;
@@ -75,6 +114,7 @@ class ProcessManager {
75
114
  this.onToolUse = onToolUse;
76
115
  this.onAssistantMessageStart = onAssistantMessageStart;
77
116
  this.onRespawn = onRespawn;
117
+ this.onQueueDrop = onQueueDrop;
78
118
  this.procs = new Map();
79
119
  }
80
120
 
@@ -292,6 +332,27 @@ class ProcessManager {
292
332
  && event.message.content.some((b) => b?.type === 'tool_use'))) {
293
333
  head.fireFirstStream?.();
294
334
  }
335
+ // 0.7.6 (item F): accumulate usage + counters for turn telemetry.
336
+ // The `result` event carries total_cost_usd + duration_ms but NOT
337
+ // a usage breakdown; usage lives on each assistant.message.usage.
338
+ // Anthropic emits cumulative totals per assistant message id
339
+ // (so within a single message the last usage seen wins; across
340
+ // distinct messages they sum).
341
+ const usage = event.message?.usage;
342
+ if (usage) {
343
+ if (messageId != null && head.lastUsageMessageId === messageId) {
344
+ // same message, replace running totals for this message
345
+ head.usageByMessage.set(messageId, usage);
346
+ } else {
347
+ head.lastUsageMessageId = messageId;
348
+ head.usageByMessage.set(messageId, usage);
349
+ }
350
+ }
351
+ if (Array.isArray(event.message?.content)) {
352
+ for (const b of event.message.content) {
353
+ if (b?.type === 'tool_use') head.toolUseCount++;
354
+ }
355
+ }
295
356
  if (added) {
296
357
  // Pre-0.7.0 we did `streamText = streamText + '\n\n' + added`,
297
358
  // which DUPLICATED text on every update because `added` is
@@ -334,12 +395,26 @@ class ProcessManager {
334
395
  entry.pendingQueue.shift();
335
396
  head.clearTimers();
336
397
  if (this.onResult) this.onResult(sessionKey, event, entry, head);
398
+ // 0.7.6 (item F): sum usage across distinct assistant messages
399
+ // (each message id seen got its last-known usage stored; sum the
400
+ // map values). Yields a single-row metric summary the caller
401
+ // can persist via db.insertTurnMetric().
402
+ const usageTotals = sumUsage(head.usageByMessage);
337
403
  head.resolve({
338
404
  text: event.result || '',
339
405
  sessionId: event.session_id,
340
406
  cost: event.total_cost_usd,
341
407
  duration: event.duration_ms,
342
408
  error: event.subtype === 'success' ? null : (event.error || event.subtype),
409
+ metrics: {
410
+ inputTokens: usageTotals.input_tokens,
411
+ outputTokens: usageTotals.output_tokens,
412
+ cacheCreationTokens: usageTotals.cache_creation_input_tokens,
413
+ cacheReadTokens: usageTotals.cache_read_input_tokens,
414
+ numAssistantMessages: head.usageByMessage.size,
415
+ numToolUses: head.toolUseCount,
416
+ resultSubtype: event.subtype || null,
417
+ },
343
418
  });
344
419
  // Activate next head or settle idle state.
345
420
  if (entry.pendingQueue.length > 0) {
@@ -456,6 +531,14 @@ class ProcessManager {
456
531
  idleTimer: null,
457
532
  maxTimer: null,
458
533
  activated: false,
534
+ // 0.7.6 (item F): per-turn telemetry accumulators. usageByMessage
535
+ // collects each assistant message's last-seen usage; we sum
536
+ // across messages at result time (each id is summed once, not
537
+ // per stream chunk, since usage in stream-json is cumulative
538
+ // *within* a message — last-seen-per-message wins).
539
+ usageByMessage: new Map(),
540
+ lastUsageMessageId: null,
541
+ toolUseCount: 0,
459
542
  // 0.7.4 (item B): set true when the first stream event (assistant
460
543
  // text or tool_use) arrives for this pending. Fires
461
544
  // `context.onFirstStream` once. Used by polygram to flip the
@@ -524,8 +607,40 @@ class ProcessManager {
524
607
  pending.idleTimer = idleTimer;
525
608
  };
526
609
 
610
+ // 0.7.6 (item H): enforce per-session queue cap. Drop the OLDEST
611
+ // non-active pending (index 1 — index 0 is the in-flight head and
612
+ // killing it mid-turn would corrupt Claude's state). The dropped
613
+ // pending's promise rejects so its handler (polygram.js) can
614
+ // surface a "couldn't keep up — message dropped" warning to the
615
+ // user. We drop AFTER pushing the new pending so the cap means
616
+ // "at most queueCap pendings live", not "refuse to enqueue past N".
617
+ // Refusing the new write would lose the most recent message —
618
+ // usually the one the user actually cares about — whereas
619
+ // dropping the oldest preserves recency at the cost of a stale
620
+ // queued turn that the user has likely moved past anyway.
527
621
  entry.pendingQueue.push(pending);
528
622
  entry.inFlight = true;
623
+ while (entry.pendingQueue.length > this.queueCap) {
624
+ // Splice at index 1 to leave the active head intact.
625
+ const dropped = entry.pendingQueue.splice(1, 1)[0];
626
+ if (!dropped) break;
627
+ dropped.clearTimers?.();
628
+ const dropErr = new Error(
629
+ `queue overflow: dropped (queue cap ${this.queueCap})`,
630
+ );
631
+ dropErr.code = 'QUEUE_OVERFLOW';
632
+ this._logEvent('queue-overflow-drop', {
633
+ session_key: sessionKey,
634
+ chat_id: entry.chatId,
635
+ queue_len: entry.pendingQueue.length,
636
+ source_msg_id: dropped.context?.sourceMsgId ?? null,
637
+ });
638
+ if (this.onQueueDrop) {
639
+ try { this.onQueueDrop(sessionKey, dropped, entry); }
640
+ catch (err) { this.logger.error(`[${entry.label}] onQueueDrop: ${err.message}`); }
641
+ }
642
+ dropped.reject(dropErr);
643
+ }
529
644
 
530
645
  // If we're the only pending, activate immediately. Otherwise wait
531
646
  // until the preceding pending is shifted out.
@@ -552,4 +667,10 @@ class ProcessManager {
552
667
  }
553
668
  }
554
669
 
555
- module.exports = { ProcessManager, DEFAULT_CAP, extractAssistantText };
670
+ module.exports = {
671
+ ProcessManager,
672
+ DEFAULT_CAP,
673
+ DEFAULT_QUEUE_CAP,
674
+ extractAssistantText,
675
+ sumUsage,
676
+ };
@@ -0,0 +1,42 @@
1
+ -- 0.7.6 (item F): turn_metrics table.
2
+ --
3
+ -- Stream-json `result` events from `claude -p` carry total_cost_usd and
4
+ -- duration_ms (already pulled into pending.resolve()), plus a `usage`
5
+ -- block on each `assistant` event with token counts including cache hits.
6
+ -- Pre-fix all of this was logged to console only; once a turn was done
7
+ -- the cost was unrecoverable for analysis.
8
+ --
9
+ -- This table persists per-turn metrics keyed by (chat_id, msg_id) so we
10
+ -- can answer questions like:
11
+ -- - cost / day per bot
12
+ -- - cache hit rate per chat
13
+ -- - which chats have the longest turns
14
+ -- - which models are most expensive overall
15
+ --
16
+ -- Stored at turn end (in onResult callback). One row per dispatched
17
+ -- user-message-to-final-reply cycle, even if the cycle had multiple
18
+ -- assistant messages (those are aggregated).
19
+ CREATE TABLE IF NOT EXISTS turn_metrics (
20
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
21
+ ts INTEGER NOT NULL, -- turn end timestamp (ms)
22
+ chat_id TEXT NOT NULL,
23
+ thread_id TEXT,
24
+ msg_id INTEGER NOT NULL, -- inbound message_id that started turn
25
+ session_id TEXT, -- claude session UUID for resume
26
+ bot_name TEXT, -- 'shumabit' / 'umi-assistant' / etc
27
+ model TEXT, -- chatConfig.model at turn start
28
+ effort TEXT, -- chatConfig.effort
29
+ input_tokens INTEGER,
30
+ output_tokens INTEGER,
31
+ cache_creation_tokens INTEGER,
32
+ cache_read_tokens INTEGER,
33
+ cost_usd REAL,
34
+ duration_ms INTEGER,
35
+ num_assistant_messages INTEGER, -- top-level message count (forceNewMessage events)
36
+ num_tool_uses INTEGER,
37
+ result_subtype TEXT, -- 'success' / 'error_max_turns' / etc
38
+ error TEXT
39
+ );
40
+ CREATE INDEX IF NOT EXISTS idx_turn_metrics_chat_ts ON turn_metrics(chat_id, ts DESC);
41
+ CREATE INDEX IF NOT EXISTS idx_turn_metrics_recent ON turn_metrics(ts DESC);
42
+ CREATE INDEX IF NOT EXISTS idx_turn_metrics_session ON turn_metrics(session_id);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "polygram",
3
- "version": "0.7.4",
3
+ "version": "0.7.6",
4
4
  "description": "Telegram daemon for Claude Code that preserves the OpenClaw per-chat session model. Migration path for OpenClaw users moving to Claude Code.",
5
5
  "main": "lib/ipc-client.js",
6
6
  "bin": {
package/polygram.js CHANGED
@@ -844,6 +844,14 @@ let isShuttingDown = false;
844
844
  // distinguish unique failures from the obvious "try again" cases.
845
845
  function errorReplyText(err) {
846
846
  const msg = err?.message || '';
847
+ // 0.7.6 (item H): queue overflow has a typed err.code so we don't have
848
+ // to grep error text. The dropped pending is OLDER than the current
849
+ // queue depth; its sender has likely sent more recent messages we're
850
+ // still working on. Tell them this one was skipped without making it
851
+ // sound like a crash.
852
+ if (err?.code === 'QUEUE_OVERFLOW') {
853
+ return '⏭ Couldn\'t keep up — this message was skipped while I was processing newer ones. Resend if it still matters.';
854
+ }
847
855
  if (/idle with no Claude activity/i.test(msg)) {
848
856
  return '⏳ I went quiet too long without finishing. Try resending or simplifying the task.';
849
857
  }
@@ -961,19 +969,21 @@ const stdinLock = createAsyncLock();
961
969
  // hammering sendChatAction every 4s for the full turn duration.
962
970
 
963
971
  // ─── Response parsing (stickers, reactions) ─────────────────────────
964
-
972
+ // Implementation lives in lib/parse-response.js so tests can require it
973
+ // without starting a bot (polygram.js is a top-level script that calls
974
+ // main() at bottom). The wrapper here supplies the runtime stickerMap /
975
+ // emojiToSticker that the parser looks up against.
976
+ //
977
+ // 0.7.5: parser also recognises a literal `[sticker:NAME]` pattern in
978
+ // addition to single-emoji shortcuts. Claude reads its own past outbound
979
+ // rows on session resume, sees `[sticker:working]` (the placeholder
980
+ // deriveOutboundText synthesises for sendSticker rows), and starts
981
+ // mimicking the format as plain text. Without the new branch the
982
+ // placeholder was rendered verbatim in the chat instead of swapped for
983
+ // the actual sticker.
984
+ const { parseResponse: parseResponseImpl } = require('./lib/parse-response');
965
985
  function parseResponse(text) {
966
- const trimmed = text.trim();
967
- const emojiOnly = /^\p{Emoji_Presentation}$/u.test(trimmed) || /^\p{Emoji}\uFE0F?$/u.test(trimmed);
968
-
969
- if (emojiOnly && trimmed) {
970
- if (emojiToSticker[trimmed]) {
971
- return { text: '', sticker: emojiToSticker[trimmed], stickerLabel: trimmed, reaction: null };
972
- }
973
- return { text: '', sticker: null, stickerLabel: null, reaction: trimmed };
974
- }
975
-
976
- return { text: trimmed, sticker: null, stickerLabel: null, reaction: null };
986
+ return parseResponseImpl(text, { stickerMap, emojiToSticker });
977
987
  }
978
988
 
979
989
  // ─── Cron/IPC send ─────────────────────────────────────────────────
@@ -1891,6 +1901,32 @@ async function handleMessage(sessionKey, chatId, msg, bot) {
1891
1901
  });
1892
1902
  const elapsed = ((Date.now() - t0) / 1000).toFixed(1);
1893
1903
 
1904
+ // 0.7.6 (item F): persist per-turn telemetry. Stream-json result
1905
+ // events carry total_cost_usd + duration_ms; sumUsage rolled up
1906
+ // input/output/cache token counts from per-message usage. One row
1907
+ // per dispatched user message; queryable via turn_metrics table.
1908
+ if (result.metrics) {
1909
+ dbWrite(() => db.insertTurnMetric({
1910
+ chat_id: chatId,
1911
+ thread_id: threadId,
1912
+ msg_id: msg.message_id,
1913
+ session_id: result.sessionId,
1914
+ bot_name: BOT_NAME,
1915
+ model: chatConfig.model,
1916
+ effort: chatConfig.effort,
1917
+ input_tokens: result.metrics.inputTokens,
1918
+ output_tokens: result.metrics.outputTokens,
1919
+ cache_creation_tokens: result.metrics.cacheCreationTokens,
1920
+ cache_read_tokens: result.metrics.cacheReadTokens,
1921
+ cost_usd: result.cost,
1922
+ duration_ms: result.duration,
1923
+ num_assistant_messages: result.metrics.numAssistantMessages,
1924
+ num_tool_uses: result.metrics.numToolUses,
1925
+ result_subtype: result.metrics.resultSubtype,
1926
+ error: result.error || null,
1927
+ }), 'insert turn_metric');
1928
+ }
1929
+
1894
1930
  stopTyping();
1895
1931
 
1896
1932
  if (result.error) {