npm - polygram - Versions diffs - 0.7.4 → 0.7.6 - Mend

polygram 0.7.4 → 0.7.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/.claude-plugin/plugin.json +1 -1
package/lib/db.js +43 -0
package/lib/parse-response.js +56 -0
package/lib/process-manager.js +122 -1
package/migrations/009-turn-metrics.sql +42 -0
package/package.json +1 -1
package/polygram.js +48 -12

package/.claude-plugin/plugin.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "$schema": "https://anthropic.com/claude-code/plugin.schema.json",
   "name": "polygram",
-  "version": "0.7.4",
+  "version": "0.7.6",
   "description": "Telegram integration for Claude Code that preserves the OpenClaw per-chat session model. Migration target for OpenClaw users. Multi-bot, multi-chat, per-topic isolation; SQLite transcripts; inline-keyboard approvals. Bundles /polygram:status|logs|pair-code|approvals admin commands and a history skill.",
   "keywords": [
     "telegram",

package/lib/db.js CHANGED Viewed

@@ -152,6 +152,26 @@ function wrap(db) {
     VALUES (?, ?, ?, ?)
   `);
+  // 0.7.6 (item F): per-turn cost / token / duration metrics. Persisted
+  // at turn end (onResult callback). One row per dispatched user
+  // message → final reply cycle, even if the cycle had multiple
+  // assistant messages. See migrations/009-turn-metrics.sql.
+  const insertTurnMetricStmt = db.prepare(`
+    INSERT INTO turn_metrics (
+      ts, chat_id, thread_id, msg_id, session_id, bot_name,
+      model, effort,
+      input_tokens, output_tokens, cache_creation_tokens, cache_read_tokens,
+      cost_usd, duration_ms, num_assistant_messages, num_tool_uses,
+      result_subtype, error
+    ) VALUES (
+      @ts, @chat_id, @thread_id, @msg_id, @session_id, @bot_name,
+      @model, @effort,
+      @input_tokens, @output_tokens, @cache_creation_tokens, @cache_read_tokens,
+      @cost_usd, @duration_ms, @num_assistant_messages, @num_tool_uses,
+      @result_subtype, @error
+    )
+  `);
   const logConfigChangeStmt = db.prepare(`
     INSERT INTO config_changes (
       chat_id, thread_id, field, old_value, new_value,
@@ -277,6 +297,29 @@ function wrap(db) {
       );
     },
+    insertTurnMetric(row) {
+      return insertTurnMetricStmt.run({
+        ts: row.ts || Date.now(),
+        chat_id: String(row.chat_id),
+        thread_id: row.thread_id != null ? String(row.thread_id) : null,
+        msg_id: row.msg_id,
+        session_id: row.session_id || null,
+        bot_name: row.bot_name || null,
+        model: row.model || null,
+        effort: row.effort || null,
+        input_tokens: row.input_tokens ?? null,
+        output_tokens: row.output_tokens ?? null,
+        cache_creation_tokens: row.cache_creation_tokens ?? null,
+        cache_read_tokens: row.cache_read_tokens ?? null,
+        cost_usd: row.cost_usd ?? null,
+        duration_ms: row.duration_ms ?? null,
+        num_assistant_messages: row.num_assistant_messages ?? null,
+        num_tool_uses: row.num_tool_uses ?? null,
+        result_subtype: row.result_subtype || null,
+        error: row.error || null,
+      });
+    },
     logConfigChange(row) {
       return logConfigChangeStmt.run({
         chat_id: String(row.chat_id),

package/lib/parse-response.js ADDED Viewed

@@ -0,0 +1,56 @@
+/**
+ * Parse Claude's final-turn text into one of three outbound shapes:
+ *   - sticker (single emoji that maps to a sticker, OR literal
+ *     `[sticker:NAME]` mimic — see below)
+ *   - reaction (single emoji not mapped to a sticker)
+ *   - text (everything else)
+ *
+ * Why this lives in lib/: polygram.js is a top-level script (calls main()
+ * at bottom) and can't be require()'d from a test without starting a bot.
+ * Pulling parseResponse out lets tests cover the regex edge cases.
+ *
+ * 0.7.5 (item: sticker regression):
+ * deriveOutboundText (lib/telegram.js) synthesises `[sticker:<name>]` for
+ * sendSticker calls so the messages.text column has *something* legible.
+ * On session resume Claude reads its own past assistant rows and sees
+ * `[sticker:working]` as the assistant message text — and starts mimicking
+ * the format LITERALLY, emitting the string `[sticker:working]` as plain
+ * text. parseResponse used to fall through to the chunked-text path, so
+ * the placeholder ended up rendered in the user's chat instead of an
+ * actual sticker.
+ *
+ * Match shape: optional whitespace, `[sticker:`, NAME (alnum/_/-), `]`,
+ * optional whitespace. NAME must resolve in the supplied stickerMap;
+ * unknown NAMEs fall through to the text path so a genuine
+ * "[sticker:foo]" message (e.g. someone joking, or a stale name from an
+ * older deploy) still reaches the user verbatim.
+ */
+const STICKER_TAG_RE = /^\s*\[sticker:([A-Za-z0-9_-]+)\]\s*$/;
+function parseResponse(text, { stickerMap = {}, emojiToSticker = {} } = {}) {
+  const trimmed = (text || '').trim();
+  const tagMatch = trimmed.match(STICKER_TAG_RE);
+  if (tagMatch) {
+    const name = tagMatch[1];
+    const fileId = stickerMap[name];
+    if (fileId) {
+      return { text: '', sticker: fileId, stickerLabel: name, reaction: null };
+    }
+  }
+  const emojiOnly = /^\p{Emoji_Presentation}$/u.test(trimmed)
+    || /^\p{Emoji}️?$/u.test(trimmed);
+  if (emojiOnly && trimmed) {
+    if (emojiToSticker[trimmed]) {
+      return { text: '', sticker: emojiToSticker[trimmed], stickerLabel: trimmed, reaction: null };
+    }
+    return { text: '', sticker: null, stickerLabel: null, reaction: trimmed };
+  }
+  return { text: trimmed, sticker: null, stickerLabel: null, reaction: null };
+}
+module.exports = { parseResponse, STICKER_TAG_RE };

package/lib/process-manager.js CHANGED Viewed

@@ -29,6 +29,14 @@ const { createInterface } = require('readline');
 const DEFAULT_CAP = 10;
 const DEFAULT_KILL_TIMEOUT_MS = 3000;
+// 0.7.6 (item H): hard cap on per-session pending queue depth.
+// Pre-fix, a chat with rapid-fire user messages (or a stuck Claude that
+// stops emitting `result`) could grow pendingQueue unbounded — each
+// pending holds a streamer + reactor + timers, so a runaway client
+// could exhaust memory or burn API quota for ack reactions on every
+// dropped message. 50 is generous (a normal turn never queues more
+// than a handful) but safely bounded.
+const DEFAULT_QUEUE_CAP = 50;
 /**
  * Pull user-visible text from a stream-json `assistant` event.
@@ -47,9 +55,38 @@ function extractAssistantText(event) {
   return parts.join('\n\n').trim().replace(/([^:]):\s*$/, '$1…');
 }
+// 0.7.6 (item F): sum the four canonical usage counters across a Map of
+// per-message usage objects. Each map value is the LAST-SEEN usage for
+// that message id (Anthropic emits cumulative totals within a message);
+// summing across map values gives the turn-wide totals.
+//
+// Defensive against missing fields — older claude versions may not
+// always emit cache_*_input_tokens.
+function sumUsage(usageByMessage) {
+  const out = {
+    input_tokens: 0,
+    output_tokens: 0,
+    cache_creation_input_tokens: 0,
+    cache_read_input_tokens: 0,
+  };
+  for (const u of usageByMessage.values()) {
+    if (!u) continue;
+    if (Number.isFinite(u.input_tokens)) out.input_tokens += u.input_tokens;
+    if (Number.isFinite(u.output_tokens)) out.output_tokens += u.output_tokens;
+    if (Number.isFinite(u.cache_creation_input_tokens)) {
+      out.cache_creation_input_tokens += u.cache_creation_input_tokens;
+    }
+    if (Number.isFinite(u.cache_read_input_tokens)) {
+      out.cache_read_input_tokens += u.cache_read_input_tokens;
+    }
+  }
+  return out;
+}
 class ProcessManager {
   constructor({
     cap = DEFAULT_CAP,
+    queueCap = DEFAULT_QUEUE_CAP,
     spawnFn,
     db = null,
     logger = console,
@@ -61,9 +98,11 @@ class ProcessManager {
     onToolUse = null,     // (sessionKey, toolName, entry) → void — routes to pendingQueue[0]
     onAssistantMessageStart = null, // (sessionKey, entry) → void — fires when a NEW top-level assistant message begins (after a previous one ended). Used by polygram.js to call streamer.forceNewMessage() so each assistant message gets its own bubble.
     onRespawn = null,     // (sessionKey, reason, entry) → void — fires after graceful drain-and-kill
+    onQueueDrop = null,   // 0.7.6: (sessionKey, droppedPending, entry) → void — fired when a pending is dropped because pendingQueue exceeded queueCap. Polygram uses this to surface a warning on the dropped message.
   } = {}) {
     if (!spawnFn) throw new Error('spawnFn required');
     this.cap = cap;
+    this.queueCap = queueCap;
     this.spawnFn = spawnFn;
     this.db = db;
     this.logger = logger;
@@ -75,6 +114,7 @@ class ProcessManager {
     this.onToolUse = onToolUse;
     this.onAssistantMessageStart = onAssistantMessageStart;
     this.onRespawn = onRespawn;
+    this.onQueueDrop = onQueueDrop;
     this.procs = new Map();
   }
@@ -292,6 +332,27 @@ class ProcessManager {
             && event.message.content.some((b) => b?.type === 'tool_use'))) {
           head.fireFirstStream?.();
         }
+        // 0.7.6 (item F): accumulate usage + counters for turn telemetry.
+        // The `result` event carries total_cost_usd + duration_ms but NOT
+        // a usage breakdown; usage lives on each assistant.message.usage.
+        // Anthropic emits cumulative totals per assistant message id
+        // (so within a single message the last usage seen wins; across
+        // distinct messages they sum).
+        const usage = event.message?.usage;
+        if (usage) {
+          if (messageId != null && head.lastUsageMessageId === messageId) {
+            // same message, replace running totals for this message
+            head.usageByMessage.set(messageId, usage);
+          } else {
+            head.lastUsageMessageId = messageId;
+            head.usageByMessage.set(messageId, usage);
+          }
+        }
+        if (Array.isArray(event.message?.content)) {
+          for (const b of event.message.content) {
+            if (b?.type === 'tool_use') head.toolUseCount++;
+          }
+        }
         if (added) {
           // Pre-0.7.0 we did `streamText = streamText + '\n\n' + added`,
           // which DUPLICATED text on every update because `added` is
@@ -334,12 +395,26 @@ class ProcessManager {
         entry.pendingQueue.shift();
         head.clearTimers();
         if (this.onResult) this.onResult(sessionKey, event, entry, head);
+        // 0.7.6 (item F): sum usage across distinct assistant messages
+        // (each message id seen got its last-known usage stored; sum the
+        // map values). Yields a single-row metric summary the caller
+        // can persist via db.insertTurnMetric().
+        const usageTotals = sumUsage(head.usageByMessage);
         head.resolve({
           text: event.result || '',
           sessionId: event.session_id,
           cost: event.total_cost_usd,
           duration: event.duration_ms,
           error: event.subtype === 'success' ? null : (event.error || event.subtype),
+          metrics: {
+            inputTokens: usageTotals.input_tokens,
+            outputTokens: usageTotals.output_tokens,
+            cacheCreationTokens: usageTotals.cache_creation_input_tokens,
+            cacheReadTokens: usageTotals.cache_read_input_tokens,
+            numAssistantMessages: head.usageByMessage.size,
+            numToolUses: head.toolUseCount,
+            resultSubtype: event.subtype || null,
+          },
         });
         // Activate next head or settle idle state.
         if (entry.pendingQueue.length > 0) {
@@ -456,6 +531,14 @@ class ProcessManager {
         idleTimer: null,
         maxTimer: null,
         activated: false,
+        // 0.7.6 (item F): per-turn telemetry accumulators. usageByMessage
+        // collects each assistant message's last-seen usage; we sum
+        // across messages at result time (each id is summed once, not
+        // per stream chunk, since usage in stream-json is cumulative
+        // *within* a message — last-seen-per-message wins).
+        usageByMessage: new Map(),
+        lastUsageMessageId: null,
+        toolUseCount: 0,
         // 0.7.4 (item B): set true when the first stream event (assistant
         // text or tool_use) arrives for this pending. Fires
         // `context.onFirstStream` once. Used by polygram to flip the
@@ -524,8 +607,40 @@ class ProcessManager {
         pending.idleTimer = idleTimer;
       };
+      // 0.7.6 (item H): enforce per-session queue cap. Drop the OLDEST
+      // non-active pending (index 1 — index 0 is the in-flight head and
+      // killing it mid-turn would corrupt Claude's state). The dropped
+      // pending's promise rejects so its handler (polygram.js) can
+      // surface a "couldn't keep up — message dropped" warning to the
+      // user. We drop AFTER pushing the new pending so the cap means
+      // "at most queueCap pendings live", not "refuse to enqueue past N".
+      // Refusing the new write would lose the most recent message —
+      // usually the one the user actually cares about — whereas
+      // dropping the oldest preserves recency at the cost of a stale
+      // queued turn that the user has likely moved past anyway.
       entry.pendingQueue.push(pending);
       entry.inFlight = true;
+      while (entry.pendingQueue.length > this.queueCap) {
+        // Splice at index 1 to leave the active head intact.
+        const dropped = entry.pendingQueue.splice(1, 1)[0];
+        if (!dropped) break;
+        dropped.clearTimers?.();
+        const dropErr = new Error(
+          `queue overflow: dropped (queue cap ${this.queueCap})`,
+        );
+        dropErr.code = 'QUEUE_OVERFLOW';
+        this._logEvent('queue-overflow-drop', {
+          session_key: sessionKey,
+          chat_id: entry.chatId,
+          queue_len: entry.pendingQueue.length,
+          source_msg_id: dropped.context?.sourceMsgId ?? null,
+        });
+        if (this.onQueueDrop) {
+          try { this.onQueueDrop(sessionKey, dropped, entry); }
+          catch (err) { this.logger.error(`[${entry.label}] onQueueDrop: ${err.message}`); }
+        }
+        dropped.reject(dropErr);
+      }
       // If we're the only pending, activate immediately. Otherwise wait
       // until the preceding pending is shifted out.
@@ -552,4 +667,10 @@ class ProcessManager {
   }
 }
-module.exports = { ProcessManager, DEFAULT_CAP, extractAssistantText };
+module.exports = {
+  ProcessManager,
+  DEFAULT_CAP,
+  DEFAULT_QUEUE_CAP,
+  extractAssistantText,
+  sumUsage,
+};

package/migrations/009-turn-metrics.sql ADDED Viewed

@@ -0,0 +1,42 @@
+-- 0.7.6 (item F): turn_metrics table.
+--
+-- Stream-json `result` events from `claude -p` carry total_cost_usd and
+-- duration_ms (already pulled into pending.resolve()), plus a `usage`
+-- block on each `assistant` event with token counts including cache hits.
+-- Pre-fix all of this was logged to console only; once a turn was done
+-- the cost was unrecoverable for analysis.
+--
+-- This table persists per-turn metrics keyed by (chat_id, msg_id) so we
+-- can answer questions like:
+--   - cost / day per bot
+--   - cache hit rate per chat
+--   - which chats have the longest turns
+--   - which models are most expensive overall
+--
+-- Stored at turn end (in onResult callback). One row per dispatched
+-- user-message-to-final-reply cycle, even if the cycle had multiple
+-- assistant messages (those are aggregated).
+CREATE TABLE IF NOT EXISTS turn_metrics (
+  id INTEGER PRIMARY KEY AUTOINCREMENT,
+  ts INTEGER NOT NULL,                  -- turn end timestamp (ms)
+  chat_id TEXT NOT NULL,
+  thread_id TEXT,
+  msg_id INTEGER NOT NULL,              -- inbound message_id that started turn
+  session_id TEXT,                      -- claude session UUID for resume
+  bot_name TEXT,                        -- 'shumabit' / 'umi-assistant' / etc
+  model TEXT,                           -- chatConfig.model at turn start
+  effort TEXT,                          -- chatConfig.effort
+  input_tokens INTEGER,
+  output_tokens INTEGER,
+  cache_creation_tokens INTEGER,
+  cache_read_tokens INTEGER,
+  cost_usd REAL,
+  duration_ms INTEGER,
+  num_assistant_messages INTEGER,       -- top-level message count (forceNewMessage events)
+  num_tool_uses INTEGER,
+  result_subtype TEXT,                  -- 'success' / 'error_max_turns' / etc
+  error TEXT
+);
+CREATE INDEX IF NOT EXISTS idx_turn_metrics_chat_ts ON turn_metrics(chat_id, ts DESC);
+CREATE INDEX IF NOT EXISTS idx_turn_metrics_recent ON turn_metrics(ts DESC);
+CREATE INDEX IF NOT EXISTS idx_turn_metrics_session ON turn_metrics(session_id);

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "polygram",
-  "version": "0.7.4",
+  "version": "0.7.6",
   "description": "Telegram daemon for Claude Code that preserves the OpenClaw per-chat session model. Migration path for OpenClaw users moving to Claude Code.",
   "main": "lib/ipc-client.js",
   "bin": {

package/polygram.js CHANGED Viewed

@@ -844,6 +844,14 @@ let isShuttingDown = false;
 // distinguish unique failures from the obvious "try again" cases.
 function errorReplyText(err) {
   const msg = err?.message || '';
+  // 0.7.6 (item H): queue overflow has a typed err.code so we don't have
+  // to grep error text. The dropped pending is OLDER than the current
+  // queue depth; its sender has likely sent more recent messages we're
+  // still working on. Tell them this one was skipped without making it
+  // sound like a crash.
+  if (err?.code === 'QUEUE_OVERFLOW') {
+    return '⏭ Couldn\'t keep up — this message was skipped while I was processing newer ones. Resend if it still matters.';
+  }
   if (/idle with no Claude activity/i.test(msg)) {
     return '⏳ I went quiet too long without finishing. Try resending or simplifying the task.';
   }
@@ -961,19 +969,21 @@ const stdinLock = createAsyncLock();
 // hammering sendChatAction every 4s for the full turn duration.
 // ─── Response parsing (stickers, reactions) ─────────────────────────
+// Implementation lives in lib/parse-response.js so tests can require it
+// without starting a bot (polygram.js is a top-level script that calls
+// main() at bottom). The wrapper here supplies the runtime stickerMap /
+// emojiToSticker that the parser looks up against.
+//
+// 0.7.5: parser also recognises a literal `[sticker:NAME]` pattern in
+// addition to single-emoji shortcuts. Claude reads its own past outbound
+// rows on session resume, sees `[sticker:working]` (the placeholder
+// deriveOutboundText synthesises for sendSticker rows), and starts
+// mimicking the format as plain text. Without the new branch the
+// placeholder was rendered verbatim in the chat instead of swapped for
+// the actual sticker.
+const { parseResponse: parseResponseImpl } = require('./lib/parse-response');
 function parseResponse(text) {
-  const trimmed = text.trim();
-  const emojiOnly = /^\p{Emoji_Presentation}$/u.test(trimmed) || /^\p{Emoji}\uFE0F?$/u.test(trimmed);
-  if (emojiOnly && trimmed) {
-    if (emojiToSticker[trimmed]) {
-      return { text: '', sticker: emojiToSticker[trimmed], stickerLabel: trimmed, reaction: null };
-    }
-    return { text: '', sticker: null, stickerLabel: null, reaction: trimmed };
-  }
-  return { text: trimmed, sticker: null, stickerLabel: null, reaction: null };
+  return parseResponseImpl(text, { stickerMap, emojiToSticker });
 }
 // ─── Cron/IPC send ─────────────────────────────────────────────────
@@ -1891,6 +1901,32 @@ async function handleMessage(sessionKey, chatId, msg, bot) {
     });
     const elapsed = ((Date.now() - t0) / 1000).toFixed(1);
+    // 0.7.6 (item F): persist per-turn telemetry. Stream-json result
+    // events carry total_cost_usd + duration_ms; sumUsage rolled up
+    // input/output/cache token counts from per-message usage. One row
+    // per dispatched user message; queryable via turn_metrics table.
+    if (result.metrics) {
+      dbWrite(() => db.insertTurnMetric({
+        chat_id: chatId,
+        thread_id: threadId,
+        msg_id: msg.message_id,
+        session_id: result.sessionId,
+        bot_name: BOT_NAME,
+        model: chatConfig.model,
+        effort: chatConfig.effort,
+        input_tokens: result.metrics.inputTokens,
+        output_tokens: result.metrics.outputTokens,
+        cache_creation_tokens: result.metrics.cacheCreationTokens,
+        cache_read_tokens: result.metrics.cacheReadTokens,
+        cost_usd: result.cost,
+        duration_ms: result.duration,
+        num_assistant_messages: result.metrics.numAssistantMessages,
+        num_tool_uses: result.metrics.numToolUses,
+        result_subtype: result.metrics.resultSubtype,
+        error: result.error || null,
+      }), 'insert turn_metric');
+    }
     stopTyping();
     if (result.error) {