npm - polygram - Versions diffs - 0.7.5 → 0.7.7 - Mend

polygram 0.7.5 → 0.7.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/.claude-plugin/plugin.json +1 -1
package/lib/db.js +43 -0
package/lib/error-classify.js +290 -0
package/lib/process-manager.js +208 -3
package/migrations/009-turn-metrics.sql +42 -0
package/package.json +1 -1
package/polygram.js +54 -22

package/.claude-plugin/plugin.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "$schema": "https://anthropic.com/claude-code/plugin.schema.json",
   "name": "polygram",
-  "version": "0.7.5",
+  "version": "0.7.7",
   "description": "Telegram integration for Claude Code that preserves the OpenClaw per-chat session model. Migration target for OpenClaw users. Multi-bot, multi-chat, per-topic isolation; SQLite transcripts; inline-keyboard approvals. Bundles /polygram:status|logs|pair-code|approvals admin commands and a history skill.",
   "keywords": [
     "telegram",

package/lib/db.js CHANGED Viewed

@@ -152,6 +152,26 @@ function wrap(db) {
     VALUES (?, ?, ?, ?)
   `);
+  // 0.7.6 (item F): per-turn cost / token / duration metrics. Persisted
+  // at turn end (onResult callback). One row per dispatched user
+  // message → final reply cycle, even if the cycle had multiple
+  // assistant messages. See migrations/009-turn-metrics.sql.
+  const insertTurnMetricStmt = db.prepare(`
+    INSERT INTO turn_metrics (
+      ts, chat_id, thread_id, msg_id, session_id, bot_name,
+      model, effort,
+      input_tokens, output_tokens, cache_creation_tokens, cache_read_tokens,
+      cost_usd, duration_ms, num_assistant_messages, num_tool_uses,
+      result_subtype, error
+    ) VALUES (
+      @ts, @chat_id, @thread_id, @msg_id, @session_id, @bot_name,
+      @model, @effort,
+      @input_tokens, @output_tokens, @cache_creation_tokens, @cache_read_tokens,
+      @cost_usd, @duration_ms, @num_assistant_messages, @num_tool_uses,
+      @result_subtype, @error
+    )
+  `);
   const logConfigChangeStmt = db.prepare(`
     INSERT INTO config_changes (
       chat_id, thread_id, field, old_value, new_value,
@@ -277,6 +297,29 @@ function wrap(db) {
       );
     },
+    insertTurnMetric(row) {
+      return insertTurnMetricStmt.run({
+        ts: row.ts || Date.now(),
+        chat_id: String(row.chat_id),
+        thread_id: row.thread_id != null ? String(row.thread_id) : null,
+        msg_id: row.msg_id,
+        session_id: row.session_id || null,
+        bot_name: row.bot_name || null,
+        model: row.model || null,
+        effort: row.effort || null,
+        input_tokens: row.input_tokens ?? null,
+        output_tokens: row.output_tokens ?? null,
+        cache_creation_tokens: row.cache_creation_tokens ?? null,
+        cache_read_tokens: row.cache_read_tokens ?? null,
+        cost_usd: row.cost_usd ?? null,
+        duration_ms: row.duration_ms ?? null,
+        num_assistant_messages: row.num_assistant_messages ?? null,
+        num_tool_uses: row.num_tool_uses ?? null,
+        result_subtype: row.result_subtype || null,
+        error: row.error || null,
+      });
+    },
     logConfigChange(row) {
       return logConfigChangeStmt.run({
         chat_id: String(row.chat_id),

package/lib/error-classify.js ADDED Viewed

@@ -0,0 +1,290 @@
+/**
+ * Error classifier — maps any error from any source to a stable shape.
+ *
+ * Sources today (0.7.7): stream-json `result` events with error
+ * subtypes, child_process `'close'`/`'error'` event errors, idle
+ * timer fires, polygram-internal Errors with `err.code` set.
+ *
+ * Sources after 0.8.0 SDK migration: SDK iterator throws
+ * (`AbortError` named class plus plain `Error`s), `SDKResultMessage`
+ * with subtypes `error_during_execution` / `error_max_turns` /
+ * `error_max_budget_usd` / `error_max_structured_output_retries`,
+ * per-message `SDKAssistantMessage.error` subtypes
+ * (`authentication_failed` / `billing_error` / `rate_limit` /
+ * `invalid_request` / `server_error` / `unknown` / `max_output_tokens`),
+ * 5xx HTTP errors that bubble through the SDK transport.
+ *
+ * Returning the same shape regardless of transport means
+ * `errorReplyText` in polygram.js doesn't grow N branches every time
+ * a new error class shows up — we just add a row to PATTERNS or a
+ * `code:` short-circuit at the top.
+ *
+ * Layered ship order (per v4 plan §6.5.1):
+ *   - 0.7.7 (this file): transport-agnostic patterns and the public
+ *     `classify()` API. Polygram.js's `errorReplyText` consults this
+ *     module directly.
+ *   - Phase 1 of 0.8.0 (later): adds typed-code branches for
+ *     `INTERRUPTED`, plus SDK `error_max_structured_output_retries`,
+ *     plus per-message SDK error subtypes.
+ *   - Phase 2 of 0.8.0 (later): adds AUTO_RECOVER actions
+ *     (`reset_session` etc) so pm can self-heal stuck sessions
+ *     without waiting for the user to type /new.
+ */
+'use strict';
+// Substring/regex patterns matched against the error string. Order
+// is significant only when patterns overlap — `transient5xx` is last
+// because the others (auth/billing/format) carry their own status
+// codes too. First match wins.
+const PATTERNS = {
+  // Anthropic API rate limit (429) — "rate-limited", "Too Many
+  // Requests", token-bucket exhaustion text.
+  rateLimit:        /\b429\b|rate[_ ]?limit|too[_ ]many[_ ]requests|tokens? per minute/i,
+  // Billing / quota (402, "insufficient credit"). Fires before any
+  // model call when the workspace is out of funds.
+  billing:          /\b402\b|payment[_ ]required|billing|insufficient[_ ]credit/i,
+  // Auth: 401/403, OAuth token expiry, refresh failure. The 0.8.0
+  // plan ships an explicit auth-expired UX (admin-chat notify +
+  // pause); 0.7.7 just maps to a friendlier user message.
+  authExpired:      /\b401\b|\b403\b|unauthor(ized|ised)|forbidden|token[_ ]expired|oauth[_ ]token[_ ]refresh[_ ]failed/i,
+  // Context window exceeded — too many tokens for the model. Usually
+  // surfaces as `prompt is too long` from Anthropic; sometimes as
+  // generic "exceeds maximum context" depending on SDK version.
+  contextOverflow:  /context[_ ](window|length)|prompt[_ ]too[_ ]large|exceeds[_ ]maximum[_ ]context|prompt is too long/i,
+  // Role alternation / message ordering — fires when transcript has
+  // consecutive same-role messages or a tool_use without matching
+  // tool_result. Polygram doesn't generate these directly, but they
+  // can surface after an interrupted turn.
+  roleOrdering:     /role.*alternat|message[_ ]ordering|consecutive (user|assistant)/i,
+  // Tool call missing required `input` field. Indicates corrupted
+  // history; user-facing message tells them to /new. Word order
+  // varies across Anthropic SDK versions — accept either
+  // "input...missing" or "missing...input" within a tool_use mention.
+  missingToolInput: /tool[_ ]use.*(input.*missing|missing.*input)|missing tool call input|tool input required/i,
+  // Idle/wall-clock timeout from polygram's pm timers, OR
+  // model-side timeout. Mapped to a single class; user message is
+  // identical either way.
+  timeout:          /timed[_ ]out|deadline|idle with no Claude activity|wall-clock ceiling/i,
+  // Generic format/validation errors (400 with no other class
+  // matching). Rare in practice; included so we don't fall through
+  // to "unknown".
+  format:           /invalid[_ ]request|invalid[_ ]json|malformed|bad request/i,
+  // Transient HTTP (5xx upstream Anthropic outage / overload). Only
+  // these get retried by pm. 521-524/529 are Cloudflare codes seen
+  // when Anthropic's edge is degraded.
+  transient5xx:     /\b5(00|02|03|2[1-4]|29)\b|temporarily overloaded|server[_ ]error|service unavailable/i,
+};
+// User-facing message per kind. Polygram-style emoji + concise
+// action hint. `null` means "suppress the user-facing reply" (used
+// for INTERRUPTED inside the abort-grace window — the user already
+// saw their /stop ack).
+const USER_MESSAGES = {
+  rateLimit:        '⚠️ Rate-limited by Anthropic. Try again in a minute.',
+  billing:          '💳 Billing issue on Anthropic — operator needs to top up credits.',
+  authExpired:      '🔑 Claude auth expired. Operator has been notified.',
+  contextOverflow:  '📚 Conversation got too long. Send /new to start fresh.',
+  roleOrdering:     '⚠️ Conversation got into a tangled state. Try /new.',
+  missingToolInput: '⚠️ Session history looks corrupted. Try /new.',
+  timeout:          '⏳ I went quiet too long without finishing. Try resending or simplifying.',
+  format:           '⚠️ Invalid request format. Try rephrasing or /new.',
+  transient5xx:     '☁️ Anthropic is temporarily unavailable. Retrying once…',
+};
+// Auto-recovery actions for kinds where the session is irrecoverable
+// without a reset. Phase 2 of 0.8.0 wires `pm.resetSession()` to
+// these; 0.7.7 just exports the table for forward-compat.
+//
+// Values map to action names that pm understands:
+//   'reset_session' — close current Query, clear sessionId, fresh start
+//   (future) 'compact' — manual compact request, if SDK exposes it
+const AUTO_RECOVER = {
+  roleOrdering:     'reset_session',
+  contextOverflow:  'reset_session',
+  missingToolInput: 'reset_session',
+};
+// Typed-code short-circuits — set on errors polygram throws itself
+// (see lib/process-manager.js), not pattern-matched. Keep these in
+// sync with the codes pm emits.
+const CODES = {
+  // 0.7.6 (item H): queue cap drop. Pre-empts pattern matching so
+  // the queue-overflow message is exact, not classified.
+  QUEUE_OVERFLOW: {
+    kind: 'queueOverflow',
+    userMessage: '⏭ Couldn\'t keep up — this message was skipped while I was processing newer ones. Resend if it still matters.',
+    isTransient: false,
+    autoRecover: null,
+  },
+  // 0.8.0 Phase 1 will set this on pendings rejected via
+  // pm.interrupt(). Matched here so the abort-grace silence works
+  // before the SDK migration lands (pm could start setting it
+  // earlier as a no-op).
+  INTERRUPTED: {
+    kind: 'interrupted',
+    userMessage: null, // suppressed; user already saw the /stop ack
+    isTransient: false,
+    autoRecover: null,
+  },
+  // Phase 2 will set this when pm.resetSession() drains the queue
+  // for any reason (auto-recovery, /new, /reset, auth-expired).
+  RESET_SESSION: {
+    kind: 'resetSession',
+    userMessage: '✨ Started a fresh session.',
+    isTransient: false,
+    autoRecover: null,
+  },
+  // 0.8.0 auth-expired path — set on every pending the daemon
+  // rejects after a 401 surface. Distinct from authExpired pattern
+  // because it's polygram saying "I already noticed and paused"
+  // rather than "I just hit a 401 and am about to handle it".
+  AUTH_EXPIRED: {
+    kind: 'authExpired',
+    userMessage: '🔑 The bot needs re-auth. The operator has been notified. Try again in a few minutes.',
+    isTransient: false,
+    autoRecover: null,
+  },
+};
+/**
+ * Classify an error from any source.
+ *
+ * Accepts:
+ *   - Error / object with `code` / `message`
+ *   - SDKResultMessage with `subtype` and optional `error`
+ *   - SDKAssistantMessage.error (string subtype like 'rate_limit')
+ *   - plain string
+ *   - null/undefined (returns the 'unknown' shape)
+ *
+ * Returns an object with stable shape:
+ *   {
+ *     kind: 'rateLimit' | 'billing' | ... | 'unknown' | code-keyed kind,
+ *     userMessage: string | null,   // null means suppress reply
+ *     isTransient: boolean,         // true → pm should retry once
+ *     autoRecover: 'reset_session' | null,
+ *   }
+ */
+function classify(err) {
+  // Typed-code short-circuit takes priority over pattern matching.
+  // Errors polygram constructs internally (QUEUE_OVERFLOW etc.) set
+  // `err.code` so we don't depend on string content.
+  const code = err?.code;
+  if (code && CODES[code]) {
+    return { ...CODES[code] };
+  }
+  // SDKAssistantMessage.error is a short string code from a fixed
+  // union — match those directly, not via regex.
+  if (typeof err === 'string') {
+    const sdkMessageError = matchSdkMessageError(err);
+    if (sdkMessageError) return sdkMessageError;
+  }
+  if (err?.subtype && typeof err.subtype === 'string') {
+    const sdkResultSubtype = matchSdkResultSubtype(err.subtype);
+    if (sdkResultSubtype) return sdkResultSubtype;
+  }
+  const msg = extractMessage(err);
+  for (const [kind, re] of Object.entries(PATTERNS)) {
+    if (re.test(msg)) {
+      return {
+        kind,
+        userMessage: USER_MESSAGES[kind],
+        isTransient: kind === 'transient5xx' || kind === 'rateLimit',
+        autoRecover: AUTO_RECOVER[kind] ?? null,
+      };
+    }
+  }
+  // Fall-through: surface a snippet of the raw error so users at
+  // least know SOMETHING happened. Same shape as before, just
+  // routed through the classifier so callers get a uniform return.
+  const reason = msg.split('\n')[0].slice(0, 120);
+  return {
+    kind: 'unknown',
+    userMessage: `Hit a snag: ${reason || 'unknown error'}. Try resending.`,
+    isTransient: false,
+    autoRecover: null,
+  };
+}
+// Pull a string out of whatever shape the caller passed.
+function extractMessage(err) {
+  if (err == null) return '';
+  if (typeof err === 'string') return err;
+  if (err.message) return String(err.message);
+  if (err.error) return String(err.error);
+  return String(err);
+}
+// SDKAssistantMessage.error fields are a small fixed union
+// (sdk.d.ts:2343). Map directly so we don't depend on transport-
+// specific error text.
+const SDK_MESSAGE_ERROR_MAP = {
+  authentication_failed: 'authExpired',
+  billing_error:         'billing',
+  rate_limit:            'rateLimit',
+  invalid_request:       'format',
+  server_error:          'transient5xx',
+  unknown:               'unknown',
+  max_output_tokens:     'format', // closest match — model gave up
+};
+function matchSdkMessageError(s) {
+  const kind = SDK_MESSAGE_ERROR_MAP[s];
+  if (!kind) return null;
+  if (kind === 'unknown') return null; // fall through to pattern match
+  return {
+    kind,
+    userMessage: USER_MESSAGES[kind] ?? null,
+    isTransient: kind === 'transient5xx' || kind === 'rateLimit',
+    autoRecover: AUTO_RECOVER[kind] ?? null,
+  };
+}
+// SDKResultMessage.subtype values (sdk.d.ts:3121). Most are
+// terminal-error indicators that don't have a clean pattern equivalent.
+const SDK_RESULT_SUBTYPE_MAP = {
+  error_during_execution:           'unknown',
+  error_max_turns:                  'format',
+  error_max_budget_usd:             'billing',
+  error_max_structured_output_retries: 'format',
+};
+function matchSdkResultSubtype(s) {
+  if (s === 'success') return null;
+  const kind = SDK_RESULT_SUBTYPE_MAP[s];
+  if (!kind || kind === 'unknown') return null;
+  return {
+    kind,
+    userMessage: USER_MESSAGES[kind] ?? null,
+    isTransient: false, // result subtypes don't auto-retry; the
+                        // turn already burned its budget.
+    autoRecover: AUTO_RECOVER[kind] ?? null,
+  };
+}
+// True if pm's iteration loop should sleep and retry the user
+// message ONCE before giving up. Currently only transient5xx and
+// rateLimit. Per v4 plan §6.6 H1/M2, retry only fires when the
+// turn produced ZERO assistant messages (idempotency); pm checks
+// that flag, not this function.
+function isTransientHttpError(err) {
+  return classify(err).isTransient;
+}
+module.exports = {
+  classify,
+  isTransientHttpError,
+  PATTERNS,
+  USER_MESSAGES,
+  AUTO_RECOVER,
+  CODES,
+};

package/lib/process-manager.js CHANGED Viewed

@@ -26,9 +26,27 @@
  */
 const { createInterface } = require('readline');
+const { isTransientHttpError } = require('./error-classify');
 const DEFAULT_CAP = 10;
 const DEFAULT_KILL_TIMEOUT_MS = 3000;
+// 0.7.7: transient HTTP retry. When Anthropic returns a 5xx (or 429
+// rate-limit) and the turn produced ZERO assistant messages so far,
+// pm sleeps and retries the user message ONCE before surfacing the
+// error to the user. Matches OpenClaw's
+// pi-embedded-Vt2x_Jl3.js:39210-39216 — "single retry, then surface".
+// Idempotency-protected: we only retry if no assistant content has
+// streamed (otherwise re-sending would replay tools that already ran).
+const DEFAULT_TRANSIENT_RETRY_DELAY_MS = 2500;
+const MAX_TRANSIENT_RETRIES = 1;
+// 0.7.6 (item H): hard cap on per-session pending queue depth.
+// Pre-fix, a chat with rapid-fire user messages (or a stuck Claude that
+// stops emitting `result`) could grow pendingQueue unbounded — each
+// pending holds a streamer + reactor + timers, so a runaway client
+// could exhaust memory or burn API quota for ack reactions on every
+// dropped message. 50 is generous (a normal turn never queues more
+// than a handful) but safely bounded.
+const DEFAULT_QUEUE_CAP = 50;
 /**
  * Pull user-visible text from a stream-json `assistant` event.
@@ -47,9 +65,38 @@ function extractAssistantText(event) {
   return parts.join('\n\n').trim().replace(/([^:]):\s*$/, '$1…');
 }
+// 0.7.6 (item F): sum the four canonical usage counters across a Map of
+// per-message usage objects. Each map value is the LAST-SEEN usage for
+// that message id (Anthropic emits cumulative totals within a message);
+// summing across map values gives the turn-wide totals.
+//
+// Defensive against missing fields — older claude versions may not
+// always emit cache_*_input_tokens.
+function sumUsage(usageByMessage) {
+  const out = {
+    input_tokens: 0,
+    output_tokens: 0,
+    cache_creation_input_tokens: 0,
+    cache_read_input_tokens: 0,
+  };
+  for (const u of usageByMessage.values()) {
+    if (!u) continue;
+    if (Number.isFinite(u.input_tokens)) out.input_tokens += u.input_tokens;
+    if (Number.isFinite(u.output_tokens)) out.output_tokens += u.output_tokens;
+    if (Number.isFinite(u.cache_creation_input_tokens)) {
+      out.cache_creation_input_tokens += u.cache_creation_input_tokens;
+    }
+    if (Number.isFinite(u.cache_read_input_tokens)) {
+      out.cache_read_input_tokens += u.cache_read_input_tokens;
+    }
+  }
+  return out;
+}
 class ProcessManager {
   constructor({
     cap = DEFAULT_CAP,
+    queueCap = DEFAULT_QUEUE_CAP,
     spawnFn,
     db = null,
     logger = console,
@@ -61,9 +108,11 @@ class ProcessManager {
     onToolUse = null,     // (sessionKey, toolName, entry) → void — routes to pendingQueue[0]
     onAssistantMessageStart = null, // (sessionKey, entry) → void — fires when a NEW top-level assistant message begins (after a previous one ended). Used by polygram.js to call streamer.forceNewMessage() so each assistant message gets its own bubble.
     onRespawn = null,     // (sessionKey, reason, entry) → void — fires after graceful drain-and-kill
+    onQueueDrop = null,   // 0.7.6: (sessionKey, droppedPending, entry) → void — fired when a pending is dropped because pendingQueue exceeded queueCap. Polygram uses this to surface a warning on the dropped message.
   } = {}) {
     if (!spawnFn) throw new Error('spawnFn required');
     this.cap = cap;
+    this.queueCap = queueCap;
     this.spawnFn = spawnFn;
     this.db = db;
     this.logger = logger;
@@ -75,6 +124,7 @@ class ProcessManager {
     this.onToolUse = onToolUse;
     this.onAssistantMessageStart = onAssistantMessageStart;
     this.onRespawn = onRespawn;
+    this.onQueueDrop = onQueueDrop;
     this.procs = new Map();
   }
@@ -288,9 +338,35 @@ class ProcessManager {
         // pending. Fire onFirstStream ONCE, regardless of whether the
         // assistant message has text or only tool_use blocks (some turns
         // emit tool_use first with no preamble).
-        if (added || (Array.isArray(event.message?.content)
-            && event.message.content.some((b) => b?.type === 'tool_use'))) {
+        const hasAssistantContent = !!added || (Array.isArray(event.message?.content)
+            && event.message.content.some((b) => b?.type === 'tool_use'));
+        if (hasAssistantContent) {
           head.fireFirstStream?.();
+          // 0.7.7: any assistant content (text OR tool_use) disqualifies
+          // the turn from transient-retry — re-sending the user prompt
+          // after this point would replay tools that already executed.
+          head.firstAssistantSeen = true;
+        }
+        // 0.7.6 (item F): accumulate usage + counters for turn telemetry.
+        // The `result` event carries total_cost_usd + duration_ms but NOT
+        // a usage breakdown; usage lives on each assistant.message.usage.
+        // Anthropic emits cumulative totals per assistant message id
+        // (so within a single message the last usage seen wins; across
+        // distinct messages they sum).
+        const usage = event.message?.usage;
+        if (usage) {
+          if (messageId != null && head.lastUsageMessageId === messageId) {
+            // same message, replace running totals for this message
+            head.usageByMessage.set(messageId, usage);
+          } else {
+            head.lastUsageMessageId = messageId;
+            head.usageByMessage.set(messageId, usage);
+          }
+        }
+        if (Array.isArray(event.message?.content)) {
+          for (const b of event.message.content) {
+            if (b?.type === 'tool_use') head.toolUseCount++;
+          }
         }
         if (added) {
           // Pre-0.7.0 we did `streamText = streamText + '\n\n' + added`,
@@ -331,15 +407,89 @@ class ProcessManager {
       }
       if (event.type === 'result' && head) {
+        // 0.7.7: transient HTTP retry. If Anthropic returned a
+        // retryable error AND the turn produced ZERO assistant
+        // content yet AND we haven't already retried, sleep and
+        // re-write the prompt instead of resolving the pending.
+        // Idempotency: firstAssistantSeen guards against replaying
+        // tools that already ran.
+        const errSignal = event.error || event.subtype;
+        const isError = event.subtype !== 'success';
+        const shouldTransientRetry = isError
+          && !head.firstAssistantSeen
+          && head.transientRetries < MAX_TRANSIENT_RETRIES
+          && head.prompt != null
+          && isTransientHttpError({ message: errSignal, subtype: event.subtype });
+        if (shouldTransientRetry) {
+          head.transientRetries++;
+          this._logEvent('transient-retry', {
+            session_key: sessionKey,
+            chat_id: entry.chatId,
+            attempt: head.transientRetries,
+            subtype: event.subtype,
+            error: typeof errSignal === 'string' ? errSignal.slice(0, 200) : null,
+          });
+          // Reset accumulators so the retried turn's metrics aren't
+          // contaminated by the failed-turn's totals (usage on a
+          // failed turn IS billed but we surface it as a separate
+          // event-log entry rather than mixing into turn_metrics).
+          head.usageByMessage = new Map();
+          head.lastUsageMessageId = null;
+          head.toolUseCount = 0;
+          head.streamText = '';
+          head.lastAssistantMessageId = null;
+          // Re-arm idle timer (the old one is still ticking from the
+          // previous activate; resetIdleTimer just re-arms).
+          head.resetIdleTimer?.();
+          // Sleep then re-write. Keep the pending in-place; the next
+          // 'result' event resolves it normally (or hits the same
+          // retry path if MAX_TRANSIENT_RETRIES hadn't been
+          // exhausted, which after the increment above it has).
+          setTimeout(() => {
+            // Edge case: pending was killed/aborted during the
+            // retry sleep — process exited, queue drained, etc.
+            // Skip the re-write if pendingQueue no longer holds us.
+            if (entry.pendingQueue[0] !== head || entry.closed) return;
+            try {
+              entry.proc.stdin.write(JSON.stringify({
+                type: 'user',
+                message: { role: 'user', content: head.prompt },
+              }) + '\n');
+            } catch (err) {
+              // stdin write failed — fall back to surfacing the
+              // error. Mark as not-retried-anymore so we don't loop.
+              this.logger.error(`[${entry.label}] transient-retry stdin write failed: ${err.message}`);
+              entry.pendingQueue.shift();
+              head.clearTimers();
+              head.reject(err);
+            }
+          }, DEFAULT_TRANSIENT_RETRY_DELAY_MS);
+          return; // don't shift / resolve; wait for next result
+        }
         entry.pendingQueue.shift();
         head.clearTimers();
         if (this.onResult) this.onResult(sessionKey, event, entry, head);
+        // 0.7.6 (item F): sum usage across distinct assistant messages
+        // (each message id seen got its last-known usage stored; sum the
+        // map values). Yields a single-row metric summary the caller
+        // can persist via db.insertTurnMetric().
+        const usageTotals = sumUsage(head.usageByMessage);
         head.resolve({
           text: event.result || '',
           sessionId: event.session_id,
           cost: event.total_cost_usd,
           duration: event.duration_ms,
           error: event.subtype === 'success' ? null : (event.error || event.subtype),
+          metrics: {
+            inputTokens: usageTotals.input_tokens,
+            outputTokens: usageTotals.output_tokens,
+            cacheCreationTokens: usageTotals.cache_creation_input_tokens,
+            cacheReadTokens: usageTotals.cache_read_input_tokens,
+            numAssistantMessages: head.usageByMessage.size,
+            numToolUses: head.toolUseCount,
+            resultSubtype: event.subtype || null,
+          },
         });
         // Activate next head or settle idle state.
         if (entry.pendingQueue.length > 0) {
@@ -456,6 +606,14 @@ class ProcessManager {
         idleTimer: null,
         maxTimer: null,
         activated: false,
+        // 0.7.6 (item F): per-turn telemetry accumulators. usageByMessage
+        // collects each assistant message's last-seen usage; we sum
+        // across messages at result time (each id is summed once, not
+        // per stream chunk, since usage in stream-json is cumulative
+        // *within* a message — last-seen-per-message wins).
+        usageByMessage: new Map(),
+        lastUsageMessageId: null,
+        toolUseCount: 0,
         // 0.7.4 (item B): set true when the first stream event (assistant
         // text or tool_use) arrives for this pending. Fires
         // `context.onFirstStream` once. Used by polygram to flip the
@@ -463,6 +621,15 @@ class ProcessManager {
         // producing output, not when the pending becomes queue head
         // (which can be ~hundreds of ms before the first token).
         firstStreamFired: false,
+        // 0.7.7: transient-retry support. We hold the prompt so we can
+        // re-write it on transient 5xx/429 if zero assistant content
+        // streamed yet. firstAssistantSeen flips on first assistant
+        // event with non-empty content OR tool_use blocks — once true,
+        // retry is no longer idempotent (we'd replay executed tools)
+        // and pm surfaces the error instead.
+        prompt,
+        transientRetries: 0,
+        firstAssistantSeen: false,
       };
       pending.fireFirstStream = () => {
@@ -524,8 +691,40 @@ class ProcessManager {
         pending.idleTimer = idleTimer;
       };
+      // 0.7.6 (item H): enforce per-session queue cap. Drop the OLDEST
+      // non-active pending (index 1 — index 0 is the in-flight head and
+      // killing it mid-turn would corrupt Claude's state). The dropped
+      // pending's promise rejects so its handler (polygram.js) can
+      // surface a "couldn't keep up — message dropped" warning to the
+      // user. We drop AFTER pushing the new pending so the cap means
+      // "at most queueCap pendings live", not "refuse to enqueue past N".
+      // Refusing the new write would lose the most recent message —
+      // usually the one the user actually cares about — whereas
+      // dropping the oldest preserves recency at the cost of a stale
+      // queued turn that the user has likely moved past anyway.
       entry.pendingQueue.push(pending);
       entry.inFlight = true;
+      while (entry.pendingQueue.length > this.queueCap) {
+        // Splice at index 1 to leave the active head intact.
+        const dropped = entry.pendingQueue.splice(1, 1)[0];
+        if (!dropped) break;
+        dropped.clearTimers?.();
+        const dropErr = new Error(
+          `queue overflow: dropped (queue cap ${this.queueCap})`,
+        );
+        dropErr.code = 'QUEUE_OVERFLOW';
+        this._logEvent('queue-overflow-drop', {
+          session_key: sessionKey,
+          chat_id: entry.chatId,
+          queue_len: entry.pendingQueue.length,
+          source_msg_id: dropped.context?.sourceMsgId ?? null,
+        });
+        if (this.onQueueDrop) {
+          try { this.onQueueDrop(sessionKey, dropped, entry); }
+          catch (err) { this.logger.error(`[${entry.label}] onQueueDrop: ${err.message}`); }
+        }
+        dropped.reject(dropErr);
+      }
       // If we're the only pending, activate immediately. Otherwise wait
       // until the preceding pending is shifted out.
@@ -552,4 +751,10 @@ class ProcessManager {
   }
 }
-module.exports = { ProcessManager, DEFAULT_CAP, extractAssistantText };
+module.exports = {
+  ProcessManager,
+  DEFAULT_CAP,
+  DEFAULT_QUEUE_CAP,
+  extractAssistantText,
+  sumUsage,
+};

package/migrations/009-turn-metrics.sql ADDED Viewed

@@ -0,0 +1,42 @@
+-- 0.7.6 (item F): turn_metrics table.
+--
+-- Stream-json `result` events from `claude -p` carry total_cost_usd and
+-- duration_ms (already pulled into pending.resolve()), plus a `usage`
+-- block on each `assistant` event with token counts including cache hits.
+-- Pre-fix all of this was logged to console only; once a turn was done
+-- the cost was unrecoverable for analysis.
+--
+-- This table persists per-turn metrics keyed by (chat_id, msg_id) so we
+-- can answer questions like:
+--   - cost / day per bot
+--   - cache hit rate per chat
+--   - which chats have the longest turns
+--   - which models are most expensive overall
+--
+-- Stored at turn end (in onResult callback). One row per dispatched
+-- user-message-to-final-reply cycle, even if the cycle had multiple
+-- assistant messages (those are aggregated).
+CREATE TABLE IF NOT EXISTS turn_metrics (
+  id INTEGER PRIMARY KEY AUTOINCREMENT,
+  ts INTEGER NOT NULL,                  -- turn end timestamp (ms)
+  chat_id TEXT NOT NULL,
+  thread_id TEXT,
+  msg_id INTEGER NOT NULL,              -- inbound message_id that started turn
+  session_id TEXT,                      -- claude session UUID for resume
+  bot_name TEXT,                        -- 'shumabit' / 'umi-assistant' / etc
+  model TEXT,                           -- chatConfig.model at turn start
+  effort TEXT,                          -- chatConfig.effort
+  input_tokens INTEGER,
+  output_tokens INTEGER,
+  cache_creation_tokens INTEGER,
+  cache_read_tokens INTEGER,
+  cost_usd REAL,
+  duration_ms INTEGER,
+  num_assistant_messages INTEGER,       -- top-level message count (forceNewMessage events)
+  num_tool_uses INTEGER,
+  result_subtype TEXT,                  -- 'success' / 'error_max_turns' / etc
+  error TEXT
+);
+CREATE INDEX IF NOT EXISTS idx_turn_metrics_chat_ts ON turn_metrics(chat_id, ts DESC);
+CREATE INDEX IF NOT EXISTS idx_turn_metrics_recent ON turn_metrics(ts DESC);
+CREATE INDEX IF NOT EXISTS idx_turn_metrics_session ON turn_metrics(session_id);

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "polygram",
-  "version": "0.7.5",
+  "version": "0.7.7",
   "description": "Telegram daemon for Claude Code that preserves the OpenClaw per-chat session model. Migration path for OpenClaw users moving to Claude Code.",
   "main": "lib/ipc-client.js",
   "bin": {

package/polygram.js CHANGED Viewed

@@ -40,6 +40,7 @@ const { startTyping } = require('./lib/typing-indicator');
 const { redactBotToken } = require('./lib/net-errors');
 const { createReactionManager, classifyToolName } = require('./lib/status-reactions');
 const { createMediaGroupBuffer } = require('./lib/media-group-buffer');
+const { classify: classifyError, isTransientHttpError } = require('./lib/error-classify');
 const {
   createStore: createApprovalsStore,
   matchesAnyPattern: matchesApprovalPattern,
@@ -842,22 +843,19 @@ let isShuttingDown = false;
 // killed). Anything we don't recognise falls back to a generic line
 // with a single-line snippet of the error so the user can at least
 // distinguish unique failures from the obvious "try again" cases.
+// 0.7.7: errorReplyText delegates to lib/error-classify.js so the
+// regex tables live in one place and stay in sync with future SDK
+// error subtypes (the 0.8.0 migration extends the classifier rather
+// than adding more if-branches here).
+//
+// classify() returns { kind, userMessage, isTransient, autoRecover }.
+// `userMessage: null` is a deliberate "suppress reply" signal —
+// today only used by INTERRUPTED in the abort-grace window. Callers
+// that already gate on isSessionRecentlyAborted will short-circuit
+// before reaching here, but we honour `null` defensively.
 function errorReplyText(err) {
-  const msg = err?.message || '';
-  if (/idle with no Claude activity/i.test(msg)) {
-    return '⏳ I went quiet too long without finishing. Try resending or simplifying the task.';
-  }
-  if (/wall-clock ceiling/i.test(msg)) {
-    return '⏱ This was taking too long, so I stopped. Try resending or simplifying the task.';
-  }
-  if (/Process (exited|killed)/i.test(msg)) {
-    return '💥 Something crashed on my end. Try again.';
-  }
-  if (/error_during_execution/i.test(msg)) {
-    return '💥 Something went wrong mid-stream. Try again.';
-  }
-  const reason = msg.split('\n')[0].slice(0, 120);
-  return `Hit a snag: ${reason || 'unknown error'}. Try resending.`;
+  const { userMessage } = classifyError(err);
+  return userMessage; // may be null — caller must handle
 }
 // Sessions the operator just /stop'd (or natural-language "стоп"). Keyed
@@ -937,13 +935,21 @@ function dispatchHandleMessage(sessionKey, chatId, msg, bot) {
     //    re-dispatch it on next start)
     //  - user just /stop'd (already saw their abort acknowledgement)
     if (!wasAborted && !isReplay && !isShuttingDown) {
-      tg(bot, 'sendMessage', {
-        chat_id: chatId,
-        text: errorReplyText(err),
-        reply_parameters: { message_id: msg.message_id },
-      }, { source: 'error-reply', botName: BOT_NAME }).catch((replyErr) => {
-        console.error(`[${sessionKey}] failed to send error reply: ${replyErr.message}`);
-      });
+      // 0.7.7: errorReplyText may return null when the classifier
+      // says "suppress reply" (e.g. INTERRUPTED inside abort grace —
+      // user already saw their /stop ack). Skip the send call in
+      // that case rather than dispatching empty text (which would
+      // 400 at the lib/telegram.js empty-text guard added in 0.7.4).
+      const replyText = errorReplyText(err);
+      if (replyText) {
+        tg(bot, 'sendMessage', {
+          chat_id: chatId,
+          text: replyText,
+          reply_parameters: { message_id: msg.message_id },
+        }, { source: 'error-reply', botName: BOT_NAME }).catch((replyErr) => {
+          console.error(`[${sessionKey}] failed to send error reply: ${replyErr.message}`);
+        });
+      }
     }
   }).finally(() => {
     const n = (inFlightHandlers.get(sessionKey) || 1) - 1;
@@ -1893,6 +1899,32 @@ async function handleMessage(sessionKey, chatId, msg, bot) {
     });
     const elapsed = ((Date.now() - t0) / 1000).toFixed(1);
+    // 0.7.6 (item F): persist per-turn telemetry. Stream-json result
+    // events carry total_cost_usd + duration_ms; sumUsage rolled up
+    // input/output/cache token counts from per-message usage. One row
+    // per dispatched user message; queryable via turn_metrics table.
+    if (result.metrics) {
+      dbWrite(() => db.insertTurnMetric({
+        chat_id: chatId,
+        thread_id: threadId,
+        msg_id: msg.message_id,
+        session_id: result.sessionId,
+        bot_name: BOT_NAME,
+        model: chatConfig.model,
+        effort: chatConfig.effort,
+        input_tokens: result.metrics.inputTokens,
+        output_tokens: result.metrics.outputTokens,
+        cache_creation_tokens: result.metrics.cacheCreationTokens,
+        cache_read_tokens: result.metrics.cacheReadTokens,
+        cost_usd: result.cost,
+        duration_ms: result.duration,
+        num_assistant_messages: result.metrics.numAssistantMessages,
+        num_tool_uses: result.metrics.numToolUses,
+        result_subtype: result.metrics.resultSubtype,
+        error: result.error || null,
+      }), 'insert turn_metric');
+    }
     stopTyping();
     if (result.error) {