polygram 0.7.5 → 0.7.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "$schema": "https://anthropic.com/claude-code/plugin.schema.json",
3
3
  "name": "polygram",
4
- "version": "0.7.5",
4
+ "version": "0.7.7",
5
5
  "description": "Telegram integration for Claude Code that preserves the OpenClaw per-chat session model. Migration target for OpenClaw users. Multi-bot, multi-chat, per-topic isolation; SQLite transcripts; inline-keyboard approvals. Bundles /polygram:status|logs|pair-code|approvals admin commands and a history skill.",
6
6
  "keywords": [
7
7
  "telegram",
package/lib/db.js CHANGED
@@ -152,6 +152,26 @@ function wrap(db) {
152
152
  VALUES (?, ?, ?, ?)
153
153
  `);
154
154
 
155
+ // 0.7.6 (item F): per-turn cost / token / duration metrics. Persisted
156
+ // at turn end (onResult callback). One row per dispatched user
157
+ // message → final reply cycle, even if the cycle had multiple
158
+ // assistant messages. See migrations/009-turn-metrics.sql.
159
+ const insertTurnMetricStmt = db.prepare(`
160
+ INSERT INTO turn_metrics (
161
+ ts, chat_id, thread_id, msg_id, session_id, bot_name,
162
+ model, effort,
163
+ input_tokens, output_tokens, cache_creation_tokens, cache_read_tokens,
164
+ cost_usd, duration_ms, num_assistant_messages, num_tool_uses,
165
+ result_subtype, error
166
+ ) VALUES (
167
+ @ts, @chat_id, @thread_id, @msg_id, @session_id, @bot_name,
168
+ @model, @effort,
169
+ @input_tokens, @output_tokens, @cache_creation_tokens, @cache_read_tokens,
170
+ @cost_usd, @duration_ms, @num_assistant_messages, @num_tool_uses,
171
+ @result_subtype, @error
172
+ )
173
+ `);
174
+
155
175
  const logConfigChangeStmt = db.prepare(`
156
176
  INSERT INTO config_changes (
157
177
  chat_id, thread_id, field, old_value, new_value,
@@ -277,6 +297,29 @@ function wrap(db) {
277
297
  );
278
298
  },
279
299
 
300
+ insertTurnMetric(row) {
301
+ return insertTurnMetricStmt.run({
302
+ ts: row.ts || Date.now(),
303
+ chat_id: String(row.chat_id),
304
+ thread_id: row.thread_id != null ? String(row.thread_id) : null,
305
+ msg_id: row.msg_id,
306
+ session_id: row.session_id || null,
307
+ bot_name: row.bot_name || null,
308
+ model: row.model || null,
309
+ effort: row.effort || null,
310
+ input_tokens: row.input_tokens ?? null,
311
+ output_tokens: row.output_tokens ?? null,
312
+ cache_creation_tokens: row.cache_creation_tokens ?? null,
313
+ cache_read_tokens: row.cache_read_tokens ?? null,
314
+ cost_usd: row.cost_usd ?? null,
315
+ duration_ms: row.duration_ms ?? null,
316
+ num_assistant_messages: row.num_assistant_messages ?? null,
317
+ num_tool_uses: row.num_tool_uses ?? null,
318
+ result_subtype: row.result_subtype || null,
319
+ error: row.error || null,
320
+ });
321
+ },
322
+
280
323
  logConfigChange(row) {
281
324
  return logConfigChangeStmt.run({
282
325
  chat_id: String(row.chat_id),
@@ -0,0 +1,290 @@
1
+ /**
2
+ * Error classifier — maps any error from any source to a stable shape.
3
+ *
4
+ * Sources today (0.7.7): stream-json `result` events with error
5
+ * subtypes, child_process `'close'`/`'error'` event errors, idle
6
+ * timer fires, polygram-internal Errors with `err.code` set.
7
+ *
8
+ * Sources after 0.8.0 SDK migration: SDK iterator throws
9
+ * (`AbortError` named class plus plain `Error`s), `SDKResultMessage`
10
+ * with subtypes `error_during_execution` / `error_max_turns` /
11
+ * `error_max_budget_usd` / `error_max_structured_output_retries`,
12
+ * per-message `SDKAssistantMessage.error` subtypes
13
+ * (`authentication_failed` / `billing_error` / `rate_limit` /
14
+ * `invalid_request` / `server_error` / `unknown` / `max_output_tokens`),
15
+ * 5xx HTTP errors that bubble through the SDK transport.
16
+ *
17
+ * Returning the same shape regardless of transport means
18
+ * `errorReplyText` in polygram.js doesn't grow N branches every time
19
+ * a new error class shows up — we just add a row to PATTERNS or a
20
+ * `code:` short-circuit at the top.
21
+ *
22
+ * Layered ship order (per v4 plan §6.5.1):
23
+ * - 0.7.7 (this file): transport-agnostic patterns and the public
24
+ * `classify()` API. Polygram.js's `errorReplyText` consults this
25
+ * module directly.
26
+ * - Phase 1 of 0.8.0 (later): adds typed-code branches for
27
+ * `INTERRUPTED`, plus SDK `error_max_structured_output_retries`,
28
+ * plus per-message SDK error subtypes.
29
+ * - Phase 2 of 0.8.0 (later): adds AUTO_RECOVER actions
30
+ * (`reset_session` etc) so pm can self-heal stuck sessions
31
+ * without waiting for the user to type /new.
32
+ */
33
+
34
+ 'use strict';
35
+
36
+ // Substring/regex patterns matched against the error string. Order
37
+ // is significant only when patterns overlap — `transient5xx` is last
38
+ // because the others (auth/billing/format) carry their own status
39
+ // codes too. First match wins.
40
+ const PATTERNS = {
41
+ // Anthropic API rate limit (429) — "rate-limited", "Too Many
42
+ // Requests", token-bucket exhaustion text.
43
+ rateLimit: /\b429\b|rate[_ ]?limit|too[_ ]many[_ ]requests|tokens? per minute/i,
44
+
45
+ // Billing / quota (402, "insufficient credit"). Fires before any
46
+ // model call when the workspace is out of funds.
47
+ billing: /\b402\b|payment[_ ]required|billing|insufficient[_ ]credit/i,
48
+
49
+ // Auth: 401/403, OAuth token expiry, refresh failure. The 0.8.0
50
+ // plan ships an explicit auth-expired UX (admin-chat notify +
51
+ // pause); 0.7.7 just maps to a friendlier user message.
52
+ authExpired: /\b401\b|\b403\b|unauthor(ized|ised)|forbidden|token[_ ]expired|oauth[_ ]token[_ ]refresh[_ ]failed/i,
53
+
54
+ // Context window exceeded — too many tokens for the model. Usually
55
+ // surfaces as `prompt is too long` from Anthropic; sometimes as
56
+ // generic "exceeds maximum context" depending on SDK version.
57
+ contextOverflow: /context[_ ](window|length)|prompt[_ ]too[_ ]large|exceeds[_ ]maximum[_ ]context|prompt is too long/i,
58
+
59
+ // Role alternation / message ordering — fires when transcript has
60
+ // consecutive same-role messages or a tool_use without matching
61
+ // tool_result. Polygram doesn't generate these directly, but they
62
+ // can surface after an interrupted turn.
63
+ roleOrdering: /role.*alternat|message[_ ]ordering|consecutive (user|assistant)/i,
64
+
65
+ // Tool call missing required `input` field. Indicates corrupted
66
+ // history; user-facing message tells them to /new. Word order
67
+ // varies across Anthropic SDK versions — accept either
68
+ // "input...missing" or "missing...input" within a tool_use mention.
69
+ missingToolInput: /tool[_ ]use.*(input.*missing|missing.*input)|missing tool call input|tool input required/i,
70
+
71
+ // Idle/wall-clock timeout from polygram's pm timers, OR
72
+ // model-side timeout. Mapped to a single class; user message is
73
+ // identical either way.
74
+ timeout: /timed[_ ]out|deadline|idle with no Claude activity|wall-clock ceiling/i,
75
+
76
+ // Generic format/validation errors (400 with no other class
77
+ // matching). Rare in practice; included so we don't fall through
78
+ // to "unknown".
79
+ format: /invalid[_ ]request|invalid[_ ]json|malformed|bad request/i,
80
+
81
+ // Transient HTTP (5xx upstream Anthropic outage / overload). Only
82
+ // these get retried by pm. 521-524/529 are Cloudflare codes seen
83
+ // when Anthropic's edge is degraded.
84
+ transient5xx: /\b5(00|02|03|2[1-4]|29)\b|temporarily overloaded|server[_ ]error|service unavailable/i,
85
+ };
86
+
87
+ // User-facing message per kind. Polygram-style emoji + concise
88
+ // action hint. `null` means "suppress the user-facing reply" (used
89
+ // for INTERRUPTED inside the abort-grace window — the user already
90
+ // saw their /stop ack).
91
+ const USER_MESSAGES = {
92
+ rateLimit: '⚠️ Rate-limited by Anthropic. Try again in a minute.',
93
+ billing: '💳 Billing issue on Anthropic — operator needs to top up credits.',
94
+ authExpired: '🔑 Claude auth expired. Operator has been notified.',
95
+ contextOverflow: '📚 Conversation got too long. Send /new to start fresh.',
96
+ roleOrdering: '⚠️ Conversation got into a tangled state. Try /new.',
97
+ missingToolInput: '⚠️ Session history looks corrupted. Try /new.',
98
+ timeout: '⏳ I went quiet too long without finishing. Try resending or simplifying.',
99
+ format: '⚠️ Invalid request format. Try rephrasing or /new.',
100
+ transient5xx: '☁️ Anthropic is temporarily unavailable. Retrying once…',
101
+ };
102
+
103
+ // Auto-recovery actions for kinds where the session is irrecoverable
104
+ // without a reset. Phase 2 of 0.8.0 wires `pm.resetSession()` to
105
+ // these; 0.7.7 just exports the table for forward-compat.
106
+ //
107
+ // Values map to action names that pm understands:
108
+ // 'reset_session' — close current Query, clear sessionId, fresh start
109
+ // (future) 'compact' — manual compact request, if SDK exposes it
110
+ const AUTO_RECOVER = {
111
+ roleOrdering: 'reset_session',
112
+ contextOverflow: 'reset_session',
113
+ missingToolInput: 'reset_session',
114
+ };
115
+
116
+ // Typed-code short-circuits — set on errors polygram throws itself
117
+ // (see lib/process-manager.js), not pattern-matched. Keep these in
118
+ // sync with the codes pm emits.
119
+ const CODES = {
120
+ // 0.7.6 (item H): queue cap drop. Pre-empts pattern matching so
121
+ // the queue-overflow message is exact, not classified.
122
+ QUEUE_OVERFLOW: {
123
+ kind: 'queueOverflow',
124
+ userMessage: '⏭ Couldn\'t keep up — this message was skipped while I was processing newer ones. Resend if it still matters.',
125
+ isTransient: false,
126
+ autoRecover: null,
127
+ },
128
+ // 0.8.0 Phase 1 will set this on pendings rejected via
129
+ // pm.interrupt(). Matched here so the abort-grace silence works
130
+ // before the SDK migration lands (pm could start setting it
131
+ // earlier as a no-op).
132
+ INTERRUPTED: {
133
+ kind: 'interrupted',
134
+ userMessage: null, // suppressed; user already saw the /stop ack
135
+ isTransient: false,
136
+ autoRecover: null,
137
+ },
138
+ // Phase 2 will set this when pm.resetSession() drains the queue
139
+ // for any reason (auto-recovery, /new, /reset, auth-expired).
140
+ RESET_SESSION: {
141
+ kind: 'resetSession',
142
+ userMessage: '✨ Started a fresh session.',
143
+ isTransient: false,
144
+ autoRecover: null,
145
+ },
146
+ // 0.8.0 auth-expired path — set on every pending the daemon
147
+ // rejects after a 401 surface. Distinct from authExpired pattern
148
+ // because it's polygram saying "I already noticed and paused"
149
+ // rather than "I just hit a 401 and am about to handle it".
150
+ AUTH_EXPIRED: {
151
+ kind: 'authExpired',
152
+ userMessage: '🔑 The bot needs re-auth. The operator has been notified. Try again in a few minutes.',
153
+ isTransient: false,
154
+ autoRecover: null,
155
+ },
156
+ };
157
+
158
+ /**
159
+ * Classify an error from any source.
160
+ *
161
+ * Accepts:
162
+ * - Error / object with `code` / `message`
163
+ * - SDKResultMessage with `subtype` and optional `error`
164
+ * - SDKAssistantMessage.error (string subtype like 'rate_limit')
165
+ * - plain string
166
+ * - null/undefined (returns the 'unknown' shape)
167
+ *
168
+ * Returns an object with stable shape:
169
+ * {
170
+ * kind: 'rateLimit' | 'billing' | ... | 'unknown' | code-keyed kind,
171
+ * userMessage: string | null, // null means suppress reply
172
+ * isTransient: boolean, // true → pm should retry once
173
+ * autoRecover: 'reset_session' | null,
174
+ * }
175
+ */
176
+ function classify(err) {
177
+ // Typed-code short-circuit takes priority over pattern matching.
178
+ // Errors polygram constructs internally (QUEUE_OVERFLOW etc.) set
179
+ // `err.code` so we don't depend on string content.
180
+ const code = err?.code;
181
+ if (code && CODES[code]) {
182
+ return { ...CODES[code] };
183
+ }
184
+
185
+ // SDKAssistantMessage.error is a short string code from a fixed
186
+ // union — match those directly, not via regex.
187
+ if (typeof err === 'string') {
188
+ const sdkMessageError = matchSdkMessageError(err);
189
+ if (sdkMessageError) return sdkMessageError;
190
+ }
191
+ if (err?.subtype && typeof err.subtype === 'string') {
192
+ const sdkResultSubtype = matchSdkResultSubtype(err.subtype);
193
+ if (sdkResultSubtype) return sdkResultSubtype;
194
+ }
195
+
196
+ const msg = extractMessage(err);
197
+ for (const [kind, re] of Object.entries(PATTERNS)) {
198
+ if (re.test(msg)) {
199
+ return {
200
+ kind,
201
+ userMessage: USER_MESSAGES[kind],
202
+ isTransient: kind === 'transient5xx' || kind === 'rateLimit',
203
+ autoRecover: AUTO_RECOVER[kind] ?? null,
204
+ };
205
+ }
206
+ }
207
+
208
+ // Fall-through: surface a snippet of the raw error so users at
209
+ // least know SOMETHING happened. Same shape as before, just
210
+ // routed through the classifier so callers get a uniform return.
211
+ const reason = msg.split('\n')[0].slice(0, 120);
212
+ return {
213
+ kind: 'unknown',
214
+ userMessage: `Hit a snag: ${reason || 'unknown error'}. Try resending.`,
215
+ isTransient: false,
216
+ autoRecover: null,
217
+ };
218
+ }
219
+
220
+ // Pull a string out of whatever shape the caller passed.
221
+ function extractMessage(err) {
222
+ if (err == null) return '';
223
+ if (typeof err === 'string') return err;
224
+ if (err.message) return String(err.message);
225
+ if (err.error) return String(err.error);
226
+ return String(err);
227
+ }
228
+
229
+ // SDKAssistantMessage.error fields are a small fixed union
230
+ // (sdk.d.ts:2343). Map directly so we don't depend on transport-
231
+ // specific error text.
232
+ const SDK_MESSAGE_ERROR_MAP = {
233
+ authentication_failed: 'authExpired',
234
+ billing_error: 'billing',
235
+ rate_limit: 'rateLimit',
236
+ invalid_request: 'format',
237
+ server_error: 'transient5xx',
238
+ unknown: 'unknown',
239
+ max_output_tokens: 'format', // closest match — model gave up
240
+ };
241
+ function matchSdkMessageError(s) {
242
+ const kind = SDK_MESSAGE_ERROR_MAP[s];
243
+ if (!kind) return null;
244
+ if (kind === 'unknown') return null; // fall through to pattern match
245
+ return {
246
+ kind,
247
+ userMessage: USER_MESSAGES[kind] ?? null,
248
+ isTransient: kind === 'transient5xx' || kind === 'rateLimit',
249
+ autoRecover: AUTO_RECOVER[kind] ?? null,
250
+ };
251
+ }
252
+
253
+ // SDKResultMessage.subtype values (sdk.d.ts:3121). Most are
254
+ // terminal-error indicators that don't have a clean pattern equivalent.
255
+ const SDK_RESULT_SUBTYPE_MAP = {
256
+ error_during_execution: 'unknown',
257
+ error_max_turns: 'format',
258
+ error_max_budget_usd: 'billing',
259
+ error_max_structured_output_retries: 'format',
260
+ };
261
+ function matchSdkResultSubtype(s) {
262
+ if (s === 'success') return null;
263
+ const kind = SDK_RESULT_SUBTYPE_MAP[s];
264
+ if (!kind || kind === 'unknown') return null;
265
+ return {
266
+ kind,
267
+ userMessage: USER_MESSAGES[kind] ?? null,
268
+ isTransient: false, // result subtypes don't auto-retry; the
269
+ // turn already burned its budget.
270
+ autoRecover: AUTO_RECOVER[kind] ?? null,
271
+ };
272
+ }
273
+
274
+ // True if pm's iteration loop should sleep and retry the user
275
+ // message ONCE before giving up. Currently only transient5xx and
276
+ // rateLimit. Per v4 plan §6.6 H1/M2, retry only fires when the
277
+ // turn produced ZERO assistant messages (idempotency); pm checks
278
+ // that flag, not this function.
279
+ function isTransientHttpError(err) {
280
+ return classify(err).isTransient;
281
+ }
282
+
283
+ module.exports = {
284
+ classify,
285
+ isTransientHttpError,
286
+ PATTERNS,
287
+ USER_MESSAGES,
288
+ AUTO_RECOVER,
289
+ CODES,
290
+ };
@@ -26,9 +26,27 @@
26
26
  */
27
27
 
28
28
  const { createInterface } = require('readline');
29
+ const { isTransientHttpError } = require('./error-classify');
29
30
 
30
31
  const DEFAULT_CAP = 10;
31
32
  const DEFAULT_KILL_TIMEOUT_MS = 3000;
33
+ // 0.7.7: transient HTTP retry. When Anthropic returns a 5xx (or 429
34
+ // rate-limit) and the turn produced ZERO assistant messages so far,
35
+ // pm sleeps and retries the user message ONCE before surfacing the
36
+ // error to the user. Matches OpenClaw's
37
+ // pi-embedded-Vt2x_Jl3.js:39210-39216 — "single retry, then surface".
38
+ // Idempotency-protected: we only retry if no assistant content has
39
+ // streamed (otherwise re-sending would replay tools that already ran).
40
+ const DEFAULT_TRANSIENT_RETRY_DELAY_MS = 2500;
41
+ const MAX_TRANSIENT_RETRIES = 1;
42
+ // 0.7.6 (item H): hard cap on per-session pending queue depth.
43
+ // Pre-fix, a chat with rapid-fire user messages (or a stuck Claude that
44
+ // stops emitting `result`) could grow pendingQueue unbounded — each
45
+ // pending holds a streamer + reactor + timers, so a runaway client
46
+ // could exhaust memory or burn API quota for ack reactions on every
47
+ // dropped message. 50 is generous (a normal turn never queues more
48
+ // than a handful) but safely bounded.
49
+ const DEFAULT_QUEUE_CAP = 50;
32
50
 
33
51
  /**
34
52
  * Pull user-visible text from a stream-json `assistant` event.
@@ -47,9 +65,38 @@ function extractAssistantText(event) {
47
65
  return parts.join('\n\n').trim().replace(/([^:]):\s*$/, '$1…');
48
66
  }
49
67
 
68
+ // 0.7.6 (item F): sum the four canonical usage counters across a Map of
69
+ // per-message usage objects. Each map value is the LAST-SEEN usage for
70
+ // that message id (Anthropic emits cumulative totals within a message);
71
+ // summing across map values gives the turn-wide totals.
72
+ //
73
+ // Defensive against missing fields — older claude versions may not
74
+ // always emit cache_*_input_tokens.
75
+ function sumUsage(usageByMessage) {
76
+ const out = {
77
+ input_tokens: 0,
78
+ output_tokens: 0,
79
+ cache_creation_input_tokens: 0,
80
+ cache_read_input_tokens: 0,
81
+ };
82
+ for (const u of usageByMessage.values()) {
83
+ if (!u) continue;
84
+ if (Number.isFinite(u.input_tokens)) out.input_tokens += u.input_tokens;
85
+ if (Number.isFinite(u.output_tokens)) out.output_tokens += u.output_tokens;
86
+ if (Number.isFinite(u.cache_creation_input_tokens)) {
87
+ out.cache_creation_input_tokens += u.cache_creation_input_tokens;
88
+ }
89
+ if (Number.isFinite(u.cache_read_input_tokens)) {
90
+ out.cache_read_input_tokens += u.cache_read_input_tokens;
91
+ }
92
+ }
93
+ return out;
94
+ }
95
+
50
96
  class ProcessManager {
51
97
  constructor({
52
98
  cap = DEFAULT_CAP,
99
+ queueCap = DEFAULT_QUEUE_CAP,
53
100
  spawnFn,
54
101
  db = null,
55
102
  logger = console,
@@ -61,9 +108,11 @@ class ProcessManager {
61
108
  onToolUse = null, // (sessionKey, toolName, entry) → void — routes to pendingQueue[0]
62
109
  onAssistantMessageStart = null, // (sessionKey, entry) → void — fires when a NEW top-level assistant message begins (after a previous one ended). Used by polygram.js to call streamer.forceNewMessage() so each assistant message gets its own bubble.
63
110
  onRespawn = null, // (sessionKey, reason, entry) → void — fires after graceful drain-and-kill
111
+ onQueueDrop = null, // 0.7.6: (sessionKey, droppedPending, entry) → void — fired when a pending is dropped because pendingQueue exceeded queueCap. Polygram uses this to surface a warning on the dropped message.
64
112
  } = {}) {
65
113
  if (!spawnFn) throw new Error('spawnFn required');
66
114
  this.cap = cap;
115
+ this.queueCap = queueCap;
67
116
  this.spawnFn = spawnFn;
68
117
  this.db = db;
69
118
  this.logger = logger;
@@ -75,6 +124,7 @@ class ProcessManager {
75
124
  this.onToolUse = onToolUse;
76
125
  this.onAssistantMessageStart = onAssistantMessageStart;
77
126
  this.onRespawn = onRespawn;
127
+ this.onQueueDrop = onQueueDrop;
78
128
  this.procs = new Map();
79
129
  }
80
130
 
@@ -288,9 +338,35 @@ class ProcessManager {
288
338
  // pending. Fire onFirstStream ONCE, regardless of whether the
289
339
  // assistant message has text or only tool_use blocks (some turns
290
340
  // emit tool_use first with no preamble).
291
- if (added || (Array.isArray(event.message?.content)
292
- && event.message.content.some((b) => b?.type === 'tool_use'))) {
341
+ const hasAssistantContent = !!added || (Array.isArray(event.message?.content)
342
+ && event.message.content.some((b) => b?.type === 'tool_use'));
343
+ if (hasAssistantContent) {
293
344
  head.fireFirstStream?.();
345
+ // 0.7.7: any assistant content (text OR tool_use) disqualifies
346
+ // the turn from transient-retry — re-sending the user prompt
347
+ // after this point would replay tools that already executed.
348
+ head.firstAssistantSeen = true;
349
+ }
350
+ // 0.7.6 (item F): accumulate usage + counters for turn telemetry.
351
+ // The `result` event carries total_cost_usd + duration_ms but NOT
352
+ // a usage breakdown; usage lives on each assistant.message.usage.
353
+ // Anthropic emits cumulative totals per assistant message id
354
+ // (so within a single message the last usage seen wins; across
355
+ // distinct messages they sum).
356
+ const usage = event.message?.usage;
357
+ if (usage) {
358
+ if (messageId != null && head.lastUsageMessageId === messageId) {
359
+ // same message, replace running totals for this message
360
+ head.usageByMessage.set(messageId, usage);
361
+ } else {
362
+ head.lastUsageMessageId = messageId;
363
+ head.usageByMessage.set(messageId, usage);
364
+ }
365
+ }
366
+ if (Array.isArray(event.message?.content)) {
367
+ for (const b of event.message.content) {
368
+ if (b?.type === 'tool_use') head.toolUseCount++;
369
+ }
294
370
  }
295
371
  if (added) {
296
372
  // Pre-0.7.0 we did `streamText = streamText + '\n\n' + added`,
@@ -331,15 +407,89 @@ class ProcessManager {
331
407
  }
332
408
 
333
409
  if (event.type === 'result' && head) {
410
+ // 0.7.7: transient HTTP retry. If Anthropic returned a
411
+ // retryable error AND the turn produced ZERO assistant
412
+ // content yet AND we haven't already retried, sleep and
413
+ // re-write the prompt instead of resolving the pending.
414
+ // Idempotency: firstAssistantSeen guards against replaying
415
+ // tools that already ran.
416
+ const errSignal = event.error || event.subtype;
417
+ const isError = event.subtype !== 'success';
418
+ const shouldTransientRetry = isError
419
+ && !head.firstAssistantSeen
420
+ && head.transientRetries < MAX_TRANSIENT_RETRIES
421
+ && head.prompt != null
422
+ && isTransientHttpError({ message: errSignal, subtype: event.subtype });
423
+ if (shouldTransientRetry) {
424
+ head.transientRetries++;
425
+ this._logEvent('transient-retry', {
426
+ session_key: sessionKey,
427
+ chat_id: entry.chatId,
428
+ attempt: head.transientRetries,
429
+ subtype: event.subtype,
430
+ error: typeof errSignal === 'string' ? errSignal.slice(0, 200) : null,
431
+ });
432
+ // Reset accumulators so the retried turn's metrics aren't
433
+ // contaminated by the failed-turn's totals (usage on a
434
+ // failed turn IS billed but we surface it as a separate
435
+ // event-log entry rather than mixing into turn_metrics).
436
+ head.usageByMessage = new Map();
437
+ head.lastUsageMessageId = null;
438
+ head.toolUseCount = 0;
439
+ head.streamText = '';
440
+ head.lastAssistantMessageId = null;
441
+ // Re-arm idle timer (the old one is still ticking from the
442
+ // previous activate; resetIdleTimer just re-arms).
443
+ head.resetIdleTimer?.();
444
+ // Sleep then re-write. Keep the pending in-place; the next
445
+ // 'result' event resolves it normally (or hits the same
446
+ // retry path if MAX_TRANSIENT_RETRIES hadn't been
447
+ // exhausted, which after the increment above it has).
448
+ setTimeout(() => {
449
+ // Edge case: pending was killed/aborted during the
450
+ // retry sleep — process exited, queue drained, etc.
451
+ // Skip the re-write if pendingQueue no longer holds us.
452
+ if (entry.pendingQueue[0] !== head || entry.closed) return;
453
+ try {
454
+ entry.proc.stdin.write(JSON.stringify({
455
+ type: 'user',
456
+ message: { role: 'user', content: head.prompt },
457
+ }) + '\n');
458
+ } catch (err) {
459
+ // stdin write failed — fall back to surfacing the
460
+ // error. Mark as not-retried-anymore so we don't loop.
461
+ this.logger.error(`[${entry.label}] transient-retry stdin write failed: ${err.message}`);
462
+ entry.pendingQueue.shift();
463
+ head.clearTimers();
464
+ head.reject(err);
465
+ }
466
+ }, DEFAULT_TRANSIENT_RETRY_DELAY_MS);
467
+ return; // don't shift / resolve; wait for next result
468
+ }
469
+
334
470
  entry.pendingQueue.shift();
335
471
  head.clearTimers();
336
472
  if (this.onResult) this.onResult(sessionKey, event, entry, head);
473
+ // 0.7.6 (item F): sum usage across distinct assistant messages
474
+ // (each message id seen got its last-known usage stored; sum the
475
+ // map values). Yields a single-row metric summary the caller
476
+ // can persist via db.insertTurnMetric().
477
+ const usageTotals = sumUsage(head.usageByMessage);
337
478
  head.resolve({
338
479
  text: event.result || '',
339
480
  sessionId: event.session_id,
340
481
  cost: event.total_cost_usd,
341
482
  duration: event.duration_ms,
342
483
  error: event.subtype === 'success' ? null : (event.error || event.subtype),
484
+ metrics: {
485
+ inputTokens: usageTotals.input_tokens,
486
+ outputTokens: usageTotals.output_tokens,
487
+ cacheCreationTokens: usageTotals.cache_creation_input_tokens,
488
+ cacheReadTokens: usageTotals.cache_read_input_tokens,
489
+ numAssistantMessages: head.usageByMessage.size,
490
+ numToolUses: head.toolUseCount,
491
+ resultSubtype: event.subtype || null,
492
+ },
343
493
  });
344
494
  // Activate next head or settle idle state.
345
495
  if (entry.pendingQueue.length > 0) {
@@ -456,6 +606,14 @@ class ProcessManager {
456
606
  idleTimer: null,
457
607
  maxTimer: null,
458
608
  activated: false,
609
+ // 0.7.6 (item F): per-turn telemetry accumulators. usageByMessage
610
+ // collects each assistant message's last-seen usage; we sum
611
+ // across messages at result time (each id is summed once, not
612
+ // per stream chunk, since usage in stream-json is cumulative
613
+ // *within* a message — last-seen-per-message wins).
614
+ usageByMessage: new Map(),
615
+ lastUsageMessageId: null,
616
+ toolUseCount: 0,
459
617
  // 0.7.4 (item B): set true when the first stream event (assistant
460
618
  // text or tool_use) arrives for this pending. Fires
461
619
  // `context.onFirstStream` once. Used by polygram to flip the
@@ -463,6 +621,15 @@ class ProcessManager {
463
621
  // producing output, not when the pending becomes queue head
464
622
  // (which can be ~hundreds of ms before the first token).
465
623
  firstStreamFired: false,
624
+ // 0.7.7: transient-retry support. We hold the prompt so we can
625
+ // re-write it on transient 5xx/429 if zero assistant content
626
+ // streamed yet. firstAssistantSeen flips on first assistant
627
+ // event with non-empty content OR tool_use blocks — once true,
628
+ // retry is no longer idempotent (we'd replay executed tools)
629
+ // and pm surfaces the error instead.
630
+ prompt,
631
+ transientRetries: 0,
632
+ firstAssistantSeen: false,
466
633
  };
467
634
 
468
635
  pending.fireFirstStream = () => {
@@ -524,8 +691,40 @@ class ProcessManager {
524
691
  pending.idleTimer = idleTimer;
525
692
  };
526
693
 
694
+ // 0.7.6 (item H): enforce per-session queue cap. Drop the OLDEST
695
+ // non-active pending (index 1 — index 0 is the in-flight head and
696
+ // killing it mid-turn would corrupt Claude's state). The dropped
697
+ // pending's promise rejects so its handler (polygram.js) can
698
+ // surface a "couldn't keep up — message dropped" warning to the
699
+ // user. We drop AFTER pushing the new pending so the cap means
700
+ // "at most queueCap pendings live", not "refuse to enqueue past N".
701
+ // Refusing the new write would lose the most recent message —
702
+ // usually the one the user actually cares about — whereas
703
+ // dropping the oldest preserves recency at the cost of a stale
704
+ // queued turn that the user has likely moved past anyway.
527
705
  entry.pendingQueue.push(pending);
528
706
  entry.inFlight = true;
707
+ while (entry.pendingQueue.length > this.queueCap) {
708
+ // Splice at index 1 to leave the active head intact.
709
+ const dropped = entry.pendingQueue.splice(1, 1)[0];
710
+ if (!dropped) break;
711
+ dropped.clearTimers?.();
712
+ const dropErr = new Error(
713
+ `queue overflow: dropped (queue cap ${this.queueCap})`,
714
+ );
715
+ dropErr.code = 'QUEUE_OVERFLOW';
716
+ this._logEvent('queue-overflow-drop', {
717
+ session_key: sessionKey,
718
+ chat_id: entry.chatId,
719
+ queue_len: entry.pendingQueue.length,
720
+ source_msg_id: dropped.context?.sourceMsgId ?? null,
721
+ });
722
+ if (this.onQueueDrop) {
723
+ try { this.onQueueDrop(sessionKey, dropped, entry); }
724
+ catch (err) { this.logger.error(`[${entry.label}] onQueueDrop: ${err.message}`); }
725
+ }
726
+ dropped.reject(dropErr);
727
+ }
529
728
 
530
729
  // If we're the only pending, activate immediately. Otherwise wait
531
730
  // until the preceding pending is shifted out.
@@ -552,4 +751,10 @@ class ProcessManager {
552
751
  }
553
752
  }
554
753
 
555
- module.exports = { ProcessManager, DEFAULT_CAP, extractAssistantText };
754
+ module.exports = {
755
+ ProcessManager,
756
+ DEFAULT_CAP,
757
+ DEFAULT_QUEUE_CAP,
758
+ extractAssistantText,
759
+ sumUsage,
760
+ };
@@ -0,0 +1,42 @@
1
+ -- 0.7.6 (item F): turn_metrics table.
2
+ --
3
+ -- Stream-json `result` events from `claude -p` carry total_cost_usd and
4
+ -- duration_ms (already pulled into pending.resolve()), plus a `usage`
5
+ -- block on each `assistant` event with token counts including cache hits.
6
+ -- Pre-fix all of this was logged to console only; once a turn was done
7
+ -- the cost was unrecoverable for analysis.
8
+ --
9
+ -- This table persists per-turn metrics keyed by (chat_id, msg_id) so we
10
+ -- can answer questions like:
11
+ -- - cost / day per bot
12
+ -- - cache hit rate per chat
13
+ -- - which chats have the longest turns
14
+ -- - which models are most expensive overall
15
+ --
16
+ -- Stored at turn end (in onResult callback). One row per dispatched
17
+ -- user-message-to-final-reply cycle, even if the cycle had multiple
18
+ -- assistant messages (those are aggregated).
19
+ CREATE TABLE IF NOT EXISTS turn_metrics (
20
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
21
+ ts INTEGER NOT NULL, -- turn end timestamp (ms)
22
+ chat_id TEXT NOT NULL,
23
+ thread_id TEXT,
24
+ msg_id INTEGER NOT NULL, -- inbound message_id that started turn
25
+ session_id TEXT, -- claude session UUID for resume
26
+ bot_name TEXT, -- 'shumabit' / 'umi-assistant' / etc
27
+ model TEXT, -- chatConfig.model at turn start
28
+ effort TEXT, -- chatConfig.effort
29
+ input_tokens INTEGER,
30
+ output_tokens INTEGER,
31
+ cache_creation_tokens INTEGER,
32
+ cache_read_tokens INTEGER,
33
+ cost_usd REAL,
34
+ duration_ms INTEGER,
35
+ num_assistant_messages INTEGER, -- top-level message count (forceNewMessage events)
36
+ num_tool_uses INTEGER,
37
+ result_subtype TEXT, -- 'success' / 'error_max_turns' / etc
38
+ error TEXT
39
+ );
40
+ CREATE INDEX IF NOT EXISTS idx_turn_metrics_chat_ts ON turn_metrics(chat_id, ts DESC);
41
+ CREATE INDEX IF NOT EXISTS idx_turn_metrics_recent ON turn_metrics(ts DESC);
42
+ CREATE INDEX IF NOT EXISTS idx_turn_metrics_session ON turn_metrics(session_id);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "polygram",
3
- "version": "0.7.5",
3
+ "version": "0.7.7",
4
4
  "description": "Telegram daemon for Claude Code that preserves the OpenClaw per-chat session model. Migration path for OpenClaw users moving to Claude Code.",
5
5
  "main": "lib/ipc-client.js",
6
6
  "bin": {
package/polygram.js CHANGED
@@ -40,6 +40,7 @@ const { startTyping } = require('./lib/typing-indicator');
40
40
  const { redactBotToken } = require('./lib/net-errors');
41
41
  const { createReactionManager, classifyToolName } = require('./lib/status-reactions');
42
42
  const { createMediaGroupBuffer } = require('./lib/media-group-buffer');
43
+ const { classify: classifyError, isTransientHttpError } = require('./lib/error-classify');
43
44
  const {
44
45
  createStore: createApprovalsStore,
45
46
  matchesAnyPattern: matchesApprovalPattern,
@@ -842,22 +843,19 @@ let isShuttingDown = false;
842
843
  // killed). Anything we don't recognise falls back to a generic line
843
844
  // with a single-line snippet of the error so the user can at least
844
845
  // distinguish unique failures from the obvious "try again" cases.
846
+ // 0.7.7: errorReplyText delegates to lib/error-classify.js so the
847
+ // regex tables live in one place and stay in sync with future SDK
848
+ // error subtypes (the 0.8.0 migration extends the classifier rather
849
+ // than adding more if-branches here).
850
+ //
851
+ // classify() returns { kind, userMessage, isTransient, autoRecover }.
852
+ // `userMessage: null` is a deliberate "suppress reply" signal —
853
+ // today only used by INTERRUPTED in the abort-grace window. Callers
854
+ // that already gate on isSessionRecentlyAborted will short-circuit
855
+ // before reaching here, but we honour `null` defensively.
845
856
  function errorReplyText(err) {
846
- const msg = err?.message || '';
847
- if (/idle with no Claude activity/i.test(msg)) {
848
- return '⏳ I went quiet too long without finishing. Try resending or simplifying the task.';
849
- }
850
- if (/wall-clock ceiling/i.test(msg)) {
851
- return '⏱ This was taking too long, so I stopped. Try resending or simplifying the task.';
852
- }
853
- if (/Process (exited|killed)/i.test(msg)) {
854
- return '💥 Something crashed on my end. Try again.';
855
- }
856
- if (/error_during_execution/i.test(msg)) {
857
- return '💥 Something went wrong mid-stream. Try again.';
858
- }
859
- const reason = msg.split('\n')[0].slice(0, 120);
860
- return `Hit a snag: ${reason || 'unknown error'}. Try resending.`;
857
+ const { userMessage } = classifyError(err);
858
+ return userMessage; // may be null — caller must handle
861
859
  }
862
860
 
863
861
  // Sessions the operator just /stop'd (or natural-language "стоп"). Keyed
@@ -937,13 +935,21 @@ function dispatchHandleMessage(sessionKey, chatId, msg, bot) {
937
935
  // re-dispatch it on next start)
938
936
  // - user just /stop'd (already saw their abort acknowledgement)
939
937
  if (!wasAborted && !isReplay && !isShuttingDown) {
940
- tg(bot, 'sendMessage', {
941
- chat_id: chatId,
942
- text: errorReplyText(err),
943
- reply_parameters: { message_id: msg.message_id },
944
- }, { source: 'error-reply', botName: BOT_NAME }).catch((replyErr) => {
945
- console.error(`[${sessionKey}] failed to send error reply: ${replyErr.message}`);
946
- });
938
+ // 0.7.7: errorReplyText may return null when the classifier
939
+ // says "suppress reply" (e.g. INTERRUPTED inside abort grace —
940
+ // user already saw their /stop ack). Skip the send call in
941
+ // that case rather than dispatching empty text (which would
942
+ // 400 at the lib/telegram.js empty-text guard added in 0.7.4).
943
+ const replyText = errorReplyText(err);
944
+ if (replyText) {
945
+ tg(bot, 'sendMessage', {
946
+ chat_id: chatId,
947
+ text: replyText,
948
+ reply_parameters: { message_id: msg.message_id },
949
+ }, { source: 'error-reply', botName: BOT_NAME }).catch((replyErr) => {
950
+ console.error(`[${sessionKey}] failed to send error reply: ${replyErr.message}`);
951
+ });
952
+ }
947
953
  }
948
954
  }).finally(() => {
949
955
  const n = (inFlightHandlers.get(sessionKey) || 1) - 1;
@@ -1893,6 +1899,32 @@ async function handleMessage(sessionKey, chatId, msg, bot) {
1893
1899
  });
1894
1900
  const elapsed = ((Date.now() - t0) / 1000).toFixed(1);
1895
1901
 
1902
+ // 0.7.6 (item F): persist per-turn telemetry. Stream-json result
1903
+ // events carry total_cost_usd + duration_ms; sumUsage rolled up
1904
+ // input/output/cache token counts from per-message usage. One row
1905
+ // per dispatched user message; queryable via turn_metrics table.
1906
+ if (result.metrics) {
1907
+ dbWrite(() => db.insertTurnMetric({
1908
+ chat_id: chatId,
1909
+ thread_id: threadId,
1910
+ msg_id: msg.message_id,
1911
+ session_id: result.sessionId,
1912
+ bot_name: BOT_NAME,
1913
+ model: chatConfig.model,
1914
+ effort: chatConfig.effort,
1915
+ input_tokens: result.metrics.inputTokens,
1916
+ output_tokens: result.metrics.outputTokens,
1917
+ cache_creation_tokens: result.metrics.cacheCreationTokens,
1918
+ cache_read_tokens: result.metrics.cacheReadTokens,
1919
+ cost_usd: result.cost,
1920
+ duration_ms: result.duration,
1921
+ num_assistant_messages: result.metrics.numAssistantMessages,
1922
+ num_tool_uses: result.metrics.numToolUses,
1923
+ result_subtype: result.metrics.resultSubtype,
1924
+ error: result.error || null,
1925
+ }), 'insert turn_metric');
1926
+ }
1927
+
1896
1928
  stopTyping();
1897
1929
 
1898
1930
  if (result.error) {