polygram 0.7.6 → 0.7.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +1 -1
- package/lib/db.js +7 -1
- package/lib/error-classify.js +290 -0
- package/lib/process-manager.js +86 -2
- package/package.json +1 -1
- package/polygram.js +28 -30
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"$schema": "https://anthropic.com/claude-code/plugin.schema.json",
|
|
3
3
|
"name": "polygram",
|
|
4
|
-
"version": "0.7.
|
|
4
|
+
"version": "0.7.8",
|
|
5
5
|
"description": "Telegram integration for Claude Code that preserves the OpenClaw per-chat session model. Migration target for OpenClaw users. Multi-bot, multi-chat, per-topic isolation; SQLite transcripts; inline-keyboard approvals. Bundles /polygram:status|logs|pair-code|approvals admin commands and a history skill.",
|
|
6
6
|
"keywords": [
|
|
7
7
|
"telegram",
|
package/lib/db.js
CHANGED
|
@@ -8,7 +8,13 @@ const fs = require('fs');
|
|
|
8
8
|
const path = require('path');
|
|
9
9
|
const Database = require('better-sqlite3');
|
|
10
10
|
|
|
11
|
-
|
|
11
|
+
// 0.7.8: bumped from 8 → 9. 0.7.6 added migration 009-turn-metrics.sql
|
|
12
|
+
// but failed to bump SCHEMA_VERSION; the early-return on line ~36
|
|
13
|
+
// skipped the migration loop on any DB already at user_version=8 (any
|
|
14
|
+
// upgraded install) → turn_metrics table never created → INSERT prepare
|
|
15
|
+
// at startup crashed polygram. Both 0.7.6 and 0.7.7 shipped with the
|
|
16
|
+
// bug. Fixed by bumping the constant.
|
|
17
|
+
const SCHEMA_VERSION = 9;
|
|
12
18
|
|
|
13
19
|
// Sentinel `error` value for outbound rows whose API call may or may not
|
|
14
20
|
// have reached Telegram. markStalePending writes it; hasOutboundReplyTo
|
|
@@ -0,0 +1,290 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Error classifier — maps any error from any source to a stable shape.
|
|
3
|
+
*
|
|
4
|
+
* Sources today (0.7.7): stream-json `result` events with error
|
|
5
|
+
* subtypes, child_process `'close'`/`'error'` event errors, idle
|
|
6
|
+
* timer fires, polygram-internal Errors with `err.code` set.
|
|
7
|
+
*
|
|
8
|
+
* Sources after 0.8.0 SDK migration: SDK iterator throws
|
|
9
|
+
* (`AbortError` named class plus plain `Error`s), `SDKResultMessage`
|
|
10
|
+
* with subtypes `error_during_execution` / `error_max_turns` /
|
|
11
|
+
* `error_max_budget_usd` / `error_max_structured_output_retries`,
|
|
12
|
+
* per-message `SDKAssistantMessage.error` subtypes
|
|
13
|
+
* (`authentication_failed` / `billing_error` / `rate_limit` /
|
|
14
|
+
* `invalid_request` / `server_error` / `unknown` / `max_output_tokens`),
|
|
15
|
+
* 5xx HTTP errors that bubble through the SDK transport.
|
|
16
|
+
*
|
|
17
|
+
* Returning the same shape regardless of transport means
|
|
18
|
+
* `errorReplyText` in polygram.js doesn't grow N branches every time
|
|
19
|
+
* a new error class shows up — we just add a row to PATTERNS or a
|
|
20
|
+
* `code:` short-circuit at the top.
|
|
21
|
+
*
|
|
22
|
+
* Layered ship order (per v4 plan §6.5.1):
|
|
23
|
+
* - 0.7.7 (this file): transport-agnostic patterns and the public
|
|
24
|
+
* `classify()` API. Polygram.js's `errorReplyText` consults this
|
|
25
|
+
* module directly.
|
|
26
|
+
* - Phase 1 of 0.8.0 (later): adds typed-code branches for
|
|
27
|
+
* `INTERRUPTED`, plus SDK `error_max_structured_output_retries`,
|
|
28
|
+
* plus per-message SDK error subtypes.
|
|
29
|
+
* - Phase 2 of 0.8.0 (later): adds AUTO_RECOVER actions
|
|
30
|
+
* (`reset_session` etc) so pm can self-heal stuck sessions
|
|
31
|
+
* without waiting for the user to type /new.
|
|
32
|
+
*/
|
|
33
|
+
|
|
34
|
+
'use strict';
|
|
35
|
+
|
|
36
|
+
// Substring/regex patterns matched against the error string. Order
|
|
37
|
+
// is significant only when patterns overlap — `transient5xx` is last
|
|
38
|
+
// because the others (auth/billing/format) carry their own status
|
|
39
|
+
// codes too. First match wins.
|
|
40
|
+
const PATTERNS = {
|
|
41
|
+
// Anthropic API rate limit (429) — "rate-limited", "Too Many
|
|
42
|
+
// Requests", token-bucket exhaustion text.
|
|
43
|
+
rateLimit: /\b429\b|rate[_ ]?limit|too[_ ]many[_ ]requests|tokens? per minute/i,
|
|
44
|
+
|
|
45
|
+
// Billing / quota (402, "insufficient credit"). Fires before any
|
|
46
|
+
// model call when the workspace is out of funds.
|
|
47
|
+
billing: /\b402\b|payment[_ ]required|billing|insufficient[_ ]credit/i,
|
|
48
|
+
|
|
49
|
+
// Auth: 401/403, OAuth token expiry, refresh failure. The 0.8.0
|
|
50
|
+
// plan ships an explicit auth-expired UX (admin-chat notify +
|
|
51
|
+
// pause); 0.7.7 just maps to a friendlier user message.
|
|
52
|
+
authExpired: /\b401\b|\b403\b|unauthor(ized|ised)|forbidden|token[_ ]expired|oauth[_ ]token[_ ]refresh[_ ]failed/i,
|
|
53
|
+
|
|
54
|
+
// Context window exceeded — too many tokens for the model. Usually
|
|
55
|
+
// surfaces as `prompt is too long` from Anthropic; sometimes as
|
|
56
|
+
// generic "exceeds maximum context" depending on SDK version.
|
|
57
|
+
contextOverflow: /context[_ ](window|length)|prompt[_ ]too[_ ]large|exceeds[_ ]maximum[_ ]context|prompt is too long/i,
|
|
58
|
+
|
|
59
|
+
// Role alternation / message ordering — fires when transcript has
|
|
60
|
+
// consecutive same-role messages or a tool_use without matching
|
|
61
|
+
// tool_result. Polygram doesn't generate these directly, but they
|
|
62
|
+
// can surface after an interrupted turn.
|
|
63
|
+
roleOrdering: /role.*alternat|message[_ ]ordering|consecutive (user|assistant)/i,
|
|
64
|
+
|
|
65
|
+
// Tool call missing required `input` field. Indicates corrupted
|
|
66
|
+
// history; user-facing message tells them to /new. Word order
|
|
67
|
+
// varies across Anthropic SDK versions — accept either
|
|
68
|
+
// "input...missing" or "missing...input" within a tool_use mention.
|
|
69
|
+
missingToolInput: /tool[_ ]use.*(input.*missing|missing.*input)|missing tool call input|tool input required/i,
|
|
70
|
+
|
|
71
|
+
// Idle/wall-clock timeout from polygram's pm timers, OR
|
|
72
|
+
// model-side timeout. Mapped to a single class; user message is
|
|
73
|
+
// identical either way.
|
|
74
|
+
timeout: /timed[_ ]out|deadline|idle with no Claude activity|wall-clock ceiling/i,
|
|
75
|
+
|
|
76
|
+
// Generic format/validation errors (400 with no other class
|
|
77
|
+
// matching). Rare in practice; included so we don't fall through
|
|
78
|
+
// to "unknown".
|
|
79
|
+
format: /invalid[_ ]request|invalid[_ ]json|malformed|bad request/i,
|
|
80
|
+
|
|
81
|
+
// Transient HTTP (5xx upstream Anthropic outage / overload). Only
|
|
82
|
+
// these get retried by pm. 521-524/529 are Cloudflare codes seen
|
|
83
|
+
// when Anthropic's edge is degraded.
|
|
84
|
+
transient5xx: /\b5(00|02|03|2[1-4]|29)\b|temporarily overloaded|server[_ ]error|service unavailable/i,
|
|
85
|
+
};
|
|
86
|
+
|
|
87
|
+
// User-facing message per kind. Polygram-style emoji + concise
|
|
88
|
+
// action hint. `null` means "suppress the user-facing reply" (used
|
|
89
|
+
// for INTERRUPTED inside the abort-grace window — the user already
|
|
90
|
+
// saw their /stop ack).
|
|
91
|
+
const USER_MESSAGES = {
|
|
92
|
+
rateLimit: '⚠️ Rate-limited by Anthropic. Try again in a minute.',
|
|
93
|
+
billing: '💳 Billing issue on Anthropic — operator needs to top up credits.',
|
|
94
|
+
authExpired: '🔑 Claude auth expired. Operator has been notified.',
|
|
95
|
+
contextOverflow: '📚 Conversation got too long. Send /new to start fresh.',
|
|
96
|
+
roleOrdering: '⚠️ Conversation got into a tangled state. Try /new.',
|
|
97
|
+
missingToolInput: '⚠️ Session history looks corrupted. Try /new.',
|
|
98
|
+
timeout: '⏳ I went quiet too long without finishing. Try resending or simplifying.',
|
|
99
|
+
format: '⚠️ Invalid request format. Try rephrasing or /new.',
|
|
100
|
+
transient5xx: '☁️ Anthropic is temporarily unavailable. Retrying once…',
|
|
101
|
+
};
|
|
102
|
+
|
|
103
|
+
// Auto-recovery actions for kinds where the session is irrecoverable
|
|
104
|
+
// without a reset. Phase 2 of 0.8.0 wires `pm.resetSession()` to
|
|
105
|
+
// these; 0.7.7 just exports the table for forward-compat.
|
|
106
|
+
//
|
|
107
|
+
// Values map to action names that pm understands:
|
|
108
|
+
// 'reset_session' — close current Query, clear sessionId, fresh start
|
|
109
|
+
// (future) 'compact' — manual compact request, if SDK exposes it
|
|
110
|
+
const AUTO_RECOVER = {
|
|
111
|
+
roleOrdering: 'reset_session',
|
|
112
|
+
contextOverflow: 'reset_session',
|
|
113
|
+
missingToolInput: 'reset_session',
|
|
114
|
+
};
|
|
115
|
+
|
|
116
|
+
// Typed-code short-circuits — set on errors polygram throws itself
|
|
117
|
+
// (see lib/process-manager.js), not pattern-matched. Keep these in
|
|
118
|
+
// sync with the codes pm emits.
|
|
119
|
+
const CODES = {
|
|
120
|
+
// 0.7.6 (item H): queue cap drop. Pre-empts pattern matching so
|
|
121
|
+
// the queue-overflow message is exact, not classified.
|
|
122
|
+
QUEUE_OVERFLOW: {
|
|
123
|
+
kind: 'queueOverflow',
|
|
124
|
+
userMessage: '⏭ Couldn\'t keep up — this message was skipped while I was processing newer ones. Resend if it still matters.',
|
|
125
|
+
isTransient: false,
|
|
126
|
+
autoRecover: null,
|
|
127
|
+
},
|
|
128
|
+
// 0.8.0 Phase 1 will set this on pendings rejected via
|
|
129
|
+
// pm.interrupt(). Matched here so the abort-grace silence works
|
|
130
|
+
// before the SDK migration lands (pm could start setting it
|
|
131
|
+
// earlier as a no-op).
|
|
132
|
+
INTERRUPTED: {
|
|
133
|
+
kind: 'interrupted',
|
|
134
|
+
userMessage: null, // suppressed; user already saw the /stop ack
|
|
135
|
+
isTransient: false,
|
|
136
|
+
autoRecover: null,
|
|
137
|
+
},
|
|
138
|
+
// Phase 2 will set this when pm.resetSession() drains the queue
|
|
139
|
+
// for any reason (auto-recovery, /new, /reset, auth-expired).
|
|
140
|
+
RESET_SESSION: {
|
|
141
|
+
kind: 'resetSession',
|
|
142
|
+
userMessage: '✨ Started a fresh session.',
|
|
143
|
+
isTransient: false,
|
|
144
|
+
autoRecover: null,
|
|
145
|
+
},
|
|
146
|
+
// 0.8.0 auth-expired path — set on every pending the daemon
|
|
147
|
+
// rejects after a 401 surface. Distinct from authExpired pattern
|
|
148
|
+
// because it's polygram saying "I already noticed and paused"
|
|
149
|
+
// rather than "I just hit a 401 and am about to handle it".
|
|
150
|
+
AUTH_EXPIRED: {
|
|
151
|
+
kind: 'authExpired',
|
|
152
|
+
userMessage: '🔑 The bot needs re-auth. The operator has been notified. Try again in a few minutes.',
|
|
153
|
+
isTransient: false,
|
|
154
|
+
autoRecover: null,
|
|
155
|
+
},
|
|
156
|
+
};
|
|
157
|
+
|
|
158
|
+
/**
|
|
159
|
+
* Classify an error from any source.
|
|
160
|
+
*
|
|
161
|
+
* Accepts:
|
|
162
|
+
* - Error / object with `code` / `message`
|
|
163
|
+
* - SDKResultMessage with `subtype` and optional `error`
|
|
164
|
+
* - SDKAssistantMessage.error (string subtype like 'rate_limit')
|
|
165
|
+
* - plain string
|
|
166
|
+
* - null/undefined (returns the 'unknown' shape)
|
|
167
|
+
*
|
|
168
|
+
* Returns an object with stable shape:
|
|
169
|
+
* {
|
|
170
|
+
* kind: 'rateLimit' | 'billing' | ... | 'unknown' | code-keyed kind,
|
|
171
|
+
* userMessage: string | null, // null means suppress reply
|
|
172
|
+
* isTransient: boolean, // true → pm should retry once
|
|
173
|
+
* autoRecover: 'reset_session' | null,
|
|
174
|
+
* }
|
|
175
|
+
*/
|
|
176
|
+
function classify(err) {
|
|
177
|
+
// Typed-code short-circuit takes priority over pattern matching.
|
|
178
|
+
// Errors polygram constructs internally (QUEUE_OVERFLOW etc.) set
|
|
179
|
+
// `err.code` so we don't depend on string content.
|
|
180
|
+
const code = err?.code;
|
|
181
|
+
if (code && CODES[code]) {
|
|
182
|
+
return { ...CODES[code] };
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
// SDKAssistantMessage.error is a short string code from a fixed
|
|
186
|
+
// union — match those directly, not via regex.
|
|
187
|
+
if (typeof err === 'string') {
|
|
188
|
+
const sdkMessageError = matchSdkMessageError(err);
|
|
189
|
+
if (sdkMessageError) return sdkMessageError;
|
|
190
|
+
}
|
|
191
|
+
if (err?.subtype && typeof err.subtype === 'string') {
|
|
192
|
+
const sdkResultSubtype = matchSdkResultSubtype(err.subtype);
|
|
193
|
+
if (sdkResultSubtype) return sdkResultSubtype;
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
const msg = extractMessage(err);
|
|
197
|
+
for (const [kind, re] of Object.entries(PATTERNS)) {
|
|
198
|
+
if (re.test(msg)) {
|
|
199
|
+
return {
|
|
200
|
+
kind,
|
|
201
|
+
userMessage: USER_MESSAGES[kind],
|
|
202
|
+
isTransient: kind === 'transient5xx' || kind === 'rateLimit',
|
|
203
|
+
autoRecover: AUTO_RECOVER[kind] ?? null,
|
|
204
|
+
};
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
// Fall-through: surface a snippet of the raw error so users at
|
|
209
|
+
// least know SOMETHING happened. Same shape as before, just
|
|
210
|
+
// routed through the classifier so callers get a uniform return.
|
|
211
|
+
const reason = msg.split('\n')[0].slice(0, 120);
|
|
212
|
+
return {
|
|
213
|
+
kind: 'unknown',
|
|
214
|
+
userMessage: `Hit a snag: ${reason || 'unknown error'}. Try resending.`,
|
|
215
|
+
isTransient: false,
|
|
216
|
+
autoRecover: null,
|
|
217
|
+
};
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
// Pull a string out of whatever shape the caller passed.
|
|
221
|
+
function extractMessage(err) {
|
|
222
|
+
if (err == null) return '';
|
|
223
|
+
if (typeof err === 'string') return err;
|
|
224
|
+
if (err.message) return String(err.message);
|
|
225
|
+
if (err.error) return String(err.error);
|
|
226
|
+
return String(err);
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
// SDKAssistantMessage.error fields are a small fixed union
|
|
230
|
+
// (sdk.d.ts:2343). Map directly so we don't depend on transport-
|
|
231
|
+
// specific error text.
|
|
232
|
+
const SDK_MESSAGE_ERROR_MAP = {
|
|
233
|
+
authentication_failed: 'authExpired',
|
|
234
|
+
billing_error: 'billing',
|
|
235
|
+
rate_limit: 'rateLimit',
|
|
236
|
+
invalid_request: 'format',
|
|
237
|
+
server_error: 'transient5xx',
|
|
238
|
+
unknown: 'unknown',
|
|
239
|
+
max_output_tokens: 'format', // closest match — model gave up
|
|
240
|
+
};
|
|
241
|
+
function matchSdkMessageError(s) {
|
|
242
|
+
const kind = SDK_MESSAGE_ERROR_MAP[s];
|
|
243
|
+
if (!kind) return null;
|
|
244
|
+
if (kind === 'unknown') return null; // fall through to pattern match
|
|
245
|
+
return {
|
|
246
|
+
kind,
|
|
247
|
+
userMessage: USER_MESSAGES[kind] ?? null,
|
|
248
|
+
isTransient: kind === 'transient5xx' || kind === 'rateLimit',
|
|
249
|
+
autoRecover: AUTO_RECOVER[kind] ?? null,
|
|
250
|
+
};
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
// SDKResultMessage.subtype values (sdk.d.ts:3121). Most are
|
|
254
|
+
// terminal-error indicators that don't have a clean pattern equivalent.
|
|
255
|
+
const SDK_RESULT_SUBTYPE_MAP = {
|
|
256
|
+
error_during_execution: 'unknown',
|
|
257
|
+
error_max_turns: 'format',
|
|
258
|
+
error_max_budget_usd: 'billing',
|
|
259
|
+
error_max_structured_output_retries: 'format',
|
|
260
|
+
};
|
|
261
|
+
function matchSdkResultSubtype(s) {
|
|
262
|
+
if (s === 'success') return null;
|
|
263
|
+
const kind = SDK_RESULT_SUBTYPE_MAP[s];
|
|
264
|
+
if (!kind || kind === 'unknown') return null;
|
|
265
|
+
return {
|
|
266
|
+
kind,
|
|
267
|
+
userMessage: USER_MESSAGES[kind] ?? null,
|
|
268
|
+
isTransient: false, // result subtypes don't auto-retry; the
|
|
269
|
+
// turn already burned its budget.
|
|
270
|
+
autoRecover: AUTO_RECOVER[kind] ?? null,
|
|
271
|
+
};
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
// True if pm's iteration loop should sleep and retry the user
|
|
275
|
+
// message ONCE before giving up. Currently only transient5xx and
|
|
276
|
+
// rateLimit. Per v4 plan §6.6 H1/M2, retry only fires when the
|
|
277
|
+
// turn produced ZERO assistant messages (idempotency); pm checks
|
|
278
|
+
// that flag, not this function.
|
|
279
|
+
function isTransientHttpError(err) {
|
|
280
|
+
return classify(err).isTransient;
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
module.exports = {
|
|
284
|
+
classify,
|
|
285
|
+
isTransientHttpError,
|
|
286
|
+
PATTERNS,
|
|
287
|
+
USER_MESSAGES,
|
|
288
|
+
AUTO_RECOVER,
|
|
289
|
+
CODES,
|
|
290
|
+
};
|
package/lib/process-manager.js
CHANGED
|
@@ -26,9 +26,19 @@
|
|
|
26
26
|
*/
|
|
27
27
|
|
|
28
28
|
const { createInterface } = require('readline');
|
|
29
|
+
const { isTransientHttpError } = require('./error-classify');
|
|
29
30
|
|
|
30
31
|
const DEFAULT_CAP = 10;
|
|
31
32
|
const DEFAULT_KILL_TIMEOUT_MS = 3000;
|
|
33
|
+
// 0.7.7: transient HTTP retry. When Anthropic returns a 5xx (or 429
|
|
34
|
+
// rate-limit) and the turn produced ZERO assistant messages so far,
|
|
35
|
+
// pm sleeps and retries the user message ONCE before surfacing the
|
|
36
|
+
// error to the user. Matches OpenClaw's
|
|
37
|
+
// pi-embedded-Vt2x_Jl3.js:39210-39216 — "single retry, then surface".
|
|
38
|
+
// Idempotency-protected: we only retry if no assistant content has
|
|
39
|
+
// streamed (otherwise re-sending would replay tools that already ran).
|
|
40
|
+
const DEFAULT_TRANSIENT_RETRY_DELAY_MS = 2500;
|
|
41
|
+
const MAX_TRANSIENT_RETRIES = 1;
|
|
32
42
|
// 0.7.6 (item H): hard cap on per-session pending queue depth.
|
|
33
43
|
// Pre-fix, a chat with rapid-fire user messages (or a stuck Claude that
|
|
34
44
|
// stops emitting `result`) could grow pendingQueue unbounded — each
|
|
@@ -328,9 +338,14 @@ class ProcessManager {
|
|
|
328
338
|
// pending. Fire onFirstStream ONCE, regardless of whether the
|
|
329
339
|
// assistant message has text or only tool_use blocks (some turns
|
|
330
340
|
// emit tool_use first with no preamble).
|
|
331
|
-
|
|
332
|
-
&& event.message.content.some((b) => b?.type === 'tool_use'))
|
|
341
|
+
const hasAssistantContent = !!added || (Array.isArray(event.message?.content)
|
|
342
|
+
&& event.message.content.some((b) => b?.type === 'tool_use'));
|
|
343
|
+
if (hasAssistantContent) {
|
|
333
344
|
head.fireFirstStream?.();
|
|
345
|
+
// 0.7.7: any assistant content (text OR tool_use) disqualifies
|
|
346
|
+
// the turn from transient-retry — re-sending the user prompt
|
|
347
|
+
// after this point would replay tools that already executed.
|
|
348
|
+
head.firstAssistantSeen = true;
|
|
334
349
|
}
|
|
335
350
|
// 0.7.6 (item F): accumulate usage + counters for turn telemetry.
|
|
336
351
|
// The `result` event carries total_cost_usd + duration_ms but NOT
|
|
@@ -392,6 +407,66 @@ class ProcessManager {
|
|
|
392
407
|
}
|
|
393
408
|
|
|
394
409
|
if (event.type === 'result' && head) {
|
|
410
|
+
// 0.7.7: transient HTTP retry. If Anthropic returned a
|
|
411
|
+
// retryable error AND the turn produced ZERO assistant
|
|
412
|
+
// content yet AND we haven't already retried, sleep and
|
|
413
|
+
// re-write the prompt instead of resolving the pending.
|
|
414
|
+
// Idempotency: firstAssistantSeen guards against replaying
|
|
415
|
+
// tools that already ran.
|
|
416
|
+
const errSignal = event.error || event.subtype;
|
|
417
|
+
const isError = event.subtype !== 'success';
|
|
418
|
+
const shouldTransientRetry = isError
|
|
419
|
+
&& !head.firstAssistantSeen
|
|
420
|
+
&& head.transientRetries < MAX_TRANSIENT_RETRIES
|
|
421
|
+
&& head.prompt != null
|
|
422
|
+
&& isTransientHttpError({ message: errSignal, subtype: event.subtype });
|
|
423
|
+
if (shouldTransientRetry) {
|
|
424
|
+
head.transientRetries++;
|
|
425
|
+
this._logEvent('transient-retry', {
|
|
426
|
+
session_key: sessionKey,
|
|
427
|
+
chat_id: entry.chatId,
|
|
428
|
+
attempt: head.transientRetries,
|
|
429
|
+
subtype: event.subtype,
|
|
430
|
+
error: typeof errSignal === 'string' ? errSignal.slice(0, 200) : null,
|
|
431
|
+
});
|
|
432
|
+
// Reset accumulators so the retried turn's metrics aren't
|
|
433
|
+
// contaminated by the failed-turn's totals (usage on a
|
|
434
|
+
// failed turn IS billed but we surface it as a separate
|
|
435
|
+
// event-log entry rather than mixing into turn_metrics).
|
|
436
|
+
head.usageByMessage = new Map();
|
|
437
|
+
head.lastUsageMessageId = null;
|
|
438
|
+
head.toolUseCount = 0;
|
|
439
|
+
head.streamText = '';
|
|
440
|
+
head.lastAssistantMessageId = null;
|
|
441
|
+
// Re-arm idle timer (the old one is still ticking from the
|
|
442
|
+
// previous activate; resetIdleTimer just re-arms).
|
|
443
|
+
head.resetIdleTimer?.();
|
|
444
|
+
// Sleep then re-write. Keep the pending in-place; the next
|
|
445
|
+
// 'result' event resolves it normally (or hits the same
|
|
446
|
+
// retry path if MAX_TRANSIENT_RETRIES hadn't been
|
|
447
|
+
// exhausted, which after the increment above it has).
|
|
448
|
+
setTimeout(() => {
|
|
449
|
+
// Edge case: pending was killed/aborted during the
|
|
450
|
+
// retry sleep — process exited, queue drained, etc.
|
|
451
|
+
// Skip the re-write if pendingQueue no longer holds us.
|
|
452
|
+
if (entry.pendingQueue[0] !== head || entry.closed) return;
|
|
453
|
+
try {
|
|
454
|
+
entry.proc.stdin.write(JSON.stringify({
|
|
455
|
+
type: 'user',
|
|
456
|
+
message: { role: 'user', content: head.prompt },
|
|
457
|
+
}) + '\n');
|
|
458
|
+
} catch (err) {
|
|
459
|
+
// stdin write failed — fall back to surfacing the
|
|
460
|
+
// error. Mark as not-retried-anymore so we don't loop.
|
|
461
|
+
this.logger.error(`[${entry.label}] transient-retry stdin write failed: ${err.message}`);
|
|
462
|
+
entry.pendingQueue.shift();
|
|
463
|
+
head.clearTimers();
|
|
464
|
+
head.reject(err);
|
|
465
|
+
}
|
|
466
|
+
}, DEFAULT_TRANSIENT_RETRY_DELAY_MS);
|
|
467
|
+
return; // don't shift / resolve; wait for next result
|
|
468
|
+
}
|
|
469
|
+
|
|
395
470
|
entry.pendingQueue.shift();
|
|
396
471
|
head.clearTimers();
|
|
397
472
|
if (this.onResult) this.onResult(sessionKey, event, entry, head);
|
|
@@ -546,6 +621,15 @@ class ProcessManager {
|
|
|
546
621
|
// producing output, not when the pending becomes queue head
|
|
547
622
|
// (which can be ~hundreds of ms before the first token).
|
|
548
623
|
firstStreamFired: false,
|
|
624
|
+
// 0.7.7: transient-retry support. We hold the prompt so we can
|
|
625
|
+
// re-write it on transient 5xx/429 if zero assistant content
|
|
626
|
+
// streamed yet. firstAssistantSeen flips on first assistant
|
|
627
|
+
// event with non-empty content OR tool_use blocks — once true,
|
|
628
|
+
// retry is no longer idempotent (we'd replay executed tools)
|
|
629
|
+
// and pm surfaces the error instead.
|
|
630
|
+
prompt,
|
|
631
|
+
transientRetries: 0,
|
|
632
|
+
firstAssistantSeen: false,
|
|
549
633
|
};
|
|
550
634
|
|
|
551
635
|
pending.fireFirstStream = () => {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "polygram",
|
|
3
|
-
"version": "0.7.
|
|
3
|
+
"version": "0.7.8",
|
|
4
4
|
"description": "Telegram daemon for Claude Code that preserves the OpenClaw per-chat session model. Migration path for OpenClaw users moving to Claude Code.",
|
|
5
5
|
"main": "lib/ipc-client.js",
|
|
6
6
|
"bin": {
|
package/polygram.js
CHANGED
|
@@ -40,6 +40,7 @@ const { startTyping } = require('./lib/typing-indicator');
|
|
|
40
40
|
const { redactBotToken } = require('./lib/net-errors');
|
|
41
41
|
const { createReactionManager, classifyToolName } = require('./lib/status-reactions');
|
|
42
42
|
const { createMediaGroupBuffer } = require('./lib/media-group-buffer');
|
|
43
|
+
const { classify: classifyError, isTransientHttpError } = require('./lib/error-classify');
|
|
43
44
|
const {
|
|
44
45
|
createStore: createApprovalsStore,
|
|
45
46
|
matchesAnyPattern: matchesApprovalPattern,
|
|
@@ -842,30 +843,19 @@ let isShuttingDown = false;
|
|
|
842
843
|
// killed). Anything we don't recognise falls back to a generic line
|
|
843
844
|
// with a single-line snippet of the error so the user can at least
|
|
844
845
|
// distinguish unique failures from the obvious "try again" cases.
|
|
846
|
+
// 0.7.7: errorReplyText delegates to lib/error-classify.js so the
|
|
847
|
+
// regex tables live in one place and stay in sync with future SDK
|
|
848
|
+
// error subtypes (the 0.8.0 migration extends the classifier rather
|
|
849
|
+
// than adding more if-branches here).
|
|
850
|
+
//
|
|
851
|
+
// classify() returns { kind, userMessage, isTransient, autoRecover }.
|
|
852
|
+
// `userMessage: null` is a deliberate "suppress reply" signal —
|
|
853
|
+
// today only used by INTERRUPTED in the abort-grace window. Callers
|
|
854
|
+
// that already gate on isSessionRecentlyAborted will short-circuit
|
|
855
|
+
// before reaching here, but we honour `null` defensively.
|
|
845
856
|
function errorReplyText(err) {
|
|
846
|
-
const
|
|
847
|
-
|
|
848
|
-
// to grep error text. The dropped pending is OLDER than the current
|
|
849
|
-
// queue depth; its sender has likely sent more recent messages we're
|
|
850
|
-
// still working on. Tell them this one was skipped without making it
|
|
851
|
-
// sound like a crash.
|
|
852
|
-
if (err?.code === 'QUEUE_OVERFLOW') {
|
|
853
|
-
return '⏭ Couldn\'t keep up — this message was skipped while I was processing newer ones. Resend if it still matters.';
|
|
854
|
-
}
|
|
855
|
-
if (/idle with no Claude activity/i.test(msg)) {
|
|
856
|
-
return '⏳ I went quiet too long without finishing. Try resending or simplifying the task.';
|
|
857
|
-
}
|
|
858
|
-
if (/wall-clock ceiling/i.test(msg)) {
|
|
859
|
-
return '⏱ This was taking too long, so I stopped. Try resending or simplifying the task.';
|
|
860
|
-
}
|
|
861
|
-
if (/Process (exited|killed)/i.test(msg)) {
|
|
862
|
-
return '💥 Something crashed on my end. Try again.';
|
|
863
|
-
}
|
|
864
|
-
if (/error_during_execution/i.test(msg)) {
|
|
865
|
-
return '💥 Something went wrong mid-stream. Try again.';
|
|
866
|
-
}
|
|
867
|
-
const reason = msg.split('\n')[0].slice(0, 120);
|
|
868
|
-
return `Hit a snag: ${reason || 'unknown error'}. Try resending.`;
|
|
857
|
+
const { userMessage } = classifyError(err);
|
|
858
|
+
return userMessage; // may be null — caller must handle
|
|
869
859
|
}
|
|
870
860
|
|
|
871
861
|
// Sessions the operator just /stop'd (or natural-language "стоп"). Keyed
|
|
@@ -945,13 +935,21 @@ function dispatchHandleMessage(sessionKey, chatId, msg, bot) {
|
|
|
945
935
|
// re-dispatch it on next start)
|
|
946
936
|
// - user just /stop'd (already saw their abort acknowledgement)
|
|
947
937
|
if (!wasAborted && !isReplay && !isShuttingDown) {
|
|
948
|
-
|
|
949
|
-
|
|
950
|
-
|
|
951
|
-
|
|
952
|
-
|
|
953
|
-
|
|
954
|
-
|
|
938
|
+
// 0.7.7: errorReplyText may return null when the classifier
|
|
939
|
+
// says "suppress reply" (e.g. INTERRUPTED inside abort grace —
|
|
940
|
+
// user already saw their /stop ack). Skip the send call in
|
|
941
|
+
// that case rather than dispatching empty text (which would
|
|
942
|
+
// 400 at the lib/telegram.js empty-text guard added in 0.7.4).
|
|
943
|
+
const replyText = errorReplyText(err);
|
|
944
|
+
if (replyText) {
|
|
945
|
+
tg(bot, 'sendMessage', {
|
|
946
|
+
chat_id: chatId,
|
|
947
|
+
text: replyText,
|
|
948
|
+
reply_parameters: { message_id: msg.message_id },
|
|
949
|
+
}, { source: 'error-reply', botName: BOT_NAME }).catch((replyErr) => {
|
|
950
|
+
console.error(`[${sessionKey}] failed to send error reply: ${replyErr.message}`);
|
|
951
|
+
});
|
|
952
|
+
}
|
|
955
953
|
}
|
|
956
954
|
}).finally(() => {
|
|
957
955
|
const n = (inFlightHandlers.get(sessionKey) || 1) - 1;
|