polygram 0.6.16 → 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +1 -1
- package/README.md +1 -1
- package/lib/announces.js +150 -0
- package/lib/deliver.js +78 -0
- package/lib/net-errors.js +52 -3
- package/lib/process-manager.js +30 -6
- package/lib/sent-cache.js +119 -0
- package/lib/stream-reply.js +133 -17
- package/lib/telegram-chunk.js +288 -0
- package/lib/telegram-format.js +107 -1
- package/lib/telegram.js +159 -39
- package/package.json +1 -1
- package/polygram.js +177 -51
package/lib/stream-reply.js
CHANGED
|
@@ -1,15 +1,31 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Live streaming-reply state machine for a single turn.
|
|
3
3
|
*
|
|
4
|
-
* Lifecycle
|
|
4
|
+
* Lifecycle:
|
|
5
5
|
* idle -> (text >= minChars) -> live
|
|
6
6
|
* live -> (subsequent chunks) -> live (throttled edits)
|
|
7
|
-
*
|
|
7
|
+
* live -> flushDraft() -> live (drains pending edit)
|
|
8
|
+
* live -> forceNewMessage() -> idle (next chunk = new bubble)
|
|
9
|
+
* live -> discard() -> finalized (bubble deleted)
|
|
10
|
+
* any -> finalize(finalText) -> finalized
|
|
8
11
|
*
|
|
9
12
|
* The streamer never talks to Telegram directly — callers inject
|
|
10
|
-
* `send(text)`
|
|
11
|
-
* polygram.js in charge of transcript writes, sticker/reaction
|
|
12
|
-
* error handling; this module is just a cadence machine.
|
|
13
|
+
* `send(text)`, `edit(msg_id, text)`, and (optional) `deleteMessage(msg_id)`.
|
|
14
|
+
* That keeps polygram.js in charge of transcript writes, sticker/reaction
|
|
15
|
+
* routing, and error handling; this module is just a cadence machine.
|
|
16
|
+
*
|
|
17
|
+
* `finalize()` returns a rich result so the caller can decide whether the
|
|
18
|
+
* preview's last edit IS the final reply, or whether to discard the
|
|
19
|
+
* preview and redeliver via deliverReplies (overflow / final edit failed):
|
|
20
|
+
*
|
|
21
|
+
* { kind: implicit, see flags below }
|
|
22
|
+
* { streamed: false } — never went live
|
|
23
|
+
* { streamed: true, finalEditOk: true } — preview = final
|
|
24
|
+
* { streamed: true, finalEditOk: false, overflow: true } — body too long
|
|
25
|
+
* { streamed: true, finalEditOk: false, overflow: false } — edit failed
|
|
26
|
+
*
|
|
27
|
+
* Short replies preview-becomes-final (no flicker, single bubble); long
|
|
28
|
+
* replies preview-deleted-redelivered (chunks land at chat bottom).
|
|
13
29
|
*
|
|
14
30
|
* Test-friendly: inject `clock` (now() fn) and `schedule` (setTimeout-like)
|
|
15
31
|
* so a fake clock can drive throttle timing deterministically.
|
|
@@ -25,6 +41,7 @@ const DEFAULT_THROTTLE_MS = 1000;
|
|
|
25
41
|
function createStreamer({
|
|
26
42
|
send, // async (text) -> { message_id }
|
|
27
43
|
edit, // async (msg_id, text) -> void
|
|
44
|
+
deleteMessage = null, // async (msg_id) -> void [optional]
|
|
28
45
|
minChars = DEFAULT_MIN_CHARS,
|
|
29
46
|
throttleMs = DEFAULT_THROTTLE_MS,
|
|
30
47
|
maxLen = 4096,
|
|
@@ -41,7 +58,12 @@ function createStreamer({
|
|
|
41
58
|
let pendingEdit = null; // timer id
|
|
42
59
|
let flushPromise = null; // ongoing edit promise (for back-pressure)
|
|
43
60
|
|
|
44
|
-
|
|
61
|
+
// LIVE-EDIT truncation only — used during streaming when latestText
|
|
62
|
+
// overshoots maxLen. The trailing "..." signals to the user that more
|
|
63
|
+
// is coming. Finalize doesn't truncate: overflow is handled by
|
|
64
|
+
// signalling the caller to discard-and-redeliver via chunkMarkdownText,
|
|
65
|
+
// which preserves all content without any byte-cut.
|
|
66
|
+
function truncateForLive(s) {
|
|
45
67
|
if (s.length <= maxLen) return s;
|
|
46
68
|
return s.slice(0, maxLen - 3) + '...';
|
|
47
69
|
}
|
|
@@ -56,7 +78,7 @@ function createStreamer({
|
|
|
56
78
|
if (state === 'idle') {
|
|
57
79
|
if (text.length < minChars) return;
|
|
58
80
|
state = 'live';
|
|
59
|
-
currentText =
|
|
81
|
+
currentText = truncateForLive(text);
|
|
60
82
|
try {
|
|
61
83
|
const res = await send(currentText);
|
|
62
84
|
msgId = res?.message_id ?? null;
|
|
@@ -90,7 +112,7 @@ function createStreamer({
|
|
|
90
112
|
async function flush() {
|
|
91
113
|
pendingEdit = null;
|
|
92
114
|
if (state !== 'live' || msgId == null) return;
|
|
93
|
-
const next =
|
|
115
|
+
const next = truncateForLive(latestText);
|
|
94
116
|
if (next === currentText) return;
|
|
95
117
|
lastEditTs = clock();
|
|
96
118
|
currentText = next;
|
|
@@ -98,38 +120,132 @@ function createStreamer({
|
|
|
98
120
|
flushPromise = edit(msgId, currentText);
|
|
99
121
|
await flushPromise;
|
|
100
122
|
} catch (err) {
|
|
101
|
-
// Non-fatal — maybe 429. Log and keep going; next
|
|
123
|
+
// Non-fatal — maybe 429 or transient. Log and keep going; next
|
|
124
|
+
// chunk will retry. The HTML→plain fallback in lib/telegram.js
|
|
125
|
+
// already handles the most common cause (parse error from
|
|
126
|
+
// truncate cutting mid-tag).
|
|
102
127
|
logger.error(`[stream] edit failed: ${err.message}`);
|
|
103
128
|
} finally {
|
|
104
129
|
flushPromise = null;
|
|
105
130
|
}
|
|
106
131
|
}
|
|
107
132
|
|
|
133
|
+
// 0.7.0: explicitly drain any pending edit. Useful when the caller
|
|
134
|
+
// is about to make a finalize/discard decision and wants the bubble's
|
|
135
|
+
// visual state to be accurate (no stale half-rendered text under a
|
|
136
|
+
// pending timer).
|
|
137
|
+
async function flushDraft() {
|
|
138
|
+
if (pendingEdit) { cancel(pendingEdit); pendingEdit = null; await flush(); }
|
|
139
|
+
if (flushPromise) { try { await flushPromise; } catch {} }
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
// Reset bubble state so the next onChunk creates a NEW message.
|
|
143
|
+
// Used by `onAssistantMessageStart` in process-manager.js when Claude
|
|
144
|
+
// emits a new top-level assistant message mid-turn (post tool-result):
|
|
145
|
+
// we want it in its own bubble below the previous one, not appended
|
|
146
|
+
// via editMessageText to the original.
|
|
147
|
+
function forceNewMessage() {
|
|
148
|
+
if (pendingEdit) { cancel(pendingEdit); pendingEdit = null; }
|
|
149
|
+
// Don't await flushPromise — the caller has decided to start a new
|
|
150
|
+
// message; whatever the old bubble shows is "done".
|
|
151
|
+
msgId = null;
|
|
152
|
+
currentText = '';
|
|
153
|
+
latestText = '';
|
|
154
|
+
state = 'idle';
|
|
155
|
+
lastEditTs = 0;
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
// 0.7.0: delete the current bubble via the injected deleteMessage
|
|
159
|
+
// callback. Used when the final reply overflows the preview's single-
|
|
160
|
+
// message capacity, so handleMessage will discard the preview and
|
|
161
|
+
// redeliver via deliverReplies (chunks land at chat bottom).
|
|
162
|
+
//
|
|
163
|
+
// Works whether state is 'live' OR 'finalized' — handleMessage's
|
|
164
|
+
// typical flow is finalize() → finalEditOk false → discard. The
|
|
165
|
+
// bubble's msgId is preserved through finalize so we can still
|
|
166
|
+
// delete it. If deleteMessage isn't provided, we just transition
|
|
167
|
+
// state without touching Telegram — the bubble stays at its last
|
|
168
|
+
// edited content, becoming a vestigial "head" of the conversation.
|
|
169
|
+
async function discard() {
|
|
170
|
+
if (pendingEdit) { cancel(pendingEdit); pendingEdit = null; }
|
|
171
|
+
if (flushPromise) { try { await flushPromise; } catch {} }
|
|
172
|
+
const idToDelete = msgId;
|
|
173
|
+
state = 'finalized';
|
|
174
|
+
msgId = null;
|
|
175
|
+
let deleted = false;
|
|
176
|
+
if (idToDelete && typeof deleteMessage === 'function') {
|
|
177
|
+
try {
|
|
178
|
+
await deleteMessage(idToDelete);
|
|
179
|
+
deleted = true;
|
|
180
|
+
} catch (err) {
|
|
181
|
+
// Telegram rejects deletions of messages older than 48h or
|
|
182
|
+
// already-deleted ones. Non-fatal — the redelivery happens
|
|
183
|
+
// either way.
|
|
184
|
+
logger.warn?.(`[stream] discard deleteMessage failed: ${err.message}`);
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
return { msgId: idToDelete, deleted };
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
// 0.7.0: snapshot for callers that want to track the bubble's id
|
|
191
|
+
// for later cleanup (e.g. archive a superseded preview when
|
|
192
|
+
// forceNewMessage was called and the previous bubble should be
|
|
193
|
+
// deleted at end-of-turn).
|
|
194
|
+
function archive() {
|
|
195
|
+
return { msgId, currentText };
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
// 0.7.0: rich result. `finalEditOk` tells caller whether the preview
|
|
199
|
+
// can stand as the final reply (true) or needs to be replaced via
|
|
200
|
+
// discard + deliverReplies (false). `overflow` is the one specific
|
|
201
|
+
// reason: body wouldn't fit in a single Telegram message.
|
|
108
202
|
async function finalize(finalText, { errorSuffix = null } = {}) {
|
|
109
|
-
if (state === 'finalized') return { streamed: false, msgId };
|
|
203
|
+
if (state === 'finalized') return { streamed: false, msgId, finalEditOk: false, overflow: false };
|
|
110
204
|
if (pendingEdit) { cancel(pendingEdit); pendingEdit = null; }
|
|
111
205
|
if (flushPromise) { try { await flushPromise; } catch {} }
|
|
112
206
|
|
|
113
207
|
if (state === 'idle') {
|
|
114
208
|
state = 'finalized';
|
|
115
|
-
return { streamed: false, msgId: null };
|
|
209
|
+
return { streamed: false, msgId: null, finalEditOk: false, overflow: false };
|
|
116
210
|
}
|
|
117
211
|
|
|
118
|
-
// live → finalize
|
|
212
|
+
// live → finalize.
|
|
119
213
|
state = 'finalized';
|
|
120
214
|
let body = finalText ?? latestText;
|
|
121
215
|
if (errorSuffix) body = `${body}\n\n⚠️ ${errorSuffix}`;
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
216
|
+
|
|
217
|
+
// If body overflows the single-message cap, the caller needs to
|
|
218
|
+
// discard this bubble and redeliver via chunks. Don't try to edit.
|
|
219
|
+
if (body.length > maxLen) {
|
|
220
|
+
return { streamed: true, msgId, finalText: body, finalEditOk: false, overflow: true };
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
// Body fits. Try one last edit to bring the bubble to the final
|
|
224
|
+
// text. If that succeeds, preview-IS-final and caller can return
|
|
225
|
+
// without redelivering. If it fails (e.g. parse error after our
|
|
226
|
+
// wrapper exhausts its retry, or a 5xx), caller should discard
|
|
227
|
+
// and redeliver — the bubble's content is unreliable.
|
|
228
|
+
if (body === currentText) {
|
|
229
|
+
// Already correct — no edit needed.
|
|
230
|
+
return { streamed: true, msgId, finalText: body, finalEditOk: true, overflow: false };
|
|
231
|
+
}
|
|
232
|
+
try {
|
|
233
|
+
await edit(msgId, body);
|
|
234
|
+
currentText = body;
|
|
235
|
+
return { streamed: true, msgId, finalText: body, finalEditOk: true, overflow: false };
|
|
236
|
+
} catch (err) {
|
|
237
|
+
logger.error(`[stream] final edit failed: ${err.message}`);
|
|
238
|
+
return { streamed: true, msgId, finalText: body, finalEditOk: false, overflow: false };
|
|
126
239
|
}
|
|
127
|
-
return { streamed: true, msgId, finalText: next };
|
|
128
240
|
}
|
|
129
241
|
|
|
130
242
|
return {
|
|
131
243
|
onChunk,
|
|
132
244
|
finalize,
|
|
245
|
+
flushDraft,
|
|
246
|
+
forceNewMessage,
|
|
247
|
+
discard,
|
|
248
|
+
archive,
|
|
133
249
|
// Introspection for tests:
|
|
134
250
|
get state() { return state; },
|
|
135
251
|
get msgId() { return msgId; },
|
|
@@ -0,0 +1,288 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Markdown-aware chunking for Telegram-bound replies.
|
|
3
|
+
*
|
|
4
|
+
* Direct port of OpenClaw's chunkMarkdownText. The naive byte-cut
|
|
5
|
+
* chunker we shipped before this would land boundaries mid-word and
|
|
6
|
+
* mid-HTML-tag, which Telegram's parse_mode=HTML rejected with
|
|
7
|
+
* `400 can't parse entities` — bubbles froze and content got dropped.
|
|
8
|
+
*
|
|
9
|
+
* Guarantees:
|
|
10
|
+
*
|
|
11
|
+
* 1. No chunk exceeds `limit`.
|
|
12
|
+
* 2. Breaks prefer newlines over whitespace over hard-cut.
|
|
13
|
+
* 3. Code fences (```...```) are never broken silently — if a chunk
|
|
14
|
+
* would land inside a fence, we close it on chunk N and re-open
|
|
15
|
+
* with the same marker + language on chunk N+1, so each chunk is
|
|
16
|
+
* independently parseable.
|
|
17
|
+
* 4. Parenthesised expressions `(...)` aren't broken at whitespace
|
|
18
|
+
* inside the parens (avoids splitting markdown-link syntax like
|
|
19
|
+
* `[label](http://example.com/...)`).
|
|
20
|
+
*
|
|
21
|
+
* Plain `chunkText` (no fence handling) is exported for callers that
|
|
22
|
+
* already know the input has no markdown — primarily code paths
|
|
23
|
+
* handling raw user input echoes or non-text payloads.
|
|
24
|
+
*/
|
|
25
|
+
|
|
26
|
+
// ─── Code-fence span detection ──────────────────────────────────────
|
|
27
|
+
|
|
28
|
+
// Scan `buffer` for ```...``` and ~~~...~~~ fences. Returns the span list
|
|
29
|
+
// of matched (open, close) pairs. An unclosed open fence at end-of-input
|
|
30
|
+
// is treated as if it closes at end (so the chunker can still split inside
|
|
31
|
+
// safely).
|
|
32
|
+
function parseFenceSpans(buffer) {
|
|
33
|
+
const spans = [];
|
|
34
|
+
let open;
|
|
35
|
+
let offset = 0;
|
|
36
|
+
while (offset <= buffer.length) {
|
|
37
|
+
const nextNewline = buffer.indexOf('\n', offset);
|
|
38
|
+
const lineEnd = nextNewline === -1 ? buffer.length : nextNewline;
|
|
39
|
+
const line = buffer.slice(offset, lineEnd);
|
|
40
|
+
// Fence opens/closes start with up to 3 spaces of indent then 3+ of
|
|
41
|
+
// ` or ~. The "info string" after the marker (language hint) doesn't
|
|
42
|
+
// affect span boundaries.
|
|
43
|
+
const match = line.match(/^( {0,3})(`{3,}|~{3,})(.*)$/);
|
|
44
|
+
if (match) {
|
|
45
|
+
const indent = match[1];
|
|
46
|
+
const marker = match[2];
|
|
47
|
+
const markerChar = marker[0];
|
|
48
|
+
const markerLen = marker.length;
|
|
49
|
+
if (!open) {
|
|
50
|
+
open = { start: offset, markerChar, markerLen, openLine: line, marker, indent };
|
|
51
|
+
} else if (open.markerChar === markerChar && markerLen >= open.markerLen) {
|
|
52
|
+
// Closing fence must use the SAME char and at least as many of them.
|
|
53
|
+
// Different-char or shorter sequences are part of the body.
|
|
54
|
+
spans.push({
|
|
55
|
+
start: open.start, end: lineEnd,
|
|
56
|
+
openLine: open.openLine, marker: open.marker, indent: open.indent,
|
|
57
|
+
});
|
|
58
|
+
open = undefined;
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
if (nextNewline === -1) break;
|
|
62
|
+
offset = nextNewline + 1;
|
|
63
|
+
}
|
|
64
|
+
if (open) {
|
|
65
|
+
// Unclosed at EOF — treat as spanning to end so a later break-point
|
|
66
|
+
// inside knows it's "in fence".
|
|
67
|
+
spans.push({
|
|
68
|
+
start: open.start, end: buffer.length,
|
|
69
|
+
openLine: open.openLine, marker: open.marker, indent: open.indent,
|
|
70
|
+
});
|
|
71
|
+
}
|
|
72
|
+
return spans;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
function findFenceSpanAt(spans, index) {
|
|
76
|
+
// Strict inequality: a break at exactly span.start is just before the
|
|
77
|
+
// opening fence (safe). At span.end, just after the close (also safe).
|
|
78
|
+
return spans.find((span) => index > span.start && index < span.end);
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
function isSafeFenceBreak(spans, index) {
|
|
82
|
+
return !findFenceSpanAt(spans, index);
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
// ─── Paren-aware break-point scan ───────────────────────────────────
|
|
86
|
+
|
|
87
|
+
// Find the last newline / last whitespace in `window` that's NOT inside
|
|
88
|
+
// `(...)` parens. Used by both plain and markdown chunkers.
|
|
89
|
+
//
|
|
90
|
+
// `isAllowed(i)` is consulted before every candidate — passed by the
|
|
91
|
+
// markdown chunker to skip break points inside fence spans.
|
|
92
|
+
function scanParenAwareBreakpoints(window, isAllowed = () => true) {
|
|
93
|
+
let lastNewline = -1;
|
|
94
|
+
let lastWhitespace = -1;
|
|
95
|
+
let depth = 0;
|
|
96
|
+
for (let i = 0; i < window.length; i++) {
|
|
97
|
+
if (!isAllowed(i)) continue;
|
|
98
|
+
const char = window[i];
|
|
99
|
+
if (char === '(') { depth += 1; continue; }
|
|
100
|
+
if (char === ')' && depth > 0) { depth -= 1; continue; }
|
|
101
|
+
if (depth !== 0) continue;
|
|
102
|
+
if (char === '\n') lastNewline = i;
|
|
103
|
+
else if (/\s/.test(char)) lastWhitespace = i;
|
|
104
|
+
}
|
|
105
|
+
return { lastNewline, lastWhitespace };
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
// ─── Chunkers ────────────────────────────────────────────────────────
|
|
109
|
+
|
|
110
|
+
// Common early-out: empty / fits-in-one returns directly so the loop
|
|
111
|
+
// bodies can assume there's real work to do. `limit ≤ 0` is treated as
|
|
112
|
+
// a programmer error and throws — silently returning [text] would let
|
|
113
|
+
// a misread config pass through a body that exceeds Telegram's actual
|
|
114
|
+
// 4096-char cap, which the chunker exists to prevent.
|
|
115
|
+
function resolveChunkEarlyReturn(text, limit) {
|
|
116
|
+
if (typeof limit !== 'number' || !Number.isFinite(limit) || limit <= 0) {
|
|
117
|
+
throw new RangeError(`chunk limit must be a positive number; got ${limit}`);
|
|
118
|
+
}
|
|
119
|
+
if (text == null || text === '') return [];
|
|
120
|
+
if (typeof text !== 'string') {
|
|
121
|
+
throw new TypeError(`chunk text must be a string; got ${typeof text}`);
|
|
122
|
+
}
|
|
123
|
+
if (text.length <= limit) return [text];
|
|
124
|
+
return undefined;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
// Generic break-resolver loop shared with markdown variant. The resolver
|
|
128
|
+
// receives a `window` (text.slice(0, limit)) and returns where to break.
|
|
129
|
+
// Negative / out-of-range break indices fall back to hard-cut at limit.
|
|
130
|
+
function chunkTextByBreakResolver(text, limit, resolveBreakIndex) {
|
|
131
|
+
if (!text) return [];
|
|
132
|
+
if (text.length <= limit) return [text];
|
|
133
|
+
const chunks = [];
|
|
134
|
+
let remaining = text;
|
|
135
|
+
while (remaining.length > limit) {
|
|
136
|
+
const candidateBreak = resolveBreakIndex(remaining.slice(0, limit));
|
|
137
|
+
const breakIdx = Number.isFinite(candidateBreak) && candidateBreak > 0 && candidateBreak <= limit
|
|
138
|
+
? candidateBreak
|
|
139
|
+
: limit;
|
|
140
|
+
const chunk = remaining.slice(0, breakIdx).trimEnd();
|
|
141
|
+
if (chunk.length > 0) chunks.push(chunk);
|
|
142
|
+
// If we broke on a separator (whitespace), consume it — don't carry it
|
|
143
|
+
// to the start of the next chunk where it'd just be trimmed anyway.
|
|
144
|
+
const brokeOnSeparator = breakIdx < remaining.length && /\s/.test(remaining[breakIdx]);
|
|
145
|
+
const nextStart = Math.min(remaining.length, breakIdx + (brokeOnSeparator ? 1 : 0));
|
|
146
|
+
remaining = remaining.slice(nextStart).trimStart();
|
|
147
|
+
}
|
|
148
|
+
if (remaining.length) chunks.push(remaining);
|
|
149
|
+
return chunks;
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
// Plain-text chunker: respects parens but ignores fences. Cheaper than
|
|
153
|
+
// chunkMarkdownText when caller knows the input has no code blocks.
|
|
154
|
+
function chunkText(text, limit) {
|
|
155
|
+
const early = resolveChunkEarlyReturn(text, limit);
|
|
156
|
+
if (early !== undefined) return early;
|
|
157
|
+
return chunkTextByBreakResolver(text, limit, (window) => {
|
|
158
|
+
const { lastNewline, lastWhitespace } = scanParenAwareBreakpoints(window);
|
|
159
|
+
return lastNewline > 0 ? lastNewline : lastWhitespace;
|
|
160
|
+
});
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
// Strip leading newlines from the remainder after a chunk break — they
|
|
164
|
+
// would otherwise show up as blank lines at the top of the next bubble.
|
|
165
|
+
function stripLeadingNewlines(value) {
|
|
166
|
+
let i = 0;
|
|
167
|
+
while (i < value.length && value[i] === '\n') i++;
|
|
168
|
+
return i > 0 ? value.slice(i) : value;
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
// Inside a code-fence, prefer the last newline (line boundary) — but only
|
|
172
|
+
// inside the safe break region. Falls back to whitespace, then hard-cut.
|
|
173
|
+
function pickSafeBreakIndex(window, spans) {
|
|
174
|
+
const { lastNewline, lastWhitespace } = scanParenAwareBreakpoints(
|
|
175
|
+
window,
|
|
176
|
+
(index) => isSafeFenceBreak(spans, index),
|
|
177
|
+
);
|
|
178
|
+
if (lastNewline > 0) return lastNewline;
|
|
179
|
+
if (lastWhitespace > 0) return lastWhitespace;
|
|
180
|
+
return -1;
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
// Markdown-aware chunker. The whole point of 0.7.0 over the previous
|
|
184
|
+
// `lastIndexOf('\n', maxLen)` chunker.
|
|
185
|
+
//
|
|
186
|
+
// Flow per iteration:
|
|
187
|
+
// 1. Parse fence spans of the remainder.
|
|
188
|
+
// 2. Pick the best (newline > whitespace) break inside `[0..limit]`
|
|
189
|
+
// that's NOT inside a fence. Fall back to hard-cut at limit.
|
|
190
|
+
// 3. If the break did land inside a fence (no safe alternative was
|
|
191
|
+
// reachable), search backwards for a newline within the fence body
|
|
192
|
+
// that still fits with a closing-fence appended; if found, split
|
|
193
|
+
// the fence — close it on this chunk and reopen with the same
|
|
194
|
+
// marker+language on the next.
|
|
195
|
+
// 4. Append the chunk; advance `remaining` past the break + the
|
|
196
|
+
// reopened fence header (if any).
|
|
197
|
+
function chunkMarkdownText(text, limit) {
|
|
198
|
+
const early = resolveChunkEarlyReturn(text, limit);
|
|
199
|
+
if (early !== undefined) return early;
|
|
200
|
+
const chunks = [];
|
|
201
|
+
let remaining = text;
|
|
202
|
+
while (remaining.length > limit) {
|
|
203
|
+
const spans = parseFenceSpans(remaining);
|
|
204
|
+
const softBreak = pickSafeBreakIndex(remaining.slice(0, limit), spans);
|
|
205
|
+
let breakIdx = softBreak > 0 ? softBreak : limit;
|
|
206
|
+
const initialFence = isSafeFenceBreak(spans, breakIdx) ? undefined : findFenceSpanAt(spans, breakIdx);
|
|
207
|
+
let fenceToSplit = initialFence;
|
|
208
|
+
if (initialFence) {
|
|
209
|
+
// The break landed inside a fence. We may still split the fence,
|
|
210
|
+
// but only if there's room for a closing line within the limit.
|
|
211
|
+
const closeLine = `${initialFence.indent}${initialFence.marker}`;
|
|
212
|
+
const maxIdxIfNeedNewline = limit - (closeLine.length + 1); // need a \n separator
|
|
213
|
+
if (maxIdxIfNeedNewline <= 0) {
|
|
214
|
+
// Even the close line wouldn't fit — give up and hard-cut.
|
|
215
|
+
// Caller will see a malformed chunk, but that's a degenerate
|
|
216
|
+
// input case (limit smaller than the close marker).
|
|
217
|
+
fenceToSplit = undefined;
|
|
218
|
+
breakIdx = limit;
|
|
219
|
+
} else {
|
|
220
|
+
// Look for a newline inside the fence body that's late enough
|
|
221
|
+
// to make progress (past the open line + at least one body line)
|
|
222
|
+
// and early enough that close line fits.
|
|
223
|
+
const minProgressIdx = Math.min(
|
|
224
|
+
remaining.length,
|
|
225
|
+
initialFence.start + initialFence.openLine.length + 2,
|
|
226
|
+
);
|
|
227
|
+
const maxIdxIfAlreadyNewline = limit - closeLine.length;
|
|
228
|
+
let pickedNewline = false;
|
|
229
|
+
let lastNewline = remaining.lastIndexOf('\n', Math.max(0, maxIdxIfAlreadyNewline - 1));
|
|
230
|
+
while (lastNewline !== -1) {
|
|
231
|
+
const candidateBreak = lastNewline + 1;
|
|
232
|
+
if (candidateBreak < minProgressIdx) break;
|
|
233
|
+
const candidateFence = findFenceSpanAt(spans, candidateBreak);
|
|
234
|
+
if (candidateFence && candidateFence.start === initialFence.start) {
|
|
235
|
+
breakIdx = Math.max(1, candidateBreak);
|
|
236
|
+
pickedNewline = true;
|
|
237
|
+
break;
|
|
238
|
+
}
|
|
239
|
+
lastNewline = remaining.lastIndexOf('\n', lastNewline - 1);
|
|
240
|
+
}
|
|
241
|
+
if (!pickedNewline) {
|
|
242
|
+
if (minProgressIdx > maxIdxIfAlreadyNewline) {
|
|
243
|
+
// No safe in-fence newline found and no room to add one —
|
|
244
|
+
// give up on splitting this fence; hard-cut at limit.
|
|
245
|
+
fenceToSplit = undefined;
|
|
246
|
+
breakIdx = limit;
|
|
247
|
+
} else {
|
|
248
|
+
// Force the break; chunker will append a synthetic newline
|
|
249
|
+
// before the close line.
|
|
250
|
+
breakIdx = Math.max(minProgressIdx, maxIdxIfNeedNewline);
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
// Re-check the break: if our adjusted index is no longer inside
|
|
255
|
+
// the same fence, don't try to split it.
|
|
256
|
+
const fenceAtBreak = findFenceSpanAt(spans, breakIdx);
|
|
257
|
+
fenceToSplit = fenceAtBreak && fenceAtBreak.start === initialFence.start ? fenceAtBreak : undefined;
|
|
258
|
+
}
|
|
259
|
+
let rawChunk = remaining.slice(0, breakIdx);
|
|
260
|
+
if (!rawChunk) break;
|
|
261
|
+
const brokeOnSeparator = breakIdx < remaining.length && /\s/.test(remaining[breakIdx]);
|
|
262
|
+
const nextStart = Math.min(remaining.length, breakIdx + (brokeOnSeparator ? 1 : 0));
|
|
263
|
+
let next = remaining.slice(nextStart);
|
|
264
|
+
if (fenceToSplit) {
|
|
265
|
+
// Close the fence on this chunk; reopen with the same marker line
|
|
266
|
+
// (preserving language hint) on the next.
|
|
267
|
+
const closeLine = `${fenceToSplit.indent}${fenceToSplit.marker}`;
|
|
268
|
+
rawChunk = rawChunk.endsWith('\n') ? `${rawChunk}${closeLine}` : `${rawChunk}\n${closeLine}`;
|
|
269
|
+
next = `${fenceToSplit.openLine}\n${next}`;
|
|
270
|
+
} else {
|
|
271
|
+
// Strip stray leading newlines on the next chunk so it doesn't
|
|
272
|
+
// open with blank lines.
|
|
273
|
+
next = stripLeadingNewlines(next);
|
|
274
|
+
}
|
|
275
|
+
chunks.push(rawChunk);
|
|
276
|
+
remaining = next;
|
|
277
|
+
}
|
|
278
|
+
if (remaining.length) chunks.push(remaining);
|
|
279
|
+
return chunks;
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
module.exports = {
|
|
283
|
+
chunkText,
|
|
284
|
+
chunkMarkdownText,
|
|
285
|
+
// Internals exported for tests; not part of the stable API.
|
|
286
|
+
parseFenceSpans,
|
|
287
|
+
scanParenAwareBreakpoints,
|
|
288
|
+
};
|
package/lib/telegram-format.js
CHANGED
|
@@ -272,4 +272,110 @@ function toTelegramHtml(text) {
|
|
|
272
272
|
|
|
273
273
|
function toTelegramMarkdown(text) { return toTelegramHtml(text); }
|
|
274
274
|
|
|
275
|
-
|
|
275
|
+
// 0.7.0 (Phase K) audit note: polygram's `toTelegramHtml` is functionally
|
|
276
|
+
// aligned with OpenClaw's `markdownToTelegramHtml` + `renderTelegramHtmlText`
|
|
277
|
+
// (extensions/telegram, send.ts:828-898). Both produce parse_mode=HTML
|
|
278
|
+
// output, both run `wrapFileReferencesInHtml` as a post-processor, and
|
|
279
|
+
// both handle the same set of formatting features (bold, italic, code,
|
|
280
|
+
// pre/fence, links, lists, spoilers, blockquotes, tables).
|
|
281
|
+
//
|
|
282
|
+
// OpenClaw uses an internal markdown-IR (markdownToIR → renderTelegramHtml);
|
|
283
|
+
// polygram uses `marked` + custom renderers. Both work; the IR approach
|
|
284
|
+
// is more amenable to multi-output (HTML / plain / Slack), but polygram
|
|
285
|
+
// only needs HTML so the regex-based path is simpler.
|
|
286
|
+
//
|
|
287
|
+
// The HTML→plain fallback shipped in 0.7.0 phase 2 makes converter
|
|
288
|
+
// correctness less load-bearing: if any edge case slips through and
|
|
289
|
+
// Telegram rejects the HTML, we automatically retry as plain text and
|
|
290
|
+
// no content is lost.
|
|
291
|
+
|
|
292
|
+
// ─── Telegram error classification ──────────────────────────────────
|
|
293
|
+
|
|
294
|
+
// Ported from OpenClaw (`send-DVX_zY9w.js:1075-1077`). HTML parse errors
|
|
295
|
+
// fire when our markdown→HTML conversion produces malformed output (e.g.
|
|
296
|
+
// the streamer's truncate cut mid-tag pre-0.7.0). Caller reacts by
|
|
297
|
+
// retrying the same call as plain text without parse_mode.
|
|
298
|
+
const HTML_PARSE_ERR_RE = /can't parse entities|parse entities|find end of the entity/i;
|
|
299
|
+
|
|
300
|
+
// 'message is not modified' fires when an editMessageText payload
|
|
301
|
+
// matches the current message text exactly. Not a real failure — the
|
|
302
|
+
// streamer's debounced edit just happened to land on a no-op. Swallow
|
|
303
|
+
// to keep error logs clean. The phrase is unique enough that we don't
|
|
304
|
+
// need to guard with a 400 prefix (grammy strips the status code in
|
|
305
|
+
// some error shapes).
|
|
306
|
+
const MESSAGE_NOT_MODIFIED_RE = /message is not modified|MESSAGE_NOT_MODIFIED/i;
|
|
307
|
+
|
|
308
|
+
// grammy attaches the underlying API error in different shapes
|
|
309
|
+
// depending on transport. Walk the candidates and pick the first
|
|
310
|
+
// stringifiable message.
|
|
311
|
+
function errorMessage(err) {
|
|
312
|
+
if (!err) return '';
|
|
313
|
+
return String(err.description || err.message || err.error_message || err);
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
function isHtmlParseError(err) {
|
|
317
|
+
return HTML_PARSE_ERR_RE.test(errorMessage(err));
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
function isMessageNotModifiedError(err) {
|
|
321
|
+
return MESSAGE_NOT_MODIFIED_RE.test(errorMessage(err));
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
// 0.7.0 (Phase G): 429 rate-limit detection + retry_after extraction.
|
|
325
|
+
// Telegram returns "Too Many Requests: retry after N" (HTTP 429) when the
|
|
326
|
+
// per-bot rate limit is hit (~30 req/s). N is in seconds. grammy attaches
|
|
327
|
+
// retry_after as `err.parameters.retry_after` in some shapes; otherwise
|
|
328
|
+
// we parse it out of the message string.
|
|
329
|
+
const RATE_LIMIT_RE = /too many requests|429|retry after (\d+)/i;
|
|
330
|
+
|
|
331
|
+
function isRateLimitError(err) {
|
|
332
|
+
if (!err) return false;
|
|
333
|
+
return RATE_LIMIT_RE.test(errorMessage(err));
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
// Returns retry_after in milliseconds, or null if not a 429 / not parseable.
|
|
337
|
+
function getRetryAfterMs(err) {
|
|
338
|
+
if (!err) return null;
|
|
339
|
+
// grammy / Telegram Bot API: err.parameters.retry_after (seconds)
|
|
340
|
+
const fromParams = err.parameters?.retry_after ?? err.error_parameters?.retry_after;
|
|
341
|
+
if (typeof fromParams === 'number' && Number.isFinite(fromParams)) {
|
|
342
|
+
return Math.max(0, fromParams * 1000);
|
|
343
|
+
}
|
|
344
|
+
// Fall back to parsing the message: "retry after 5" / "retry after 12 seconds"
|
|
345
|
+
const m = errorMessage(err).match(/retry after (\d+)/i);
|
|
346
|
+
if (m) return Math.max(0, parseInt(m[1], 10) * 1000);
|
|
347
|
+
return null;
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
// ─── Caption length policy ──────────────────────────────────────────
|
|
351
|
+
|
|
352
|
+
// Telegram caps captions on sendPhoto / sendVideo / sendAudio /
|
|
353
|
+
// sendDocument at 1024 chars (vs 4096 for sendMessage). When a caption
|
|
354
|
+
// would exceed this, OpenClaw's choice is to send the media WITHOUT a
|
|
355
|
+
// caption and follow up with the text as a separate sendMessage. This
|
|
356
|
+
// is simpler than splitting mid-caption (which would visually fragment
|
|
357
|
+
// the description across the media bubble and a follow-up). Reused
|
|
358
|
+
// here for any future skill that wants to send media with rich text.
|
|
359
|
+
const TELEGRAM_MAX_CAPTION_LENGTH = 1024;
|
|
360
|
+
|
|
361
|
+
function splitTelegramCaption(text) {
|
|
362
|
+
const trimmed = (text || '').trim();
|
|
363
|
+
if (!trimmed) return { caption: undefined, followUpText: undefined };
|
|
364
|
+
if (trimmed.length > TELEGRAM_MAX_CAPTION_LENGTH) {
|
|
365
|
+
return { caption: undefined, followUpText: trimmed };
|
|
366
|
+
}
|
|
367
|
+
return { caption: trimmed, followUpText: undefined };
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
module.exports = {
|
|
371
|
+
toTelegramMarkdown,
|
|
372
|
+
toTelegramHtml,
|
|
373
|
+
wrapFileReferencesInHtml,
|
|
374
|
+
escapeHtml,
|
|
375
|
+
isHtmlParseError,
|
|
376
|
+
isMessageNotModifiedError,
|
|
377
|
+
splitTelegramCaption,
|
|
378
|
+
TELEGRAM_MAX_CAPTION_LENGTH,
|
|
379
|
+
isRateLimitError,
|
|
380
|
+
getRetryAfterMs,
|
|
381
|
+
};
|