@nubemclaw/channel-telegram 1.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,351 @@
1
+ import type { ChannelLogger } from "@nubemclaw/channel-sdk";
2
+ import { Agent, ProxyAgent, fetch as undiciFetch } from "undici";
3
+
4
+ /**
5
+ * Telegram operational transport (Phase 13, task #144 v2).
6
+ *
7
+ * Port of the operational hardening from OpenClaw
8
+ * `extensions/telegram/src/fetch.ts:862`. Telegram long-polling keeps
9
+ * connections to `api.telegram.org` hot for hours; the default global
10
+ * `fetch`/undici behavior is wrong for this access pattern in three
11
+ * concrete ways the OpenClaw codebase documented through real
12
+ * production incidents (openclaw#68128 is cited verbatim in
13
+ * fetch.ts:37-39):
14
+ *
15
+ * 1. **HTTP/2 ALPN stalls long-polling.** Undici 8 enables HTTP/2
16
+ * negotiation by default. The Telegram Bot API serves both /1.1
17
+ * and /2, but the /2 stream behavior interacts badly with the
18
+ * long-poll on Windows/IPv6 networks — connections hang past the
19
+ * `getUpdates(timeout=N)` budget. `allowH2: false` pins HTTP/1.1.
20
+ *
21
+ * 2. **Unbounded connection pool leaks sockets.** The default Agent
22
+ * has no per-origin connection limit and an effectively
23
+ * unbounded `keepAliveTimeout`. Long polling rotates dispatchers
24
+ * slowly; the leak grows monotonically until file descriptors
25
+ * exhaust. Pinning `connections: 10`, `keepAliveTimeout: 30s` and
26
+ * `keepAliveMaxTimeout: 600s` keeps the pool bounded.
27
+ *
28
+ * 3. **No way to release sockets on shutdown.** A polling session
29
+ * that stops without destroying its dispatcher leaves keep-alive
30
+ * sockets open. The exported `close()` calls `destroy()` on
31
+ * every owned dispatcher so the runner can swap or drop the
32
+ * transport cleanly.
33
+ *
34
+ * Two additional concerns the transport handles:
35
+ *
36
+ * - **IPv4 fallback with sticky attempts**: some operator networks
37
+ * (notably IPv6-only with broken NAT64) cannot reach
38
+ * `api.telegram.org` over IPv6. OpenClaw observed enough of this
39
+ * to ship sticky fallback to `family: 4`. We port the same
40
+ * behavior with a simpler health model: after N consecutive
41
+ * failures the transport flips to the IPv4-pinned dispatcher.
42
+ *
43
+ * - **Proxy support**: `NUBEMCLAW_PROXY_URL` env var or constructor
44
+ * param mounts an undici `ProxyAgent`. Corporate networks behind
45
+ * egress proxies need this — there is no built-in env support in
46
+ * grammy's default `globalThis.fetch`.
47
+ *
48
+ * What this implementation deliberately omits vs OpenClaw fetch.ts:
49
+ *
50
+ * - **DNS pinning to hardcoded Telegram IPs** (the `149.154.167.220`
51
+ * fallback list). Telegram rotates IPs on its own schedule; a
52
+ * hardcoded list is a hostile maintenance burden and we have no
53
+ * evidence it solved an incident outside OpenClaw's specific
54
+ * deployment. If we see DNS failures in v3 we revisit.
55
+ * - **HTTP exchange capture** (the `captureHttpExchange` calls).
56
+ * That is a debug-only feature wired to OpenClaw's internal
57
+ * observability; not portable to v3 today.
58
+ *
59
+ * The transport returns a `fetch` callable that grammy's `Bot`
60
+ * constructor accepts via `new Bot(token, { client: { fetcher } })`.
61
+ */
62
+
63
+ const TELEGRAM_API_HOSTNAME = "api.telegram.org";
64
+ const POOL_KEEP_ALIVE_TIMEOUT_MS = 30_000;
65
+ const POOL_KEEP_ALIVE_MAX_TIMEOUT_MS = 600_000;
66
+ const POOL_CONNECTIONS_PER_ORIGIN = 10;
67
+ const POOL_PIPELINING = 1;
68
+ const FALLBACK_FAILURE_THRESHOLD = 5;
69
+ const FALLBACK_COOLDOWN_INITIAL_MS = 10_000;
70
+ const FALLBACK_COOLDOWN_MAX_MS = 60_000;
71
+
72
+ const FALLBACK_NETWORK_ERROR_CODES = new Set([
73
+ "ETIMEDOUT",
74
+ "ENETUNREACH",
75
+ "EHOSTUNREACH",
76
+ "UND_ERR_CONNECT_TIMEOUT",
77
+ "UND_ERR_SOCKET",
78
+ ]);
79
+
80
+ const poolOptions = (): {
81
+ allowH2: false;
82
+ keepAliveTimeout: number;
83
+ keepAliveMaxTimeout: number;
84
+ connections: number;
85
+ pipelining: number;
86
+ } => ({
87
+ allowH2: false,
88
+ keepAliveTimeout: POOL_KEEP_ALIVE_TIMEOUT_MS,
89
+ keepAliveMaxTimeout: POOL_KEEP_ALIVE_MAX_TIMEOUT_MS,
90
+ connections: POOL_CONNECTIONS_PER_ORIGIN,
91
+ pipelining: POOL_PIPELINING,
92
+ });
93
+
94
+ const collectErrorCodes = (err: unknown): Set<string> => {
95
+ const codes = new Set<string>();
96
+ const queue: unknown[] = [err];
97
+ const seen = new Set<unknown>();
98
+ let i = 0;
99
+ while (i < queue.length) {
100
+ const current = queue[i++];
101
+ if (current === null || current === undefined || seen.has(current)) continue;
102
+ seen.add(current);
103
+ if (typeof current !== "object") continue;
104
+ const code = (current as { code?: unknown }).code;
105
+ if (typeof code === "string" && code.trim() !== "") {
106
+ codes.add(code.trim().toUpperCase());
107
+ }
108
+ const cause = (current as { cause?: unknown }).cause;
109
+ if (cause !== undefined) queue.push(cause);
110
+ const nested = (current as { errors?: unknown }).errors;
111
+ if (Array.isArray(nested)) for (const n of nested) queue.push(n);
112
+ }
113
+ return codes;
114
+ };
115
+
116
+ const shouldFallback = (err: unknown): boolean => {
117
+ if (err === null || err === undefined) return false;
118
+ const codes = collectErrorCodes(err);
119
+ for (const c of FALLBACK_NETWORK_ERROR_CODES) {
120
+ if (codes.has(c)) return true;
121
+ }
122
+ const message =
123
+ err !== null && typeof err === "object" && "message" in err
124
+ ? String((err as { message: unknown }).message).toLowerCase()
125
+ : "";
126
+ return message.includes("fetch failed") && codes.size === 0;
127
+ };
128
+
129
+ type Dispatcher = Agent | ProxyAgent;
130
+
131
+ type AttemptKind = "primary" | "fallback-ipv4";
132
+
133
+ interface Attempt {
134
+ readonly kind: AttemptKind;
135
+ readonly create: () => Dispatcher;
136
+ }
137
+
138
+ interface AttemptHealth {
139
+ consecutiveFailures: number;
140
+ cooldownMs: number;
141
+ unhealthyUntilMs: number;
142
+ }
143
+
144
+ export interface TelegramTransportOptions {
145
+ /**
146
+ * Explicit proxy URL (`http://...` / `https://...`). Defaults to
147
+ * `NUBEMCLAW_PROXY_URL` env var. When set, every dispatcher routes
148
+ * through this proxy via `undici.ProxyAgent`.
149
+ */
150
+ readonly proxyUrl?: string;
151
+ /** Custom logger; defaults to a silent one (transport doesn't own loggers). */
152
+ readonly logger?: ChannelLogger;
153
+ /** Test seam: override the env var for proxy resolution. */
154
+ readonly envVars?: NodeJS.ProcessEnv;
155
+ }
156
+
157
+ export interface TelegramTransport {
158
+ /**
159
+ * `fetch`-shaped callable usable directly as `new Bot(token,
160
+ * { client: { fetcher } }).` Routes through the primary dispatcher
161
+ * with automatic IPv4 fallback on persistent network failures.
162
+ */
163
+ fetch: typeof globalThis.fetch;
164
+ /**
165
+ * Destroys every dispatcher this transport owns. Safe to call
166
+ * multiple times; subsequent calls resolve immediately. The runner
167
+ * MUST call this when stopping the channel — otherwise undici keeps
168
+ * keep-alive sockets open indefinitely.
169
+ */
170
+ close(): Promise<void>;
171
+ }
172
+
173
+ const silentLogger: ChannelLogger = {
174
+ info: () => {},
175
+ warn: () => {},
176
+ error: () => {},
177
+ debug: () => {},
178
+ };
179
+
180
+ const resolveProxyUrl = (opts: TelegramTransportOptions): string | undefined => {
181
+ const explicit = opts.proxyUrl?.trim();
182
+ if (explicit !== undefined && explicit !== "") return explicit;
183
+ const envVars = opts.envVars ?? process.env;
184
+ const env = envVars["NUBEMCLAW_PROXY_URL"]?.trim();
185
+ return env !== undefined && env !== "" ? env : undefined;
186
+ };
187
+
188
+ export const createTelegramTransport = (opts: TelegramTransportOptions = {}): TelegramTransport => {
189
+ const logger = opts.logger ?? silentLogger;
190
+ const proxyUrl = resolveProxyUrl(opts);
191
+ const owned = new Set<Dispatcher>();
192
+
193
+ const buildPrimary = (): Dispatcher => {
194
+ if (proxyUrl !== undefined) {
195
+ return new ProxyAgent({ uri: proxyUrl, ...poolOptions() });
196
+ }
197
+ return new Agent({ ...poolOptions() });
198
+ };
199
+ const buildFallbackIpv4 = (): Dispatcher => {
200
+ // Force IPv4. ProxyAgent doesn't take a connect.family override
201
+ // directly via construction — when a proxy is in play we keep the
202
+ // dispatcher identical to primary because the IPv4 vs IPv6 decision
203
+ // is made by the proxy itself. Without a proxy we pin family: 4.
204
+ if (proxyUrl !== undefined) {
205
+ return new ProxyAgent({ uri: proxyUrl, ...poolOptions() });
206
+ }
207
+ return new Agent({
208
+ ...poolOptions(),
209
+ connect: { family: 4 },
210
+ });
211
+ };
212
+
213
+ // Lazy dispatcher instantiation per attempt so we only pay the cost
214
+ // when the attempt is actually exercised.
215
+ let primaryDispatcher: Dispatcher | null = null;
216
+ let fallbackDispatcher: Dispatcher | null = null;
217
+ const getPrimary = (): Dispatcher => {
218
+ if (primaryDispatcher === null) {
219
+ primaryDispatcher = buildPrimary();
220
+ owned.add(primaryDispatcher);
221
+ }
222
+ return primaryDispatcher;
223
+ };
224
+ const getFallback = (): Dispatcher => {
225
+ if (fallbackDispatcher === null) {
226
+ fallbackDispatcher = buildFallbackIpv4();
227
+ owned.add(fallbackDispatcher);
228
+ }
229
+ return fallbackDispatcher;
230
+ };
231
+
232
+ const attempts: Attempt[] = [
233
+ { kind: "primary", create: getPrimary },
234
+ { kind: "fallback-ipv4", create: getFallback },
235
+ ];
236
+ const health: AttemptHealth[] = attempts.map(() => ({
237
+ consecutiveFailures: 0,
238
+ cooldownMs: FALLBACK_COOLDOWN_INITIAL_MS,
239
+ unhealthyUntilMs: 0,
240
+ }));
241
+ let stickyIndex = 0;
242
+
243
+ const recordSuccess = (attemptIndex: number): void => {
244
+ const h = health[attemptIndex];
245
+ if (h === undefined) return;
246
+ h.consecutiveFailures = 0;
247
+ h.cooldownMs = FALLBACK_COOLDOWN_INITIAL_MS;
248
+ h.unhealthyUntilMs = 0;
249
+ if (attemptIndex < stickyIndex) {
250
+ logger.debug(
251
+ { from: stickyIndex, to: attemptIndex },
252
+ "telegram transport: recovered to lower-cost attempt",
253
+ );
254
+ stickyIndex = attemptIndex;
255
+ }
256
+ };
257
+
258
+ const recordFailure = (attemptIndex: number, err: unknown): void => {
259
+ if (!shouldFallback(err)) return;
260
+ const h = health[attemptIndex];
261
+ if (h === undefined) return;
262
+ h.consecutiveFailures += 1;
263
+ if (h.consecutiveFailures < FALLBACK_FAILURE_THRESHOLD) return;
264
+ const cooldownMs = Math.min(FALLBACK_COOLDOWN_MAX_MS, h.cooldownMs);
265
+ h.consecutiveFailures = 0;
266
+ h.cooldownMs = Math.min(FALLBACK_COOLDOWN_MAX_MS, cooldownMs * 2);
267
+ h.unhealthyUntilMs = Date.now() + cooldownMs;
268
+ logger.warn(
269
+ { attempt: attempts[attemptIndex]?.kind, cooldownMs },
270
+ "telegram transport: attempt marked temporarily unhealthy",
271
+ );
272
+ };
273
+
274
+ const promoteSticky = (): void => {
275
+ if (stickyIndex < attempts.length - 1) {
276
+ stickyIndex += 1;
277
+ logger.warn(
278
+ { newAttempt: attempts[stickyIndex]?.kind },
279
+ "telegram transport: promoting to fallback attempt",
280
+ );
281
+ }
282
+ };
283
+
284
+ const wrappedFetch: typeof globalThis.fetch = async (input, init) => {
285
+ let lastErr: unknown;
286
+ for (let attemptIndex = stickyIndex; attemptIndex < attempts.length; attemptIndex += 1) {
287
+ const attempt = attempts[attemptIndex];
288
+ if (attempt === undefined) break;
289
+ const h = health[attemptIndex];
290
+ if (h !== undefined && h.unhealthyUntilMs > Date.now()) {
291
+ // Skip this attempt while it cools down.
292
+ lastErr = new Error(`telegram transport: attempt '${attempt.kind}' cooling down`);
293
+ continue;
294
+ }
295
+ try {
296
+ const dispatcher = attempt.create();
297
+ // Cast to RequestInit + dispatcher property — undici's RequestInit
298
+ // extends the global with `dispatcher`, but the global `fetch`
299
+ // type doesn't know that.
300
+ const initWithDispatcher = {
301
+ ...init,
302
+ dispatcher,
303
+ } as unknown as Parameters<typeof undiciFetch>[1];
304
+ const res = await undiciFetch(
305
+ input as Parameters<typeof undiciFetch>[0],
306
+ initWithDispatcher,
307
+ );
308
+ recordSuccess(attemptIndex);
309
+ return res as unknown as Response;
310
+ } catch (cause) {
311
+ lastErr = cause;
312
+ if (!shouldFallback(cause)) throw cause;
313
+ recordFailure(attemptIndex, cause);
314
+ if (attemptIndex === stickyIndex && attemptIndex < attempts.length - 1) {
315
+ promoteSticky();
316
+ }
317
+ }
318
+ }
319
+ throw lastErr;
320
+ };
321
+
322
+ let closed = false;
323
+ const close = async (): Promise<void> => {
324
+ if (closed) return;
325
+ closed = true;
326
+ const toDestroy = [...owned];
327
+ owned.clear();
328
+ await Promise.all(
329
+ toDestroy.map(async (d) => {
330
+ try {
331
+ await d.destroy();
332
+ } catch {
333
+ // Already destroyed — ignore.
334
+ }
335
+ }),
336
+ );
337
+ };
338
+
339
+ return {
340
+ fetch: wrappedFetch,
341
+ close,
342
+ };
343
+ };
344
+
345
+ /**
346
+ * Sentinel constant identifying the Telegram API host. Exported for
347
+ * tests that want to assert the transport is being aimed at the right
348
+ * origin (the wrapped fetch does not enforce this — `globalThis.fetch`
349
+ * resolves the URL the caller supplies).
350
+ */
351
+ export const TELEGRAM_HOST = TELEGRAM_API_HOSTNAME;