aiden-runtime 4.0.2 → 4.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. package/README.md +11 -7
  2. package/config/hardware.json +2 -2
  3. package/dist/api/server.js +50 -52
  4. package/dist/cli/v4/aidenCLI.js +421 -5
  5. package/dist/cli/v4/aidenPrompt.js +317 -0
  6. package/dist/cli/v4/box.js +105 -39
  7. package/dist/cli/v4/callbacks.js +39 -6
  8. package/dist/cli/v4/chatSession.js +256 -55
  9. package/dist/cli/v4/citationFooter.js +97 -0
  10. package/dist/cli/v4/commands/channel.js +656 -0
  11. package/dist/cli/v4/commands/clear.js +1 -1
  12. package/dist/cli/v4/commands/compress.js +1 -1
  13. package/dist/cli/v4/commands/cron.js +44 -16
  14. package/dist/cli/v4/commands/fanout.js +236 -0
  15. package/dist/cli/v4/commands/help.js +15 -4
  16. package/dist/cli/v4/commands/history.js +84 -0
  17. package/dist/cli/v4/commands/index.js +16 -1
  18. package/dist/cli/v4/commands/mcp.js +358 -0
  19. package/dist/cli/v4/commands/show.js +43 -0
  20. package/dist/cli/v4/commands/skills.js +169 -4
  21. package/dist/cli/v4/commands/status.js +84 -0
  22. package/dist/cli/v4/commands/subagent.js +78 -0
  23. package/dist/cli/v4/commands/verbose.js +1 -1
  24. package/dist/cli/v4/commands/voice.js +218 -0
  25. package/dist/cli/v4/cronCli.js +103 -0
  26. package/dist/cli/v4/display.js +297 -13
  27. package/dist/cli/v4/doctor.js +41 -0
  28. package/dist/cli/v4/envSources.js +105 -0
  29. package/dist/cli/v4/ghostMatch.js +74 -0
  30. package/dist/cli/v4/historyStore.js +163 -0
  31. package/dist/cli/v4/pasteCompression.js +124 -0
  32. package/dist/cli/v4/pasteIntercept.js +203 -0
  33. package/dist/cli/v4/replyRenderer.js +209 -0
  34. package/dist/cli/v4/resizeGuard.js +92 -0
  35. package/dist/cli/v4/shellInterpolation.js +139 -0
  36. package/dist/cli/v4/skinEngine.js +21 -1
  37. package/dist/cli/v4/streamingPrefix.js +121 -0
  38. package/dist/cli/v4/syntaxHighlight.js +345 -0
  39. package/dist/cli/v4/table.js +216 -0
  40. package/dist/cli/v4/themeDetect.js +81 -0
  41. package/dist/cli/v4/uiBuild.js +74 -0
  42. package/dist/cli/v4/voiceCli.js +113 -0
  43. package/dist/cli/v4/voicePromptApi.js +196 -0
  44. package/dist/core/channels/discord.js +16 -10
  45. package/dist/core/channels/email.js +13 -9
  46. package/dist/core/channels/imessage.js +13 -9
  47. package/dist/core/channels/manager.js +25 -7
  48. package/dist/core/channels/pdf-extract.js +180 -0
  49. package/dist/core/channels/photo-vision.js +157 -0
  50. package/dist/core/channels/signal.js +11 -7
  51. package/dist/core/channels/slack.js +13 -10
  52. package/dist/core/channels/telegram-commands.js +154 -0
  53. package/dist/core/channels/telegram-groups.js +198 -0
  54. package/dist/core/channels/telegram-rate-limit.js +124 -0
  55. package/dist/core/channels/telegram.js +1980 -0
  56. package/dist/core/channels/twilio.js +11 -7
  57. package/dist/core/channels/webhook.js +9 -5
  58. package/dist/core/channels/whatsapp.js +15 -11
  59. package/dist/core/channels/whisper-transcribe.js +163 -0
  60. package/dist/core/cronManager.js +33 -294
  61. package/dist/core/gateway.js +29 -8
  62. package/dist/core/playwrightBridge.js +90 -0
  63. package/dist/core/v4/aidenAgent.js +35 -0
  64. package/dist/core/v4/auxiliaryClient.js +2 -2
  65. package/dist/core/v4/cron/atomicWrite.js +18 -4
  66. package/dist/core/v4/cron/cronExecute.js +300 -0
  67. package/dist/core/v4/cron/cronManager.js +502 -0
  68. package/dist/core/v4/cron/cronState.js +314 -0
  69. package/dist/core/v4/cron/cronTick.js +90 -0
  70. package/dist/core/v4/cron/diagnostics.js +104 -0
  71. package/dist/core/v4/cron/graceWindow.js +79 -0
  72. package/dist/core/v4/logger/factory.js +110 -0
  73. package/dist/core/v4/logger/index.js +22 -0
  74. package/dist/core/v4/logger/logger.js +101 -0
  75. package/dist/core/v4/logger/sinks/fileSink.js +110 -0
  76. package/dist/core/v4/logger/sinks/multiSink.js +43 -0
  77. package/dist/core/v4/logger/sinks/nullSink.js +53 -0
  78. package/dist/core/v4/logger/sinks/stdSink.js +81 -0
  79. package/dist/core/v4/mcp/server/diagnostics.js +40 -0
  80. package/dist/core/v4/mcp/server/skillBridge.js +94 -0
  81. package/dist/core/v4/mcp/server/stdioServer.js +119 -0
  82. package/dist/core/v4/mcp/server/toolBridge.js +168 -0
  83. package/dist/core/v4/platformPaths.js +105 -0
  84. package/dist/core/v4/providerFallback.js +25 -0
  85. package/dist/core/v4/skillLoader.js +21 -5
  86. package/dist/core/v4/skillMining/candidateStore.js +164 -0
  87. package/dist/core/v4/skillMining/extractorPrompt.js +111 -0
  88. package/dist/core/v4/skillMining/proposalBuilder.js +139 -0
  89. package/dist/core/v4/skillMining/skillMiner.js +191 -0
  90. package/dist/core/v4/skillMining/traceFingerprint.js +51 -0
  91. package/dist/core/v4/subagent/budget.js +76 -0
  92. package/dist/core/v4/subagent/diagnostics.js +22 -0
  93. package/dist/core/v4/subagent/fanout.js +216 -0
  94. package/dist/core/v4/subagent/merger.js +148 -0
  95. package/dist/core/v4/subagent/providerRotation.js +54 -0
  96. package/dist/core/v4/voice/audioStream.js +373 -0
  97. package/dist/core/v4/voice/cliVoice.js +393 -0
  98. package/dist/core/v4/voice/diagnostics.js +66 -0
  99. package/dist/core/v4/voice/ttsStream.js +193 -0
  100. package/dist/core/version.js +1 -1
  101. package/dist/core/visionAnalyze.js +291 -90
  102. package/dist/core/voice/audio.js +61 -5
  103. package/dist/core/voice/audioBackend.js +134 -0
  104. package/dist/core/voice/stt.js +61 -6
  105. package/dist/core/voice/tts.js +19 -3
  106. package/dist/tools/v4/index.js +32 -1
  107. package/dist/tools/v4/subagent/subagentFanout.js +166 -0
  108. package/package.json +11 -2
@@ -0,0 +1,1980 @@
1
+ "use strict";
2
+ // ============================================================
3
+ // DevOS — Autonomous AI Execution System
4
+ // Copyright (c) 2026 Shiva Deore. All rights reserved.
5
+ // ============================================================
6
+ var __importDefault = (this && this.__importDefault) || function (mod) {
7
+ return (mod && mod.__esModule) ? mod : { "default": mod };
8
+ };
9
+ Object.defineProperty(exports, "__esModule", { value: true });
10
+ exports.TelegramAdapter = exports.TELEGRAM_ADAPTER_BUILD = void 0;
11
+ // core/channels/telegram.ts — Telegram channel adapter (Phase v4.1-1.2).
12
+ //
13
+ // Config (env vars):
14
+ // TELEGRAM_BOT_TOKEN — required; adapter stays disabled if absent
15
+ // TELEGRAM_ALLOWED_CHATS — optional comma-separated chat IDs allowlist
16
+ //
17
+ // Phase v4.1-1.2 fixes:
18
+ // - All adapter logs route through an injectable AidenFileLogger
19
+ // (~/.aiden/logs/telegram.log) instead of console.log / console.warn
20
+ // so polling-error spam never leaks into the CLI's chat REPL stdout.
21
+ // - 409 Conflict (concurrent-poller) is detected and counted; after
22
+ // CONFLICT_THRESHOLD consecutive 409s the adapter stops polling and
23
+ // transitions to a `conflict` state surfaced in the boot card and
24
+ // /channel telegram status.
25
+ //
26
+ // Phase 1 scope:
27
+ // - Long-polling mode only (webhooks land in Phase 2)
28
+ // - Direct messages only — group chats are politely ignored
29
+ // - Plain-text messages — voice / files / inline mode are Phase 2
30
+ // - Outbound chunked at 4096 chars (Telegram hard limit on text messages)
31
+ // - Per-chat memory isolation routed via gateway.routeMessage(channelId)
32
+ // - Bot replies suppressed (no bot loops)
33
+ // - Token never logged or surfaced in error messages
34
+ const node_telegram_bot_api_1 = __importDefault(require("node-telegram-bot-api"));
35
+ const node_fs_1 = require("node:fs");
36
+ const node_path_1 = __importDefault(require("node:path"));
37
+ const node_crypto_1 = require("node:crypto");
38
+ const gateway_1 = require("../gateway");
39
+ const logger_1 = require("../v4/logger");
40
+ const telegram_rate_limit_1 = require("./telegram-rate-limit");
41
+ const telegram_groups_1 = require("./telegram-groups");
42
+ const telegram_commands_1 = require("./telegram-commands");
43
+ const paths_1 = require("../v4/paths");
44
+ const whisper_transcribe_1 = require("./whisper-transcribe");
45
+ const photo_vision_1 = require("./photo-vision");
46
+ const pdf_extract_1 = require("./pdf-extract");
47
+ // Telegram Bot API constants — public spec, see core.telegram.org/bots/api
48
+ const MAX_MESSAGE_CHARS = 4096;
49
+ const POLLING_TIMEOUT_S = 50; // long-poll window; Telegram caps at 50
50
+ const POLLING_INTERVAL_MS = 300; // surfaced as a constant so the
51
+ // liveness log can publish it.
52
+ const DEFAULT_PARSE_MODE = 'Markdown'; // Aiden agent answers are markdown-friendly
53
+ const CONFLICT_THRESHOLD = 3; // consecutive 409s before we surrender
54
+ /**
55
+ * Phase v4.1-3.2 — build fingerprint.
56
+ *
57
+ * Bumped manually per phase. Surfaced in the `polling launched` log
58
+ * so live tests can grep one line and confirm the running binary
59
+ * matches the expected phase. Today's example: live test of v4.1-3.1
60
+ * silently failed because the user ran an older `dist/` build —
61
+ * watching for the fingerprint in the log would have flagged that
62
+ * in seconds instead of hours.
63
+ */
64
+ exports.TELEGRAM_ADAPTER_BUILD = 'v4.1-4.2';
65
+ // Phase v4.1-3 — voice cache + janitor knobs. Constants rather than
66
+ // env vars because they're maintenance defaults; if the cache grows
67
+ // past 500 MB on a long-running deployment the janitor fires once on
68
+ // startup. Real TTL system parks at v4.2.
69
+ const VOICE_CACHE_DIR_NAME = 'audio';
70
+ const VOICE_CACHE_PARENT = 'cache';
71
+ const JANITOR_THRESHOLD_BYTES = 500 * 1024 * 1024; // 500 MB
72
+ const JANITOR_AGE_THRESHOLD_MS = 7 * 24 * 60 * 60 * 1000; // 7 days
73
+ const DEFAULT_CONFIDENCE_FLOOR = -0.5;
74
+ // Phase v4.1-4 — photo + document cache subdirs (siblings of `audio`).
75
+ const PHOTO_CACHE_DIR_NAME = 'photos';
76
+ const DOCUMENT_CACHE_DIR_NAME = 'documents';
77
+ /**
78
+ * Phase v4.1-4 — supported document MIME map. Extension → MIME.
79
+ * Anything outside this map gets a friendly reject reply. The list
80
+ * is intentionally tight for v4.1-4 (PDF + the four common image
81
+ * formats sent as files); we expand in a later sub-phase as demand
82
+ * surfaces. Keep the values lowercase to compare against
83
+ * `msg.document.mime_type` directly.
84
+ */
85
+ const SUPPORTED_DOC_MIME = {
86
+ '.pdf': 'application/pdf',
87
+ '.png': 'image/png',
88
+ '.jpg': 'image/jpeg',
89
+ '.jpeg': 'image/jpeg',
90
+ '.gif': 'image/gif',
91
+ '.webp': 'image/webp',
92
+ };
93
+ class TelegramAdapter {
94
+ constructor(opts = {}) {
95
+ this.name = 'telegram';
96
+ this.client = null;
97
+ this.healthy = false;
98
+ // Phase v4.1-1.1 — populated by getMe() on a successful connect so
99
+ // /channel list and /channel telegram status can show the live name.
100
+ this.botUsername = null;
101
+ // Wall-clock of the last inbound message — surfaced in /channel
102
+ // telegram status so the user can confirm polling is actually
103
+ // delivering messages, not just connected.
104
+ this.lastMessageAt = null;
105
+ this.errorCount = 0;
106
+ // Phase v4.1-1.2 — 409 Conflict tracking. Telegram only allows one
107
+ // concurrent poller per bot; if a second instance starts, both get
108
+ // 409s every poll cycle. We count consecutive 409s and surrender
109
+ // (stop polling) once we cross CONFLICT_THRESHOLD so we don't spam.
110
+ this.state = 'inactive';
111
+ this.consecutiveConflicts = 0;
112
+ // Phase v4.1-2 — group machinery. Lazily constructed in start() so
113
+ // tests that bypass start() don't need a paths fixture.
114
+ this.groupStore = null;
115
+ this.rateLimiter = null;
116
+ this.commandRouter = null;
117
+ this.allowedGroups = new Set();
118
+ /** When true, groups respond to every message — overrides mention-only. */
119
+ this.groupsRespondAll = false;
120
+ /** Number of voice messages successfully transcribed since adapter start. */
121
+ this.voiceTranscribedCount = 0;
122
+ /** Number of voice messages received (any outcome) since adapter start. */
123
+ this.voiceReceivedCount = 0;
124
+ this.recentMessageIds = new Set();
125
+ this.recentMessageOrder = [];
126
+ /**
127
+ * Phase v4.1-3.2 — first-inbound sentinel. Logs once per adapter
128
+ * lifecycle (reset on each `start()`). If "polling launched" fires
129
+ * but "polling first inbound" never does, polling is stalled at
130
+ * the network layer — the cheapest "is the loop alive?" tell.
131
+ */
132
+ this.firstInboundLogged = false;
133
+ this.repliedMessageIds = new Set();
134
+ this.repliedMessageOrder = [];
135
+ /**
136
+ * Phase v4.1-4.2 — Telegram delivery-split coalesce slot. When a
137
+ * Telegram client splits a single "send caption + photo" action
138
+ * into two separate updates (text-only + photo-with-caption, ~1ms
139
+ * apart in the same poll batch), the photo handler synchronously
140
+ * records the caption here BEFORE awaiting the download. The text
141
+ * fall-through then checks this slot and suppresses dispatch when
142
+ * the text matches a recent caption — the agent only sees the
143
+ * photo annotation, never the orphaned text. Per-chat single-slot
144
+ * because the only legitimate collision is on consecutive photos
145
+ * from the same chat (the latest one wins).
146
+ */
147
+ this.recentPhotoCaptions = new Map();
148
+ /**
149
+ * Phase v4.1-3.2 — captured polling params so the smoke can
150
+ * inspect them via `getDiagnostics()` without hitting NTBA's
151
+ * internal `_polling.options` shape. Set once per `start()`.
152
+ */
153
+ this.lastPollingParams = null;
154
+ /** Phase v4.1-3.2 — stored client factory (test seam). */
155
+ this.clientFactory = null;
156
+ /** Number of photos handled (any outcome) since adapter start. */
157
+ this.photoReceivedCount = 0;
158
+ /** Number of photos successfully described / attached since adapter start. */
159
+ this.photoProcessedCount = 0;
160
+ /** Number of documents handled (any outcome) since adapter start. */
161
+ this.documentReceivedCount = 0;
162
+ /** Number of documents successfully extracted / routed since adapter start. */
163
+ this.documentProcessedCount = 0;
164
+ // Snapshot env at construction so tests that bypass start() (stubbing
165
+ // the bot client directly) still see the configured token + allowlist.
166
+ // start() re-snapshots — that's the path that makes
167
+ // channelManager.restart('telegram') pick up a freshly-written .env.
168
+ this.token = (process.env.TELEGRAM_BOT_TOKEN ?? '').trim();
169
+ const rawChats = process.env.TELEGRAM_ALLOWED_CHATS ?? '';
170
+ this.allowedChats = rawChats
171
+ ? new Set(rawChats.split(',').map(s => s.trim()).filter(Boolean))
172
+ : new Set();
173
+ // Phase v4.1-1.3a — default to noopLogger so REPL stays clean
174
+ // even if nobody calls attachLogger. ChannelManager.register
175
+ // overrides with a scoped child logger.
176
+ this.log = opts.logger ?? (0, logger_1.noopLogger)();
177
+ // Phase v4.1-3 — production wiring of the channel-side Whisper adapter.
178
+ this.transcribeFn = opts.transcribe ?? whisper_transcribe_1.transcribeForChannel;
179
+ this.clientFactory = opts.clientFactory ?? null;
180
+ // Phase v4.1-4 — production wiring of the photo + pdf adapters.
181
+ this.analyzePhotoFn = opts.analyzePhoto ?? photo_vision_1.analyzePhotoForChannel;
182
+ this.extractPdfFn = opts.extractPdf ?? pdf_extract_1.extractPdfForChannel;
183
+ this.activeModelInfo = opts.activeModelInfo ?? (() => null);
184
+ }
185
+ attachLogger(logger) {
186
+ this.log = logger;
187
+ }
188
+ // ── Logging shims — kept for the file-internal call sites that
189
+ // were touched in v4.1-1.2. Each routes through the injected logger;
190
+ // there is no console.* fallback any more — REPL is sacred.
191
+ // Phase v4.1-2 — accept an optional structured context payload so
192
+ // group-routing diagnostics can include { chatId, userId, ... } that
193
+ // the FileSink renders as JSON after the message body.
194
+ logInfo(msg, ctx) { this.log.info(msg, ctx); }
195
+ logWarn(msg, ctx) { this.log.warn(msg, ctx); }
196
+ lockPath() {
197
+ // eslint-disable-next-line @typescript-eslint/no-var-requires
198
+ const path = require('node:path');
199
+ // eslint-disable-next-line @typescript-eslint/no-var-requires
200
+ const { resolveAidenPaths } = require('../v4/paths');
201
+ return path.join(resolveAidenPaths().root, 'telegram-polling.lock');
202
+ }
203
+ acquireLocalLock() {
204
+ // eslint-disable-next-line @typescript-eslint/no-var-requires
205
+ const fs = require('node:fs');
206
+ const p = this.lockPath();
207
+ try {
208
+ const raw = fs.readFileSync(p, 'utf8');
209
+ const obj = JSON.parse(raw);
210
+ const tokenTag = (this.token ?? '').slice(0, 8);
211
+ const sameToken = obj.token === tokenTag;
212
+ const fresh = typeof obj.ts === 'number' && (Date.now() - obj.ts) < TelegramAdapter.LOCK_STALE_MS;
213
+ const sameProcess = typeof obj.pid === 'number' && obj.pid === process.pid;
214
+ if (sameProcess) {
215
+ // Re-acquire after our own restart — refresh and proceed.
216
+ this.writeLockFile(p);
217
+ return true;
218
+ }
219
+ if (sameToken && fresh) {
220
+ return false; // another aiden on this box, this token, recent.
221
+ }
222
+ // Stale or different-token — overwrite.
223
+ this.writeLockFile(p);
224
+ return true;
225
+ }
226
+ catch {
227
+ // No lock file yet (or unreadable) — ours now.
228
+ this.writeLockFile(p);
229
+ return true;
230
+ }
231
+ }
232
+ writeLockFile(p) {
233
+ // eslint-disable-next-line @typescript-eslint/no-var-requires
234
+ const fs = require('node:fs');
235
+ try {
236
+ const dir = require('node:path').dirname(p);
237
+ if (!fs.existsSync(dir))
238
+ fs.mkdirSync(dir, { recursive: true });
239
+ fs.writeFileSync(p, JSON.stringify({
240
+ pid: process.pid,
241
+ ts: Date.now(),
242
+ token: (this.token ?? '').slice(0, 8),
243
+ }), 'utf8');
244
+ }
245
+ catch {
246
+ // Lock-write failure is non-fatal — fall through and let the
247
+ // 409 detection handle any actual conflict at runtime.
248
+ }
249
+ }
250
+ releaseLocalLock() {
251
+ // eslint-disable-next-line @typescript-eslint/no-var-requires
252
+ const fs = require('node:fs');
253
+ try {
254
+ const p = this.lockPath();
255
+ const raw = fs.readFileSync(p, 'utf8');
256
+ const obj = JSON.parse(raw);
257
+ // Only release a lock we own — don't unlink a sibling's claim.
258
+ if (obj.pid === process.pid)
259
+ fs.unlinkSync(p);
260
+ }
261
+ catch {
262
+ /* lock not present or unreadable — nothing to release */
263
+ }
264
+ }
265
+ logError(msg, ctx) { this.log.error(msg, ctx); }
266
+ // ── Lifecycle ────────────────────────────────────────────────
267
+ async start() {
268
+ // Phase v4.1-1.1: re-read env on every start() so the slash-command
269
+ // path can rotate the token by writing .env then calling
270
+ // channelManager.restart('telegram'). The earlier implementation
271
+ // only read env in the constructor, which froze the token for the
272
+ // adapter's lifetime.
273
+ this.token = (process.env.TELEGRAM_BOT_TOKEN ?? '').trim();
274
+ const rawChats = process.env.TELEGRAM_ALLOWED_CHATS ?? '';
275
+ this.allowedChats = rawChats
276
+ ? new Set(rawChats.split(',').map(s => s.trim()).filter(Boolean))
277
+ : new Set();
278
+ this.botUsername = null;
279
+ this.errorCount = 0;
280
+ this.lastMessageAt = null;
281
+ this.consecutiveConflicts = 0;
282
+ this.state = 'inactive';
283
+ this.firstInboundLogged = false;
284
+ this.lastPollingParams = null;
285
+ // Phase v4.1-2 — read group-side env knobs alongside the token.
286
+ const rawAllowedGroups = process.env.TELEGRAM_ALLOWED_GROUPS ?? '';
287
+ this.allowedGroups = rawAllowedGroups
288
+ ? new Set(rawAllowedGroups.split(',').map(s => s.trim()).filter(Boolean))
289
+ : new Set();
290
+ this.groupsRespondAll =
291
+ (process.env.TELEGRAM_GROUPS_RESPOND_ALL ?? '').toLowerCase() === 'true';
292
+ if (!this.token) {
293
+ this.logInfo('Disabled — set TELEGRAM_BOT_TOKEN to enable');
294
+ return;
295
+ }
296
+ // Phase v4.1-3 — voice cache janitor runs once at adapter start.
297
+ // No background timer (defer real TTL to v4.2). Best-effort: a
298
+ // janitor failure must not block polling.
299
+ this.runVoiceCacheJanitor().catch((e) => {
300
+ this.logWarn(`voice cache janitor failed: ${e?.message ?? String(e)}`);
301
+ });
302
+ // Phase v4.1-2 — construct group machinery (one rate limiter + one
303
+ // store + one command router shared across all groups). Failure
304
+ // here is non-fatal: the adapter falls back to DM-only.
305
+ if (!this.groupStore) {
306
+ try {
307
+ this.groupStore = new telegram_groups_1.TelegramGroupStore({
308
+ paths: (0, paths_1.resolveAidenPaths)(),
309
+ logger: this.log.child('groups'),
310
+ });
311
+ this.rateLimiter = new telegram_rate_limit_1.TelegramRateLimiter({ logger: this.log.child('ratelimit') });
312
+ this.commandRouter = new telegram_commands_1.TelegramCommandRouter({
313
+ store: this.groupStore,
314
+ logger: this.log.child('commands'),
315
+ botUsername: () => this.botUsername,
316
+ fetchGroupAdmins: async (chatId) => {
317
+ if (!this.client)
318
+ return [];
319
+ try {
320
+ const admins = await this.client.getChatAdministrators(chatId);
321
+ return admins.map((a) => String(a?.user?.id ?? '')).filter(Boolean);
322
+ }
323
+ catch {
324
+ return [];
325
+ }
326
+ },
327
+ });
328
+ await this.groupStore.load();
329
+ }
330
+ catch (e) {
331
+ this.logWarn(`group machinery init failed: ${this.scrubToken(e?.message)}`);
332
+ this.groupStore = null;
333
+ this.rateLimiter = null;
334
+ this.commandRouter = null;
335
+ }
336
+ }
337
+ // Phase v4.1-preship-cleanup — local same-machine lock so two
338
+ // aiden processes on the same dev box don't both start polling
339
+ // the same bot and trigger 409s. Lock file at
340
+ // <aidenHome>/telegram-polling.lock contains pid + timestamp.
341
+ // Stale locks (>60s old) are auto-released. Cross-machine /
342
+ // shared-token conflicts are NOT covered here — those still
343
+ // surface via the existing 409 detection + /channel telegram
344
+ // takeover path.
345
+ if (!this.acquireLocalLock()) {
346
+ this.state = 'conflict';
347
+ this.healthy = false;
348
+ this.logWarn('Skipping polling — another aiden process on THIS machine is already holding the Telegram bot. ' +
349
+ 'Run `tasklist | grep node` (Windows) or `pgrep node` (POSIX) to see the rival, ' +
350
+ 'or stop the other instance and re-run /channel telegram takeover here.');
351
+ return;
352
+ }
353
+ this.state = 'connecting';
354
+ // Phase v4.1-3.2 — explicit allowed_updates defends against
355
+ // Telegram's "sticky filter" behaviour (getUpdates spec: "If
356
+ // not specified, the previous setting will be used."). Empty
357
+ // list means "all default update types" per the same spec.
358
+ // Operators can override via TELEGRAM_ALLOWED_UPDATES (CSV) if
359
+ // they need exotic kinds like chat_member.
360
+ const allowedUpdates = readAllowedUpdates();
361
+ this.lastPollingParams = {
362
+ interval: POLLING_INTERVAL_MS,
363
+ timeout: POLLING_TIMEOUT_S,
364
+ allowedUpdates,
365
+ };
366
+ try {
367
+ const factory = this.clientFactory ?? ((t, o) => new node_telegram_bot_api_1.default(t, o));
368
+ this.client = factory(this.token, {
369
+ polling: {
370
+ interval: POLLING_INTERVAL_MS,
371
+ autoStart: true,
372
+ params: {
373
+ timeout: POLLING_TIMEOUT_S,
374
+ allowed_updates: allowedUpdates,
375
+ },
376
+ },
377
+ });
378
+ }
379
+ catch (e) {
380
+ // Constructor failure (rare — usually a malformed token shape).
381
+ // Strip any echoed token from the message defensively.
382
+ this.logError(`Failed to construct client: ${this.scrubToken(e?.message)}`);
383
+ this.healthy = false;
384
+ this.state = 'inactive';
385
+ return;
386
+ }
387
+ // Phase v4.1-3.2 — liveness log. Fires AFTER client construction
388
+ // and BEFORE getMe so a getMe-stall (auth failure, network) is
389
+ // distinguishable from a "never even tried polling" situation.
390
+ // The fingerprint here is the smoking-gun for build-staleness:
391
+ // grep this line in the live log, compare against the expected
392
+ // phase number; mismatch = user is running old compiled JS.
393
+ this.logInfo('polling launched', {
394
+ fingerprint: exports.TELEGRAM_ADAPTER_BUILD,
395
+ interval: POLLING_INTERVAL_MS,
396
+ timeout: POLLING_TIMEOUT_S,
397
+ allowedUpdates: allowedUpdates.length === 0 ? '[] (all default types)' : allowedUpdates,
398
+ });
399
+ this.client.on('polling_error', (err) => {
400
+ this.errorCount += 1;
401
+ const message = this.scrubToken(err?.message ?? '');
402
+ const is409 = isConflictError(err);
403
+ if (is409) {
404
+ this.consecutiveConflicts += 1;
405
+ // Phase v4.1-1.2 — log the FIRST 409 visibly (so /channel
406
+ // telegram status reflects reality), then go quiet to avoid
407
+ // spamming the file. After threshold, stop polling cleanly.
408
+ if (this.consecutiveConflicts === 1) {
409
+ this.logWarn('Polling 409 Conflict — another aiden instance is holding this bot. ' +
410
+ 'Will stop polling after ' + CONFLICT_THRESHOLD + ' consecutive 409s.');
411
+ }
412
+ if (this.consecutiveConflicts >= CONFLICT_THRESHOLD) {
413
+ this.state = 'conflict';
414
+ this.healthy = false;
415
+ this.logWarn(`Surrendering after ${CONFLICT_THRESHOLD} consecutive 409s. ` +
416
+ 'Run /channel telegram status — use /channel telegram takeover ' +
417
+ 'if you want this instance to take over the bot.');
418
+ // Stop polling without awaiting — the event handler must return
419
+ // promptly. node-telegram-bot-api accepts stopPolling() repeatedly.
420
+ this.client?.stopPolling({ cancel: true }).catch(() => undefined);
421
+ }
422
+ return;
423
+ }
424
+ // Non-409: regular transient (502/504 storms etc.) — counted but
425
+ // not threshold-tripping. Reset conflict streak so we don't OR
426
+ // with stale 409s.
427
+ this.consecutiveConflicts = 0;
428
+ // Mark degraded only if previously active so the pill flips to
429
+ // "degraded" rather than masking a healthy state.
430
+ if (this.state === 'active')
431
+ this.state = 'degraded';
432
+ this.logWarn(`Polling error: ${message}`);
433
+ });
434
+ this.client.on('webhook_error', (err) => {
435
+ // Defensive: not active in polling mode but could fire if a stale
436
+ // webhook URL is registered against this bot.
437
+ this.logWarn(`Webhook error: ${this.scrubToken(err?.message)}`);
438
+ });
439
+ // Phase v4.1-3.1 — extracted into wireMessageHandlers() so the
440
+ // smoke can drive the same code path with a stub EventEmitter-
441
+ // backed client (no real polling loop needed). This subscribes
442
+ // to 'message' AND defensive secondary subscriptions for 'voice'
443
+ // and 'audio' to make sure media never gets silently dropped if
444
+ // NTBA's emit policy ever shifts.
445
+ this.wireMessageHandlers();
446
+ // Identify self so the boot log is honest about which bot is alive.
447
+ try {
448
+ const me = await this.client.getMe();
449
+ this.botUsername = me.username ?? me.first_name ?? null;
450
+ // Phase v4.1-3.2 — second liveness log. If "polling launched"
451
+ // fires but "polling getMe ok" doesn't, getMe is hanging
452
+ // (auth issue, network filter, or DNS).
453
+ this.logInfo('polling getMe ok', {
454
+ fingerprint: exports.TELEGRAM_ADAPTER_BUILD,
455
+ botUsername: this.botUsername,
456
+ });
457
+ this.logInfo(`Connected as @${this.botUsername ?? 'bot'}`);
458
+ this.healthy = true;
459
+ this.state = 'active';
460
+ }
461
+ catch (e) {
462
+ // 401 here means the token is rejected — keep adapter inert
463
+ // rather than retry-storming.
464
+ this.logError(`getMe failed: ${this.scrubToken(e?.message)}`);
465
+ this.healthy = false;
466
+ this.state = 'inactive';
467
+ try {
468
+ await this.client.stopPolling();
469
+ }
470
+ catch { /* nothing to clean */ }
471
+ this.client = null;
472
+ return;
473
+ }
474
+ // Register outbound delivery so gateway.deliver() / broadcast() work.
475
+ gateway_1.gateway.registerChannel('telegram', async (msg) => {
476
+ return this.deliverToChat(msg.channelId, msg.text);
477
+ });
478
+ // Surface bot commands in the Telegram client's `/` menu. Best-effort —
479
+ // a 429 here doesn't block startup.
480
+ this.publishBotCommands().catch((e) => this.logWarn(`setMyCommands failed: ${this.scrubToken(e.message)}`));
481
+ }
482
+ async stop() {
483
+ this.healthy = false;
484
+ this.state = 'inactive';
485
+ // Phase v4.1-2 — flush group state + dispose rate-limiter timers.
486
+ // Best-effort; never let cleanup throw stop the shutdown chain.
487
+ try {
488
+ await this.groupStore?.flushNow();
489
+ }
490
+ catch { /* ignore */ }
491
+ try {
492
+ this.rateLimiter?.dispose();
493
+ }
494
+ catch { /* ignore */ }
495
+ if (!this.client) {
496
+ return;
497
+ }
498
+ gateway_1.gateway.unregisterChannel('telegram');
499
+ // stopPolling resolves once the in-flight long-poll ends; on a 50s
500
+ // poll window we wait at most that long. Wrap in try/catch because
501
+ // network errors during shutdown are not interesting.
502
+ try {
503
+ await this.client.stopPolling({ cancel: true });
504
+ }
505
+ catch {
506
+ /* shutdown best-effort */
507
+ }
508
+ this.client = null;
509
+ // Phase v4.1-preship-cleanup — release the local-machine lock
510
+ // so a follow-up adapter restart (or another aiden process)
511
+ // can acquire polling immediately rather than waiting for the
512
+ // 60s staleness timeout.
513
+ this.releaseLocalLock();
514
+ this.logInfo('Disconnected');
515
+ }
516
+ async send(chatId, message) {
517
+ await this.deliverToChat(chatId, message);
518
+ }
519
+ isHealthy() {
520
+ return this.healthy;
521
+ }
522
+ // ── Status accessors (Phase v4.1-1.1 — used by /channel commands) ──
523
+ /** Bot username from the last successful getMe() call, or null. */
524
+ getBotUsername() {
525
+ return this.botUsername;
526
+ }
527
+ /** True when a token was found in the environment at last start(). */
528
+ hasToken() {
529
+ return this.token.length > 0;
530
+ }
531
+ /** Phase v4.1-1.2 — coarse adapter state, drives boot pill + status. */
532
+ getState() {
533
+ return this.state;
534
+ }
535
+ /** Phase v4.1-2 — read-only group store accessor for CLI commands. */
536
+ getGroupStore() {
537
+ return this.groupStore;
538
+ }
539
+ /** Phase v4.1-2 — read-only allowlist accessor for CLI commands. */
540
+ getAllowedGroups() {
541
+ return this.allowedGroups;
542
+ }
543
+ /**
544
+ * Phase v4.1-3 — voice diagnostics for `/channel telegram voice
545
+ * status`. Safe to call any time; returns the in-memory counters
546
+ * (reset on adapter start) plus the cache footprint on disk.
547
+ */
548
+ async getVoiceDiagnostics() {
549
+ const cacheDir = this.getVoiceCacheDir();
550
+ let cacheBytes = 0;
551
+ let cacheFileCount = 0;
552
+ try {
553
+ const entries = await node_fs_1.promises.readdir(cacheDir, { withFileTypes: true });
554
+ for (const e of entries) {
555
+ if (!e.isFile())
556
+ continue;
557
+ try {
558
+ const st = await node_fs_1.promises.stat(node_path_1.default.join(cacheDir, e.name));
559
+ cacheBytes += st.size;
560
+ cacheFileCount += 1;
561
+ }
562
+ catch { /* skip unreadable */ }
563
+ }
564
+ }
565
+ catch { /* dir missing — ok, just zero */ }
566
+ return {
567
+ enabled: isVoiceEnabled(),
568
+ threshold: readConfidenceThreshold(),
569
+ language: (process.env.TELEGRAM_VOICE_LANGUAGE ?? '').trim() || null,
570
+ cacheDir,
571
+ cacheBytes,
572
+ cacheFileCount,
573
+ transcribedCount: this.voiceTranscribedCount,
574
+ receivedCount: this.voiceReceivedCount,
575
+ };
576
+ }
577
+ /**
578
+ * Phase v4.1-3 — public hook for tests + the CLI. Returns the
579
+ * absolute path to where voice notes are cached on disk.
580
+ */
581
+ getVoiceCacheDir() {
582
+ return node_path_1.default.join((0, paths_1.resolveAidenPaths)().root, VOICE_CACHE_PARENT, VOICE_CACHE_DIR_NAME);
583
+ }
584
+ /**
585
+ * Phase v4.1-4 — media (voice + photos + documents) aggregate
586
+ * diagnostics for `/channel telegram media status`. Walks the
587
+ * three cache subdirs, totals their sizes + file counts, returns
588
+ * the supported-types table.
589
+ */
590
+ async getMediaDiagnostics() {
591
+ return {
592
+ enabled: isMediaEnabled(),
593
+ supportedDocTypes: Object.keys(SUPPORTED_DOC_MIME).map((k) => k.replace(/^\./, '')),
594
+ voice: await this.summarizeCacheDir(this.getVoiceCacheDir()),
595
+ photos: {
596
+ ...(await this.summarizeCacheDir(this.getPhotoCacheDir())),
597
+ receivedCount: this.photoReceivedCount,
598
+ processedCount: this.photoProcessedCount,
599
+ },
600
+ documents: {
601
+ ...(await this.summarizeCacheDir(this.getDocumentCacheDir())),
602
+ receivedCount: this.documentReceivedCount,
603
+ processedCount: this.documentProcessedCount,
604
+ },
605
+ };
606
+ }
607
+ async summarizeCacheDir(dir) {
608
+ let bytes = 0;
609
+ let files = 0;
610
+ try {
611
+ const entries = await node_fs_1.promises.readdir(dir, { withFileTypes: true });
612
+ for (const e of entries) {
613
+ if (!e.isFile())
614
+ continue;
615
+ try {
616
+ const st = await node_fs_1.promises.stat(node_path_1.default.join(dir, e.name));
617
+ bytes += st.size;
618
+ files += 1;
619
+ }
620
+ catch { /* skip */ }
621
+ }
622
+ }
623
+ catch { /* dir missing — ok */ }
624
+ return { dir, bytes, files };
625
+ }
626
+ getDiagnostics() {
627
+ return {
628
+ healthy: this.healthy,
629
+ hasToken: this.hasToken(),
630
+ botUsername: this.botUsername,
631
+ lastMessageAt: this.lastMessageAt,
632
+ errorCount: this.errorCount,
633
+ // node-telegram-bot-api exposes isPolling() — guard for the lib being
634
+ // mid-construction in early start() failures.
635
+ pollingActive: this.client?.isPolling?.() === true,
636
+ state: this.state,
637
+ consecutiveConflicts: this.consecutiveConflicts,
638
+ buildFingerprint: exports.TELEGRAM_ADAPTER_BUILD,
639
+ pollingParams: this.lastPollingParams,
640
+ };
641
+ }
642
+ /**
643
+ * Phase v4.1-1.2 — `/channel telegram takeover` reaches the network
644
+ * directly to evict the other poller. Two-step:
645
+ * 1. POST /deleteWebhook?drop_pending_updates=true — clears any
646
+ * stale webhook AND drops the buffered update queue.
647
+ * 2. GET /getUpdates?offset=-1 — Telegram's documented way to
648
+ * "kick" the other concurrent long-poll. The other instance's
649
+ * in-flight getUpdates returns 409 + closes; on its retry it
650
+ * hits 409 too and (in adapters that detect it) surrenders.
651
+ * Returns ok/error so the slash command can render diagnostics.
652
+ */
653
+ async takeoverPolling() {
654
+ if (!this.token)
655
+ return { ok: false, reason: 'no token configured' };
656
+ const ctrl = new AbortController();
657
+ const timer = setTimeout(() => ctrl.abort(), 10000);
658
+ const base = `https://api.telegram.org/bot${this.token}`;
659
+ try {
660
+ // 1. Clear webhook + pending updates so we start from a clean queue.
661
+ const dw = await fetch(`${base}/deleteWebhook?drop_pending_updates=true`, { signal: ctrl.signal, method: 'POST' });
662
+ if (!dw.ok) {
663
+ return { ok: false, reason: `deleteWebhook HTTP ${dw.status}` };
664
+ }
665
+ // 2. Issue a synthetic getUpdates with offset=-1 to interrupt the
666
+ // rival poll and reserve the slot for us.
667
+ const gu = await fetch(`${base}/getUpdates?offset=-1&limit=1&timeout=0`, { signal: ctrl.signal });
668
+ if (!gu.ok) {
669
+ return { ok: false, reason: `getUpdates HTTP ${gu.status}` };
670
+ }
671
+ this.consecutiveConflicts = 0;
672
+ // After takeover we want a clean restart so polling resumes with
673
+ // fresh client state + the 409 counter zeroed.
674
+ await this.stop();
675
+ await this.start();
676
+ return { ok: this.healthy };
677
+ }
678
+ catch (err) {
679
+ return {
680
+ ok: false,
681
+ reason: err?.name === 'AbortError'
682
+ ? 'request timed out (10s)'
683
+ : (err?.message ?? 'network error'),
684
+ };
685
+ }
686
+ finally {
687
+ clearTimeout(timer);
688
+ }
689
+ }
690
+ // ── Inbound ──────────────────────────────────────────────────
691
+ /**
692
+ * Phase v4.1-3.1 — extracted message-event wiring. Subscribes to
693
+ * the primary `'message'` event AND defensive secondaries `'voice'`
694
+ * and `'audio'`. Idempotency via `markMessageSeen` so a message
695
+ * that fires both events (NTBA's normal behaviour) only runs the
696
+ * pipeline once.
697
+ *
698
+ * Marked `protected` so smoke harnesses can drive this directly
699
+ * against a stub EventEmitter without standing up a real polling
700
+ * loop. Tests do `(adapter as never).wireMessageHandlers()` after
701
+ * patching `client` to a fake bot.
702
+ */
703
+ wireMessageHandlers() {
704
+ if (!this.client)
705
+ return;
706
+ const dispatch = (source) => async (msg) => {
707
+ // Phase v4.1-3.2 — third liveness log. Fires once per process
708
+ // on the first inbound message regardless of content type. If
709
+ // "polling launched" + "polling getMe ok" fired but this never
710
+ // does, the polling loop is alive in the SDK sense but no
711
+ // messages are reaching us — likely a server-side filter,
712
+ // sticky `allowed_updates`, or a Telegram throttle.
713
+ if (!this.firstInboundLogged) {
714
+ this.firstInboundLogged = true;
715
+ this.logInfo('polling first inbound', {
716
+ fingerprint: exports.TELEGRAM_ADAPTER_BUILD,
717
+ source,
718
+ chatType: msg.chat?.type,
719
+ hasVoice: !!msg.voice,
720
+ hasAudio: !!msg.audio,
721
+ hasText: !!(msg.text ?? '').trim(),
722
+ });
723
+ }
724
+ // Idempotency: defensive 'voice' / 'audio' subs would otherwise
725
+ // double-process the same payload after 'message' already ran.
726
+ if (!this.markMessageSeen(msg)) {
727
+ this.logInfo('duplicate event suppressed', {
728
+ source,
729
+ messageId: msg.message_id,
730
+ });
731
+ return;
732
+ }
733
+ // A successful message means we're alive — clear the degraded
734
+ // flag and reset the 409 streak (something else is polling but
735
+ // we got through, or the conflict resolved).
736
+ this.consecutiveConflicts = 0;
737
+ if (this.state === 'degraded')
738
+ this.state = 'active';
739
+ try {
740
+ await this.handleIncoming(msg);
741
+ }
742
+ catch (e) {
743
+ this.logError(`Handler error: ${this.scrubToken(e?.message)}`, { source });
744
+ }
745
+ };
746
+ this.client.on('message', dispatch('message'));
747
+ // Defensive secondary subscriptions — Phase v4.1-3.1 + v4.1-4.
748
+ // NTBA emits both 'message' AND the type-specific event for media
749
+ // attachments. The 'message' handler is the primary path; these
750
+ // catch the same payload if the primary somehow misses it. The
751
+ // dedup guard on dispatch() makes this idempotent.
752
+ this.client.on('voice', dispatch('voice'));
753
+ this.client.on('audio', dispatch('audio'));
754
+ this.client.on('photo', dispatch('photo'));
755
+ this.client.on('document', dispatch('document'));
756
+ }
757
+ /**
758
+ * Phase v4.1-3.1 — message-id dedup. Returns true if this is the
759
+ * first time we've seen `${chat_id}:${message_id}` (caller should
760
+ * process), false if we've already handled it (caller should skip).
761
+ * Bounded FIFO of 256 entries — older keys get evicted.
762
+ */
763
+ markMessageSeen(msg) {
764
+ const chatId = msg.chat?.id;
765
+ const messageId = msg.message_id;
766
+ if (chatId === undefined || messageId === undefined) {
767
+ // Shape we don't recognize — let it through; handleIncoming will
768
+ // log and drop on the unrecognized-shape guard.
769
+ return true;
770
+ }
771
+ const key = `${chatId}:${messageId}`;
772
+ if (this.recentMessageIds.has(key))
773
+ return false;
774
+ this.recentMessageIds.add(key);
775
+ this.recentMessageOrder.push(key);
776
+ if (this.recentMessageOrder.length > TelegramAdapter.RECENT_MSG_LIMIT) {
777
+ const evicted = this.recentMessageOrder.shift();
778
+ if (evicted !== undefined)
779
+ this.recentMessageIds.delete(evicted);
780
+ }
781
+ return true;
782
+ }
783
+ async handleIncoming(msg) {
784
+ this.lastMessageAt = Date.now();
785
+ // Phase v4.1-3.1 — shape fingerprint at the dispatch boundary.
786
+ // Logs at info so the live test can see "we received the update,
787
+ // and here's what fields it has" before any gate or handler runs.
788
+ // Pure shape only — no payload contents, no PII beyond chat type.
789
+ const shape = {
790
+ chatType: msg.chat?.type,
791
+ messageId: msg.message_id,
792
+ hasText: !!(msg.text ?? '').trim(),
793
+ hasCaption: !!(msg.caption ?? '').trim(),
794
+ hasVoice: !!msg.voice,
795
+ hasAudio: !!msg.audio,
796
+ hasPhoto: !!msg.photo,
797
+ hasDocument: !!msg.document,
798
+ hasVideo: !!msg.video,
799
+ hasVideoNote: !!msg.video_note,
800
+ hasSticker: !!msg.sticker,
801
+ isReply: !!msg.reply_to_message,
802
+ isBot: !!msg.from?.is_bot,
803
+ };
804
+ this.logInfo('inbound update', shape);
805
+ // Skip bot-authored messages — prevents loops if another bot is
806
+ // also in the conversation, and avoids replying to our own forwards.
807
+ if (msg.from?.is_bot)
808
+ return;
809
+ // Phase v4.1-3 — detect voice/audio attachment up front so we
810
+ // don't drop voice messages on the empty-text early-return.
811
+ const voiceFile = msg.voice;
812
+ const audioFile = msg.audio;
813
+ const hasVoice = !!voiceFile;
814
+ const hasAudio = !!audioFile;
815
+ // Phase v4.1-4 — photo + document detection. msg.photo is an
816
+ // array of PhotoSize objects sorted small→large; we always pick
817
+ // the largest. msg.document is a single object with optional
818
+ // file_name / mime_type. Either signals "user sent media."
819
+ const photoArr = msg.photo;
820
+ const documentFile = msg.document;
821
+ const hasPhoto = Array.isArray(photoArr) && photoArr.length > 0;
822
+ const hasDocument = !!documentFile;
823
+ // Plain text path retains the old guard — but only when no voice
824
+ // attachment is present. Captions are handled separately below.
825
+ const text = (msg.text ?? '').trim();
826
+ const caption = (msg.caption ?? '').trim();
827
+ if (!text && !caption && !hasVoice && !hasAudio && !hasPhoto && !hasDocument) {
828
+ // Phase v4.1-3.1 — log the dropped shape so the live test can
829
+ // see "we got an update we didn't know how to handle." Includes
830
+ // the field-keys list so unrecognized media (`video_note`,
831
+ // `document` with audio MIME, future Telegram update kinds)
832
+ // shows up loudly in the log instead of being silently swallowed.
833
+ const shapeKeys = Object.keys(msg)
834
+ .filter((k) => k !== 'chat' && k !== 'from' && k !== 'date'
835
+ && k !== 'message_id' && msg[k] !== undefined);
836
+ this.logInfo('dropped: unrecognized message shape', {
837
+ chatType: msg.chat?.type,
838
+ keys: shapeKeys,
839
+ });
840
+ return;
841
+ }
842
+ const chatId = String(msg.chat.id);
843
+ const chatType = msg.chat.type;
844
+ const userId = msg.from?.id ? String(msg.from.id) : chatId;
845
+ // ── Group routing (Phase v4.1-2 + v4.1-3) ───────────────────────
846
+ if (chatType !== 'private') {
847
+ // Allowlist gate. Empty allowlist = open (default behaviour;
848
+ // works out of the box). Populated allowlist = strict opt-in.
849
+ if (this.allowedGroups.size > 0 && !this.allowedGroups.has(chatId)) {
850
+ this.logInfo(`group not on allowlist — ignored`, { chatId });
851
+ return;
852
+ }
853
+ // Rate-limit applies BEFORE all other gates so a spammer's
854
+ // commands (and voice notes) also get suppressed.
855
+ if (this.rateLimiter?.shouldThrottle(userId))
856
+ return;
857
+ // Cache title + bump lastMessageAt so /channel telegram groups list works.
858
+ this.groupStore?.observeMessage(chatId, { title: msg.chat.title });
859
+ // Slash commands only matter for the text path. Voice/audio
860
+ // attachments never carry a slash command.
861
+ if (text) {
862
+ const outcome = await this.routeCommand(msg, chatId, userId);
863
+ if (outcome)
864
+ return;
865
+ }
866
+ // Pause gate — admin can /pause without removing the bot.
867
+ // Applied AFTER command routing so /resume can break the spell.
868
+ if (this.groupStore?.isPaused(chatId)) {
869
+ this.logInfo(`group paused — ignored`, { chatId });
870
+ return;
871
+ }
872
+ // Per-group user allowlist (set by /allowusers).
873
+ if (this.groupStore && !this.groupStore.userIsAllowed(chatId, userId)) {
874
+ this.logInfo(`user not on group allowlist — ignored`, { chatId, userId });
875
+ return;
876
+ }
877
+ // Mention gate. In groups the bot only replies when explicitly
878
+ // addressed (mention or reply-to-bot), unless the operator has
879
+ // flipped TELEGRAM_GROUPS_RESPOND_ALL. For voice/audio with no
880
+ // text, the caption serves the same role; reply-to-bot also
881
+ // counts as an address even without a caption.
882
+ const addressedText = text || caption;
883
+ if (!this.groupsRespondAll && !this.isAddressedToBot(msg, addressedText)) {
884
+ this.logInfo(`group message not addressed to bot — ignored`, { chatId });
885
+ return;
886
+ }
887
+ // Voice/audio path — gates passed, hand off to the voice handler.
888
+ if (hasVoice || hasAudio) {
889
+ const groupCaption = this.stripBotMention(caption);
890
+ await this.handleVoiceMessage(msg, (voiceFile ?? audioFile), hasAudio, chatId, userId, groupCaption, true);
891
+ return;
892
+ }
893
+ // Phase v4.1-4 — photo path (group).
894
+ if (hasPhoto) {
895
+ const groupCaption = this.stripBotMention(caption);
896
+ await this.handlePhotoMessage(msg, photoArr, chatId, userId, groupCaption, true);
897
+ return;
898
+ }
899
+ // Phase v4.1-4 — document path (group).
900
+ if (hasDocument) {
901
+ const groupCaption = this.stripBotMention(caption);
902
+ await this.handleDocumentMessage(msg, documentFile, chatId, userId, groupCaption, true);
903
+ return;
904
+ }
905
+ const stripped = this.stripBotMention(text);
906
+ // Phase v4.1-4.2 — same Telegram-split coalesce as the DM path
907
+ // below. Compare the stripped text (post-mention) against the
908
+ // recent caption, since groups require an @mention but the
909
+ // caption is what the photo handler recorded.
910
+ if (this.shouldSuppressAsTelegramSplit(chatId, stripped)) {
911
+ return;
912
+ }
913
+ const wrapped = this.wrapGroupMessage(msg, stripped);
914
+ await this.deliverAgentReply(chatId, userId, wrapped, msg.message_id);
915
+ return;
916
+ }
917
+ // ── Direct message path (Phase 1 behaviour preserved) ───────────
918
+ if (!this.isAllowed(chatId)) {
919
+ this.logInfo(`Blocked unauthorized chat: ${chatId}`);
920
+ await this.deliverToChat(chatId, '⚠️ This chat is not on the allowlist. Add its ID to TELEGRAM_ALLOWED_CHATS to enable.').catch(() => undefined);
921
+ return;
922
+ }
923
+ // Rate-limit DMs too — same per-user budget across DMs and groups.
924
+ if (this.rateLimiter?.shouldThrottle(userId))
925
+ return;
926
+ // Slash commands first (text only).
927
+ if (text) {
928
+ const dmOutcome = await this.routeCommand(msg, chatId, userId);
929
+ if (dmOutcome)
930
+ return;
931
+ }
932
+ // Voice/audio DM path.
933
+ if (hasVoice || hasAudio) {
934
+ await this.handleVoiceMessage(msg, (voiceFile ?? audioFile), hasAudio, chatId, userId, caption, false);
935
+ return;
936
+ }
937
+ // Phase v4.1-4 — photo DM path.
938
+ if (hasPhoto) {
939
+ await this.handlePhotoMessage(msg, photoArr, chatId, userId, caption, false);
940
+ return;
941
+ }
942
+ // Phase v4.1-4 — document DM path.
943
+ if (hasDocument) {
944
+ await this.handleDocumentMessage(msg, documentFile, chatId, userId, caption, false);
945
+ return;
946
+ }
947
+ // Phase v4.1-4.2 — Telegram delivery-split coalesce. When a
948
+ // client splits a single "send caption + photo" action into a
949
+ // standalone text update + a photo-with-matching-caption update
950
+ // arriving 1-10ms apart in the same poll batch, the photo's
951
+ // `handlePhotoMessage` synchronously records its caption (BEFORE
952
+ // any await) into `recentPhotoCaptions`. By the time this text
953
+ // microtask resumes, the caption is in the slot — we suppress
954
+ // the orphaned text dispatch so the user gets ONE reply (the
955
+ // photo's annotated description), not two.
956
+ if (this.shouldSuppressAsTelegramSplit(chatId, text)) {
957
+ return;
958
+ }
959
+ await this.deliverAgentReply(chatId, userId, text, msg.message_id);
960
+ }
961
+ /**
962
+ * Phase v4.1-3 — voice/audio handler. Called only after the gate
963
+ * chain (allowlist → mention → pause → user-allow → rate-limit) has
964
+ * cleared, so we know we're allowed to spend bandwidth on getFile().
965
+ *
966
+ * Routing:
967
+ * - voice disabled (TELEGRAM_VOICE_ENABLED=false) → friendly reject
968
+ * - file_size > 25 MB → friendly reject (size cap)
969
+ * - download failure → friendly reject
970
+ * - transcription failure → smuggle [error annotation] into the agent
971
+ * - hallucination → smuggle [noise annotation] into the agent
972
+ * - low confidence → echo "🎤 _heard:_ ..." THEN smuggle transcript
973
+ * - confident → silent smuggle of [transcript: "X"]
974
+ *
975
+ * Caption (when present) is concatenated after the transcript
976
+ * annotation so the agent sees both signals on one user turn.
977
+ */
978
+ async handleVoiceMessage(msg, file, isAudio, chatId, userId, caption, isGroup) {
979
+ this.voiceReceivedCount += 1;
980
+ // Phase v4.1-3.1 — entry-point log. If this fires, dispatch made
981
+ // it through every gate; if it doesn't fire, the silent drop is
982
+ // upstream in handleIncoming and the inbound-update + drop logs
983
+ // tell us where.
984
+ this.logInfo('voice handler entered', {
985
+ isAudio,
986
+ isGroup,
987
+ fileSize: file.file_size,
988
+ hasCaption: caption.length > 0,
989
+ chatId,
990
+ });
991
+ if (!isVoiceEnabled()) {
992
+ await this.deliverToChat(chatId, '🎤 Voice messages are disabled. Re-enable with /channel telegram voice enable.').catch(() => undefined);
993
+ return;
994
+ }
995
+ // Pre-check the size hint Telegram supplies on the file. The
996
+ // whisper-transcribe module also size-checks the on-disk file,
997
+ // but rejecting here avoids the download round-trip when we
998
+ // already know the payload is too big.
999
+ const fileSize = typeof file.file_size === 'number' ? file.file_size : undefined;
1000
+ if (fileSize !== undefined && fileSize > 25 * 1024 * 1024) {
1001
+ await this.deliverToChat(chatId, `🎤 Voice message is too large (${(fileSize / (1024 * 1024)).toFixed(1)} MB; cap is 25 MB). Please record a shorter clip.`).catch(() => undefined);
1002
+ return;
1003
+ }
1004
+ // Download to local cache.
1005
+ let cachePath;
1006
+ try {
1007
+ cachePath = await this.downloadVoiceToCache(file.file_id, isAudio);
1008
+ }
1009
+ catch (e) {
1010
+ this.logError(`voice download failed: ${this.scrubToken(e?.message)}`);
1011
+ cachePath = null;
1012
+ }
1013
+ if (!cachePath) {
1014
+ await this.deliverToChat(chatId, '🎤 I couldn\'t download your voice message. Please try sending it again.').catch(() => undefined);
1015
+ return;
1016
+ }
1017
+ // Transcribe via the injected (or default) Whisper chain.
1018
+ const language = (process.env.TELEGRAM_VOICE_LANGUAGE ?? '').trim() || undefined;
1019
+ const transcribeOpts = {
1020
+ filePath: cachePath,
1021
+ logger: this.log.child('whisper'),
1022
+ };
1023
+ if (language)
1024
+ transcribeOpts.language = language;
1025
+ let result;
1026
+ try {
1027
+ result = await this.transcribeFn(transcribeOpts);
1028
+ }
1029
+ catch (e) {
1030
+ result = {
1031
+ success: false,
1032
+ error: `transcribe threw: ${e?.message ?? String(e)}`,
1033
+ };
1034
+ }
1035
+ // Failure / hallucination: hand the agent an "I heard noise" cue
1036
+ // so it composes the apology in its own voice. Don't echo to the
1037
+ // user directly — that's the agent's job.
1038
+ if (!result.success || result.isHallucination) {
1039
+ const reason = result.isHallucination
1040
+ ? 'the audio was unintelligible (Whisper returned a known noise pattern)'
1041
+ : (result.error ?? 'unknown error');
1042
+ const annotation = `[The user sent a voice message but transcription failed: ${reason}. ` +
1043
+ `Apologize briefly and ask them to type the message instead.]`;
1044
+ const wrapped = isGroup ? this.wrapGroupMessage(msg, annotation) : annotation;
1045
+ await this.deliverAgentReply(chatId, userId, wrapped, msg.message_id);
1046
+ return;
1047
+ }
1048
+ this.voiceTranscribedCount += 1;
1049
+ const transcript = (result.text ?? '').trim();
1050
+ const threshold = readConfidenceThreshold();
1051
+ // `avgLogprob` is negative; closer to 0 = more confident. Missing
1052
+ // = treat as confident (provider didn't surface logprobs at all,
1053
+ // most commonly the local Whisper.cpp path).
1054
+ const confident = result.avgLogprob === undefined
1055
+ ? true
1056
+ : result.avgLogprob >= threshold;
1057
+ // Low-confidence echo lands BEFORE the agent reply so the user
1058
+ // sees what the bot heard while the agent is still thinking.
1059
+ if (!confident) {
1060
+ await this.deliverToChat(chatId, `🎤 _heard:_ "${escapeMarkdownItalic(transcript)}"`).catch(() => undefined);
1061
+ }
1062
+ const annotation = caption
1063
+ ? `[The user sent a voice message. Transcript: "${transcript}"]\n\n${caption}`
1064
+ : `[The user sent a voice message. Transcript: "${transcript}"]`;
1065
+ const wrapped = isGroup ? this.wrapGroupMessage(msg, annotation) : annotation;
1066
+ await this.deliverAgentReply(chatId, userId, wrapped, msg.message_id);
1067
+ }
1068
+ /**
1069
+ * Phase v4.1-3 — pull the file off Telegram and write it under
1070
+ * `<aiden_root>/cache/audio/audio_<uuid12>.<ext>`. Returns the
1071
+ * absolute path on success, null on any failure (logged).
1072
+ */
1073
+ async downloadVoiceToCache(fileId, isAudio) {
1074
+ if (!this.client)
1075
+ return null;
1076
+ const cacheDir = this.getVoiceCacheDir();
1077
+ await node_fs_1.promises.mkdir(cacheDir, { recursive: true });
1078
+ const ext = isAudio ? '.mp3' : '.ogg';
1079
+ const id = (0, node_crypto_1.randomUUID)().replace(/-/g, '').slice(0, 12);
1080
+ const outPath = node_path_1.default.join(cacheDir, `audio_${id}${ext}`);
1081
+ // Phase v4.1-3.1 — visible "starting download" log so a hung
1082
+ // getFileStream() (silent network stall, partial response, etc.)
1083
+ // is distinguishable from "never tried" in the live log.
1084
+ this.logInfo('downloading voice file', {
1085
+ fileIdPrefix: fileId.slice(0, 12),
1086
+ ext,
1087
+ outPath,
1088
+ });
1089
+ try {
1090
+ const stream = this.client.getFileStream(fileId);
1091
+ const writer = (0, node_fs_1.createWriteStream)(outPath);
1092
+ await new Promise((resolve, reject) => {
1093
+ stream.on('error', reject);
1094
+ writer.on('error', reject);
1095
+ writer.on('finish', () => resolve());
1096
+ stream.pipe(writer);
1097
+ });
1098
+ return outPath;
1099
+ }
1100
+ catch (e) {
1101
+ this.logError(`getFileStream failed: ${this.scrubToken(e?.message)}`);
1102
+ try {
1103
+ await node_fs_1.promises.unlink(outPath);
1104
+ }
1105
+ catch { /* ignore */ }
1106
+ return null;
1107
+ }
1108
+ }
1109
+ /**
1110
+ * Phase v4.1-3 + v4.1-4 — startup-only cache janitor. Sweeps the
1111
+ * voice / photo / document cache subdirs. For each: if the dir is
1112
+ * over 500 MB, delete files older than 7 days. No background timer;
1113
+ * defer real TTL to v4.2.
1114
+ *
1115
+ * `protected` so the smoke can construct an adapter without start()
1116
+ * and invoke the janitor directly to verify it on pre-seeded dirs.
1117
+ */
1118
+ async runVoiceCacheJanitor() {
1119
+ await this.sweepCacheDir('voice', this.getVoiceCacheDir());
1120
+ await this.sweepCacheDir('photos', this.getPhotoCacheDir());
1121
+ await this.sweepCacheDir('documents', this.getDocumentCacheDir());
1122
+ }
1123
+ /**
1124
+ * Phase v4.1-4 — generic cache-dir sweep. Extracted so each media
1125
+ * type's directory uses identical policy.
1126
+ */
1127
+ async sweepCacheDir(label, dir) {
1128
+ try {
1129
+ await node_fs_1.promises.mkdir(dir, { recursive: true });
1130
+ const entries = await node_fs_1.promises.readdir(dir, { withFileTypes: true });
1131
+ let totalBytes = 0;
1132
+ const fileInfos = [];
1133
+ for (const e of entries) {
1134
+ if (!e.isFile())
1135
+ continue;
1136
+ const full = node_path_1.default.join(dir, e.name);
1137
+ try {
1138
+ const st = await node_fs_1.promises.stat(full);
1139
+ totalBytes += st.size;
1140
+ fileInfos.push({ path: full, mtimeMs: st.mtimeMs, size: st.size });
1141
+ }
1142
+ catch { /* skip unreadable */ }
1143
+ }
1144
+ if (totalBytes <= JANITOR_THRESHOLD_BYTES) {
1145
+ this.logInfo(`${label} cache: under threshold`, {
1146
+ bytes: totalBytes,
1147
+ files: fileInfos.length,
1148
+ });
1149
+ return;
1150
+ }
1151
+ const cutoff = Date.now() - JANITOR_AGE_THRESHOLD_MS;
1152
+ let cleaned = 0;
1153
+ let freed = 0;
1154
+ for (const f of fileInfos) {
1155
+ if (f.mtimeMs < cutoff) {
1156
+ try {
1157
+ await node_fs_1.promises.unlink(f.path);
1158
+ cleaned += 1;
1159
+ freed += f.size;
1160
+ }
1161
+ catch { /* ignore */ }
1162
+ }
1163
+ }
1164
+ this.logInfo(`${label} cache: cleaned ${cleaned} files, freed ${(freed / 1024 / 1024).toFixed(1)} MB`, { cleaned, freedBytes: freed, totalBefore: totalBytes });
1165
+ }
1166
+ catch (e) {
1167
+ this.logWarn(`${label} cache janitor: ${e?.message ?? String(e)}`);
1168
+ }
1169
+ }
1170
+ /** Phase v4.1-4 — photo cache directory. */
1171
+ getPhotoCacheDir() {
1172
+ return node_path_1.default.join((0, paths_1.resolveAidenPaths)().root, VOICE_CACHE_PARENT, PHOTO_CACHE_DIR_NAME);
1173
+ }
1174
+ /** Phase v4.1-4 — document cache directory. */
1175
+ getDocumentCacheDir() {
1176
+ return node_path_1.default.join((0, paths_1.resolveAidenPaths)().root, VOICE_CACHE_PARENT, DOCUMENT_CACHE_DIR_NAME);
1177
+ }
1178
+ /**
1179
+ * Phase v4.1-4 — photo handler. Called only after the gate chain
1180
+ * has cleared. Mirrors `handleVoiceMessage`'s shape: download to
1181
+ * cache, hand off to the photo-vision adapter, smuggle the result
1182
+ * into the agent's user turn (native attachment hint OR description
1183
+ * annotation), concat caption when present.
1184
+ */
1185
+ async handlePhotoMessage(msg, photoArr, chatId, userId, caption, isGroup) {
1186
+ this.photoReceivedCount += 1;
1187
+ // Phase v4.1-4.2 — record caption synchronously BEFORE any await
1188
+ // so a paired text-only message (Telegram delivery split) that
1189
+ // arrived ~1ms before this photo can detect the split when its
1190
+ // microtask resumes, and suppress its dispatch. Per-chat slot;
1191
+ // the latest photo wins which is fine — we only care about the
1192
+ // most recent caption when looking for a match.
1193
+ if (caption) {
1194
+ this.recentPhotoCaptions.set(chatId, {
1195
+ caption: normalizeText(caption),
1196
+ ts: Date.now(),
1197
+ });
1198
+ }
1199
+ if (!isMediaEnabled()) {
1200
+ await this.deliverToChat(chatId, '📎 Media (photos + documents) is disabled. Re-enable with /channel telegram media enable.').catch(() => undefined);
1201
+ return;
1202
+ }
1203
+ // Pick the largest PhotoSize variant Telegram offered us.
1204
+ const largest = photoArr[photoArr.length - 1];
1205
+ const fileSize = typeof largest?.file_size === 'number' ? largest.file_size : undefined;
1206
+ this.logInfo('photo handler entered', {
1207
+ isGroup,
1208
+ fileSize,
1209
+ hasCaption: caption.length > 0,
1210
+ chatId,
1211
+ width: largest?.width,
1212
+ height: largest?.height,
1213
+ });
1214
+ if (fileSize !== undefined && fileSize > 25 * 1024 * 1024) {
1215
+ await this.deliverToChat(chatId, `📎 Photo is too large (${(fileSize / (1024 * 1024)).toFixed(1)} MB; cap is 25 MB). Please send a smaller image.`).catch(() => undefined);
1216
+ return;
1217
+ }
1218
+ const cachePath = await this.downloadMediaToCache(largest.file_id, this.getPhotoCacheDir(), photoExtFor(largest), 'photo');
1219
+ if (!cachePath) {
1220
+ await this.deliverToChat(chatId, '📎 I couldn\'t download your photo. Please try sending it again.').catch(() => undefined);
1221
+ return;
1222
+ }
1223
+ const modelInfo = this.activeModelInfo() ?? {};
1224
+ const photoOpts = {
1225
+ filePath: cachePath,
1226
+ logger: this.log.child('vision'),
1227
+ };
1228
+ if (modelInfo.providerId)
1229
+ photoOpts.providerId = modelInfo.providerId;
1230
+ if (modelInfo.modelId)
1231
+ photoOpts.modelId = modelInfo.modelId;
1232
+ let result;
1233
+ try {
1234
+ result = await this.analyzePhotoFn(photoOpts);
1235
+ }
1236
+ catch (e) {
1237
+ result = { success: false, mode: 'text', error: `analyzePhoto threw: ${e?.message ?? String(e)}` };
1238
+ }
1239
+ if (!result.success) {
1240
+ // Phase v4.1-4.2 — preserve caption on the failure path so the
1241
+ // agent still sees what the user wanted to ask about even when
1242
+ // vision couldn't describe the image.
1243
+ const baseAnnotation = `[The user sent a photo but description failed: ${result.error ?? 'unknown error'}. ` +
1244
+ `Apologize briefly and ask them to type the question instead.]`;
1245
+ const annotation = caption ? `${baseAnnotation}\n\n${caption}` : baseAnnotation;
1246
+ const wrapped = isGroup ? this.wrapGroupMessage(msg, annotation) : annotation;
1247
+ await this.deliverAgentReply(chatId, userId, wrapped, msg.message_id);
1248
+ return;
1249
+ }
1250
+ this.photoProcessedCount += 1;
1251
+ // Build the smuggled annotation. In native mode the agent loop is
1252
+ // responsible for actually attaching pixels to the user turn —
1253
+ // for v4.1-4 we hand it the cache path so it can do so. In text
1254
+ // mode we splice the auxiliary description directly.
1255
+ let annotation;
1256
+ if (result.mode === 'native' && result.nativePath) {
1257
+ annotation = caption
1258
+ ? `[The user sent a photo. The local cache path is "${result.nativePath}". The active model supports vision — attach the photo on the user turn.]\n\n${caption}`
1259
+ : `[The user sent a photo. The local cache path is "${result.nativePath}". The active model supports vision — attach the photo on the user turn.]`;
1260
+ }
1261
+ else {
1262
+ annotation = caption
1263
+ ? `[The user sent a photo. Description: ${result.description}]\n\n${caption}`
1264
+ : `[The user sent a photo. Description: ${result.description}]`;
1265
+ }
1266
+ const wrapped = isGroup ? this.wrapGroupMessage(msg, annotation) : annotation;
1267
+ await this.deliverAgentReply(chatId, userId, wrapped, msg.message_id);
1268
+ }
1269
+ /**
1270
+ * Phase v4.1-4 — document handler. Splits inbound documents on
1271
+ * MIME type:
1272
+ * - PDF → extract text, smuggle truncated content
1273
+ * - PNG / JPG / GIF / WEBP (image-as-document) → route through
1274
+ * the photo-vision pipeline
1275
+ * - Anything else → friendly reject reply
1276
+ */
1277
+ async handleDocumentMessage(msg, file, chatId, userId, caption, isGroup) {
1278
+ this.documentReceivedCount += 1;
1279
+ // Phase v4.1-4.2 — same coalesce-record as photos. Documents
1280
+ // (PDFs + image-as-document) get caption-bundled annotations
1281
+ // too, so a paired text update with matching caption should be
1282
+ // suppressed.
1283
+ if (caption) {
1284
+ this.recentPhotoCaptions.set(chatId, {
1285
+ caption: normalizeText(caption),
1286
+ ts: Date.now(),
1287
+ });
1288
+ }
1289
+ if (!isMediaEnabled()) {
1290
+ await this.deliverToChat(chatId, '📎 Media (photos + documents) is disabled. Re-enable with /channel telegram media enable.').catch(() => undefined);
1291
+ return;
1292
+ }
1293
+ const ext = guessDocumentExtension(file);
1294
+ this.logInfo('document handler entered', {
1295
+ isGroup,
1296
+ fileName: file.file_name,
1297
+ mimeType: file.mime_type,
1298
+ fileSize: file.file_size,
1299
+ ext,
1300
+ hasCaption: caption.length > 0,
1301
+ chatId,
1302
+ });
1303
+ if (!ext || !(ext in SUPPORTED_DOC_MIME)) {
1304
+ const supportedList = Object.keys(SUPPORTED_DOC_MIME).map((k) => k.replace(/^\./, '')).join(', ');
1305
+ await this.deliverToChat(chatId, `📎 Unsupported document type. Supported: ${supportedList}.`).catch(() => undefined);
1306
+ return;
1307
+ }
1308
+ const fileSize = file.file_size;
1309
+ if (typeof fileSize === 'number' && ext === '.pdf' && fileSize > 20 * 1024 * 1024) {
1310
+ await this.deliverToChat(chatId, `📎 PDF too large (${(fileSize / (1024 * 1024)).toFixed(1)} MB; cap is 20 MB). Please send a smaller file.`).catch(() => undefined);
1311
+ return;
1312
+ }
1313
+ if (typeof fileSize === 'number' && ext !== '.pdf' && fileSize > 25 * 1024 * 1024) {
1314
+ await this.deliverToChat(chatId, `📎 Image-as-document too large (${(fileSize / (1024 * 1024)).toFixed(1)} MB; cap is 25 MB). Please send a smaller file.`).catch(() => undefined);
1315
+ return;
1316
+ }
1317
+ const sanitizedName = sanitizeDocumentName(file.file_name ?? `document${ext}`);
1318
+ const cachePath = await this.downloadDocumentToCache(file.file_id, sanitizedName);
1319
+ if (!cachePath) {
1320
+ await this.deliverToChat(chatId, '📎 I couldn\'t download your document. Please try sending it again.').catch(() => undefined);
1321
+ return;
1322
+ }
1323
+ // Route image-as-document through the photo-vision pipeline so
1324
+ // the agent gets the same UX whether the user sent a photo or a
1325
+ // PNG-as-file.
1326
+ if (ext !== '.pdf') {
1327
+ const modelInfo = this.activeModelInfo() ?? {};
1328
+ const photoOpts = {
1329
+ filePath: cachePath,
1330
+ logger: this.log.child('vision'),
1331
+ };
1332
+ if (modelInfo.providerId)
1333
+ photoOpts.providerId = modelInfo.providerId;
1334
+ if (modelInfo.modelId)
1335
+ photoOpts.modelId = modelInfo.modelId;
1336
+ let result;
1337
+ try {
1338
+ result = await this.analyzePhotoFn(photoOpts);
1339
+ }
1340
+ catch (e) {
1341
+ result = { success: false, mode: 'text', error: `analyzePhoto threw: ${e?.message ?? String(e)}` };
1342
+ }
1343
+ if (!result.success) {
1344
+ // Phase v4.1-4.2 — preserve caption on the failure path.
1345
+ const baseAnnotation = `[The user sent an image (as a document) but description failed: ${result.error ?? 'unknown error'}. ` +
1346
+ `Apologize briefly.]`;
1347
+ const annotation = caption ? `${baseAnnotation}\n\n${caption}` : baseAnnotation;
1348
+ const wrapped = isGroup ? this.wrapGroupMessage(msg, annotation) : annotation;
1349
+ await this.deliverAgentReply(chatId, userId, wrapped, msg.message_id);
1350
+ return;
1351
+ }
1352
+ this.documentProcessedCount += 1;
1353
+ const baseLabel = `image (as document, "${sanitizedName}")`;
1354
+ let annotation;
1355
+ if (result.mode === 'native' && result.nativePath) {
1356
+ annotation = caption
1357
+ ? `[The user sent an ${baseLabel}. Local path: "${result.nativePath}". The active model supports vision — attach on the user turn.]\n\n${caption}`
1358
+ : `[The user sent an ${baseLabel}. Local path: "${result.nativePath}". The active model supports vision — attach on the user turn.]`;
1359
+ }
1360
+ else {
1361
+ annotation = caption
1362
+ ? `[The user sent an ${baseLabel}. Description: ${result.description}]\n\n${caption}`
1363
+ : `[The user sent an ${baseLabel}. Description: ${result.description}]`;
1364
+ }
1365
+ const wrapped = isGroup ? this.wrapGroupMessage(msg, annotation) : annotation;
1366
+ await this.deliverAgentReply(chatId, userId, wrapped, msg.message_id);
1367
+ return;
1368
+ }
1369
+ // PDF path — extract text, truncate to budget, smuggle.
1370
+ const modelInfo = this.activeModelInfo() ?? {};
1371
+ const pdfOpts = {
1372
+ filePath: cachePath,
1373
+ logger: this.log.child('pdf'),
1374
+ };
1375
+ if (typeof modelInfo.contextWindow === 'number')
1376
+ pdfOpts.modelContextWindow = modelInfo.contextWindow;
1377
+ let pdfResult;
1378
+ try {
1379
+ pdfResult = await this.extractPdfFn(pdfOpts);
1380
+ }
1381
+ catch (e) {
1382
+ pdfResult = { success: false, truncated: false, error: `extractPdf threw: ${e?.message ?? String(e)}` };
1383
+ }
1384
+ if (!pdfResult.success) {
1385
+ // Phase v4.1-4.2 — preserve caption on PDF failure path.
1386
+ const baseAnnotation = `[The user sent a PDF "${sanitizedName}" but extraction failed: ${pdfResult.error ?? 'unknown error'}. ` +
1387
+ `Apologize briefly and ask them to retry or paste the relevant text.]`;
1388
+ const annotation = caption ? `${baseAnnotation}\n\n${caption}` : baseAnnotation;
1389
+ const wrapped = isGroup ? this.wrapGroupMessage(msg, annotation) : annotation;
1390
+ await this.deliverAgentReply(chatId, userId, wrapped, msg.message_id);
1391
+ return;
1392
+ }
1393
+ this.documentProcessedCount += 1;
1394
+ const truncationNote = pdfResult.truncated
1395
+ ? ` Note: PDF truncated to fit context. Original was ${pdfResult.originalChars ?? '?'} chars.`
1396
+ : '';
1397
+ let annotation = `[The user sent a PDF "${sanitizedName}". Extracted text:\n${pdfResult.text ?? ''}` +
1398
+ (truncationNote ? `\n${truncationNote.trim()}` : '') +
1399
+ `]`;
1400
+ if (caption)
1401
+ annotation = `${annotation}\n\n${caption}`;
1402
+ const wrapped = isGroup ? this.wrapGroupMessage(msg, annotation) : annotation;
1403
+ await this.deliverAgentReply(chatId, userId, wrapped, msg.message_id);
1404
+ }
1405
+ /**
1406
+ * Phase v4.1-4 — generic media downloader. Streams the file from
1407
+ * Telegram into the given cache directory under
1408
+ * `audio_<uuid12>.<ext>` for voice and `photo_<uuid12>.<ext>` for
1409
+ * photos, etc. Returns absolute path on success or null on failure.
1410
+ */
1411
+ async downloadMediaToCache(fileId, cacheDir, ext, label) {
1412
+ if (!this.client)
1413
+ return null;
1414
+ await node_fs_1.promises.mkdir(cacheDir, { recursive: true });
1415
+ const id = (0, node_crypto_1.randomUUID)().replace(/-/g, '').slice(0, 12);
1416
+ const outPath = node_path_1.default.join(cacheDir, `${label}_${id}${ext}`);
1417
+ this.logInfo(`downloading ${label} file`, {
1418
+ fileIdPrefix: fileId.slice(0, 12),
1419
+ ext,
1420
+ outPath,
1421
+ });
1422
+ try {
1423
+ const stream = this.client.getFileStream(fileId);
1424
+ const writer = (0, node_fs_1.createWriteStream)(outPath);
1425
+ await new Promise((resolve, reject) => {
1426
+ stream.on('error', reject);
1427
+ writer.on('error', reject);
1428
+ writer.on('finish', () => resolve());
1429
+ stream.pipe(writer);
1430
+ });
1431
+ return outPath;
1432
+ }
1433
+ catch (e) {
1434
+ this.logError(`getFileStream failed (${label}): ${this.scrubToken(e?.message)}`);
1435
+ try {
1436
+ await node_fs_1.promises.unlink(outPath);
1437
+ }
1438
+ catch { /* ignore */ }
1439
+ return null;
1440
+ }
1441
+ }
1442
+ /**
1443
+ * Phase v4.1-4 — document download with original-name preservation.
1444
+ * `doc_<uuid12>_<sanitized_filename>` so log lines and agent
1445
+ * annotations carry a human-readable name.
1446
+ */
1447
+ async downloadDocumentToCache(fileId, sanitizedName) {
1448
+ if (!this.client)
1449
+ return null;
1450
+ const cacheDir = this.getDocumentCacheDir();
1451
+ await node_fs_1.promises.mkdir(cacheDir, { recursive: true });
1452
+ const id = (0, node_crypto_1.randomUUID)().replace(/-/g, '').slice(0, 12);
1453
+ const outPath = node_path_1.default.join(cacheDir, `doc_${id}_${sanitizedName}`);
1454
+ this.logInfo('downloading document file', {
1455
+ fileIdPrefix: fileId.slice(0, 12),
1456
+ sanitizedName,
1457
+ outPath,
1458
+ });
1459
+ try {
1460
+ const stream = this.client.getFileStream(fileId);
1461
+ const writer = (0, node_fs_1.createWriteStream)(outPath);
1462
+ await new Promise((resolve, reject) => {
1463
+ stream.on('error', reject);
1464
+ writer.on('error', reject);
1465
+ writer.on('finish', () => resolve());
1466
+ stream.pipe(writer);
1467
+ });
1468
+ return outPath;
1469
+ }
1470
+ catch (e) {
1471
+ this.logError(`getFileStream failed (document): ${this.scrubToken(e?.message)}`);
1472
+ try {
1473
+ await node_fs_1.promises.unlink(outPath);
1474
+ }
1475
+ catch { /* ignore */ }
1476
+ return null;
1477
+ }
1478
+ }
1479
+ /**
1480
+ * Send a typing indicator + route through the agent + deliver reply.
1481
+ * Extracted from the original handleIncoming so DMs and groups share
1482
+ * the same outbound path.
1483
+ */
1484
+ async deliverAgentReply(chatId, userId, text, msgIdForDedup) {
1485
+ // Phase v4.1-4.2 — outbound delivery dedup. When `msgIdForDedup`
1486
+ // is provided, ensure exactly one reply ever fires for that
1487
+ // `${chat_id}:${msg_id}` regardless of how the call landed here
1488
+ // (race, future regression in dispatch dedup, multi-handler
1489
+ // bug). Bounded FIFO of 256 keys; eviction order = insertion.
1490
+ if (typeof msgIdForDedup === 'number') {
1491
+ const key = `${chatId}:${msgIdForDedup}`;
1492
+ if (this.repliedMessageIds.has(key)) {
1493
+ this.logInfo('duplicate delivery suppressed', { chatId, msgId: msgIdForDedup });
1494
+ return;
1495
+ }
1496
+ this.repliedMessageIds.add(key);
1497
+ this.repliedMessageOrder.push(key);
1498
+ if (this.repliedMessageOrder.length > TelegramAdapter.REPLIED_MSG_LIMIT) {
1499
+ const evicted = this.repliedMessageOrder.shift();
1500
+ if (evicted !== undefined)
1501
+ this.repliedMessageIds.delete(evicted);
1502
+ }
1503
+ }
1504
+ try {
1505
+ await this.client?.sendChatAction(chatId, 'typing');
1506
+ }
1507
+ catch {
1508
+ /* non-fatal — proceed with the answer */
1509
+ }
1510
+ const reply = await this.processMessage(chatId, userId, text);
1511
+ await this.deliverToChat(chatId, reply).catch((e) => this.logError(`Reply delivery failed: ${this.scrubToken(e.message)}`));
1512
+ }
1513
+ /**
1514
+ * Phase v4.1-4.2 — Telegram delivery-split detector. Returns true
1515
+ * when the given text matches a recent photo caption from the same
1516
+ * chat (within `TELEGRAM_PHOTO_COALESCE_MS`, default 500ms). The
1517
+ * text fall-through paths in `handleIncoming` use this to suppress
1518
+ * orphaned text dispatches that some clients emit alongside a
1519
+ * photo+caption upload. Logs the suppression with the time delta
1520
+ * so the live test can verify the coalesce window is sized right.
1521
+ */
1522
+ shouldSuppressAsTelegramSplit(chatId, text) {
1523
+ const recent = this.recentPhotoCaptions.get(chatId);
1524
+ if (!recent)
1525
+ return false;
1526
+ const windowMs = readCoalesceWindowMs();
1527
+ const delta = Date.now() - recent.ts;
1528
+ if (delta >= windowMs)
1529
+ return false;
1530
+ if (normalizeText(text) !== recent.caption)
1531
+ return false;
1532
+ this.logInfo('text suppressed: matches recent photo caption (telegram delivery split)', {
1533
+ chatId,
1534
+ delta,
1535
+ window: windowMs,
1536
+ });
1537
+ return true;
1538
+ }
1539
+ /**
1540
+ * Phase v4.1-2 — command routing. Returns true when the message was
1541
+ * consumed as a slash command (caller skips the agent dispatch).
1542
+ * Pulls out admin-only states (pause / resume / clear / allowusers)
1543
+ * and surfaces the right user-visible reply when one is needed.
1544
+ */
1545
+ async routeCommand(msg, chatId, userId) {
1546
+ if (!this.commandRouter) {
1547
+ // Fallback to the legacy handler when the router didn't init —
1548
+ // preserves Phase 1 behaviour for DMs even if group machinery
1549
+ // bricked at start().
1550
+ const text = msg.text ?? '';
1551
+ if (!text.startsWith('/'))
1552
+ return false;
1553
+ return this.handleSlashCommand(chatId, text);
1554
+ }
1555
+ const text = msg.text ?? '';
1556
+ if (!text.startsWith('/'))
1557
+ return false;
1558
+ const outcome = await this.commandRouter.route(msg);
1559
+ switch (outcome.kind) {
1560
+ case 'agent': return false;
1561
+ case 'handled': return true;
1562
+ case 'reply':
1563
+ await this.deliverToChat(chatId, outcome.text);
1564
+ return true;
1565
+ case 'cleared':
1566
+ await this.processMessage(chatId, userId, '/clear');
1567
+ await this.deliverToChat(chatId, '✓ Memory cleared for this chat.');
1568
+ return true;
1569
+ case 'paused':
1570
+ await this.deliverToChat(chatId, '⏸ Bot paused in this group. Run /resume to re-enable.');
1571
+ return true;
1572
+ case 'resumed':
1573
+ await this.deliverToChat(chatId, '▶ Bot resumed in this group.');
1574
+ return true;
1575
+ }
1576
+ }
1577
+ /**
1578
+ * True when the group message is explicitly addressed to the bot:
1579
+ * - The text begins with `@bot_username`.
1580
+ * - The message is a reply to one of the bot's own messages.
1581
+ * Both signals come from Telegram natively — we don't try to NLP
1582
+ * "hey aiden" or similar; that's a Phase 3 add-on.
1583
+ *
1584
+ * Phase v4.1-3 — also honours `caption_entities` so a voice message
1585
+ * with `@bot_username summarise this` in its caption counts as an
1586
+ * address. Voice/audio without text uses the caption (or
1587
+ * reply-to-bot) as its mention signal.
1588
+ */
1589
+ isAddressedToBot(msg, text) {
1590
+ const username = (this.botUsername ?? '').toLowerCase();
1591
+ if (username && text.toLowerCase().startsWith(`@${username}`))
1592
+ return true;
1593
+ // Telegram exposes `entities` (text messages) and `caption_entities`
1594
+ // (media messages with captions). Check both — for voice messages
1595
+ // in groups, the mention lives on the caption.
1596
+ const entitySources = [
1597
+ msg.entities,
1598
+ msg.caption_entities,
1599
+ ];
1600
+ for (const entities of entitySources) {
1601
+ if (!entities)
1602
+ continue;
1603
+ for (const e of entities) {
1604
+ if (e.type === 'mention') {
1605
+ const slice = text.slice(e.offset, e.offset + e.length).toLowerCase();
1606
+ if (username && slice === `@${username}`)
1607
+ return true;
1608
+ }
1609
+ }
1610
+ }
1611
+ if (msg.reply_to_message?.from?.is_bot && msg.reply_to_message.from.username
1612
+ && msg.reply_to_message.from.username.toLowerCase() === username) {
1613
+ return true;
1614
+ }
1615
+ return false;
1616
+ }
1617
+ /** Strip a leading `@bot_username ` so the agent sees a clean prompt. */
1618
+ stripBotMention(text) {
1619
+ const username = (this.botUsername ?? '').toLowerCase();
1620
+ if (!username)
1621
+ return text;
1622
+ const m = text.match(new RegExp(`^@${username}\\s+`, 'i'));
1623
+ return m ? text.slice(m[0].length) : text;
1624
+ }
1625
+ /**
1626
+ * Phase v4.1-2 prompt-injection defence-in-depth. Wrap group
1627
+ * messages in a delimiter that clearly separates the model's system
1628
+ * persona from user-supplied content. The agent's prompt stack treats
1629
+ * this as a quoted user payload — a malicious "ignore all previous
1630
+ * instructions" lands inside `<message>...</message>` rather than as
1631
+ * an apparent system override.
1632
+ */
1633
+ wrapGroupMessage(msg, content) {
1634
+ const username = msg.from?.username ? `@${msg.from.username}` : (msg.from?.first_name ?? 'unknown');
1635
+ const groupName = msg.chat.title ?? `chat ${msg.chat.id}`;
1636
+ return (`<message from="${escapeForXml(username)}" group="${escapeForXml(groupName)}">\n` +
1637
+ `${content}\n` +
1638
+ `</message>`);
1639
+ }
1640
+ async processMessage(chatId, userId, text) {
1641
+ try {
1642
+ return await gateway_1.gateway.routeMessage({
1643
+ channel: 'telegram',
1644
+ channelId: chatId,
1645
+ userId,
1646
+ text,
1647
+ timestamp: Date.now(),
1648
+ });
1649
+ }
1650
+ catch (e) {
1651
+ this.logError(`routeMessage error: ${this.scrubToken(e?.message)}`);
1652
+ return '❌ Something went wrong. Try again.';
1653
+ }
1654
+ }
1655
+ // In-channel slash commands. Returns true when handled — caller skips
1656
+ // the agent loop. Agent-style prompts (e.g. "/summarize this") fall
1657
+ // through and reach the agent unchanged.
1658
+ async handleSlashCommand(chatId, raw) {
1659
+ const cmd = raw.split(/\s+/)[0]?.toLowerCase().split('@')[0];
1660
+ if (!cmd)
1661
+ return false;
1662
+ if (cmd === '/start' || cmd === '/help') {
1663
+ await this.deliverToChat(chatId, '*Aiden* — your local AI assistant.\n\n' +
1664
+ 'Send any message to start. Built-in commands:\n' +
1665
+ '`/help` show this message\n' +
1666
+ '`/status` bot health check\n' +
1667
+ '`/clear` wipe this chat\'s memory');
1668
+ return true;
1669
+ }
1670
+ if (cmd === '/status') {
1671
+ await this.deliverToChat(chatId, this.healthy ? '✓ Online' : '⚠ Degraded');
1672
+ return true;
1673
+ }
1674
+ if (cmd === '/clear') {
1675
+ // Per-chat memory isolation hangs off `gateway.routeMessage` resolving
1676
+ // a stable session id from `(channel, channelId)`. Forward the clear
1677
+ // intent through the agent path so the existing /clear handler runs;
1678
+ // tolerates absence (early Phase 2 will give us a direct hook).
1679
+ await this.processMessage(chatId, chatId, '/clear');
1680
+ await this.deliverToChat(chatId, '✓ Memory cleared for this chat.');
1681
+ return true;
1682
+ }
1683
+ return false;
1684
+ }
1685
+ // Phase v4.1-2 — `replyToGroup` retired. Groups are now first-class
1686
+ // (mention-only by default, allowlist-gated, rate-limited). The
1687
+ // legacy refusal stub no longer fires.
1688
+ // ── Outbound ─────────────────────────────────────────────────
1689
+ async deliverToChat(chatId, text) {
1690
+ if (!this.client)
1691
+ return false;
1692
+ const chunks = this.chunkAtBoundary(text, MAX_MESSAGE_CHARS);
1693
+ for (const chunk of chunks) {
1694
+ try {
1695
+ await this.client.sendMessage(chatId, chunk, { parse_mode: DEFAULT_PARSE_MODE });
1696
+ }
1697
+ catch (e) {
1698
+ // Retry once without parse_mode in case the chunk's markdown is
1699
+ // malformed (Telegram is strict about Markdown V1). Keeps replies
1700
+ // visible even when the model emits broken backticks.
1701
+ const message = (e?.message ?? '').toLowerCase();
1702
+ if (message.includes('parse')) {
1703
+ try {
1704
+ await this.client.sendMessage(chatId, chunk);
1705
+ continue;
1706
+ }
1707
+ catch (inner) {
1708
+ this.logError(`sendMessage retry failed: ${this.scrubToken(inner?.message)}`);
1709
+ return false;
1710
+ }
1711
+ }
1712
+ // Telegram returns 429 with a `parameters.retry_after` field
1713
+ // surfaced by node-telegram-bot-api as `e.response.parameters`.
1714
+ const retryAfter = e?.response?.body?.parameters?.retry_after;
1715
+ if (typeof retryAfter === 'number' && retryAfter > 0 && retryAfter <= 10) {
1716
+ await new Promise(r => setTimeout(r, retryAfter * 1000));
1717
+ try {
1718
+ await this.client.sendMessage(chatId, chunk, { parse_mode: DEFAULT_PARSE_MODE });
1719
+ continue;
1720
+ }
1721
+ catch (inner) {
1722
+ this.logError(`sendMessage post-429 failed: ${this.scrubToken(inner?.message)}`);
1723
+ return false;
1724
+ }
1725
+ }
1726
+ this.logError(`sendMessage failed: ${this.scrubToken(e?.message)}`);
1727
+ return false;
1728
+ }
1729
+ }
1730
+ return true;
1731
+ }
1732
+ // ── Helpers ──────────────────────────────────────────────────
1733
+ isAllowed(chatId) {
1734
+ if (this.allowedChats.size === 0)
1735
+ return true;
1736
+ return this.allowedChats.has(chatId);
1737
+ }
1738
+ /**
1739
+ * Split `text` into pieces no larger than `limit` bytes, preferring
1740
+ * to break at a newline, falling back to a space, finally a hard cut.
1741
+ * The split point must fall in the second half of the chunk so a long
1742
+ * URL near the start doesn't force a tiny chunk.
1743
+ */
1744
+ chunkAtBoundary(text, limit) {
1745
+ if (text.length <= limit)
1746
+ return [text];
1747
+ const out = [];
1748
+ let cursor = text;
1749
+ while (cursor.length > 0) {
1750
+ if (cursor.length <= limit) {
1751
+ out.push(cursor);
1752
+ break;
1753
+ }
1754
+ let cut = cursor.lastIndexOf('\n', limit);
1755
+ if (cut < limit / 2)
1756
+ cut = cursor.lastIndexOf(' ', limit);
1757
+ if (cut < limit / 2)
1758
+ cut = limit;
1759
+ out.push(cursor.slice(0, cut));
1760
+ cursor = cursor.slice(cut).replace(/^\s+/, '');
1761
+ }
1762
+ return out;
1763
+ }
1764
+ /**
1765
+ * Telegram tokens take the form `<bot_id>:<secret>` and travel through
1766
+ * the request URL. A leaked token in stack traces / log lines is the
1767
+ * single biggest credential risk for this adapter, so every error
1768
+ * message gets scrubbed before it reaches the console.
1769
+ */
1770
+ scrubToken(message) {
1771
+ if (typeof message !== 'string')
1772
+ return String(message ?? '');
1773
+ if (!this.token)
1774
+ return message;
1775
+ return message.split(this.token).join('[redacted]');
1776
+ }
1777
+ /** Best-effort `setMyCommands` so the `/` menu in Telegram is helpful. */
1778
+ async publishBotCommands() {
1779
+ if (!this.client)
1780
+ return;
1781
+ await this.client.setMyCommands([
1782
+ { command: 'help', description: 'Show available commands' },
1783
+ { command: 'status', description: 'Bot health check' },
1784
+ { command: 'clear', description: 'Wipe this chat\'s memory' },
1785
+ ]);
1786
+ }
1787
+ }
1788
+ exports.TelegramAdapter = TelegramAdapter;
1789
+ // Phase v4.1-3.1 — idempotency guard for the defensive secondary
1790
+ // subscriptions. NTBA always fires 'message' AND the type-specific
1791
+ // event ('voice', 'audio') for media attachments; we subscribe to
1792
+ // both so the bot stays alive even if a future lib version stops
1793
+ // emitting 'message' for some media type. The dedup key is
1794
+ // `${chat_id}:${message_id}` — Telegram guarantees uniqueness within
1795
+ // a chat. Tiny 256-entry FIFO; eviction order is insertion order so
1796
+ // we don't need a real LRU policy.
1797
+ TelegramAdapter.RECENT_MSG_LIMIT = 256;
1798
+ /**
1799
+ * Phase v4.1-4.2 — outbound delivery dedup. `${chat_id}:${msg_id}`
1800
+ * keys for messages that have already had `deliverAgentReply` fire.
1801
+ * Same FIFO shape as `recentMessageIds`; protects against any path
1802
+ * (race, future regression in dispatch dedup, multi-handler bug)
1803
+ * that could double-fire a reply for the same inbound message.
1804
+ */
1805
+ TelegramAdapter.REPLIED_MSG_LIMIT = 256;
1806
+ // ── Phase v4.1-preship-cleanup: local-machine polling lock ───────
1807
+ // A small lock file under <aidenHome>/telegram-polling.lock keyed
1808
+ // on the bot token (truncated). Same-machine rivals see the lock
1809
+ // and bail out with a clear message instead of racing for 409s.
1810
+ // Stale locks (>60s past last refresh) are treated as abandoned —
1811
+ // a crashed or kill -9'd aiden won't permanently block restarts.
1812
+ TelegramAdapter.LOCK_STALE_MS = 60000;
1813
+ /**
1814
+ * Phase v4.1-2 — minimal XML attribute escape so usernames /
1815
+ * group titles can't close the wrap-message envelope. Only the five
1816
+ * predefined entities — XML 1.0 doesn't need anything else.
1817
+ */
1818
+ function escapeForXml(s) {
1819
+ return s
1820
+ .replace(/&/g, '&amp;')
1821
+ .replace(/</g, '&lt;')
1822
+ .replace(/>/g, '&gt;')
1823
+ .replace(/"/g, '&quot;')
1824
+ .replace(/'/g, '&apos;');
1825
+ }
1826
+ /**
1827
+ * Phase v4.1-4 — TELEGRAM_MEDIA_ENABLED gate. Controls photos +
1828
+ * documents; voice retains its own TELEGRAM_VOICE_ENABLED so
1829
+ * operators can disable one without the other. Default true.
1830
+ */
1831
+ function isMediaEnabled() {
1832
+ const raw = (process.env.TELEGRAM_MEDIA_ENABLED ?? '').trim().toLowerCase();
1833
+ return raw !== 'false';
1834
+ }
1835
+ /**
1836
+ * Phase v4.1-4 — derive a sane file extension for a `PhotoSize` blob.
1837
+ * PhotoSize doesn't carry MIME or filename, so we default to .jpg
1838
+ * (Telegram's photo flow always re-encodes to JPEG).
1839
+ */
1840
+ function photoExtFor(_p) {
1841
+ return '.jpg';
1842
+ }
1843
+ /**
1844
+ * Phase v4.1-4 — pick `.<ext>` for a document. Order:
1845
+ * 1. file_name extension if present and recognised
1846
+ * 2. mime_type reverse-lookup
1847
+ * 3. undefined (caller will reject as unsupported)
1848
+ */
1849
+ function guessDocumentExtension(file) {
1850
+ const name = file.file_name ?? '';
1851
+ const dotIdx = name.lastIndexOf('.');
1852
+ if (dotIdx >= 0) {
1853
+ const ext = name.slice(dotIdx).toLowerCase();
1854
+ if (ext in SUPPORTED_DOC_MIME)
1855
+ return ext;
1856
+ }
1857
+ const mime = (file.mime_type ?? '').toLowerCase().trim();
1858
+ if (!mime)
1859
+ return undefined;
1860
+ for (const [k, v] of Object.entries(SUPPORTED_DOC_MIME)) {
1861
+ if (v === mime)
1862
+ return k;
1863
+ }
1864
+ return undefined;
1865
+ }
1866
+ /**
1867
+ * Phase v4.1-4 — sanitize a filename for safe use on the local cache
1868
+ * path AND inside agent annotations / log lines. Strip everything
1869
+ * outside `[A-Za-z0-9_.\- ]`, collapse whitespace, cap at 80 chars.
1870
+ * Empty result falls back to "file" so we never end up with an empty
1871
+ * cache filename.
1872
+ */
1873
+ function sanitizeDocumentName(name) {
1874
+ const cleaned = name
1875
+ .replace(/[^\w.\- ]+/g, '_')
1876
+ .replace(/\s+/g, ' ')
1877
+ .trim()
1878
+ .slice(0, 80);
1879
+ return cleaned || 'file';
1880
+ }
1881
+ /**
1882
+ * Phase v4.1-3 — read TELEGRAM_VOICE_ENABLED at call time. Default is
1883
+ * true ("works out of the box"); only the literal string "false"
1884
+ * (case-insensitive) disables it. Anything else (empty, missing,
1885
+ * "true", "1") leaves voice on.
1886
+ */
1887
+ /**
1888
+ * Phase v4.1-3.2 — `allowed_updates` parser. Empty string / unset env
1889
+ * returns `[]` which Telegram interprets as "all default types"
1890
+ * (everything except chat_member, message_reaction,
1891
+ * message_reaction_count). Operators who want those exotic types
1892
+ * pass a CSV via `TELEGRAM_ALLOWED_UPDATES`.
1893
+ *
1894
+ * Whitelist of valid Telegram update types — anything the user
1895
+ * passes that we don't recognise is silently dropped (with the rest
1896
+ * of the list still applied). Better than crashing the adapter on
1897
+ * a typo.
1898
+ */
1899
+ const VALID_UPDATE_TYPES = new Set([
1900
+ 'message', 'edited_message', 'channel_post', 'edited_channel_post',
1901
+ 'business_connection', 'business_message', 'edited_business_message',
1902
+ 'deleted_business_messages', 'message_reaction', 'message_reaction_count',
1903
+ 'inline_query', 'chosen_inline_result', 'callback_query',
1904
+ 'shipping_query', 'pre_checkout_query', 'poll', 'poll_answer',
1905
+ 'my_chat_member', 'chat_member', 'chat_join_request',
1906
+ 'chat_boost', 'removed_chat_boost', 'purchased_paid_media',
1907
+ ]);
1908
+ function readAllowedUpdates() {
1909
+ const raw = (process.env.TELEGRAM_ALLOWED_UPDATES ?? '').trim();
1910
+ if (!raw)
1911
+ return [];
1912
+ return raw
1913
+ .split(',')
1914
+ .map((s) => s.trim())
1915
+ .filter((s) => VALID_UPDATE_TYPES.has(s));
1916
+ }
1917
+ /**
1918
+ * Phase v4.1-4.2 — normalize text for the delivery-split coalesce
1919
+ * comparison. Trim, lowercase, collapse all whitespace runs to a
1920
+ * single space. Matches the user's "what image is this " typed
1921
+ * with trailing space against the photo caption "what image is this".
1922
+ */
1923
+ function normalizeText(s) {
1924
+ return s.trim().toLowerCase().replace(/\s+/g, ' ');
1925
+ }
1926
+ /**
1927
+ * Phase v4.1-4.2 — coalesce window for the Telegram delivery-split
1928
+ * detector. Default 500 ms covers the observed ~1 ms split + any
1929
+ * client/network jitter without inflating false-positive risk.
1930
+ * Operators can override via `TELEGRAM_PHOTO_COALESCE_MS`.
1931
+ */
1932
+ function readCoalesceWindowMs() {
1933
+ const raw = (process.env.TELEGRAM_PHOTO_COALESCE_MS ?? '').trim();
1934
+ if (!raw)
1935
+ return 500;
1936
+ const n = Number.parseInt(raw, 10);
1937
+ return Number.isFinite(n) && n >= 0 ? n : 500;
1938
+ }
1939
+ function isVoiceEnabled() {
1940
+ const raw = (process.env.TELEGRAM_VOICE_ENABLED ?? '').trim().toLowerCase();
1941
+ return raw !== 'false';
1942
+ }
1943
+ /**
1944
+ * Phase v4.1-3 — confidence threshold (avg_logprob floor) below
1945
+ * which the adapter echoes the heard transcript before answering.
1946
+ * Default −0.5 matches Whisper's empirical "borderline" point — at
1947
+ * −0.7 transcripts are usually mostly-right but with a wobble.
1948
+ */
1949
+ function readConfidenceThreshold() {
1950
+ const raw = (process.env.TELEGRAM_VOICE_CONFIDENCE_THRESHOLD ?? '').trim();
1951
+ if (!raw)
1952
+ return DEFAULT_CONFIDENCE_FLOOR;
1953
+ const n = Number.parseFloat(raw);
1954
+ return Number.isFinite(n) ? n : DEFAULT_CONFIDENCE_FLOOR;
1955
+ }
1956
+ /**
1957
+ * Phase v4.1-3 — escape characters that would close a Markdown italic
1958
+ * span in our `🎤 _heard:_ "..."` echo. Also trims leading/trailing
1959
+ * whitespace. Conservative — we just need the underscore to render.
1960
+ */
1961
+ function escapeMarkdownItalic(s) {
1962
+ return s.replace(/_/g, '\\_');
1963
+ }
1964
+ function isConflictError(err) {
1965
+ if (!err)
1966
+ return false;
1967
+ const msg = String(err?.message ?? err).toLowerCase();
1968
+ // Common shapes:
1969
+ // "ETELEGRAM: 409 Conflict: terminated by other getUpdates request"
1970
+ // "Conflict: terminated by other getUpdates"
1971
+ // { code: 'ETELEGRAM', response: { statusCode: 409 } }
1972
+ if (msg.includes('409') && msg.includes('conflict'))
1973
+ return true;
1974
+ if (msg.includes('terminated by other getupdates'))
1975
+ return true;
1976
+ const status = err?.response?.statusCode;
1977
+ if (status === 409)
1978
+ return true;
1979
+ return false;
1980
+ }