@agentprojectcontext/apx 1.42.0 → 1.42.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/core/agent/constants.js +10 -0
- package/src/core/agent/run-agent.js +36 -18
- package/src/core/channels/telegram/api.js +62 -0
- package/src/core/channels/telegram/ask-callbacks.js +238 -0
- package/src/core/channels/telegram/dispatch.js +60 -310
- package/src/core/channels/telegram/helpers.js +28 -1
- package/src/core/channels/telegram/inbound/audio.js +82 -0
- package/src/core/channels/telegram/inbound/photo.js +63 -0
- package/src/core/channels/telegram/reply.js +204 -0
- package/src/core/config/index.js +5 -0
- package/src/core/confirmation/adapters/telegram.js +20 -37
- package/src/core/i18n/en.js +4 -0
- package/src/core/i18n/es.js +4 -0
- package/src/core/i18n/pt.js +4 -0
- package/src/host/daemon/plugins/desktop/index.js +6 -1
- package/src/host/daemon/plugins/telegram/index.js +62 -360
- package/src/interfaces/web/package-lock.json +3 -3
|
@@ -11,25 +11,22 @@
|
|
|
11
11
|
// `appendGlobalMessage`, `CHANNELS`, `nowIso`, etc. Top-level imports here
|
|
12
12
|
// keep that scope intact — earlier splits forgot them and the bug only
|
|
13
13
|
// surfaced when a real telegram update arrived (ReferenceError at runtime).
|
|
14
|
-
import path from "node:path";
|
|
15
14
|
import { callEngine } from "#core/engines/index.js";
|
|
16
|
-
import {
|
|
17
|
-
import { stripThinking } from "#core/util/thinking.js";
|
|
15
|
+
import { isSuperAgentEnabled } from "#core/agent/super-agent.js";
|
|
18
16
|
import { getRecentTelegramTurnsFromFs, appendGlobalMessage } from "#core/stores/messages.js";
|
|
19
17
|
import { compactChannelIfNeeded } from "#core/memory/index.js";
|
|
20
18
|
import { readAgents } from "#core/apc/parser.js";
|
|
21
19
|
import { buildAgentSystem } from "#core/agent/build-agent-system.js";
|
|
22
|
-
import { transcribe as transcribeAudioFile } from "#core/voice/transcription.js";
|
|
23
20
|
import { resolveAgentName, SUPERAGENT_ACTOR_ID } from "#core/identity/index.js";
|
|
24
21
|
import { registerSender, resolveAllowedTools } from "#core/identity/telegram.js";
|
|
25
22
|
import { buildRelationshipBlock } from "#core/agent/index.js";
|
|
26
|
-
import { getConfirmationStore as getConfirmStore } from "#core/confirmation/pending-store.js";
|
|
27
23
|
import { CHANNELS } from "#core/constants/channels.js";
|
|
28
24
|
import { tryResolveSkillCommand } from "#core/agent/skills/trigger.js";
|
|
29
|
-
import { createTelegramConfirmAdapter } from "#core/confirmation/adapters/telegram.js";
|
|
30
25
|
import * as askFlow from "./ask.js";
|
|
31
|
-
import {
|
|
32
|
-
import {
|
|
26
|
+
import { telegramAuthorLabel } from "./helpers.js";
|
|
27
|
+
import { handleIncomingPhoto } from "./inbound/photo.js";
|
|
28
|
+
import { handleIncomingAudio } from "./inbound/audio.js";
|
|
29
|
+
import { buildStreamHandler, runTelegramSuperAgent, telegramErrorText, sendFinalReply } from "./reply.js";
|
|
33
30
|
import { t, resolveLang } from "#core/i18n/index.js";
|
|
34
31
|
|
|
35
32
|
const nowIso = () => new Date().toISOString().replace(/\.\d{3}Z$/, "Z");
|
|
@@ -50,10 +47,7 @@ export async function handleUpdate(self, u) {
|
|
|
50
47
|
self.log(`telegram[${self.channel.name}] update ${u.update_id} ignored — no target project`);
|
|
51
48
|
return;
|
|
52
49
|
}
|
|
53
|
-
const author =
|
|
54
|
-
msg.from?.username
|
|
55
|
-
? "@" + msg.from.username
|
|
56
|
-
: `${msg.from?.first_name || ""} ${msg.from?.last_name || ""}`.trim() || "unknown";
|
|
50
|
+
const author = telegramAuthorLabel(msg.from);
|
|
57
51
|
const chat_id = msg.chat?.id;
|
|
58
52
|
|
|
59
53
|
// Resolve WHO is writing (owner / known contact / guest), keyed by the
|
|
@@ -85,108 +79,18 @@ export async function handleUpdate(self, u) {
|
|
|
85
79
|
|
|
86
80
|
let text = msg.text || msg.caption || "";
|
|
87
81
|
|
|
88
|
-
// ── Incoming
|
|
82
|
+
// ── Incoming media ────────────────────────────────────────────────────
|
|
83
|
+
// Photo and voice/audio each download + archive the file and rewrite `text`
|
|
84
|
+
// so the rest of the pipeline treats them like a typed message. The handlers
|
|
85
|
+
// live in ./inbound/ to keep this dispatcher focused on routing. Photos have
|
|
86
|
+
// no vision yet, so the handler injects an `[image]` marker (never silent);
|
|
87
|
+
// audio injects its `[audio]` transcript.
|
|
89
88
|
if (msg.photo && msg.photo.length > 0) {
|
|
90
|
-
|
|
91
|
-
const bestPhoto = msg.photo.reduce((a, b) => (b.file_size > a.file_size ? b : a));
|
|
92
|
-
const token = resolveBotToken(self.channel);
|
|
93
|
-
const mediaDir = path.join(APX_HOME, "media");
|
|
94
|
-
fs.mkdirSync(mediaDir, { recursive: true });
|
|
95
|
-
try {
|
|
96
|
-
const localPath = await downloadTelegramFile(token, bestPhoto.file_id, mediaDir);
|
|
97
|
-
self.log(`telegram[${self.channel.name}] photo saved: ${localPath}`);
|
|
98
|
-
appendGlobalMessage({
|
|
99
|
-
channel: CHANNELS.TELEGRAM,
|
|
100
|
-
direction: "in",
|
|
101
|
-
type: "photo",
|
|
102
|
-
actor_id: msg.from?.id ? String(msg.from.id) : author,
|
|
103
|
-
external_id: String(u.update_id),
|
|
104
|
-
author,
|
|
105
|
-
body: text || "[photo]",
|
|
106
|
-
meta: {
|
|
107
|
-
chat_id,
|
|
108
|
-
user_id: msg.from?.id || null,
|
|
109
|
-
message_id: msg.message_id,
|
|
110
|
-
tg_channel: self.channel.name,
|
|
111
|
-
local_path: localPath,
|
|
112
|
-
file_id: bestPhoto.file_id,
|
|
113
|
-
width: bestPhoto.width,
|
|
114
|
-
height: bestPhoto.height,
|
|
115
|
-
},
|
|
116
|
-
});
|
|
117
|
-
} catch (e) {
|
|
118
|
-
self.log(`telegram[${self.channel.name}] photo download failed: ${e.message}`);
|
|
119
|
-
}
|
|
120
|
-
// If there's a caption, continue to handle it as text; otherwise return
|
|
121
|
-
if (!text) return;
|
|
89
|
+
({ text } = await handleIncomingPhoto(self, { msg, u, author, chat_id, text }));
|
|
122
90
|
}
|
|
123
|
-
|
|
124
|
-
// ── Incoming voice / audio handling ──────────────────────────────────
|
|
125
|
-
// Telegram sends `voice` for the press-and-hold mic recording (.oga/opus)
|
|
126
|
-
// and `audio` for uploaded audio files (mp3/m4a/etc.). Either way we
|
|
127
|
-
// download, run it through Whisper, prefix the result with `[audio] `
|
|
128
|
-
// and let the rest of the message flow handle it as plain text.
|
|
129
91
|
const incomingAudio = msg.voice || msg.audio;
|
|
130
92
|
if (incomingAudio && incomingAudio.file_id) {
|
|
131
|
-
|
|
132
|
-
const mediaDir = path.join(APX_HOME, "media");
|
|
133
|
-
fs.mkdirSync(mediaDir, { recursive: true });
|
|
134
|
-
// Show "typing…" right away — download + transcription is the slow part of
|
|
135
|
-
// a voice message, and the reply-path typing (below) only starts after it,
|
|
136
|
-
// so without this the chat sits silent for seconds with no feedback.
|
|
137
|
-
const stopVoiceTyping = self._startTyping(chat_id);
|
|
138
|
-
let localPath = null;
|
|
139
|
-
let transcript = "";
|
|
140
|
-
let transcribeError = null;
|
|
141
|
-
let transcribeBackend = null;
|
|
142
|
-
try {
|
|
143
|
-
localPath = await downloadTelegramFile(token, incomingAudio.file_id, mediaDir);
|
|
144
|
-
self.log(`telegram[${self.channel.name}] audio saved: ${localPath}`);
|
|
145
|
-
} catch (e) {
|
|
146
|
-
self.log(`telegram[${self.channel.name}] audio download failed: ${e.message}`);
|
|
147
|
-
}
|
|
148
|
-
if (localPath) {
|
|
149
|
-
try {
|
|
150
|
-
const result = await transcribeAudioFile(localPath);
|
|
151
|
-
transcript = result.text || "";
|
|
152
|
-
transcribeBackend = result.backend;
|
|
153
|
-
self.log(`telegram[${self.channel.name}] audio transcribed via ${transcribeBackend} (${transcript.length} chars, lang=${result.language || "?"})`);
|
|
154
|
-
} catch (e) {
|
|
155
|
-
transcribeError = e.message;
|
|
156
|
-
self.log(`telegram[${self.channel.name}] audio transcription failed: ${e.message}`);
|
|
157
|
-
}
|
|
158
|
-
}
|
|
159
|
-
stopVoiceTyping(); // reply-path typing takes over from here
|
|
160
|
-
const audioBody = transcript
|
|
161
|
-
? `[audio] ${transcript}`
|
|
162
|
-
: `[audio] (transcription unavailable${transcribeError ? ": " + transcribeError : ""})`;
|
|
163
|
-
|
|
164
|
-
appendGlobalMessage({
|
|
165
|
-
channel: CHANNELS.TELEGRAM,
|
|
166
|
-
direction: "in",
|
|
167
|
-
type: "audio",
|
|
168
|
-
actor_id: msg.from?.id ? String(msg.from.id) : author,
|
|
169
|
-
external_id: String(u.update_id),
|
|
170
|
-
author,
|
|
171
|
-
body: audioBody,
|
|
172
|
-
meta: {
|
|
173
|
-
chat_id,
|
|
174
|
-
user_id: msg.from?.id || null,
|
|
175
|
-
message_id: msg.message_id,
|
|
176
|
-
tg_channel: self.channel.name,
|
|
177
|
-
local_path: localPath,
|
|
178
|
-
file_id: incomingAudio.file_id,
|
|
179
|
-
duration: incomingAudio.duration,
|
|
180
|
-
mime_type: incomingAudio.mime_type,
|
|
181
|
-
transcription_backend: transcribeBackend,
|
|
182
|
-
transcription_error: transcribeError,
|
|
183
|
-
},
|
|
184
|
-
});
|
|
185
|
-
|
|
186
|
-
// Inject the transcribed text into `text` so the rest of the agent
|
|
187
|
-
// pipeline treats it identically to a typed message. If there was a
|
|
188
|
-
// caption alongside the audio, prepend the audio marker to it.
|
|
189
|
-
text = text ? `${audioBody}\n${text}` : audioBody;
|
|
93
|
+
({ text } = await handleIncomingAudio(self, { msg, u, author, chat_id, text, incomingAudio }));
|
|
190
94
|
}
|
|
191
95
|
|
|
192
96
|
// If there's a pending ask_questions flow for this chat AND the current
|
|
@@ -309,10 +213,14 @@ export async function handleUpdate(self, u) {
|
|
|
309
213
|
// Start "typing..." indicator. Stops when we send the reply (or fail).
|
|
310
214
|
const stopTyping = self._startTyping(chat_id);
|
|
311
215
|
|
|
216
|
+
// Preset to the super-agent defaults so every exit path (including one where
|
|
217
|
+
// neither the routed-agent nor the super-agent branch runs) has a valid
|
|
218
|
+
// actor — the routed-agent / super-agent branches override these on success,
|
|
219
|
+
// and their catch blocks reset all four together (no partial-overwrite gap).
|
|
312
220
|
let replyText;
|
|
313
221
|
let replyAuthor;
|
|
314
|
-
let replyActorId; // stable id: super_agent | agent slug
|
|
315
|
-
let replyKind;
|
|
222
|
+
let replyActorId = SUPERAGENT_ACTOR_ID; // stable id: super_agent | agent slug
|
|
223
|
+
let replyKind = "superagent"; // actor_kind: superagent | agent
|
|
316
224
|
const projectCfg = target.config || self.globalConfig;
|
|
317
225
|
// Display name for the super-agent persona on this channel (from identity.json).
|
|
318
226
|
const agentDisplay = resolveAgentName(self.globalConfig);
|
|
@@ -342,7 +250,10 @@ export async function handleUpdate(self, u) {
|
|
|
342
250
|
replyKind = "agent";
|
|
343
251
|
} catch (e) {
|
|
344
252
|
self.log(`telegram[${self.channel.name}] agent reply failed: ${e.message}`);
|
|
345
|
-
replyText =
|
|
253
|
+
replyText = t("telegram.error_agent", {
|
|
254
|
+
lang: resolveLang(self.globalConfig),
|
|
255
|
+
vars: { error: e.message.slice(0, 200) },
|
|
256
|
+
});
|
|
346
257
|
replyAuthor = agentDisplay;
|
|
347
258
|
replyActorId = SUPERAGENT_ACTOR_ID;
|
|
348
259
|
replyKind = "superagent";
|
|
@@ -354,140 +265,33 @@ export async function handleUpdate(self, u) {
|
|
|
354
265
|
}
|
|
355
266
|
}
|
|
356
267
|
|
|
357
|
-
// Fallback: super-agent — STREAMED.
|
|
358
|
-
//
|
|
359
|
-
//
|
|
360
|
-
//
|
|
361
|
-
//
|
|
362
|
-
// to channels that render tools — but NEVER sent to Telegram; tools are
|
|
363
|
-
// internal. The conversation saved on disk is the full, real exchange;
|
|
364
|
-
// Telegram is just the prose-only view of it.
|
|
268
|
+
// Fallback: super-agent — STREAMED. Each iteration's assistant text is sent
|
|
269
|
+
// to Telegram as its own message the moment the model produces it; tool
|
|
270
|
+
// calls are logged but never sent (internal). The streamed turn + its final
|
|
271
|
+
// send live in ./reply.js so this dispatcher and the ask-flow resume
|
|
272
|
+
// (_runResumedTurn in the host poller) share ONE reply path — no drift.
|
|
365
273
|
let saUsage = null;
|
|
366
274
|
let streamedCount = 0;
|
|
367
275
|
let lastStreamedText = "";
|
|
368
|
-
// Telegram shows the user ONLY prose — never the tool calls. On an action
|
|
369
|
-
// request the model often jumps straight to a tool with no preamble text,
|
|
370
|
-
// so the user would stare at a silent chat until the final reply. Send one
|
|
371
|
-
// short localized heads-up the moment real work starts (first tool_start),
|
|
372
|
-
// but only if the agent didn't already write its own "on it" line.
|
|
373
|
-
let sentHeadsUp = false;
|
|
374
|
-
const headsUpPhrase = () => t("telegram.heads_up", { lang: resolveLang(self.globalConfig) });
|
|
375
276
|
if (!replyText && isSuperAgentEnabled(self.globalConfig)) {
|
|
376
|
-
const onEvent =
|
|
377
|
-
try {
|
|
378
|
-
if (ev.type === "tool_start" && !sentHeadsUp && streamedCount === 0) {
|
|
379
|
-
sentHeadsUp = true;
|
|
380
|
-
const heads = headsUpPhrase();
|
|
381
|
-
await self._send({ chat_id, text: heads });
|
|
382
|
-
appendGlobalMessage({
|
|
383
|
-
channel: CHANNELS.TELEGRAM,
|
|
384
|
-
direction: "out",
|
|
385
|
-
type: "agent",
|
|
386
|
-
actor_id: SUPERAGENT_ACTOR_ID,
|
|
387
|
-
actor_kind: "superagent",
|
|
388
|
-
agent_slug: SUPERAGENT_ACTOR_ID,
|
|
389
|
-
author: agentDisplay,
|
|
390
|
-
body: heads,
|
|
391
|
-
meta: { chat_id, tg_channel: self.channel.name, in_reply_to: u.update_id, heads_up: true },
|
|
392
|
-
});
|
|
393
|
-
return;
|
|
394
|
-
}
|
|
395
|
-
if (ev.type === "assistant_text" && ev.text) {
|
|
396
|
-
const piece = stripThinking(ev.text).trim();
|
|
397
|
-
if (!piece) return;
|
|
398
|
-
await self._send({ chat_id, text: piece });
|
|
399
|
-
lastStreamedText = piece;
|
|
400
|
-
streamedCount += 1;
|
|
401
|
-
appendGlobalMessage({
|
|
402
|
-
channel: CHANNELS.TELEGRAM,
|
|
403
|
-
direction: "out",
|
|
404
|
-
type: "agent",
|
|
405
|
-
actor_id: SUPERAGENT_ACTOR_ID,
|
|
406
|
-
actor_kind: "superagent",
|
|
407
|
-
agent_slug: SUPERAGENT_ACTOR_ID,
|
|
408
|
-
author: agentDisplay,
|
|
409
|
-
body: piece,
|
|
410
|
-
meta: {
|
|
411
|
-
chat_id,
|
|
412
|
-
tg_channel: self.channel.name,
|
|
413
|
-
in_reply_to: u.update_id,
|
|
414
|
-
streamed: true,
|
|
415
|
-
iteration: ev.iteration,
|
|
416
|
-
},
|
|
417
|
-
});
|
|
418
|
-
} else if (ev.type === "tool_result" && ev.trace) {
|
|
419
|
-
// Logged for the audit trail / other channels — NOT sent to Telegram.
|
|
420
|
-
const t = ev.trace;
|
|
421
|
-
appendGlobalMessage({
|
|
422
|
-
channel: CHANNELS.TELEGRAM,
|
|
423
|
-
direction: "out",
|
|
424
|
-
type: "tool",
|
|
425
|
-
actor_id: t.tool,
|
|
426
|
-
actor_kind: "tool",
|
|
427
|
-
author: agentDisplay,
|
|
428
|
-
body: `${t.tool}(${JSON.stringify(t.args || {}).slice(0, 200)})`,
|
|
429
|
-
meta: {
|
|
430
|
-
chat_id,
|
|
431
|
-
tg_channel: self.channel.name,
|
|
432
|
-
in_reply_to: u.update_id,
|
|
433
|
-
tool: t.tool,
|
|
434
|
-
args: t.args,
|
|
435
|
-
result: t.result,
|
|
436
|
-
iteration: ev.iteration,
|
|
437
|
-
},
|
|
438
|
-
});
|
|
439
|
-
} else if (ev.type === "engine_failed") {
|
|
440
|
-
// A model in the fallback chain errored; the loop is rotating to
|
|
441
|
-
// the next one. Log it so a mid-turn provider failure (rate limit,
|
|
442
|
-
// tool-grammar 400, …) is diagnosable instead of invisible.
|
|
443
|
-
self.log(
|
|
444
|
-
`telegram[${self.channel.name}] engine_failed: ${ev.model || "?"} (${ev.reason || "?"}) → ${ev.retry_with || "end of chain"}`,
|
|
445
|
-
);
|
|
446
|
-
} else if (ev.type === "model_routed" || ev.type === "model_retry") {
|
|
447
|
-
self.log(
|
|
448
|
-
`telegram[${self.channel.name}] ${ev.type}: model=${ev.model || "?"}${ev.reason ? ` reason=${ev.reason}` : ""}${ev.from_fallback ? " (fallback)" : ""}`,
|
|
449
|
-
);
|
|
450
|
-
}
|
|
451
|
-
} catch (e) {
|
|
452
|
-
// A failed intermediate send must not abort the whole run.
|
|
453
|
-
self.log(`telegram[${self.channel.name}] stream event failed: ${e.message}`);
|
|
454
|
-
}
|
|
455
|
-
};
|
|
456
|
-
|
|
457
|
-
const confirmAdapter = createTelegramConfirmAdapter({
|
|
458
|
-
token: resolveBotToken(self.channel),
|
|
459
|
-
chatId: chat_id,
|
|
460
|
-
pendingStore: getConfirmStore(),
|
|
461
|
-
});
|
|
277
|
+
const { onEvent, state } = buildStreamHandler(self, { chat_id, update_id: u.update_id, agentDisplay });
|
|
462
278
|
|
|
463
|
-
// `/slug ...` shortcut: load the matching skill body into contextNote
|
|
464
|
-
//
|
|
279
|
+
// `/slug ...` shortcut: load the matching skill body into contextNote and
|
|
280
|
+
// strip the prefix from the user prompt before sending to the loop.
|
|
465
281
|
const slashed = tryResolveSkillCommand(text, { projectPath: target?.path });
|
|
466
|
-
const slashedPrompt = slashed.handled ? slashed.prompt : text;
|
|
467
|
-
const slashedContextNote = slashed.handled ? slashed.contextNote : "";
|
|
468
282
|
|
|
469
283
|
try {
|
|
470
|
-
const sa = await
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
plugins: self.plugins,
|
|
474
|
-
registries: self.registries,
|
|
475
|
-
prompt: slashedPrompt,
|
|
284
|
+
const sa = await runTelegramSuperAgent(self, {
|
|
285
|
+
chat_id,
|
|
286
|
+
prompt: slashed.handled ? slashed.prompt : text,
|
|
476
287
|
previousMessages,
|
|
477
|
-
|
|
288
|
+
target,
|
|
289
|
+
author,
|
|
478
290
|
relationshipBlock,
|
|
479
291
|
allowedTools,
|
|
480
|
-
contextNote:
|
|
481
|
-
channelMeta: buildTelegramMeta({
|
|
482
|
-
channelName: self.channel.name,
|
|
483
|
-
author,
|
|
484
|
-
chatId: chat_id,
|
|
485
|
-
target,
|
|
486
|
-
routeToAgent: self.channel.route_to_agent,
|
|
487
|
-
}),
|
|
292
|
+
contextNote: slashed.handled ? slashed.contextNote : "",
|
|
488
293
|
signal: abortCtrl.signal,
|
|
489
294
|
onEvent,
|
|
490
|
-
requestConfirmation: confirmAdapter.requestConfirmation,
|
|
491
295
|
});
|
|
492
296
|
replyText = sa.text;
|
|
493
297
|
replyAuthor = sa.name || agentDisplay;
|
|
@@ -496,13 +300,13 @@ export async function handleUpdate(self, u) {
|
|
|
496
300
|
saUsage = sa.usage;
|
|
497
301
|
|
|
498
302
|
// ── ask_questions integration ────────────────────────────────────
|
|
499
|
-
// If the super-agent ended this turn by calling ask_questions, hand
|
|
500
|
-
//
|
|
501
|
-
//
|
|
502
|
-
//
|
|
303
|
+
// If the super-agent ended this turn by calling ask_questions, hand off
|
|
304
|
+
// to the inline-keyboard flow instead of sending the bare assistant
|
|
305
|
+
// text. The flow keeps state per chat_id and re-runs the super-agent
|
|
306
|
+
// (via _runResumedTurn) once every answer is collected.
|
|
503
307
|
const askQuestions = askFlow.extractAskQuestionsFromTrace(sa.trace);
|
|
504
308
|
if (askQuestions && chat_id) {
|
|
505
|
-
|
|
309
|
+
self.activeRequests.delete(chat_id);
|
|
506
310
|
stopTyping();
|
|
507
311
|
try {
|
|
508
312
|
await self._startAskFlow({
|
|
@@ -523,21 +327,20 @@ export async function handleUpdate(self, u) {
|
|
|
523
327
|
}
|
|
524
328
|
return; // The reply for this turn IS the ask flow.
|
|
525
329
|
}
|
|
330
|
+
streamedCount = state.streamedCount;
|
|
331
|
+
lastStreamedText = state.lastStreamedText;
|
|
526
332
|
} catch (e) {
|
|
527
333
|
if (abortCtrl.signal.aborted) {
|
|
528
334
|
// A newer message superseded this one. Whatever streamed so far is
|
|
529
|
-
// already sent + logged; the newer message's run continues the
|
|
530
|
-
// thread from that history.
|
|
335
|
+
// already sent + logged; the newer message's run continues the thread.
|
|
531
336
|
self.log(`telegram[${self.channel.name}] request aborted for chat ${chat_id}`);
|
|
532
337
|
if (chat_id) self.activeRequests.delete(chat_id);
|
|
533
338
|
stopTyping();
|
|
534
339
|
return;
|
|
535
340
|
}
|
|
536
341
|
self.log(`telegram[${self.channel.name}] super-agent failed: ${e.message}`);
|
|
537
|
-
// Surface the failure to the user instead of silently dropping the
|
|
538
|
-
|
|
539
|
-
// the message. Keep the message short and non-leaking.
|
|
540
|
-
replyText = `⚠️ Could not generate a reply right now (${e.message || "internal error"}).`;
|
|
342
|
+
// Surface the failure to the user instead of silently dropping the turn.
|
|
343
|
+
replyText = telegramErrorText(self, e);
|
|
541
344
|
replyAuthor = agentDisplay;
|
|
542
345
|
replyActorId = SUPERAGENT_ACTOR_ID;
|
|
543
346
|
replyKind = "superagent";
|
|
@@ -545,71 +348,18 @@ export async function handleUpdate(self, u) {
|
|
|
545
348
|
}
|
|
546
349
|
|
|
547
350
|
if (chat_id) self.activeRequests.delete(chat_id);
|
|
548
|
-
|
|
549
|
-
// Final answer. The intermediate prose was already streamed; only send the
|
|
550
|
-
// final text if it's non-empty AND not a duplicate of the last streamed
|
|
551
|
-
// piece (the loop can end on an iteration whose text was already sent).
|
|
552
|
-
// If nothing streamed and there's no final text, send a minimal ack so the
|
|
553
|
-
// turn isn't silently empty.
|
|
554
|
-
const finalClean = replyText ? stripThinking(replyText).trim() : "";
|
|
555
|
-
let toSend = "";
|
|
556
|
-
if (finalClean && finalClean !== lastStreamedText) {
|
|
557
|
-
toSend = finalClean;
|
|
558
|
-
} else if (!finalClean) {
|
|
559
|
-
// Never end a turn on silence. The loop's tool-free wrap-up normally
|
|
560
|
-
// fills finalClean with a model-authored closing (handled above); this is
|
|
561
|
-
// the last-resort floor for the rare case it still came back empty. A
|
|
562
|
-
// pure chit-chat turn that did nothing gets the short ack; a turn that
|
|
563
|
-
// streamed/acted but produced no closing gets a neutral "continue?" that
|
|
564
|
-
// does NOT claim completion.
|
|
565
|
-
toSend = streamedCount === 0
|
|
566
|
-
? t("telegram.fallback_listo", { lang: resolveLang(self.globalConfig) })
|
|
567
|
-
: t("telegram.fallback_continue", { lang: resolveLang(self.globalConfig) });
|
|
568
|
-
}
|
|
569
|
-
|
|
570
351
|
stopTyping();
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
appendGlobalMessage({
|
|
584
|
-
channel: CHANNELS.TELEGRAM,
|
|
585
|
-
direction: "out",
|
|
586
|
-
type: "agent",
|
|
587
|
-
actor_id: replyActorId || SUPERAGENT_ACTOR_ID,
|
|
588
|
-
actor_kind: replyKind || "superagent",
|
|
589
|
-
agent_slug: replyActorId || SUPERAGENT_ACTOR_ID,
|
|
590
|
-
author: replyAuthor || agentDisplay,
|
|
591
|
-
body: toSend,
|
|
592
|
-
meta,
|
|
593
|
-
});
|
|
594
|
-
} catch (e) {
|
|
595
|
-
self.log(`telegram[${self.channel.name}] send-back error: ${e.message}`);
|
|
596
|
-
appendGlobalMessage({
|
|
597
|
-
channel: CHANNELS.TELEGRAM,
|
|
598
|
-
direction: "out",
|
|
599
|
-
type: "agent",
|
|
600
|
-
actor_id: replyActorId || SUPERAGENT_ACTOR_ID,
|
|
601
|
-
actor_kind: replyKind || "superagent",
|
|
602
|
-
agent_slug: replyActorId || SUPERAGENT_ACTOR_ID,
|
|
603
|
-
author: replyAuthor || agentDisplay,
|
|
604
|
-
body: `[send_failed] ${toSend}`,
|
|
605
|
-
meta: {
|
|
606
|
-
chat_id,
|
|
607
|
-
tg_channel: self.channel.name,
|
|
608
|
-
in_reply_to: u.update_id,
|
|
609
|
-
send_error: e.message,
|
|
610
|
-
...(saUsage ? { usage: saUsage } : {}),
|
|
611
|
-
},
|
|
612
|
-
});
|
|
613
|
-
}
|
|
352
|
+
await sendFinalReply(self, {
|
|
353
|
+
chat_id,
|
|
354
|
+
update_id: u.update_id,
|
|
355
|
+
replyText,
|
|
356
|
+
replyAuthor,
|
|
357
|
+
replyActorId,
|
|
358
|
+
replyKind,
|
|
359
|
+
saUsage,
|
|
360
|
+
streamedCount,
|
|
361
|
+
lastStreamedText,
|
|
362
|
+
agentDisplay,
|
|
363
|
+
});
|
|
614
364
|
}
|
|
615
365
|
|
|
@@ -2,10 +2,37 @@
|
|
|
2
2
|
// big poller class stays focused on lifecycle + message dispatch. Each
|
|
3
3
|
// function is pure (no `this`) — instances import them and call as needed.
|
|
4
4
|
import fs from "node:fs";
|
|
5
|
-
import
|
|
5
|
+
import path from "node:path";
|
|
6
|
+
import { TELEGRAM_STATE_PATH, APX_HOME } from "#core/config/index.js";
|
|
6
7
|
|
|
7
8
|
const nowIso = () => new Date().toISOString().replace(/\.\d{3}Z$/, "Z");
|
|
8
9
|
|
|
10
|
+
/**
|
|
11
|
+
* Display label for a Telegram sender used as the `author` / actor fallback:
|
|
12
|
+
* @username → "First Last" → "unknown".
|
|
13
|
+
* Single source of truth so every inbound branch (text/photo/audio) and the
|
|
14
|
+
* message store agree. NOTE: this is the raw handle; the *resolved contact
|
|
15
|
+
* name* (which prefers a saved roster name) is `resolveSender().name` in
|
|
16
|
+
* core/identity/telegram.js — different purpose, don't conflate them.
|
|
17
|
+
*/
|
|
18
|
+
export function telegramAuthorLabel(from) {
|
|
19
|
+
if (from?.username) return "@" + from.username;
|
|
20
|
+
const full = `${from?.first_name || ""} ${from?.last_name || ""}`.trim();
|
|
21
|
+
return full || "unknown";
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Ensure and return the shared media-download directory (~/.apx/media).
|
|
26
|
+
* Owns BOTH the path and the mkdir so callers never touch `fs`/`APX_HOME`
|
|
27
|
+
* directly — the inbound dispatcher used to inline this and a module split
|
|
28
|
+
* dropped its `fs`/`APX_HOME` imports, silently breaking every photo/voice.
|
|
29
|
+
*/
|
|
30
|
+
export function telegramMediaDir() {
|
|
31
|
+
const dir = path.join(APX_HOME, "media");
|
|
32
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
33
|
+
return dir;
|
|
34
|
+
}
|
|
35
|
+
|
|
9
36
|
/**
|
|
10
37
|
* Build the channelMeta block the super-agent loop receives for a Telegram
|
|
11
38
|
* turn. The prompt template at src/core/agent/prompts/channels/telegram.md
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
// Inbound Telegram VOICE/AUDIO handling, split out of dispatch.js. Telegram
|
|
2
|
+
// sends `voice` for the press-and-hold mic recording (.oga/opus) and `audio`
|
|
3
|
+
// for uploaded audio files (mp3/m4a/etc.). Either way we download, run it
|
|
4
|
+
// through Whisper, prefix the result with `[audio] ` and let the rest of the
|
|
5
|
+
// message flow handle it as plain text.
|
|
6
|
+
//
|
|
7
|
+
// Takes the poller instance (`self`, for logging, channel + the typing
|
|
8
|
+
// indicator) plus the parsed update context, and returns the `text` the rest of
|
|
9
|
+
// the pipeline should run — the transcript merged into any existing caption.
|
|
10
|
+
import { appendGlobalMessage } from "#core/stores/messages.js";
|
|
11
|
+
import { CHANNELS } from "#core/constants/channels.js";
|
|
12
|
+
import { transcribe as transcribeAudioFile } from "#core/voice/transcription.js";
|
|
13
|
+
import { resolveBotToken, telegramMediaDir } from "../helpers.js";
|
|
14
|
+
import { downloadTelegramFile } from "../media.js";
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* @param {object} self poller instance (uses self.log, self.channel, self._startTyping)
|
|
18
|
+
* @param {object} ctx { msg, u, author, chat_id, text, incomingAudio }
|
|
19
|
+
* @returns {Promise<{ text: string }>} text to continue the pipeline with
|
|
20
|
+
*/
|
|
21
|
+
export async function handleIncomingAudio(self, { msg, u, author, chat_id, text, incomingAudio }) {
|
|
22
|
+
const token = resolveBotToken(self.channel);
|
|
23
|
+
const mediaDir = telegramMediaDir();
|
|
24
|
+
|
|
25
|
+
// Show "typing…" right away — download + transcription is the slow part of a
|
|
26
|
+
// voice message, and the reply-path typing only starts after it, so without
|
|
27
|
+
// this the chat sits silent for seconds with no feedback.
|
|
28
|
+
const stopVoiceTyping = self._startTyping(chat_id);
|
|
29
|
+
let localPath = null;
|
|
30
|
+
let transcript = "";
|
|
31
|
+
let transcribeError = null;
|
|
32
|
+
let transcribeBackend = null;
|
|
33
|
+
try {
|
|
34
|
+
localPath = await downloadTelegramFile(token, incomingAudio.file_id, mediaDir);
|
|
35
|
+
self.log(`telegram[${self.channel.name}] audio saved: ${localPath}`);
|
|
36
|
+
} catch (e) {
|
|
37
|
+
self.log(`telegram[${self.channel.name}] audio download failed: ${e.message}`);
|
|
38
|
+
}
|
|
39
|
+
if (localPath) {
|
|
40
|
+
try {
|
|
41
|
+
const result = await transcribeAudioFile(localPath);
|
|
42
|
+
transcript = result.text || "";
|
|
43
|
+
transcribeBackend = result.backend;
|
|
44
|
+
self.log(`telegram[${self.channel.name}] audio transcribed via ${transcribeBackend} (${transcript.length} chars, lang=${result.language || "?"})`);
|
|
45
|
+
} catch (e) {
|
|
46
|
+
transcribeError = e.message;
|
|
47
|
+
self.log(`telegram[${self.channel.name}] audio transcription failed: ${e.message}`);
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
stopVoiceTyping(); // reply-path typing takes over from here
|
|
51
|
+
|
|
52
|
+
const audioBody = transcript
|
|
53
|
+
? `[audio] ${transcript}`
|
|
54
|
+
: `[audio] (transcription unavailable${transcribeError ? ": " + transcribeError : ""})`;
|
|
55
|
+
|
|
56
|
+
appendGlobalMessage({
|
|
57
|
+
channel: CHANNELS.TELEGRAM,
|
|
58
|
+
direction: "in",
|
|
59
|
+
type: "audio",
|
|
60
|
+
actor_id: msg.from?.id ? String(msg.from.id) : author,
|
|
61
|
+
external_id: String(u.update_id),
|
|
62
|
+
author,
|
|
63
|
+
body: audioBody,
|
|
64
|
+
meta: {
|
|
65
|
+
chat_id,
|
|
66
|
+
user_id: msg.from?.id || null,
|
|
67
|
+
message_id: msg.message_id,
|
|
68
|
+
tg_channel: self.channel.name,
|
|
69
|
+
local_path: localPath,
|
|
70
|
+
file_id: incomingAudio.file_id,
|
|
71
|
+
duration: incomingAudio.duration,
|
|
72
|
+
mime_type: incomingAudio.mime_type,
|
|
73
|
+
transcription_backend: transcribeBackend,
|
|
74
|
+
transcription_error: transcribeError,
|
|
75
|
+
},
|
|
76
|
+
});
|
|
77
|
+
|
|
78
|
+
// Inject the transcribed text into `text` so the rest of the agent pipeline
|
|
79
|
+
// treats it identically to a typed message. If there was a caption alongside
|
|
80
|
+
// the audio, prepend the audio marker to it.
|
|
81
|
+
return { text: text ? `${audioBody}\n${text}` : audioBody };
|
|
82
|
+
}
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
// Inbound Telegram PHOTO handling, split out of dispatch.js so the dispatcher
|
|
2
|
+
// stays focused on routing. Pure of the poller's lifecycle: it takes the poller
|
|
3
|
+
// instance (`self`, for logging + channel) plus the parsed update context, and
|
|
4
|
+
// returns the (possibly rewritten) `text` the rest of the pipeline should run.
|
|
5
|
+
//
|
|
6
|
+
// Vision note: we do NOT have image understanding yet — the engine layer can't
|
|
7
|
+
// pass image content to the model. So we download + archive the photo and then
|
|
8
|
+
// inject an internal `[image]` marker into `text` so the agent ALWAYS produces a
|
|
9
|
+
// reply in its own words (never goes silent on a no-caption photo). The reply is
|
|
10
|
+
// model-authored; the marker only tells the model an image arrived and that it
|
|
11
|
+
// can't see the pixels yet. Mirrors the `[audio]` marker convention.
|
|
12
|
+
import { appendGlobalMessage } from "#core/stores/messages.js";
|
|
13
|
+
import { CHANNELS } from "#core/constants/channels.js";
|
|
14
|
+
import { resolveBotToken, telegramMediaDir } from "../helpers.js";
|
|
15
|
+
import { downloadTelegramFile } from "../media.js";
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* @param {object} self poller instance (uses self.log, self.channel)
|
|
19
|
+
* @param {object} ctx { msg, u, author, chat_id, text }
|
|
20
|
+
* @returns {Promise<{ text: string }>} text to continue the pipeline with
|
|
21
|
+
*/
|
|
22
|
+
export async function handleIncomingPhoto(self, { msg, u, author, chat_id, text }) {
|
|
23
|
+
// Telegram sends multiple sizes; pick the largest.
|
|
24
|
+
const bestPhoto = msg.photo.reduce((a, b) => (b.file_size > a.file_size ? b : a));
|
|
25
|
+
const token = resolveBotToken(self.channel);
|
|
26
|
+
const mediaDir = telegramMediaDir();
|
|
27
|
+
|
|
28
|
+
let localPath = null;
|
|
29
|
+
try {
|
|
30
|
+
localPath = await downloadTelegramFile(token, bestPhoto.file_id, mediaDir);
|
|
31
|
+
self.log(`telegram[${self.channel.name}] photo saved: ${localPath}`);
|
|
32
|
+
} catch (e) {
|
|
33
|
+
self.log(`telegram[${self.channel.name}] photo download failed: ${e.message}`);
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
// Archive the inbound photo regardless of download outcome, so chat history
|
|
37
|
+
// records it even if the file fetch failed.
|
|
38
|
+
appendGlobalMessage({
|
|
39
|
+
channel: CHANNELS.TELEGRAM,
|
|
40
|
+
direction: "in",
|
|
41
|
+
type: "photo",
|
|
42
|
+
actor_id: msg.from?.id ? String(msg.from.id) : author,
|
|
43
|
+
external_id: String(u.update_id),
|
|
44
|
+
author,
|
|
45
|
+
body: text || "[photo]",
|
|
46
|
+
meta: {
|
|
47
|
+
chat_id,
|
|
48
|
+
user_id: msg.from?.id || null,
|
|
49
|
+
message_id: msg.message_id,
|
|
50
|
+
tg_channel: self.channel.name,
|
|
51
|
+
local_path: localPath,
|
|
52
|
+
file_id: bestPhoto.file_id,
|
|
53
|
+
width: bestPhoto.width,
|
|
54
|
+
height: bestPhoto.height,
|
|
55
|
+
},
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
// Guard: never go silent. Hand the agent an internal marker so it replies in
|
|
59
|
+
// its own words. No vision yet → say so, in-band, so the model doesn't
|
|
60
|
+
// hallucinate "seeing" the image.
|
|
61
|
+
const marker = "[image attached — you cannot see its contents yet]";
|
|
62
|
+
return { text: text ? `${marker} ${text}` : marker };
|
|
63
|
+
}
|