@agentprojectcontext/apx 1.42.0 → 1.42.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@agentprojectcontext/apx",
3
- "version": "1.42.0",
3
+ "version": "1.42.1",
4
4
  "description": "APX — unified CLI + daemon for the Agent Project Context (APC) standard.",
5
5
  "publishConfig": {
6
6
  "access": "public"
@@ -5,6 +5,16 @@
5
5
  // Coding surfaces (web Code / terminal Build) raise this via maxIters and use
6
6
  // the finish-tool completionContract instead.
7
7
  export const MAX_TOOL_ITERS = 10;
8
+ // Telegram is the "do real work for me" conversational surface (the super-agent
9
+ // Roby): it needs to chain explore→edit→verify→close autonomously, not stop
10
+ // after ~9 actions and ask "want me to continue?". A budget of 10 left only one
11
+ // usable action step before the reserved wrap-up, so multi-step tasks routinely
12
+ // cut off mid-job. We give it a real autonomy budget (mirroring the TUI Code
13
+ // surface's maxIters:40) while keeping it below the coding surfaces. The
14
+ // reserved final-step wrap-up still applies, but now only fires when a task
15
+ // genuinely exhausts this budget — a rare safety floor, not the default close.
16
+ // Overridable per-deployment via config.super_agent.telegram_max_iters.
17
+ export const TELEGRAM_TOOL_ITERS = 24;
8
18
  export const ACK_ONLY_TOOLS = new Set(["send_telegram"]);
9
19
  export const MAX_CONSECUTIVE_ACKS = 2;
10
20
  // Tools whose semantics REQUIRE handing control back to the user. After the
@@ -84,20 +84,32 @@ export const FINISH_TOOL_SCHEMA = {
84
84
  },
85
85
  };
86
86
 
87
- // Behavioral nudge appended to the system prompt for the ONE tool-free wrap-up
88
- // step at the end of a turn (see the loop's `isFinalWrapUp`). This shapes
89
- // BEHAVIOR only it never dictates wording or supplies a canned/templated
90
- // sentence. The reply the user sees is 100% model-authored and varies with
91
- // what the model actually did this turn. We do NOT mention any "tool limit":
92
- // the model just speaks from where it is. Critically it must not claim work it
93
- // didn't do (weak models otherwise fabricate "all done").
94
- const WRAPUP_NUDGE =
95
- "\n\n[Internal note last step of this turn. No more tools will run now. " +
96
- "Reply in plain prose, in the user's language, from your own context: briefly " +
97
- "say what you actually accomplished so far (check the tool results above — do " +
98
- "NOT claim anything you didn't do), and if work is still pending, name what's " +
99
- "left and ask the user whether you should continue. Do not mention limits, " +
100
- "steps, or iterations just talk naturally.]";
87
+ // In-band signal injected as a CONVERSATION turn (not a system suffix) for the
88
+ // ONE tool-free wrap-up step at the end of a turn (see the loop's
89
+ // `isFinalWrapUp`). Delivering it through the message channel the way a tool
90
+ // result arrives makes weak models reliably author a reply instead of
91
+ // returning empty, because they always answer the latest turn. It shapes
92
+ // BEHAVIOR only: it never dictates wording or supplies a canned sentence. The
93
+ // reply the user sees is 100% model-authored and varies with what the model
94
+ // actually did this turn. Critically it must not claim work it didn't do (weak
95
+ // models otherwise fabricate "all done").
96
+ //
97
+ // Unlike a hard "iteration limit" message, it asks the model to surface the
98
+ // situation NATURALLY ("this is taking more steps than I expected") plus a
99
+ // concrete recap of what it found and did NOT find so the closing reads like
100
+ // a human status update, never robotic system jargon.
101
+ const WRAPUP_SIGNAL =
102
+ "[Internal turn note — this is NOT from the user. You've taken several tool " +
103
+ "steps this turn and the task isn't finished; no more tools will run now. " +
104
+ "Write the user ONE short, natural closing message, in their language, " +
105
+ "entirely in your own words:\n" +
106
+ "- Concretely recap what you actually did and what you found so far — and be " +
107
+ "honest about what you did NOT find or couldn't resolve yet. Read the tool " +
108
+ "results above; do not claim anything you didn't do.\n" +
109
+ "- Mention plainly that this is taking more steps than expected and isn't done.\n" +
110
+ "- Ask whether they want you to keep going.\n" +
111
+ "Talk like a person giving a quick status update. Do NOT emit a tool call, " +
112
+ "JSON, or system jargon like \"iteration\" or \"limit\".]";
101
113
 
102
114
  /**
103
115
  * Shared tool-calling agent loop used by super-agent and future surfaces.
@@ -301,8 +313,8 @@ export async function runAgent({
301
313
  // Rather than cut off silently mid-tool-call, we run ONE tool-free step so
302
314
  // the model writes a natural closing in its OWN words — what it did, what's
303
315
  // left, and (if anything remains) whether to continue. We change only the
304
- // STRUCTURE (no tools this step) + a behavioral nudge; the wording is
305
- // entirely the model's. Coding surfaces keep their finish-tool flow, so
316
+ // STRUCTURE (no tools this step) + an in-band directive turn (WRAPUP_SIGNAL);
317
+ // the wording is entirely the model's. Coding surfaces keep their finish-tool flow, so
306
318
  // this never applies under completionContract.
307
319
  const isFinalWrapUp =
308
320
  !useContract && effectiveSchemas.length > 0 && iter === maxIters - 1;
@@ -322,8 +334,14 @@ export async function runAgent({
322
334
  let result;
323
335
  try {
324
336
  result = await tryCallEngine({
325
- system: isFinalWrapUp ? baseSystem + WRAPUP_NUDGE : baseSystem,
326
- messages: conversation,
337
+ system: baseSystem,
338
+ // Wrap-up: deliver the "you're out of steps, summarize + ask" directive
339
+ // as the latest CONVERSATION turn so the model treats it like any other
340
+ // turn it must answer — far more reliable than a system suffix on weak
341
+ // models. Ephemeral: built fresh here, never persisted to history.
342
+ messages: isFinalWrapUp
343
+ ? [...conversation, { role: "user", content: WRAPUP_SIGNAL }]
344
+ : conversation,
327
345
  config: globalConfig,
328
346
  // On the wrap-up step we withhold tools entirely so the model must
329
347
  // answer in prose — same as a real engine called with tools omitted.
@@ -11,25 +11,22 @@
11
11
  // `appendGlobalMessage`, `CHANNELS`, `nowIso`, etc. Top-level imports here
12
12
  // keep that scope intact — earlier splits forgot them and the bug only
13
13
  // surfaced when a real telegram update arrived (ReferenceError at runtime).
14
- import path from "node:path";
15
14
  import { callEngine } from "#core/engines/index.js";
16
- import { runSuperAgent, isSuperAgentEnabled } from "#core/agent/super-agent.js";
17
- import { stripThinking } from "#core/util/thinking.js";
15
+ import { isSuperAgentEnabled } from "#core/agent/super-agent.js";
18
16
  import { getRecentTelegramTurnsFromFs, appendGlobalMessage } from "#core/stores/messages.js";
19
17
  import { compactChannelIfNeeded } from "#core/memory/index.js";
20
18
  import { readAgents } from "#core/apc/parser.js";
21
19
  import { buildAgentSystem } from "#core/agent/build-agent-system.js";
22
- import { transcribe as transcribeAudioFile } from "#core/voice/transcription.js";
23
20
  import { resolveAgentName, SUPERAGENT_ACTOR_ID } from "#core/identity/index.js";
24
21
  import { registerSender, resolveAllowedTools } from "#core/identity/telegram.js";
25
22
  import { buildRelationshipBlock } from "#core/agent/index.js";
26
- import { getConfirmationStore as getConfirmStore } from "#core/confirmation/pending-store.js";
27
23
  import { CHANNELS } from "#core/constants/channels.js";
28
24
  import { tryResolveSkillCommand } from "#core/agent/skills/trigger.js";
29
- import { createTelegramConfirmAdapter } from "#core/confirmation/adapters/telegram.js";
30
25
  import * as askFlow from "./ask.js";
31
- import { buildTelegramMeta, resolveBotToken, sleep } from "./helpers.js";
32
- import { sendPhoto, sendVoice, sendDocument, sendAudio, downloadTelegramFile, API_BASE } from "./media.js";
26
+ import { telegramAuthorLabel } from "./helpers.js";
27
+ import { handleIncomingPhoto } from "./inbound/photo.js";
28
+ import { handleIncomingAudio } from "./inbound/audio.js";
29
+ import { buildStreamHandler, runTelegramSuperAgent, telegramErrorText, sendFinalReply } from "./reply.js";
33
30
  import { t, resolveLang } from "#core/i18n/index.js";
34
31
 
35
32
  const nowIso = () => new Date().toISOString().replace(/\.\d{3}Z$/, "Z");
@@ -50,10 +47,7 @@ export async function handleUpdate(self, u) {
50
47
  self.log(`telegram[${self.channel.name}] update ${u.update_id} ignored — no target project`);
51
48
  return;
52
49
  }
53
- const author =
54
- msg.from?.username
55
- ? "@" + msg.from.username
56
- : `${msg.from?.first_name || ""} ${msg.from?.last_name || ""}`.trim() || "unknown";
50
+ const author = telegramAuthorLabel(msg.from);
57
51
  const chat_id = msg.chat?.id;
58
52
 
59
53
  // Resolve WHO is writing (owner / known contact / guest), keyed by the
@@ -85,108 +79,18 @@ export async function handleUpdate(self, u) {
85
79
 
86
80
  let text = msg.text || msg.caption || "";
87
81
 
88
- // ── Incoming photo handling ───────────────────────────────────────────
82
+ // ── Incoming media ────────────────────────────────────────────────────
83
+ // Photo and voice/audio each download + archive the file and rewrite `text`
84
+ // so the rest of the pipeline treats them like a typed message. The handlers
85
+ // live in ./inbound/ to keep this dispatcher focused on routing. Photos have
86
+ // no vision yet, so the handler injects an `[image]` marker (never silent);
87
+ // audio injects its `[audio]` transcript.
89
88
  if (msg.photo && msg.photo.length > 0) {
90
- // Telegram sends multiple sizes; pick the largest
91
- const bestPhoto = msg.photo.reduce((a, b) => (b.file_size > a.file_size ? b : a));
92
- const token = resolveBotToken(self.channel);
93
- const mediaDir = path.join(APX_HOME, "media");
94
- fs.mkdirSync(mediaDir, { recursive: true });
95
- try {
96
- const localPath = await downloadTelegramFile(token, bestPhoto.file_id, mediaDir);
97
- self.log(`telegram[${self.channel.name}] photo saved: ${localPath}`);
98
- appendGlobalMessage({
99
- channel: CHANNELS.TELEGRAM,
100
- direction: "in",
101
- type: "photo",
102
- actor_id: msg.from?.id ? String(msg.from.id) : author,
103
- external_id: String(u.update_id),
104
- author,
105
- body: text || "[photo]",
106
- meta: {
107
- chat_id,
108
- user_id: msg.from?.id || null,
109
- message_id: msg.message_id,
110
- tg_channel: self.channel.name,
111
- local_path: localPath,
112
- file_id: bestPhoto.file_id,
113
- width: bestPhoto.width,
114
- height: bestPhoto.height,
115
- },
116
- });
117
- } catch (e) {
118
- self.log(`telegram[${self.channel.name}] photo download failed: ${e.message}`);
119
- }
120
- // If there's a caption, continue to handle it as text; otherwise return
121
- if (!text) return;
89
+ ({ text } = await handleIncomingPhoto(self, { msg, u, author, chat_id, text }));
122
90
  }
123
-
124
- // ── Incoming voice / audio handling ──────────────────────────────────
125
- // Telegram sends `voice` for the press-and-hold mic recording (.oga/opus)
126
- // and `audio` for uploaded audio files (mp3/m4a/etc.). Either way we
127
- // download, run it through Whisper, prefix the result with `[audio] `
128
- // and let the rest of the message flow handle it as plain text.
129
91
  const incomingAudio = msg.voice || msg.audio;
130
92
  if (incomingAudio && incomingAudio.file_id) {
131
- const token = resolveBotToken(self.channel);
132
- const mediaDir = path.join(APX_HOME, "media");
133
- fs.mkdirSync(mediaDir, { recursive: true });
134
- // Show "typing…" right away — download + transcription is the slow part of
135
- // a voice message, and the reply-path typing (below) only starts after it,
136
- // so without this the chat sits silent for seconds with no feedback.
137
- const stopVoiceTyping = self._startTyping(chat_id);
138
- let localPath = null;
139
- let transcript = "";
140
- let transcribeError = null;
141
- let transcribeBackend = null;
142
- try {
143
- localPath = await downloadTelegramFile(token, incomingAudio.file_id, mediaDir);
144
- self.log(`telegram[${self.channel.name}] audio saved: ${localPath}`);
145
- } catch (e) {
146
- self.log(`telegram[${self.channel.name}] audio download failed: ${e.message}`);
147
- }
148
- if (localPath) {
149
- try {
150
- const result = await transcribeAudioFile(localPath);
151
- transcript = result.text || "";
152
- transcribeBackend = result.backend;
153
- self.log(`telegram[${self.channel.name}] audio transcribed via ${transcribeBackend} (${transcript.length} chars, lang=${result.language || "?"})`);
154
- } catch (e) {
155
- transcribeError = e.message;
156
- self.log(`telegram[${self.channel.name}] audio transcription failed: ${e.message}`);
157
- }
158
- }
159
- stopVoiceTyping(); // reply-path typing takes over from here
160
- const audioBody = transcript
161
- ? `[audio] ${transcript}`
162
- : `[audio] (transcription unavailable${transcribeError ? ": " + transcribeError : ""})`;
163
-
164
- appendGlobalMessage({
165
- channel: CHANNELS.TELEGRAM,
166
- direction: "in",
167
- type: "audio",
168
- actor_id: msg.from?.id ? String(msg.from.id) : author,
169
- external_id: String(u.update_id),
170
- author,
171
- body: audioBody,
172
- meta: {
173
- chat_id,
174
- user_id: msg.from?.id || null,
175
- message_id: msg.message_id,
176
- tg_channel: self.channel.name,
177
- local_path: localPath,
178
- file_id: incomingAudio.file_id,
179
- duration: incomingAudio.duration,
180
- mime_type: incomingAudio.mime_type,
181
- transcription_backend: transcribeBackend,
182
- transcription_error: transcribeError,
183
- },
184
- });
185
-
186
- // Inject the transcribed text into `text` so the rest of the agent
187
- // pipeline treats it identically to a typed message. If there was a
188
- // caption alongside the audio, prepend the audio marker to it.
189
- text = text ? `${audioBody}\n${text}` : audioBody;
93
+ ({ text } = await handleIncomingAudio(self, { msg, u, author, chat_id, text, incomingAudio }));
190
94
  }
191
95
 
192
96
  // If there's a pending ask_questions flow for this chat AND the current
@@ -309,10 +213,14 @@ export async function handleUpdate(self, u) {
309
213
  // Start "typing..." indicator. Stops when we send the reply (or fail).
310
214
  const stopTyping = self._startTyping(chat_id);
311
215
 
216
+ // Preset to the super-agent defaults so every exit path (including one where
217
+ // neither the routed-agent nor the super-agent branch runs) has a valid
218
+ // actor — the routed-agent / super-agent branches override these on success,
219
+ // and their catch blocks reset all four together (no partial-overwrite gap).
312
220
  let replyText;
313
221
  let replyAuthor;
314
- let replyActorId; // stable id: super_agent | agent slug
315
- let replyKind; // actor_kind: superagent | agent
222
+ let replyActorId = SUPERAGENT_ACTOR_ID; // stable id: super_agent | agent slug
223
+ let replyKind = "superagent"; // actor_kind: superagent | agent
316
224
  const projectCfg = target.config || self.globalConfig;
317
225
  // Display name for the super-agent persona on this channel (from identity.json).
318
226
  const agentDisplay = resolveAgentName(self.globalConfig);
@@ -342,7 +250,10 @@ export async function handleUpdate(self, u) {
342
250
  replyKind = "agent";
343
251
  } catch (e) {
344
252
  self.log(`telegram[${self.channel.name}] agent reply failed: ${e.message}`);
345
- replyText = `[apx error] ${e.message.slice(0, 200)}`;
253
+ replyText = t("telegram.error_agent", {
254
+ lang: resolveLang(self.globalConfig),
255
+ vars: { error: e.message.slice(0, 200) },
256
+ });
346
257
  replyAuthor = agentDisplay;
347
258
  replyActorId = SUPERAGENT_ACTOR_ID;
348
259
  replyKind = "superagent";
@@ -354,140 +265,33 @@ export async function handleUpdate(self, u) {
354
265
  }
355
266
  }
356
267
 
357
- // Fallback: super-agent — STREAMED.
358
- // Each iteration's assistant text is sent to Telegram as its own message
359
- // the moment the model produces it (its running commentary), so the user
360
- // sees a real back-and-forth instead of one giant final dump. Tool calls
361
- // are logged to the message store visible via apx log / apx search and
362
- // to channels that render tools — but NEVER sent to Telegram; tools are
363
- // internal. The conversation saved on disk is the full, real exchange;
364
- // Telegram is just the prose-only view of it.
268
+ // Fallback: super-agent — STREAMED. Each iteration's assistant text is sent
269
+ // to Telegram as its own message the moment the model produces it; tool
270
+ // calls are logged but never sent (internal). The streamed turn + its final
271
+ // send live in ./reply.js so this dispatcher and the ask-flow resume
272
+ // (_runResumedTurn in the host poller) share ONE reply path no drift.
365
273
  let saUsage = null;
366
274
  let streamedCount = 0;
367
275
  let lastStreamedText = "";
368
- // Telegram shows the user ONLY prose — never the tool calls. On an action
369
- // request the model often jumps straight to a tool with no preamble text,
370
- // so the user would stare at a silent chat until the final reply. Send one
371
- // short localized heads-up the moment real work starts (first tool_start),
372
- // but only if the agent didn't already write its own "on it" line.
373
- let sentHeadsUp = false;
374
- const headsUpPhrase = () => t("telegram.heads_up", { lang: resolveLang(self.globalConfig) });
375
276
  if (!replyText && isSuperAgentEnabled(self.globalConfig)) {
376
- const onEvent = async (ev) => {
377
- try {
378
- if (ev.type === "tool_start" && !sentHeadsUp && streamedCount === 0) {
379
- sentHeadsUp = true;
380
- const heads = headsUpPhrase();
381
- await self._send({ chat_id, text: heads });
382
- appendGlobalMessage({
383
- channel: CHANNELS.TELEGRAM,
384
- direction: "out",
385
- type: "agent",
386
- actor_id: SUPERAGENT_ACTOR_ID,
387
- actor_kind: "superagent",
388
- agent_slug: SUPERAGENT_ACTOR_ID,
389
- author: agentDisplay,
390
- body: heads,
391
- meta: { chat_id, tg_channel: self.channel.name, in_reply_to: u.update_id, heads_up: true },
392
- });
393
- return;
394
- }
395
- if (ev.type === "assistant_text" && ev.text) {
396
- const piece = stripThinking(ev.text).trim();
397
- if (!piece) return;
398
- await self._send({ chat_id, text: piece });
399
- lastStreamedText = piece;
400
- streamedCount += 1;
401
- appendGlobalMessage({
402
- channel: CHANNELS.TELEGRAM,
403
- direction: "out",
404
- type: "agent",
405
- actor_id: SUPERAGENT_ACTOR_ID,
406
- actor_kind: "superagent",
407
- agent_slug: SUPERAGENT_ACTOR_ID,
408
- author: agentDisplay,
409
- body: piece,
410
- meta: {
411
- chat_id,
412
- tg_channel: self.channel.name,
413
- in_reply_to: u.update_id,
414
- streamed: true,
415
- iteration: ev.iteration,
416
- },
417
- });
418
- } else if (ev.type === "tool_result" && ev.trace) {
419
- // Logged for the audit trail / other channels — NOT sent to Telegram.
420
- const t = ev.trace;
421
- appendGlobalMessage({
422
- channel: CHANNELS.TELEGRAM,
423
- direction: "out",
424
- type: "tool",
425
- actor_id: t.tool,
426
- actor_kind: "tool",
427
- author: agentDisplay,
428
- body: `${t.tool}(${JSON.stringify(t.args || {}).slice(0, 200)})`,
429
- meta: {
430
- chat_id,
431
- tg_channel: self.channel.name,
432
- in_reply_to: u.update_id,
433
- tool: t.tool,
434
- args: t.args,
435
- result: t.result,
436
- iteration: ev.iteration,
437
- },
438
- });
439
- } else if (ev.type === "engine_failed") {
440
- // A model in the fallback chain errored; the loop is rotating to
441
- // the next one. Log it so a mid-turn provider failure (rate limit,
442
- // tool-grammar 400, …) is diagnosable instead of invisible.
443
- self.log(
444
- `telegram[${self.channel.name}] engine_failed: ${ev.model || "?"} (${ev.reason || "?"}) → ${ev.retry_with || "end of chain"}`,
445
- );
446
- } else if (ev.type === "model_routed" || ev.type === "model_retry") {
447
- self.log(
448
- `telegram[${self.channel.name}] ${ev.type}: model=${ev.model || "?"}${ev.reason ? ` reason=${ev.reason}` : ""}${ev.from_fallback ? " (fallback)" : ""}`,
449
- );
450
- }
451
- } catch (e) {
452
- // A failed intermediate send must not abort the whole run.
453
- self.log(`telegram[${self.channel.name}] stream event failed: ${e.message}`);
454
- }
455
- };
456
-
457
- const confirmAdapter = createTelegramConfirmAdapter({
458
- token: resolveBotToken(self.channel),
459
- chatId: chat_id,
460
- pendingStore: getConfirmStore(),
461
- });
277
+ const { onEvent, state } = buildStreamHandler(self, { chat_id, update_id: u.update_id, agentDisplay });
462
278
 
463
- // `/slug ...` shortcut: load the matching skill body into contextNote
464
- // and strip the prefix from the user prompt before sending to the loop.
279
+ // `/slug ...` shortcut: load the matching skill body into contextNote and
280
+ // strip the prefix from the user prompt before sending to the loop.
465
281
  const slashed = tryResolveSkillCommand(text, { projectPath: target?.path });
466
- const slashedPrompt = slashed.handled ? slashed.prompt : text;
467
- const slashedContextNote = slashed.handled ? slashed.contextNote : "";
468
282
 
469
283
  try {
470
- const sa = await runSuperAgent({
471
- globalConfig: self.globalConfig,
472
- projects: self.projects,
473
- plugins: self.plugins,
474
- registries: self.registries,
475
- prompt: slashedPrompt,
284
+ const sa = await runTelegramSuperAgent(self, {
285
+ chat_id,
286
+ prompt: slashed.handled ? slashed.prompt : text,
476
287
  previousMessages,
477
- channel: CHANNELS.TELEGRAM,
288
+ target,
289
+ author,
478
290
  relationshipBlock,
479
291
  allowedTools,
480
- contextNote: slashedContextNote || undefined,
481
- channelMeta: buildTelegramMeta({
482
- channelName: self.channel.name,
483
- author,
484
- chatId: chat_id,
485
- target,
486
- routeToAgent: self.channel.route_to_agent,
487
- }),
292
+ contextNote: slashed.handled ? slashed.contextNote : "",
488
293
  signal: abortCtrl.signal,
489
294
  onEvent,
490
- requestConfirmation: confirmAdapter.requestConfirmation,
491
295
  });
492
296
  replyText = sa.text;
493
297
  replyAuthor = sa.name || agentDisplay;
@@ -496,13 +300,13 @@ export async function handleUpdate(self, u) {
496
300
  saUsage = sa.usage;
497
301
 
498
302
  // ── ask_questions integration ────────────────────────────────────
499
- // If the super-agent ended this turn by calling ask_questions, hand
500
- // off to the inline-keyboard flow instead of sending the bare
501
- // assistant text. The flow keeps state per chat_id and re-runs the
502
- // super-agent once every answer is collected.
303
+ // If the super-agent ended this turn by calling ask_questions, hand off
304
+ // to the inline-keyboard flow instead of sending the bare assistant
305
+ // text. The flow keeps state per chat_id and re-runs the super-agent
306
+ // (via _runResumedTurn) once every answer is collected.
503
307
  const askQuestions = askFlow.extractAskQuestionsFromTrace(sa.trace);
504
308
  if (askQuestions && chat_id) {
505
- if (chat_id) self.activeRequests.delete(chat_id);
309
+ self.activeRequests.delete(chat_id);
506
310
  stopTyping();
507
311
  try {
508
312
  await self._startAskFlow({
@@ -523,21 +327,20 @@ export async function handleUpdate(self, u) {
523
327
  }
524
328
  return; // The reply for this turn IS the ask flow.
525
329
  }
330
+ streamedCount = state.streamedCount;
331
+ lastStreamedText = state.lastStreamedText;
526
332
  } catch (e) {
527
333
  if (abortCtrl.signal.aborted) {
528
334
  // A newer message superseded this one. Whatever streamed so far is
529
- // already sent + logged; the newer message's run continues the
530
- // thread from that history.
335
+ // already sent + logged; the newer message's run continues the thread.
531
336
  self.log(`telegram[${self.channel.name}] request aborted for chat ${chat_id}`);
532
337
  if (chat_id) self.activeRequests.delete(chat_id);
533
338
  stopTyping();
534
339
  return;
535
340
  }
536
341
  self.log(`telegram[${self.channel.name}] super-agent failed: ${e.message}`);
537
- // Surface the failure to the user instead of silently dropping the
538
- // turn otherwise from the chat side it looks like the bot ignored
539
- // the message. Keep the message short and non-leaking.
540
- replyText = `⚠️ Could not generate a reply right now (${e.message || "internal error"}).`;
342
+ // Surface the failure to the user instead of silently dropping the turn.
343
+ replyText = telegramErrorText(self, e);
541
344
  replyAuthor = agentDisplay;
542
345
  replyActorId = SUPERAGENT_ACTOR_ID;
543
346
  replyKind = "superagent";
@@ -545,71 +348,18 @@ export async function handleUpdate(self, u) {
545
348
  }
546
349
 
547
350
  if (chat_id) self.activeRequests.delete(chat_id);
548
-
549
- // Final answer. The intermediate prose was already streamed; only send the
550
- // final text if it's non-empty AND not a duplicate of the last streamed
551
- // piece (the loop can end on an iteration whose text was already sent).
552
- // If nothing streamed and there's no final text, send a minimal ack so the
553
- // turn isn't silently empty.
554
- const finalClean = replyText ? stripThinking(replyText).trim() : "";
555
- let toSend = "";
556
- if (finalClean && finalClean !== lastStreamedText) {
557
- toSend = finalClean;
558
- } else if (!finalClean) {
559
- // Never end a turn on silence. The loop's tool-free wrap-up normally
560
- // fills finalClean with a model-authored closing (handled above); this is
561
- // the last-resort floor for the rare case it still came back empty. A
562
- // pure chit-chat turn that did nothing gets the short ack; a turn that
563
- // streamed/acted but produced no closing gets a neutral "continue?" that
564
- // does NOT claim completion.
565
- toSend = streamedCount === 0
566
- ? t("telegram.fallback_listo", { lang: resolveLang(self.globalConfig) })
567
- : t("telegram.fallback_continue", { lang: resolveLang(self.globalConfig) });
568
- }
569
-
570
351
  stopTyping();
571
- if (!toSend) return; // everything was already streamed — nothing left to send
572
-
573
- try {
574
- await self._send({ chat_id, text: toSend });
575
- const meta = {
576
- chat_id,
577
- tg_channel: self.channel.name,
578
- in_reply_to: u.update_id,
579
- final: true,
580
- };
581
- if (replyText && stripThinking(replyText) !== replyText) meta.thinking_stripped = true;
582
- if (saUsage) meta.usage = saUsage;
583
- appendGlobalMessage({
584
- channel: CHANNELS.TELEGRAM,
585
- direction: "out",
586
- type: "agent",
587
- actor_id: replyActorId || SUPERAGENT_ACTOR_ID,
588
- actor_kind: replyKind || "superagent",
589
- agent_slug: replyActorId || SUPERAGENT_ACTOR_ID,
590
- author: replyAuthor || agentDisplay,
591
- body: toSend,
592
- meta,
593
- });
594
- } catch (e) {
595
- self.log(`telegram[${self.channel.name}] send-back error: ${e.message}`);
596
- appendGlobalMessage({
597
- channel: CHANNELS.TELEGRAM,
598
- direction: "out",
599
- type: "agent",
600
- actor_id: replyActorId || SUPERAGENT_ACTOR_ID,
601
- actor_kind: replyKind || "superagent",
602
- agent_slug: replyActorId || SUPERAGENT_ACTOR_ID,
603
- author: replyAuthor || agentDisplay,
604
- body: `[send_failed] ${toSend}`,
605
- meta: {
606
- chat_id,
607
- tg_channel: self.channel.name,
608
- in_reply_to: u.update_id,
609
- send_error: e.message,
610
- ...(saUsage ? { usage: saUsage } : {}),
611
- },
612
- });
613
- }
352
+ await sendFinalReply(self, {
353
+ chat_id,
354
+ update_id: u.update_id,
355
+ replyText,
356
+ replyAuthor,
357
+ replyActorId,
358
+ replyKind,
359
+ saUsage,
360
+ streamedCount,
361
+ lastStreamedText,
362
+ agentDisplay,
363
+ });
614
364
  }
615
365
 
@@ -2,10 +2,37 @@
2
2
  // big poller class stays focused on lifecycle + message dispatch. Each
3
3
  // function is pure (no `this`) — instances import them and call as needed.
4
4
  import fs from "node:fs";
5
- import { TELEGRAM_STATE_PATH } from "#core/config/index.js";
5
+ import path from "node:path";
6
+ import { TELEGRAM_STATE_PATH, APX_HOME } from "#core/config/index.js";
6
7
 
7
8
  const nowIso = () => new Date().toISOString().replace(/\.\d{3}Z$/, "Z");
8
9
 
10
+ /**
11
+ * Display label for a Telegram sender used as the `author` / actor fallback:
12
+ * @username → "First Last" → "unknown".
13
+ * Single source of truth so every inbound branch (text/photo/audio) and the
14
+ * message store agree. NOTE: this is the raw handle; the *resolved contact
15
+ * name* (which prefers a saved roster name) is `resolveSender().name` in
16
+ * core/identity/telegram.js — different purpose, don't conflate them.
17
+ */
18
+ export function telegramAuthorLabel(from) {
19
+ if (from?.username) return "@" + from.username;
20
+ const full = `${from?.first_name || ""} ${from?.last_name || ""}`.trim();
21
+ return full || "unknown";
22
+ }
23
+
24
+ /**
25
+ * Ensure and return the shared media-download directory (~/.apx/media).
26
+ * Owns BOTH the path and the mkdir so callers never touch `fs`/`APX_HOME`
27
+ * directly — the inbound dispatcher used to inline this and a module split
28
+ * dropped its `fs`/`APX_HOME` imports, silently breaking every photo/voice.
29
+ */
30
+ export function telegramMediaDir() {
31
+ const dir = path.join(APX_HOME, "media");
32
+ fs.mkdirSync(dir, { recursive: true });
33
+ return dir;
34
+ }
35
+
9
36
  /**
10
37
  * Build the channelMeta block the super-agent loop receives for a Telegram
11
38
  * turn. The prompt template at src/core/agent/prompts/channels/telegram.md
@@ -0,0 +1,82 @@
1
+ // Inbound Telegram VOICE/AUDIO handling, split out of dispatch.js. Telegram
2
+ // sends `voice` for the press-and-hold mic recording (.oga/opus) and `audio`
3
+ // for uploaded audio files (mp3/m4a/etc.). Either way we download, run it
4
+ // through Whisper, prefix the result with `[audio] ` and let the rest of the
5
+ // message flow handle it as plain text.
6
+ //
7
+ // Takes the poller instance (`self`, for logging, channel + the typing
8
+ // indicator) plus the parsed update context, and returns the `text` the rest of
9
+ // the pipeline should run — the transcript merged into any existing caption.
10
+ import { appendGlobalMessage } from "#core/stores/messages.js";
11
+ import { CHANNELS } from "#core/constants/channels.js";
12
+ import { transcribe as transcribeAudioFile } from "#core/voice/transcription.js";
13
+ import { resolveBotToken, telegramMediaDir } from "../helpers.js";
14
+ import { downloadTelegramFile } from "../media.js";
15
+
16
+ /**
17
+ * @param {object} self poller instance (uses self.log, self.channel, self._startTyping)
18
+ * @param {object} ctx { msg, u, author, chat_id, text, incomingAudio }
19
+ * @returns {Promise<{ text: string }>} text to continue the pipeline with
20
+ */
21
+ export async function handleIncomingAudio(self, { msg, u, author, chat_id, text, incomingAudio }) {
22
+ const token = resolveBotToken(self.channel);
23
+ const mediaDir = telegramMediaDir();
24
+
25
+ // Show "typing…" right away — download + transcription is the slow part of a
26
+ // voice message, and the reply-path typing only starts after it, so without
27
+ // this the chat sits silent for seconds with no feedback.
28
+ const stopVoiceTyping = self._startTyping(chat_id);
29
+ let localPath = null;
30
+ let transcript = "";
31
+ let transcribeError = null;
32
+ let transcribeBackend = null;
33
+ try {
34
+ localPath = await downloadTelegramFile(token, incomingAudio.file_id, mediaDir);
35
+ self.log(`telegram[${self.channel.name}] audio saved: ${localPath}`);
36
+ } catch (e) {
37
+ self.log(`telegram[${self.channel.name}] audio download failed: ${e.message}`);
38
+ }
39
+ if (localPath) {
40
+ try {
41
+ const result = await transcribeAudioFile(localPath);
42
+ transcript = result.text || "";
43
+ transcribeBackend = result.backend;
44
+ self.log(`telegram[${self.channel.name}] audio transcribed via ${transcribeBackend} (${transcript.length} chars, lang=${result.language || "?"})`);
45
+ } catch (e) {
46
+ transcribeError = e.message;
47
+ self.log(`telegram[${self.channel.name}] audio transcription failed: ${e.message}`);
48
+ }
49
+ }
50
+ stopVoiceTyping(); // reply-path typing takes over from here
51
+
52
+ const audioBody = transcript
53
+ ? `[audio] ${transcript}`
54
+ : `[audio] (transcription unavailable${transcribeError ? ": " + transcribeError : ""})`;
55
+
56
+ appendGlobalMessage({
57
+ channel: CHANNELS.TELEGRAM,
58
+ direction: "in",
59
+ type: "audio",
60
+ actor_id: msg.from?.id ? String(msg.from.id) : author,
61
+ external_id: String(u.update_id),
62
+ author,
63
+ body: audioBody,
64
+ meta: {
65
+ chat_id,
66
+ user_id: msg.from?.id || null,
67
+ message_id: msg.message_id,
68
+ tg_channel: self.channel.name,
69
+ local_path: localPath,
70
+ file_id: incomingAudio.file_id,
71
+ duration: incomingAudio.duration,
72
+ mime_type: incomingAudio.mime_type,
73
+ transcription_backend: transcribeBackend,
74
+ transcription_error: transcribeError,
75
+ },
76
+ });
77
+
78
+ // Inject the transcribed text into `text` so the rest of the agent pipeline
79
+ // treats it identically to a typed message. If there was a caption alongside
80
+ // the audio, prepend the audio marker to it.
81
+ return { text: text ? `${audioBody}\n${text}` : audioBody };
82
+ }
@@ -0,0 +1,63 @@
1
+ // Inbound Telegram PHOTO handling, split out of dispatch.js so the dispatcher
2
+ // stays focused on routing. Pure of the poller's lifecycle: it takes the poller
3
+ // instance (`self`, for logging + channel) plus the parsed update context, and
4
+ // returns the (possibly rewritten) `text` the rest of the pipeline should run.
5
+ //
6
+ // Vision note: we do NOT have image understanding yet — the engine layer can't
7
+ // pass image content to the model. So we download + archive the photo and then
8
+ // inject an internal `[image]` marker into `text` so the agent ALWAYS produces a
9
+ // reply in its own words (never goes silent on a no-caption photo). The reply is
10
+ // model-authored; the marker only tells the model an image arrived and that it
11
+ // can't see the pixels yet. Mirrors the `[audio]` marker convention.
12
+ import { appendGlobalMessage } from "#core/stores/messages.js";
13
+ import { CHANNELS } from "#core/constants/channels.js";
14
+ import { resolveBotToken, telegramMediaDir } from "../helpers.js";
15
+ import { downloadTelegramFile } from "../media.js";
16
+
17
+ /**
18
+ * @param {object} self poller instance (uses self.log, self.channel)
19
+ * @param {object} ctx { msg, u, author, chat_id, text }
20
+ * @returns {Promise<{ text: string }>} text to continue the pipeline with
21
+ */
22
+ export async function handleIncomingPhoto(self, { msg, u, author, chat_id, text }) {
23
+ // Telegram sends multiple sizes; pick the largest.
24
+ const bestPhoto = msg.photo.reduce((a, b) => (b.file_size > a.file_size ? b : a));
25
+ const token = resolveBotToken(self.channel);
26
+ const mediaDir = telegramMediaDir();
27
+
28
+ let localPath = null;
29
+ try {
30
+ localPath = await downloadTelegramFile(token, bestPhoto.file_id, mediaDir);
31
+ self.log(`telegram[${self.channel.name}] photo saved: ${localPath}`);
32
+ } catch (e) {
33
+ self.log(`telegram[${self.channel.name}] photo download failed: ${e.message}`);
34
+ }
35
+
36
+ // Archive the inbound photo regardless of download outcome, so chat history
37
+ // records it even if the file fetch failed.
38
+ appendGlobalMessage({
39
+ channel: CHANNELS.TELEGRAM,
40
+ direction: "in",
41
+ type: "photo",
42
+ actor_id: msg.from?.id ? String(msg.from.id) : author,
43
+ external_id: String(u.update_id),
44
+ author,
45
+ body: text || "[photo]",
46
+ meta: {
47
+ chat_id,
48
+ user_id: msg.from?.id || null,
49
+ message_id: msg.message_id,
50
+ tg_channel: self.channel.name,
51
+ local_path: localPath,
52
+ file_id: bestPhoto.file_id,
53
+ width: bestPhoto.width,
54
+ height: bestPhoto.height,
55
+ },
56
+ });
57
+
58
+ // Guard: never go silent. Hand the agent an internal marker so it replies in
59
+ // its own words. No vision yet → say so, in-band, so the model doesn't
60
+ // hallucinate "seeing" the image.
61
+ const marker = "[image attached — you cannot see its contents yet]";
62
+ return { text: text ? `${marker} ${text}` : marker };
63
+ }
@@ -0,0 +1,204 @@
1
+ // Shared Telegram super-agent reply path. Both the inbound dispatcher
2
+ // (handleUpdate) and the ask-flow resume (_runResumedTurn in the host poller)
3
+ // drive the SAME streamed turn through these helpers, so behavior — autonomy
4
+ // budget, streaming, never-silent floor, localized errors, rich channelMeta —
5
+ // can't drift between the two entry points. It did drift: the resume path was a
6
+ // stale hand-rolled copy that missed maxIters, streaming and i18n. One source
7
+ // of truth fixes that for good.
8
+ import { runSuperAgent } from "#core/agent/super-agent.js";
9
+ import { TELEGRAM_TOOL_ITERS } from "#core/agent/constants.js";
10
+ import { stripThinking } from "#core/util/thinking.js";
11
+ import { appendGlobalMessage } from "#core/stores/messages.js";
12
+ import { CHANNELS } from "#core/constants/channels.js";
13
+ import { SUPERAGENT_ACTOR_ID } from "#core/identity/index.js";
14
+ import { createTelegramConfirmAdapter } from "#core/confirmation/adapters/telegram.js";
15
+ import { getConfirmationStore as getConfirmStore } from "#core/confirmation/pending-store.js";
16
+ import { t, resolveLang } from "#core/i18n/index.js";
17
+ import { buildTelegramMeta, resolveBotToken } from "./helpers.js";
18
+
19
+ /**
20
+ * Build the streaming event handler for a Telegram super-agent turn. Sends a
21
+ * one-shot localized heads-up the moment real work starts (first tool), streams
22
+ * each assistant-text iteration as its own chat message, and logs tool calls
23
+ * (audit trail / other channels — never sent to Telegram). Returns the handler
24
+ * plus a live `state` the caller reads AFTER the run to drive the final send.
25
+ *
26
+ * @returns {{ onEvent: Function, state: { streamedCount: number, lastStreamedText: string } }}
27
+ */
28
+ export function buildStreamHandler(self, { chat_id, update_id, agentDisplay }) {
29
+ const state = { streamedCount: 0, lastStreamedText: "", sentHeadsUp: false };
30
+ const onEvent = async (ev) => {
31
+ try {
32
+ if (ev.type === "tool_start" && !state.sentHeadsUp && state.streamedCount === 0) {
33
+ state.sentHeadsUp = true;
34
+ const heads = t("telegram.heads_up", { lang: resolveLang(self.globalConfig) });
35
+ await self._send({ chat_id, text: heads });
36
+ appendGlobalMessage({
37
+ channel: CHANNELS.TELEGRAM,
38
+ direction: "out",
39
+ type: "agent",
40
+ actor_id: SUPERAGENT_ACTOR_ID,
41
+ actor_kind: "superagent",
42
+ agent_slug: SUPERAGENT_ACTOR_ID,
43
+ author: agentDisplay,
44
+ body: heads,
45
+ meta: { chat_id, tg_channel: self.channel.name, in_reply_to: update_id, heads_up: true },
46
+ });
47
+ return;
48
+ }
49
+ if (ev.type === "assistant_text" && ev.text) {
50
+ const piece = stripThinking(ev.text).trim();
51
+ if (!piece) return;
52
+ await self._send({ chat_id, text: piece });
53
+ state.lastStreamedText = piece;
54
+ state.streamedCount += 1;
55
+ appendGlobalMessage({
56
+ channel: CHANNELS.TELEGRAM,
57
+ direction: "out",
58
+ type: "agent",
59
+ actor_id: SUPERAGENT_ACTOR_ID,
60
+ actor_kind: "superagent",
61
+ agent_slug: SUPERAGENT_ACTOR_ID,
62
+ author: agentDisplay,
63
+ body: piece,
64
+ meta: { chat_id, tg_channel: self.channel.name, in_reply_to: update_id, streamed: true, iteration: ev.iteration },
65
+ });
66
+ } else if (ev.type === "tool_result" && ev.trace) {
67
+ // Logged for the audit trail / other channels — NOT sent to Telegram.
68
+ const tr = ev.trace;
69
+ appendGlobalMessage({
70
+ channel: CHANNELS.TELEGRAM,
71
+ direction: "out",
72
+ type: "tool",
73
+ actor_id: tr.tool,
74
+ actor_kind: "tool",
75
+ author: agentDisplay,
76
+ body: `${tr.tool}(${JSON.stringify(tr.args || {}).slice(0, 200)})`,
77
+ meta: { chat_id, tg_channel: self.channel.name, in_reply_to: update_id, tool: tr.tool, args: tr.args, result: tr.result, iteration: ev.iteration },
78
+ });
79
+ } else if (ev.type === "engine_failed") {
80
+ // A model in the fallback chain errored; the loop is rotating to the
81
+ // next one. Log so a mid-turn provider failure is diagnosable.
82
+ self.log(`telegram[${self.channel.name}] engine_failed: ${ev.model || "?"} (${ev.reason || "?"}) → ${ev.retry_with || "end of chain"}`);
83
+ } else if (ev.type === "model_routed" || ev.type === "model_retry") {
84
+ self.log(`telegram[${self.channel.name}] ${ev.type}: model=${ev.model || "?"}${ev.reason ? ` reason=${ev.reason}` : ""}${ev.from_fallback ? " (fallback)" : ""}`);
85
+ }
86
+ } catch (e) {
87
+ // A failed intermediate send must not abort the whole run.
88
+ self.log(`telegram[${self.channel.name}] stream event failed: ${e.message}`);
89
+ }
90
+ };
91
+ return { onEvent, state };
92
+ }
93
+
94
+ /**
95
+ * Run the super-agent for a Telegram turn with the canonical channel config:
96
+ * the autonomy budget (telegram_max_iters → TELEGRAM_TOOL_ITERS), rich
97
+ * channelMeta (project pin + route), the confirmation adapter, and streaming.
98
+ * The single place this call is configured — change it once, both entry points
99
+ * inherit it. Throws on failure (caller decides abort-vs-error handling).
100
+ */
101
+ export function runTelegramSuperAgent(self, {
102
+ chat_id, prompt, previousMessages, target, author, relationshipBlock,
103
+ allowedTools, contextNote, signal, onEvent,
104
+ }) {
105
+ const confirmAdapter = createTelegramConfirmAdapter({
106
+ token: resolveBotToken(self.channel),
107
+ chatId: chat_id,
108
+ pendingStore: getConfirmStore(),
109
+ });
110
+ return runSuperAgent({
111
+ globalConfig: self.globalConfig,
112
+ projects: self.projects,
113
+ plugins: self.plugins,
114
+ registries: self.registries,
115
+ prompt,
116
+ previousMessages,
117
+ channel: CHANNELS.TELEGRAM,
118
+ relationshipBlock,
119
+ allowedTools,
120
+ contextNote: contextNote || undefined,
121
+ channelMeta: buildTelegramMeta({
122
+ channelName: self.channel.name,
123
+ author,
124
+ chatId: chat_id,
125
+ target,
126
+ routeToAgent: self.channel.route_to_agent,
127
+ }),
128
+ signal,
129
+ onEvent,
130
+ requestConfirmation: confirmAdapter.requestConfirmation,
131
+ // Autonomy budget: Telegram is the "do the whole task for me" surface, so it
132
+ // gets a real multi-step budget instead of the conversational default (which
133
+ // cut tasks off after ~9 actions to ask "continue?"). Tunable via
134
+ // config.super_agent.telegram_max_iters.
135
+ maxIters: Number(self.globalConfig?.super_agent?.telegram_max_iters) || TELEGRAM_TOOL_ITERS,
136
+ });
137
+ }
138
+
139
+ /** Localized "couldn't reply" text for a failed super-agent turn (model itself
140
+ * failed, so it can't author this — templated, but follows the user's language). */
141
+ export function telegramErrorText(self, e) {
142
+ return t("telegram.error_generic", {
143
+ lang: resolveLang(self.globalConfig),
144
+ vars: { error: e?.message || "internal error" },
145
+ });
146
+ }
147
+
148
+ /**
149
+ * Send the final reply for a turn and log it. The intermediate prose was already
150
+ * streamed, so we only send `replyText` if it's non-empty AND not a duplicate of
151
+ * the last streamed piece. Never ends on silence: a turn that streamed/acted but
152
+ * produced no closing gets a neutral "continue?"; a pure chit-chat turn that did
153
+ * nothing gets a short ack. Caller stops the typing indicator before calling.
154
+ */
155
+ export async function sendFinalReply(self, {
156
+ chat_id, update_id, replyText, replyAuthor, replyActorId, replyKind,
157
+ saUsage = null, streamedCount = 0, lastStreamedText = "", agentDisplay,
158
+ extraMeta = {},
159
+ }) {
160
+ const finalClean = replyText ? stripThinking(replyText).trim() : "";
161
+ let toSend = "";
162
+ if (finalClean && finalClean !== lastStreamedText) {
163
+ toSend = finalClean;
164
+ } else if (!finalClean) {
165
+ const lang = resolveLang(self.globalConfig);
166
+ toSend = streamedCount === 0
167
+ ? t("telegram.fallback_listo", { lang })
168
+ : t("telegram.fallback_continue", { lang });
169
+ }
170
+ if (!toSend) return; // everything was already streamed — nothing left to send
171
+
172
+ const actorId = replyActorId || SUPERAGENT_ACTOR_ID;
173
+ const kind = replyKind || "superagent";
174
+ try {
175
+ await self._send({ chat_id, text: toSend });
176
+ const meta = { chat_id, tg_channel: self.channel.name, in_reply_to: update_id, final: true, ...extraMeta };
177
+ if (replyText && stripThinking(replyText) !== replyText) meta.thinking_stripped = true;
178
+ if (saUsage) meta.usage = saUsage;
179
+ appendGlobalMessage({
180
+ channel: CHANNELS.TELEGRAM,
181
+ direction: "out",
182
+ type: "agent",
183
+ actor_id: actorId,
184
+ actor_kind: kind,
185
+ agent_slug: actorId,
186
+ author: replyAuthor || agentDisplay,
187
+ body: toSend,
188
+ meta,
189
+ });
190
+ } catch (e) {
191
+ self.log(`telegram[${self.channel.name}] send-back error: ${e.message}`);
192
+ appendGlobalMessage({
193
+ channel: CHANNELS.TELEGRAM,
194
+ direction: "out",
195
+ type: "agent",
196
+ actor_id: actorId,
197
+ actor_kind: kind,
198
+ agent_slug: actorId,
199
+ author: replyAuthor || agentDisplay,
200
+ body: `[send_failed] ${toSend}`,
201
+ meta: { chat_id, tg_channel: self.channel.name, in_reply_to: update_id, send_error: e.message, ...(saUsage ? { usage: saUsage } : {}) },
202
+ });
203
+ }
204
+ }
@@ -57,6 +57,11 @@ const DEFAULT_CONFIG = {
57
57
  system: "", // optional override; defaults in src/core/agent/prompts/
58
58
  permission_mode: PERMISSION_MODES.AUTOMATICO, // total | automatico | permiso
59
59
  allowed_tools: [], // used by permission_mode="permiso"
60
+ // Per-turn tool-loop budget for the Telegram super-agent. Higher = more
61
+ // autonomous (chains explore→edit→verify→close before replying); lower =
62
+ // snappier but more "want me to continue?" hand-backs. 0/unset → built-in
63
+ // default (TELEGRAM_TOOL_ITERS in src/core/agent/constants.js).
64
+ telegram_max_iters: 0,
60
65
  // Model fallback: ordered list. Each item carries its own provider
61
66
  // prefix; the array order IS the attempt order. The router tries the
62
67
  // primary (super_agent.model) first, then walks this list, skipping
@@ -4,4 +4,8 @@ export default {
4
4
  "telegram.reset_ack": "Done, context cleared. Starting fresh. What do you need?",
5
5
  "telegram.fallback_listo": "Done.",
6
6
  "telegram.fallback_continue": "Made some headway. Want me to keep going?",
7
+ // Host-emitted error floors (the model itself failed, so it can't author
8
+ // these — they stay templated, but at least follow the user's language).
9
+ "telegram.error_agent": "⚠️ The agent hit an error ({error}).",
10
+ "telegram.error_generic": "⚠️ Couldn't reply right now ({error}).",
7
11
  };
@@ -6,4 +6,8 @@ export default {
6
6
  "telegram.reset_ack": "Listo, contexto borrado. Arranco un hilo nuevo, ¿qué necesitás?",
7
7
  "telegram.fallback_listo": "Listo.",
8
8
  "telegram.fallback_continue": "Avancé con eso. ¿Querés que siga?",
9
+ // Pisos de error emitidos por el host (el modelo falló, no puede redactarlos
10
+ // él mismo — quedan fijos, pero al menos respetan el idioma del usuario).
11
+ "telegram.error_agent": "⚠️ El agente tuvo un error ({error}).",
12
+ "telegram.error_generic": "⚠️ No pude responder ahora mismo ({error}).",
9
13
  };
@@ -4,4 +4,8 @@ export default {
4
4
  "telegram.reset_ack": "Pronto, contexto limpo. Começando do zero — do que você precisa?",
5
5
  "telegram.fallback_listo": "Pronto.",
6
6
  "telegram.fallback_continue": "Avancei com isso. Quer que eu continue?",
7
+ // Pisos de erro emitidos pelo host (o modelo falhou, não pode redigi-los —
8
+ // ficam fixos, mas ao menos seguem o idioma do usuário).
9
+ "telegram.error_agent": "⚠️ O agente encontrou um erro ({error}).",
10
+ "telegram.error_generic": "⚠️ Não consegui responder agora ({error}).",
7
11
  };
@@ -27,23 +27,15 @@
27
27
  // "poll_interval_ms": 1500
28
28
  // }
29
29
 
30
- import fs from "node:fs";
31
- import path from "node:path";
32
- import { TELEGRAM_STATE_PATH, APX_HOME } from "#core/config/index.js";
33
- import { callEngine } from "#core/engines/index.js";
34
- import { runSuperAgent, isSuperAgentEnabled } from "#core/agent/super-agent.js";
35
- import { stripThinking } from "#core/util/thinking.js";
30
+ // This poller is intentionally thin: per-update logic lives in core/channels/
31
+ // telegram/ (dispatch + reply + ask + inbound). It keeps only what the *running
32
+ // process* needs — lifecycle, the poll loop, offset state and the inline-keyboard
33
+ // callbacks. The earlier dispatch extraction left a pile of now-dead imports
34
+ // here; only what's actually referenced below remains.
36
35
  import { getRecentTelegramTurnsFromFs, appendGlobalMessage } from "#core/stores/messages.js";
37
- import { compactChannelIfNeeded } from "#core/memory/index.js";
38
- import { readAgents } from "#core/apc/parser.js";
39
- import { buildAgentSystem } from "#core/agent/build-agent-system.js";
40
- import { transcribe as transcribeAudioFile } from "#core/voice/transcription.js";
41
36
  import { resolveAgentName, SUPERAGENT_ACTOR_ID } from "#core/identity/index.js";
42
- import { registerSender, resolveAllowedTools } from "#core/identity/telegram.js";
43
- import { buildRelationshipBlock } from "#core/agent/index.js";
44
37
  import { getConfirmationStore as getConfirmStore } from "#core/confirmation/pending-store.js";
45
38
  import { CHANNELS } from "#core/constants/channels.js";
46
- import { tryResolveSkillCommand } from "#core/agent/skills/trigger.js";
47
39
  import { createTelegramConfirmAdapter } from "#core/confirmation/adapters/telegram.js";
48
40
  import * as askFlow from "#core/channels/telegram/ask.js";
49
41
 
@@ -53,7 +45,6 @@ const nowIso = () => new Date().toISOString().replace(/\.\d{3}Z$/, "Z");
53
45
  // All non-class-bound channel logic lives in core/channels/telegram/ — this
54
46
  // file stays focused on the poller class + plugin lifecycle wiring.
55
47
  import {
56
- buildTelegramMeta,
57
48
  loadState,
58
49
  saveState,
59
50
  resolveBotToken,
@@ -63,9 +54,10 @@ import {
63
54
  sleep,
64
55
  } from "#core/channels/telegram/helpers.js";
65
56
  import { handleUpdate } from "#core/channels/telegram/dispatch.js";
57
+ import { buildStreamHandler, runTelegramSuperAgent, telegramErrorText, sendFinalReply } from "#core/channels/telegram/reply.js";
66
58
 
67
59
  // ---------- media sending helpers (re-exports) ------------------------------
68
- import { sendPhoto, sendVoice, sendDocument, sendAudio, downloadTelegramFile, API_BASE } from "#core/channels/telegram/media.js";
60
+ import { sendPhoto, sendVoice, sendDocument, sendAudio, API_BASE } from "#core/channels/telegram/media.js";
69
61
  export { sendPhoto, sendVoice, sendDocument, sendAudio };
70
62
 
71
63
  // ---------- per-channel poller ----------------------------------------------
@@ -328,9 +320,9 @@ class ChannelPoller {
328
320
  }
329
321
 
330
322
  // Run a follow-up super-agent turn with the compiled answers as the user
331
- // prompt. Mirrors the post-runSuperAgent reply path in _handleUpdate but
332
- // skipped of the photo/audio/reset preamble. Re-enters the ask flow if the
333
- // model decides to ask again.
323
+ // prompt. Shares the exact reply path as a normal inbound turn (core/channels/
324
+ // telegram/reply.js) only the photo/audio/reset preamble is skipped.
325
+ // Re-enters the ask flow if the model decides to ask again.
334
326
  async _runResumedTurn(ctx) {
335
327
  const { chat_id, compiled, target, relationshipBlock, allowedTools, author, agentDisplay, update_id, sender, authorId } = ctx;
336
328
  if (!chat_id) return;
@@ -359,25 +351,32 @@ class ChannelPoller {
359
351
  max_age_hours: 24,
360
352
  });
361
353
 
354
+ // Drive the resume through the SAME shared reply path as a normal inbound
355
+ // turn (see core/channels/telegram/reply.js): streaming, the autonomy budget
356
+ // (maxIters), the never-silent floor, localized errors and rich channelMeta.
357
+ // This used to be a hand-rolled copy that silently lagged behind the main
358
+ // path — now there's one source of truth.
359
+ const { onEvent, state } = buildStreamHandler(this, { chat_id, update_id, agentDisplay });
362
360
  const stopTyping = this._startTyping(chat_id);
361
+ let replyText;
362
+ let replyAuthor;
363
+ let saUsage = null;
363
364
  try {
364
- const sa = await runSuperAgent({
365
- globalConfig: this.globalConfig,
366
- projects: this.projects,
367
- plugins: this.plugins,
368
- registries: this.registries,
365
+ const sa = await runTelegramSuperAgent(this, {
366
+ chat_id,
369
367
  prompt: compiled,
370
368
  previousMessages,
371
- channel: CHANNELS.TELEGRAM,
369
+ target,
370
+ author,
372
371
  relationshipBlock,
373
372
  allowedTools,
374
- channelMeta: { channel: CHANNELS.TELEGRAM, chat_id, author, route_to_agent: this.channel.route_to_agent },
373
+ onEvent,
375
374
  });
376
- stopTyping();
377
375
 
378
376
  // Did the model ask again? Restart the flow instead of replying.
379
377
  const followupAsk = askFlow.extractAskQuestionsFromTrace(sa.trace);
380
378
  if (followupAsk) {
379
+ stopTyping();
381
380
  await this._startAskFlow({
382
381
  chat_id,
383
382
  projectId: target?.id,
@@ -393,36 +392,29 @@ class ChannelPoller {
393
392
  });
394
393
  return;
395
394
  }
396
-
397
- const replyText = sa.text ? stripThinking(sa.text).trim() : "";
398
- if (replyText) {
399
- await this._send({ chat_id, text: replyText });
400
- appendGlobalMessage({
401
- channel: CHANNELS.TELEGRAM,
402
- direction: "out",
403
- type: "agent",
404
- actor_id: SUPERAGENT_ACTOR_ID,
405
- actor_kind: "superagent",
406
- agent_slug: SUPERAGENT_ACTOR_ID,
407
- author: sa.name || agentDisplay,
408
- body: replyText,
409
- meta: {
410
- chat_id,
411
- tg_channel: this.channel.name,
412
- in_reply_to: update_id,
413
- final: true,
414
- ask_resume: true,
415
- ...(sa.usage ? { usage: sa.usage } : {}),
416
- },
417
- });
418
- }
395
+ replyText = sa.text;
396
+ replyAuthor = sa.name || agentDisplay;
397
+ saUsage = sa.usage;
419
398
  } catch (e) {
420
- stopTyping();
421
399
  this.log(`telegram[${this.channel.name}] ask resume failed: ${e.message}`);
422
- try {
423
- await this._send({ chat_id, text: `⚠️ Error procesando tus respuestas (${e.message}).` });
424
- } catch { /* best-effort */ }
400
+ replyText = telegramErrorText(this, e);
401
+ replyAuthor = agentDisplay;
425
402
  }
403
+
404
+ stopTyping();
405
+ await sendFinalReply(this, {
406
+ chat_id,
407
+ update_id,
408
+ replyText,
409
+ replyAuthor,
410
+ replyActorId: SUPERAGENT_ACTOR_ID,
411
+ replyKind: "superagent",
412
+ saUsage,
413
+ streamedCount: state.streamedCount,
414
+ lastStreamedText: state.lastStreamedText,
415
+ agentDisplay,
416
+ extraMeta: { ask_resume: true },
417
+ });
426
418
  }
427
419
 
428
420
  // Show "typing..." indicator in the chat. Telegram clears it automatically