npm - @agentprojectcontext/apx - Versions diffs - 1.41.0 → 1.42.1 - Mend

@agentprojectcontext/apx 1.41.0 → 1.42.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

package/package.json +1 -1
package/src/core/agent/constants.js +10 -0
package/src/core/agent/run-agent.js +36 -18
package/src/core/channels/telegram/dispatch.js +60 -310
package/src/core/channels/telegram/helpers.js +28 -1
package/src/core/channels/telegram/inbound/audio.js +82 -0
package/src/core/channels/telegram/inbound/photo.js +63 -0
package/src/core/channels/telegram/reply.js +204 -0
package/src/core/config/index.js +5 -0
package/src/core/i18n/en.js +4 -0
package/src/core/i18n/es.js +4 -0
package/src/core/i18n/pt.js +4 -0
package/src/host/daemon/plugins/telegram/index.js +45 -53
package/src/interfaces/desktop/renderer.js +43 -41
package/src/interfaces/desktop/style.css +15 -6
package/src/interfaces/web/dist/assets/{index-DW7j3cXB.js → index-BReF4_xV.js} +21 -21
package/src/interfaces/web/dist/assets/{index-DW7j3cXB.js.map → index-BReF4_xV.js.map} +1 -1
package/src/interfaces/web/dist/index.html +1 -1
package/src/interfaces/web/package-lock.json +3 -3

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@agentprojectcontext/apx",
-  "version": "1.41.0",
+  "version": "1.42.1",
   "description": "APX — unified CLI + daemon for the Agent Project Context (APC) standard.",
   "publishConfig": {
     "access": "public"

package/src/core/agent/constants.js CHANGED Viewed

@@ -5,6 +5,16 @@
 // Coding surfaces (web Code / terminal Build) raise this via maxIters and use
 // the finish-tool completionContract instead.
 export const MAX_TOOL_ITERS = 10;
+// Telegram is the "do real work for me" conversational surface (the super-agent
+// Roby): it needs to chain explore→edit→verify→close autonomously, not stop
+// after ~9 actions and ask "want me to continue?". A budget of 10 left only one
+// usable action step before the reserved wrap-up, so multi-step tasks routinely
+// cut off mid-job. We give it a real autonomy budget (mirroring the TUI Code
+// surface's maxIters:40) while keeping it below the coding surfaces. The
+// reserved final-step wrap-up still applies, but now only fires when a task
+// genuinely exhausts this budget — a rare safety floor, not the default close.
+// Overridable per-deployment via config.super_agent.telegram_max_iters.
+export const TELEGRAM_TOOL_ITERS = 24;
 export const ACK_ONLY_TOOLS = new Set(["send_telegram"]);
 export const MAX_CONSECUTIVE_ACKS = 2;
 // Tools whose semantics REQUIRE handing control back to the user. After the

package/src/core/agent/run-agent.js CHANGED Viewed

@@ -84,20 +84,32 @@ export const FINISH_TOOL_SCHEMA = {
   },
 };
-// Behavioral nudge appended to the system prompt for the ONE tool-free wrap-up
-// step at the end of a turn (see the loop's `isFinalWrapUp`). This shapes
-// BEHAVIOR only — it never dictates wording or supplies a canned/templated
-// sentence. The reply the user sees is 100% model-authored and varies with
-// what the model actually did this turn. We do NOT mention any "tool limit":
-// the model just speaks from where it is. Critically it must not claim work it
-// didn't do (weak models otherwise fabricate "all done").
-const WRAPUP_NUDGE =
-  "\n\n[Internal note — last step of this turn. No more tools will run now. " +
-  "Reply in plain prose, in the user's language, from your own context: briefly " +
-  "say what you actually accomplished so far (check the tool results above — do " +
-  "NOT claim anything you didn't do), and if work is still pending, name what's " +
-  "left and ask the user whether you should continue. Do not mention limits, " +
-  "steps, or iterations — just talk naturally.]";
+// In-band signal injected as a CONVERSATION turn (not a system suffix) for the
+// ONE tool-free wrap-up step at the end of a turn (see the loop's
+// `isFinalWrapUp`). Delivering it through the message channel — the way a tool
+// result arrives — makes weak models reliably author a reply instead of
+// returning empty, because they always answer the latest turn. It shapes
+// BEHAVIOR only: it never dictates wording or supplies a canned sentence. The
+// reply the user sees is 100% model-authored and varies with what the model
+// actually did this turn. Critically it must not claim work it didn't do (weak
+// models otherwise fabricate "all done").
+//
+// Unlike a hard "iteration limit" message, it asks the model to surface the
+// situation NATURALLY ("this is taking more steps than I expected") plus a
+// concrete recap of what it found and did NOT find — so the closing reads like
+// a human status update, never robotic system jargon.
+const WRAPUP_SIGNAL =
+  "[Internal turn note — this is NOT from the user. You've taken several tool " +
+  "steps this turn and the task isn't finished; no more tools will run now. " +
+  "Write the user ONE short, natural closing message, in their language, " +
+  "entirely in your own words:\n" +
+  "- Concretely recap what you actually did and what you found so far — and be " +
+  "honest about what you did NOT find or couldn't resolve yet. Read the tool " +
+  "results above; do not claim anything you didn't do.\n" +
+  "- Mention plainly that this is taking more steps than expected and isn't done.\n" +
+  "- Ask whether they want you to keep going.\n" +
+  "Talk like a person giving a quick status update. Do NOT emit a tool call, " +
+  "JSON, or system jargon like \"iteration\" or \"limit\".]";
 /**
  * Shared tool-calling agent loop used by super-agent and future surfaces.
@@ -301,8 +313,8 @@ export async function runAgent({
     // Rather than cut off silently mid-tool-call, we run ONE tool-free step so
     // the model writes a natural closing in its OWN words — what it did, what's
     // left, and (if anything remains) whether to continue. We change only the
-    // STRUCTURE (no tools this step) + a behavioral nudge; the wording is
-    // entirely the model's. Coding surfaces keep their finish-tool flow, so
+    // STRUCTURE (no tools this step) + an in-band directive turn (WRAPUP_SIGNAL);
+    // the wording is entirely the model's. Coding surfaces keep their finish-tool flow, so
     // this never applies under completionContract.
     const isFinalWrapUp =
       !useContract && effectiveSchemas.length > 0 && iter === maxIters - 1;
@@ -322,8 +334,14 @@ export async function runAgent({
     let result;
     try {
       result = await tryCallEngine({
-        system: isFinalWrapUp ? baseSystem + WRAPUP_NUDGE : baseSystem,
-        messages: conversation,
+        system: baseSystem,
+        // Wrap-up: deliver the "you're out of steps, summarize + ask" directive
+        // as the latest CONVERSATION turn so the model treats it like any other
+        // turn it must answer — far more reliable than a system suffix on weak
+        // models. Ephemeral: built fresh here, never persisted to history.
+        messages: isFinalWrapUp
+          ? [...conversation, { role: "user", content: WRAPUP_SIGNAL }]
+          : conversation,
         config: globalConfig,
         // On the wrap-up step we withhold tools entirely so the model must
         // answer in prose — same as a real engine called with tools omitted.

package/src/core/channels/telegram/dispatch.js CHANGED Viewed

@@ -11,25 +11,22 @@
 // `appendGlobalMessage`, `CHANNELS`, `nowIso`, etc. Top-level imports here
 // keep that scope intact — earlier splits forgot them and the bug only
 // surfaced when a real telegram update arrived (ReferenceError at runtime).
-import path from "node:path";
 import { callEngine } from "#core/engines/index.js";
-import { runSuperAgent, isSuperAgentEnabled } from "#core/agent/super-agent.js";
-import { stripThinking } from "#core/util/thinking.js";
+import { isSuperAgentEnabled } from "#core/agent/super-agent.js";
 import { getRecentTelegramTurnsFromFs, appendGlobalMessage } from "#core/stores/messages.js";
 import { compactChannelIfNeeded } from "#core/memory/index.js";
 import { readAgents } from "#core/apc/parser.js";
 import { buildAgentSystem } from "#core/agent/build-agent-system.js";
-import { transcribe as transcribeAudioFile } from "#core/voice/transcription.js";
 import { resolveAgentName, SUPERAGENT_ACTOR_ID } from "#core/identity/index.js";
 import { registerSender, resolveAllowedTools } from "#core/identity/telegram.js";
 import { buildRelationshipBlock } from "#core/agent/index.js";
-import { getConfirmationStore as getConfirmStore } from "#core/confirmation/pending-store.js";
 import { CHANNELS } from "#core/constants/channels.js";
 import { tryResolveSkillCommand } from "#core/agent/skills/trigger.js";
-import { createTelegramConfirmAdapter } from "#core/confirmation/adapters/telegram.js";
 import * as askFlow from "./ask.js";
-import { buildTelegramMeta, resolveBotToken, sleep } from "./helpers.js";
-import { sendPhoto, sendVoice, sendDocument, sendAudio, downloadTelegramFile, API_BASE } from "./media.js";
+import { telegramAuthorLabel } from "./helpers.js";
+import { handleIncomingPhoto } from "./inbound/photo.js";
+import { handleIncomingAudio } from "./inbound/audio.js";
+import { buildStreamHandler, runTelegramSuperAgent, telegramErrorText, sendFinalReply } from "./reply.js";
 import { t, resolveLang } from "#core/i18n/index.js";
 const nowIso = () => new Date().toISOString().replace(/\.\d{3}Z$/, "Z");
@@ -50,10 +47,7 @@ export async function handleUpdate(self, u) {
       self.log(`telegram[${self.channel.name}] update ${u.update_id} ignored — no target project`);
       return;
     }
-    const author =
-      msg.from?.username
-        ? "@" + msg.from.username
-        : `${msg.from?.first_name || ""} ${msg.from?.last_name || ""}`.trim() || "unknown";
+    const author = telegramAuthorLabel(msg.from);
     const chat_id = msg.chat?.id;
     // Resolve WHO is writing (owner / known contact / guest), keyed by the
@@ -85,108 +79,18 @@ export async function handleUpdate(self, u) {
     let text = msg.text || msg.caption || "";
-    // ── Incoming photo handling ───────────────────────────────────────────
+    // ── Incoming media ────────────────────────────────────────────────────
+    // Photo and voice/audio each download + archive the file and rewrite `text`
+    // so the rest of the pipeline treats them like a typed message. The handlers
+    // live in ./inbound/ to keep this dispatcher focused on routing. Photos have
+    // no vision yet, so the handler injects an `[image]` marker (never silent);
+    // audio injects its `[audio]` transcript.
     if (msg.photo && msg.photo.length > 0) {
-      // Telegram sends multiple sizes; pick the largest
-      const bestPhoto = msg.photo.reduce((a, b) => (b.file_size > a.file_size ? b : a));
-      const token = resolveBotToken(self.channel);
-      const mediaDir = path.join(APX_HOME, "media");
-      fs.mkdirSync(mediaDir, { recursive: true });
-      try {
-        const localPath = await downloadTelegramFile(token, bestPhoto.file_id, mediaDir);
-        self.log(`telegram[${self.channel.name}] photo saved: ${localPath}`);
-        appendGlobalMessage({
-          channel: CHANNELS.TELEGRAM,
-          direction: "in",
-          type: "photo",
-          actor_id: msg.from?.id ? String(msg.from.id) : author,
-          external_id: String(u.update_id),
-          author,
-          body: text || "[photo]",
-          meta: {
-            chat_id,
-            user_id: msg.from?.id || null,
-            message_id: msg.message_id,
-            tg_channel: self.channel.name,
-            local_path: localPath,
-            file_id: bestPhoto.file_id,
-            width: bestPhoto.width,
-            height: bestPhoto.height,
-          },
-        });
-      } catch (e) {
-        self.log(`telegram[${self.channel.name}] photo download failed: ${e.message}`);
-      }
-      // If there's a caption, continue to handle it as text; otherwise return
-      if (!text) return;
+      ({ text } = await handleIncomingPhoto(self, { msg, u, author, chat_id, text }));
     }
-    // ── Incoming voice / audio handling ──────────────────────────────────
-    // Telegram sends `voice` for the press-and-hold mic recording (.oga/opus)
-    // and `audio` for uploaded audio files (mp3/m4a/etc.). Either way we
-    // download, run it through Whisper, prefix the result with `[audio] `
-    // and let the rest of the message flow handle it as plain text.
     const incomingAudio = msg.voice || msg.audio;
     if (incomingAudio && incomingAudio.file_id) {
-      const token = resolveBotToken(self.channel);
-      const mediaDir = path.join(APX_HOME, "media");
-      fs.mkdirSync(mediaDir, { recursive: true });
-      // Show "typing…" right away — download + transcription is the slow part of
-      // a voice message, and the reply-path typing (below) only starts after it,
-      // so without this the chat sits silent for seconds with no feedback.
-      const stopVoiceTyping = self._startTyping(chat_id);
-      let localPath = null;
-      let transcript = "";
-      let transcribeError = null;
-      let transcribeBackend = null;
-      try {
-        localPath = await downloadTelegramFile(token, incomingAudio.file_id, mediaDir);
-        self.log(`telegram[${self.channel.name}] audio saved: ${localPath}`);
-      } catch (e) {
-        self.log(`telegram[${self.channel.name}] audio download failed: ${e.message}`);
-      }
-      if (localPath) {
-        try {
-          const result = await transcribeAudioFile(localPath);
-          transcript = result.text || "";
-          transcribeBackend = result.backend;
-          self.log(`telegram[${self.channel.name}] audio transcribed via ${transcribeBackend} (${transcript.length} chars, lang=${result.language || "?"})`);
-        } catch (e) {
-          transcribeError = e.message;
-          self.log(`telegram[${self.channel.name}] audio transcription failed: ${e.message}`);
-        }
-      }
-      stopVoiceTyping(); // reply-path typing takes over from here
-      const audioBody = transcript
-        ? `[audio] ${transcript}`
-        : `[audio] (transcription unavailable${transcribeError ? ": " + transcribeError : ""})`;
-      appendGlobalMessage({
-        channel: CHANNELS.TELEGRAM,
-        direction: "in",
-        type: "audio",
-        actor_id: msg.from?.id ? String(msg.from.id) : author,
-        external_id: String(u.update_id),
-        author,
-        body: audioBody,
-        meta: {
-          chat_id,
-          user_id: msg.from?.id || null,
-          message_id: msg.message_id,
-          tg_channel: self.channel.name,
-          local_path: localPath,
-          file_id: incomingAudio.file_id,
-          duration: incomingAudio.duration,
-          mime_type: incomingAudio.mime_type,
-          transcription_backend: transcribeBackend,
-          transcription_error: transcribeError,
-        },
-      });
-      // Inject the transcribed text into `text` so the rest of the agent
-      // pipeline treats it identically to a typed message. If there was a
-      // caption alongside the audio, prepend the audio marker to it.
-      text = text ? `${audioBody}\n${text}` : audioBody;
+      ({ text } = await handleIncomingAudio(self, { msg, u, author, chat_id, text, incomingAudio }));
     }
     // If there's a pending ask_questions flow for this chat AND the current
@@ -309,10 +213,14 @@ export async function handleUpdate(self, u) {
     // Start "typing..." indicator. Stops when we send the reply (or fail).
     const stopTyping = self._startTyping(chat_id);
+    // Preset to the super-agent defaults so every exit path (including one where
+    // neither the routed-agent nor the super-agent branch runs) has a valid
+    // actor — the routed-agent / super-agent branches override these on success,
+    // and their catch blocks reset all four together (no partial-overwrite gap).
     let replyText;
     let replyAuthor;
-    let replyActorId;   // stable id: super_agent | agent slug
-    let replyKind;      // actor_kind: superagent | agent
+    let replyActorId = SUPERAGENT_ACTOR_ID;   // stable id: super_agent | agent slug
+    let replyKind = "superagent";             // actor_kind: superagent | agent
     const projectCfg = target.config || self.globalConfig;
     // Display name for the super-agent persona on this channel (from identity.json).
     const agentDisplay = resolveAgentName(self.globalConfig);
@@ -342,7 +250,10 @@ export async function handleUpdate(self, u) {
           replyKind = "agent";
         } catch (e) {
           self.log(`telegram[${self.channel.name}] agent reply failed: ${e.message}`);
-          replyText = `[apx error] ${e.message.slice(0, 200)}`;
+          replyText = t("telegram.error_agent", {
+            lang: resolveLang(self.globalConfig),
+            vars: { error: e.message.slice(0, 200) },
+          });
           replyAuthor = agentDisplay;
           replyActorId = SUPERAGENT_ACTOR_ID;
           replyKind = "superagent";
@@ -354,140 +265,33 @@ export async function handleUpdate(self, u) {
       }
     }
-    // Fallback: super-agent — STREAMED.
-    // Each iteration's assistant text is sent to Telegram as its own message
-    // the moment the model produces it (its running commentary), so the user
-    // sees a real back-and-forth instead of one giant final dump. Tool calls
-    // are logged to the message store — visible via apx log / apx search and
-    // to channels that render tools — but NEVER sent to Telegram; tools are
-    // internal. The conversation saved on disk is the full, real exchange;
-    // Telegram is just the prose-only view of it.
+    // Fallback: super-agent — STREAMED. Each iteration's assistant text is sent
+    // to Telegram as its own message the moment the model produces it; tool
+    // calls are logged but never sent (internal). The streamed turn + its final
+    // send live in ./reply.js so this dispatcher and the ask-flow resume
+    // (_runResumedTurn in the host poller) share ONE reply path — no drift.
     let saUsage = null;
     let streamedCount = 0;
     let lastStreamedText = "";
-    // Telegram shows the user ONLY prose — never the tool calls. On an action
-    // request the model often jumps straight to a tool with no preamble text,
-    // so the user would stare at a silent chat until the final reply. Send one
-    // short localized heads-up the moment real work starts (first tool_start),
-    // but only if the agent didn't already write its own "on it" line.
-    let sentHeadsUp = false;
-    const headsUpPhrase = () => t("telegram.heads_up", { lang: resolveLang(self.globalConfig) });
     if (!replyText && isSuperAgentEnabled(self.globalConfig)) {
-      const onEvent = async (ev) => {
-        try {
-          if (ev.type === "tool_start" && !sentHeadsUp && streamedCount === 0) {
-            sentHeadsUp = true;
-            const heads = headsUpPhrase();
-            await self._send({ chat_id, text: heads });
-            appendGlobalMessage({
-              channel: CHANNELS.TELEGRAM,
-              direction: "out",
-              type: "agent",
-              actor_id: SUPERAGENT_ACTOR_ID,
-              actor_kind: "superagent",
-              agent_slug: SUPERAGENT_ACTOR_ID,
-              author: agentDisplay,
-              body: heads,
-              meta: { chat_id, tg_channel: self.channel.name, in_reply_to: u.update_id, heads_up: true },
-            });
-            return;
-          }
-          if (ev.type === "assistant_text" && ev.text) {
-            const piece = stripThinking(ev.text).trim();
-            if (!piece) return;
-            await self._send({ chat_id, text: piece });
-            lastStreamedText = piece;
-            streamedCount += 1;
-            appendGlobalMessage({
-              channel: CHANNELS.TELEGRAM,
-              direction: "out",
-              type: "agent",
-              actor_id: SUPERAGENT_ACTOR_ID,
-              actor_kind: "superagent",
-              agent_slug: SUPERAGENT_ACTOR_ID,
-              author: agentDisplay,
-              body: piece,
-              meta: {
-                chat_id,
-                tg_channel: self.channel.name,
-                in_reply_to: u.update_id,
-                streamed: true,
-                iteration: ev.iteration,
-              },
-            });
-          } else if (ev.type === "tool_result" && ev.trace) {
-            // Logged for the audit trail / other channels — NOT sent to Telegram.
-            const t = ev.trace;
-            appendGlobalMessage({
-              channel: CHANNELS.TELEGRAM,
-              direction: "out",
-              type: "tool",
-              actor_id: t.tool,
-              actor_kind: "tool",
-              author: agentDisplay,
-              body: `${t.tool}(${JSON.stringify(t.args || {}).slice(0, 200)})`,
-              meta: {
-                chat_id,
-                tg_channel: self.channel.name,
-                in_reply_to: u.update_id,
-                tool: t.tool,
-                args: t.args,
-                result: t.result,
-                iteration: ev.iteration,
-              },
-            });
-          } else if (ev.type === "engine_failed") {
-            // A model in the fallback chain errored; the loop is rotating to
-            // the next one. Log it so a mid-turn provider failure (rate limit,
-            // tool-grammar 400, …) is diagnosable instead of invisible.
-            self.log(
-              `telegram[${self.channel.name}] engine_failed: ${ev.model || "?"} (${ev.reason || "?"}) → ${ev.retry_with || "end of chain"}`,
-            );
-          } else if (ev.type === "model_routed" || ev.type === "model_retry") {
-            self.log(
-              `telegram[${self.channel.name}] ${ev.type}: model=${ev.model || "?"}${ev.reason ? ` reason=${ev.reason}` : ""}${ev.from_fallback ? " (fallback)" : ""}`,
-            );
-          }
-        } catch (e) {
-          // A failed intermediate send must not abort the whole run.
-          self.log(`telegram[${self.channel.name}] stream event failed: ${e.message}`);
-        }
-      };
-      const confirmAdapter = createTelegramConfirmAdapter({
-        token: resolveBotToken(self.channel),
-        chatId: chat_id,
-        pendingStore: getConfirmStore(),
-      });
+      const { onEvent, state } = buildStreamHandler(self, { chat_id, update_id: u.update_id, agentDisplay });
-      // `/slug ...` shortcut: load the matching skill body into contextNote
-      // and strip the prefix from the user prompt before sending to the loop.
+      // `/slug ...` shortcut: load the matching skill body into contextNote and
+      // strip the prefix from the user prompt before sending to the loop.
       const slashed = tryResolveSkillCommand(text, { projectPath: target?.path });
-      const slashedPrompt = slashed.handled ? slashed.prompt : text;
-      const slashedContextNote = slashed.handled ? slashed.contextNote : "";
       try {
-        const sa = await runSuperAgent({
-          globalConfig: self.globalConfig,
-          projects: self.projects,
-          plugins: self.plugins,
-          registries: self.registries,
-          prompt: slashedPrompt,
+        const sa = await runTelegramSuperAgent(self, {
+          chat_id,
+          prompt: slashed.handled ? slashed.prompt : text,
           previousMessages,
-          channel: CHANNELS.TELEGRAM,
+          target,
+          author,
           relationshipBlock,
           allowedTools,
-          contextNote: slashedContextNote || undefined,
-          channelMeta: buildTelegramMeta({
-            channelName: self.channel.name,
-            author,
-            chatId: chat_id,
-            target,
-            routeToAgent: self.channel.route_to_agent,
-          }),
+          contextNote: slashed.handled ? slashed.contextNote : "",
           signal: abortCtrl.signal,
           onEvent,
-          requestConfirmation: confirmAdapter.requestConfirmation,
         });
         replyText = sa.text;
         replyAuthor = sa.name || agentDisplay;
@@ -496,13 +300,13 @@ export async function handleUpdate(self, u) {
         saUsage = sa.usage;
         // ── ask_questions integration ────────────────────────────────────
-        // If the super-agent ended this turn by calling ask_questions, hand
-        // off to the inline-keyboard flow instead of sending the bare
-        // assistant text. The flow keeps state per chat_id and re-runs the
-        // super-agent once every answer is collected.
+        // If the super-agent ended this turn by calling ask_questions, hand off
+        // to the inline-keyboard flow instead of sending the bare assistant
+        // text. The flow keeps state per chat_id and re-runs the super-agent
+        // (via _runResumedTurn) once every answer is collected.
         const askQuestions = askFlow.extractAskQuestionsFromTrace(sa.trace);
         if (askQuestions && chat_id) {
-          if (chat_id) self.activeRequests.delete(chat_id);
+          self.activeRequests.delete(chat_id);
           stopTyping();
           try {
             await self._startAskFlow({
@@ -523,21 +327,20 @@ export async function handleUpdate(self, u) {
           }
           return; // The reply for this turn IS the ask flow.
         }
+        streamedCount = state.streamedCount;
+        lastStreamedText = state.lastStreamedText;
       } catch (e) {
         if (abortCtrl.signal.aborted) {
           // A newer message superseded this one. Whatever streamed so far is
-          // already sent + logged; the newer message's run continues the
-          // thread from that history.
+          // already sent + logged; the newer message's run continues the thread.
           self.log(`telegram[${self.channel.name}] request aborted for chat ${chat_id}`);
           if (chat_id) self.activeRequests.delete(chat_id);
           stopTyping();
           return;
         }
         self.log(`telegram[${self.channel.name}] super-agent failed: ${e.message}`);
-        // Surface the failure to the user instead of silently dropping the
-        // turn — otherwise from the chat side it looks like the bot ignored
-        // the message. Keep the message short and non-leaking.
-        replyText = `⚠️ Could not generate a reply right now (${e.message || "internal error"}).`;
+        // Surface the failure to the user instead of silently dropping the turn.
+        replyText = telegramErrorText(self, e);
         replyAuthor = agentDisplay;
         replyActorId = SUPERAGENT_ACTOR_ID;
         replyKind = "superagent";
@@ -545,71 +348,18 @@ export async function handleUpdate(self, u) {
     }
     if (chat_id) self.activeRequests.delete(chat_id);
-    // Final answer. The intermediate prose was already streamed; only send the
-    // final text if it's non-empty AND not a duplicate of the last streamed
-    // piece (the loop can end on an iteration whose text was already sent).
-    // If nothing streamed and there's no final text, send a minimal ack so the
-    // turn isn't silently empty.
-    const finalClean = replyText ? stripThinking(replyText).trim() : "";
-    let toSend = "";
-    if (finalClean && finalClean !== lastStreamedText) {
-      toSend = finalClean;
-    } else if (!finalClean) {
-      // Never end a turn on silence. The loop's tool-free wrap-up normally
-      // fills finalClean with a model-authored closing (handled above); this is
-      // the last-resort floor for the rare case it still came back empty. A
-      // pure chit-chat turn that did nothing gets the short ack; a turn that
-      // streamed/acted but produced no closing gets a neutral "continue?" that
-      // does NOT claim completion.
-      toSend = streamedCount === 0
-        ? t("telegram.fallback_listo", { lang: resolveLang(self.globalConfig) })
-        : t("telegram.fallback_continue", { lang: resolveLang(self.globalConfig) });
-    }
     stopTyping();
-    if (!toSend) return; // everything was already streamed — nothing left to send
-    try {
-      await self._send({ chat_id, text: toSend });
-      const meta = {
-        chat_id,
-        tg_channel: self.channel.name,
-        in_reply_to: u.update_id,
-        final: true,
-      };
-      if (replyText && stripThinking(replyText) !== replyText) meta.thinking_stripped = true;
-      if (saUsage) meta.usage = saUsage;
-      appendGlobalMessage({
-        channel: CHANNELS.TELEGRAM,
-        direction: "out",
-        type: "agent",
-        actor_id: replyActorId || SUPERAGENT_ACTOR_ID,
-        actor_kind: replyKind || "superagent",
-        agent_slug: replyActorId || SUPERAGENT_ACTOR_ID,
-        author: replyAuthor || agentDisplay,
-        body: toSend,
-        meta,
-      });
-    } catch (e) {
-      self.log(`telegram[${self.channel.name}] send-back error: ${e.message}`);
-      appendGlobalMessage({
-        channel: CHANNELS.TELEGRAM,
-        direction: "out",
-        type: "agent",
-        actor_id: replyActorId || SUPERAGENT_ACTOR_ID,
-        actor_kind: replyKind || "superagent",
-        agent_slug: replyActorId || SUPERAGENT_ACTOR_ID,
-        author: replyAuthor || agentDisplay,
-        body: `[send_failed] ${toSend}`,
-        meta: {
-          chat_id,
-          tg_channel: self.channel.name,
-          in_reply_to: u.update_id,
-          send_error: e.message,
-          ...(saUsage ? { usage: saUsage } : {}),
-        },
-      });
-    }
+    await sendFinalReply(self, {
+      chat_id,
+      update_id: u.update_id,
+      replyText,
+      replyAuthor,
+      replyActorId,
+      replyKind,
+      saUsage,
+      streamedCount,
+      lastStreamedText,
+      agentDisplay,
+    });
   }

package/src/core/channels/telegram/helpers.js CHANGED Viewed

@@ -2,10 +2,37 @@
 // big poller class stays focused on lifecycle + message dispatch. Each
 // function is pure (no `this`) — instances import them and call as needed.
 import fs from "node:fs";
-import { TELEGRAM_STATE_PATH } from "#core/config/index.js";
+import path from "node:path";
+import { TELEGRAM_STATE_PATH, APX_HOME } from "#core/config/index.js";
 const nowIso = () => new Date().toISOString().replace(/\.\d{3}Z$/, "Z");
+/**
+ * Display label for a Telegram sender used as the `author` / actor fallback:
+ *   @username  →  "First Last"  →  "unknown".
+ * Single source of truth so every inbound branch (text/photo/audio) and the
+ * message store agree. NOTE: this is the raw handle; the *resolved contact
+ * name* (which prefers a saved roster name) is `resolveSender().name` in
+ * core/identity/telegram.js — different purpose, don't conflate them.
+ */
+export function telegramAuthorLabel(from) {
+  if (from?.username) return "@" + from.username;
+  const full = `${from?.first_name || ""} ${from?.last_name || ""}`.trim();
+  return full || "unknown";
+}
+/**
+ * Ensure and return the shared media-download directory (~/.apx/media).
+ * Owns BOTH the path and the mkdir so callers never touch `fs`/`APX_HOME`
+ * directly — the inbound dispatcher used to inline this and a module split
+ * dropped its `fs`/`APX_HOME` imports, silently breaking every photo/voice.
+ */
+export function telegramMediaDir() {
+  const dir = path.join(APX_HOME, "media");
+  fs.mkdirSync(dir, { recursive: true });
+  return dir;
+}
 /**
  * Build the channelMeta block the super-agent loop receives for a Telegram
  * turn. The prompt template at src/core/agent/prompts/channels/telegram.md