npm - @agentprojectcontext/apx - Versions diffs - 1.10.4 → 1.11.0 - Mend

@agentprojectcontext/apx 1.10.4 → 1.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/package.json +5 -1
package/src/cli/commands/search.js +62 -0
package/src/cli/index.js +21 -0
package/src/core/agent-system.js +15 -0
package/src/daemon/api.js +229 -0
package/src/daemon/engines/anthropic.js +19 -1
package/src/daemon/engines/index.js +2 -1
package/src/daemon/engines/openai.js +22 -2
package/src/daemon/plugins/telegram.js +248 -2
package/src/daemon/super-agent.js +42 -1
package/src/daemon/tools/browser.js +424 -0
package/src/daemon/tools/fetch.js +138 -0
package/src/daemon/tools/glob.js +165 -0
package/src/daemon/tools/grep.js +218 -0
package/src/daemon/tools/registry.js +729 -0
package/src/daemon/tools/search.js +290 -0

package/src/daemon/plugins/telegram.js CHANGED Viewed

@@ -28,7 +28,8 @@
 //   }
 import fs from "node:fs";
-import { TELEGRAM_STATE_PATH } from "../../core/config.js";
+import path from "node:path";
+import { TELEGRAM_STATE_PATH, APX_HOME } from "../../core/config.js";
 import { callEngine } from "../engines/index.js";
 import { runSuperAgent, isSuperAgentEnabled } from "../super-agent.js";
 import { stripThinking } from "../thinking.js";
@@ -39,6 +40,119 @@ import { buildAgentSystem } from "../../core/agent-system.js";
 const API_BASE = "https://api.telegram.org";
 const nowIso = () => new Date().toISOString().replace(/\.\d{3}Z$/, "Z");
+// ---------- media sending helpers -------------------------------------------
+/**
+ * Send a photo to a Telegram chat.
+ * @param {string} token     Bot token
+ * @param {string|number} chatId  Telegram chat_id
+ * @param {string|Buffer} photo   Absolute file path OR Buffer of image data
+ * @param {object} [opts]
+ * @param {string} [opts.caption]
+ * @param {string} [opts.parse_mode]  "HTML" | "Markdown" | "MarkdownV2"
+ */
+export async function sendPhoto(token, chatId, photo, { caption, parse_mode } = {}) {
+  const url = `${API_BASE}/bot${token}/sendPhoto`;
+  const form = new FormData();
+  form.append("chat_id", String(chatId));
+  if (caption) form.append("caption", caption);
+  if (parse_mode) form.append("parse_mode", parse_mode);
+  if (typeof photo === "string" && photo.startsWith("http")) {
+    // Public URL — send as string
+    form.append("photo", photo);
+  } else {
+    // Local file path or Buffer
+    const buf = Buffer.isBuffer(photo) ? photo : fs.readFileSync(photo);
+    const name = typeof photo === "string" ? path.basename(photo) : "photo.jpg";
+    const blob = new Blob([buf], { type: name.endsWith(".png") ? "image/png" : "image/jpeg" });
+    form.append("photo", blob, name);
+  }
+  const res = await fetch(url, { method: "POST", body: form });
+  const json = await res.json();
+  if (!json.ok) throw new Error(`sendPhoto failed: ${json.description || res.status}`);
+  return json.result;
+}
+/**
+ * Send a voice message (OGG/Opus preferred by Telegram).
+ * @param {string} token
+ * @param {string|number} chatId
+ * @param {string|Buffer} audio  Path or Buffer
+ * @param {object} [opts]
+ * @param {string} [opts.caption]
+ * @param {number} [opts.duration]
+ */
+export async function sendVoice(token, chatId, audio, { caption, duration } = {}) {
+  const url = `${API_BASE}/bot${token}/sendVoice`;
+  const form = new FormData();
+  form.append("chat_id", String(chatId));
+  if (caption) form.append("caption", caption);
+  if (duration) form.append("duration", String(duration));
+  const buf = Buffer.isBuffer(audio) ? audio : fs.readFileSync(audio);
+  const name = typeof audio === "string" ? path.basename(audio) : "voice.ogg";
+  const blob = new Blob([buf], { type: "audio/ogg" });
+  form.append("voice", blob, name);
+  const res = await fetch(url, { method: "POST", body: form });
+  const json = await res.json();
+  if (!json.ok) throw new Error(`sendVoice failed: ${json.description || res.status}`);
+  return json.result;
+}
+/**
+ * Send an audio file (MP3, M4A, etc — shown in Telegram music player).
+ * @param {string} token
+ * @param {string|number} chatId
+ * @param {string|Buffer} audio  Path or Buffer
+ * @param {object} [opts]
+ * @param {string} [opts.caption]
+ * @param {string} [opts.title]
+ * @param {string} [opts.performer]
+ */
+export async function sendAudio(token, chatId, audio, { caption, title, performer } = {}) {
+  const url = `${API_BASE}/bot${token}/sendAudio`;
+  const form = new FormData();
+  form.append("chat_id", String(chatId));
+  if (caption) form.append("caption", caption);
+  if (title) form.append("title", title);
+  if (performer) form.append("performer", performer);
+  const buf = Buffer.isBuffer(audio) ? audio : fs.readFileSync(audio);
+  const name = typeof audio === "string" ? path.basename(audio) : "audio.mp3";
+  const blob = new Blob([buf], { type: "audio/mpeg" });
+  form.append("audio", blob, name);
+  const res = await fetch(url, { method: "POST", body: form });
+  const json = await res.json();
+  if (!json.ok) throw new Error(`sendAudio failed: ${json.description || res.status}`);
+  return json.result;
+}
+/**
+ * Download a file from Telegram servers.
+ * Returns the local file path where it was saved.
+ */
+async function downloadTelegramFile(token, fileId, destDir) {
+  // Step 1: get file path from Telegram
+  const infoRes = await fetch(`${API_BASE}/bot${token}/getFile?file_id=${fileId}`);
+  const infoJson = await infoRes.json();
+  if (!infoJson.ok) throw new Error(`getFile failed: ${infoJson.description}`);
+  const filePath = infoJson.result.file_path; // e.g. "photos/file_123.jpg"
+  const ext = path.extname(filePath) || ".jpg";
+  const fileName = `tg_${fileId.slice(-8)}_${Date.now()}${ext}`;
+  const localPath = path.join(destDir, fileName);
+  // Step 2: download
+  const dlRes = await fetch(`${API_BASE}/file/bot${token}/${filePath}`);
+  if (!dlRes.ok) throw new Error(`download failed: ${dlRes.status}`);
+  const buf = Buffer.from(await dlRes.arrayBuffer());
+  fs.writeFileSync(localPath, buf);
+  return localPath;
+}
 // ---------- shared state ----------------------------------------------------
 function loadState() {
@@ -237,7 +351,43 @@ class ChannelPoller {
         ? "@" + msg.from.username
         : `${msg.from?.first_name || ""} ${msg.from?.last_name || ""}`.trim() || "unknown";
     const chat_id = msg.chat?.id;
-    const text = msg.text || "";
+    const text = msg.text || msg.caption || "";
+    // ── Incoming photo handling ───────────────────────────────────────────
+    if (msg.photo && msg.photo.length > 0) {
+      // Telegram sends multiple sizes; pick the largest
+      const bestPhoto = msg.photo.reduce((a, b) => (b.file_size > a.file_size ? b : a));
+      const token = resolveBotToken(this.channel);
+      const mediaDir = path.join(APX_HOME, "media");
+      fs.mkdirSync(mediaDir, { recursive: true });
+      try {
+        const localPath = await downloadTelegramFile(token, bestPhoto.file_id, mediaDir);
+        this.log(`telegram[${this.channel.name}] photo saved: ${localPath}`);
+        appendGlobalMessage({
+          channel: "telegram",
+          direction: "in",
+          type: "photo",
+          actor_id: msg.from?.id ? String(msg.from.id) : author,
+          external_id: String(u.update_id),
+          author,
+          body: text || "[photo]",
+          meta: {
+            chat_id,
+            user_id: msg.from?.id || null,
+            message_id: msg.message_id,
+            tg_channel: this.channel.name,
+            local_path: localPath,
+            file_id: bestPhoto.file_id,
+            width: bestPhoto.width,
+            height: bestPhoto.height,
+          },
+        });
+      } catch (e) {
+        this.log(`telegram[${this.channel.name}] photo download failed: ${e.message}`);
+      }
+      // If there's a caption, continue to handle it as text; otherwise return
+      if (!text) return;
+    }
     // /reset or /new wipes the rolling context for this chat. We just
     // remember a marker timestamp; subsequent inbounds will only consider
@@ -488,6 +638,31 @@ class ChannelPoller {
     if (!json.ok) throw new Error(json.description || `send failed (${res.status})`);
     return json.result;
   }
+  /** Send a photo via this channel */
+  async _sendPhoto({ chat_id, photo, caption, parse_mode }) {
+    const token = resolveBotToken(this.channel);
+    if (!token) throw new Error(`channel ${this.channel.name}: no bot_token`);
+    const target = chat_id || resolveChatId(this.channel);
+    if (!target) throw new Error(`channel ${this.channel.name}: no chat_id`);
+    return sendPhoto(token, target, photo, { caption, parse_mode });
+  }
+  /** Send a voice message via this channel */
+  async _sendVoice({ chat_id, audio, caption, duration }) {
+    const token = resolveBotToken(this.channel);
+    if (!token) throw new Error(`channel ${this.channel.name}: no bot_token`);
+    const target = chat_id || resolveChatId(this.channel);
+    return sendVoice(token, target, audio, { caption, duration });
+  }
+  /** Send an audio file via this channel */
+  async _sendAudio({ chat_id, audio, caption, title, performer }) {
+    const token = resolveBotToken(this.channel);
+    if (!token) throw new Error(`channel ${this.channel.name}: no bot_token`);
+    const target = chat_id || resolveChatId(this.channel);
+    return sendAudio(token, target, audio, { caption, title, performer });
+  }
 }
 function sleep(ms) {
@@ -557,6 +732,77 @@ export default {
         });
         return result;
       },
+      /**
+       * Send a photo to a Telegram chat.
+       * photo: local file path, Buffer, or public URL
+       * opts: { caption, parse_mode, channel, author }
+       */
+      async sendPhoto({ channel: channelName, chat_id, photo, caption, parse_mode, author = "apx" }) {
+        const p =
+          (channelName && pollers.find((pp) => pp.channel.name === channelName)) ||
+          pollers.find((pp) => resolveBotToken(pp.channel)) ||
+          null;
+        if (!p) throw new Error("no telegram channel available");
+        const result = await p._sendPhoto({ chat_id, photo, caption, parse_mode });
+        appendGlobalMessage({
+          channel: "telegram",
+          direction: "out",
+          type: "photo",
+          actor_id: author,
+          author,
+          body: caption || "[photo]",
+          meta: { chat_id: chat_id || resolveChatId(p.channel), tg_channel: p.channel.name },
+        });
+        return result;
+      },
+      /**
+       * Send a voice message (OGG/Opus preferred).
+       * audio: local file path or Buffer
+       */
+      async sendVoice({ channel: channelName, chat_id, audio, caption, duration, author = "apx" }) {
+        const p =
+          (channelName && pollers.find((pp) => pp.channel.name === channelName)) ||
+          pollers.find((pp) => resolveBotToken(pp.channel)) ||
+          null;
+        if (!p) throw new Error("no telegram channel available");
+        const result = await p._sendVoice({ chat_id, audio, caption, duration });
+        appendGlobalMessage({
+          channel: "telegram",
+          direction: "out",
+          type: "voice",
+          actor_id: author,
+          author,
+          body: caption || "[voice]",
+          meta: { chat_id: chat_id || resolveChatId(p.channel), tg_channel: p.channel.name },
+        });
+        return result;
+      },
+      /**
+       * Send an audio file (MP3/M4A — shown in music player).
+       * audio: local file path or Buffer
+       */
+      async sendAudio({ channel: channelName, chat_id, audio, caption, title, performer, author = "apx" }) {
+        const p =
+          (channelName && pollers.find((pp) => pp.channel.name === channelName)) ||
+          pollers.find((pp) => resolveBotToken(pp.channel)) ||
+          null;
+        if (!p) throw new Error("no telegram channel available");
+        const result = await p._sendAudio({ chat_id, audio, caption, title, performer });
+        appendGlobalMessage({
+          channel: "telegram",
+          direction: "out",
+          type: "audio",
+          actor_id: author,
+          author,
+          body: caption || title || "[audio]",
+          meta: { chat_id: chat_id || resolveChatId(p.channel), tg_channel: p.channel.name },
+        });
+        return result;
+      },
       pollers,
     };
   },

package/src/daemon/super-agent.js CHANGED Viewed

@@ -60,7 +60,8 @@ HARD RULES (do not deviate):
 14. VAULT RULE: When the user wants a new existing agent/template, call list_vault_agents first. If a suitable vault agent exists, import_agent into the chosen project. If none fits, say briefly what is missing.
 15. NO-PENDING RULE: never say "give me a second", "I will do it", or "I will try later" as a final answer. Either call the tool in this same turn or say what blocks you.
 16. IDENTITY RULE: when the user asks you to change your name, call yourself something, or update your personality/language, call set_identity and persist the change. Then confirm with your new name.
-17. ROUTINES RULE: NEVER create a routine in the default project (id=0). Routines MUST be tied to a specific registered project. Before adding a routine, call list_projects to find the correct project id or name. Then pass --project <id|name> to apx routine add. If no project fits, ask the user which project to use. Creating routines in project 0/default mixes unrelated projects' schedules and corrupts state.`;
+17. ROUTINES RULE: NEVER create a routine in the default project (id=0). Routines MUST be tied to a specific registered project. Before adding a routine, call list_projects to find the correct project id or name. Then pass --project <id|name> to apx routine add. If no project fits, ask the user which project to use. Creating routines in project 0/default mixes unrelated projects' schedules and corrupts state.
+18. **NO EMPTY RESPONSES**: Never respond with only text when you have tools available and the user is asking you to DO something. Call the tool FIRST, then explain. Never say "I'll do X" without immediately calling the tool. Empty acknowledgments ("ok", "entendido", "dame un minuto", "voy", "checking", "stand by") without a tool call are invalid responses — they will be re-prompted and waste a turn.`;
 function isShortConfirmation(text) {
   return /^(yes|y|si|si dale|dale|ok|okay|confirm|confirmed|go|proceed|do it)\b/i
@@ -75,6 +76,26 @@ function lastAssistantAskedForConfirmation(messages) {
   return false;
 }
+/**
+ * Returns true if the model response looks like a pure acknowledgment
+ * with no actual content — the classic "ghost response" anti-pattern.
+ */
+function isGhostResponse(text) {
+  const t = String(text || "").trim();
+  if (t.length > 200) return false; // long responses are probably real
+  return /^(ok|okay|got it|understood|sure|of course|on it|dale|entendido|claro|voy|ya lo hago|dame un (segundo|momento)|un momento|let me|i (will|can|shall)|i'm (going|about)|give me a|ahora lo|enseguida|checking|looking|fetching|working on|stand by|please wait|un seg|dame sec)[\s.,!]*/i
+    .test(t);
+}
+/**
+ * Returns true if the user's prompt looks like an instruction to act
+ * rather than just a question or statement.
+ */
+function looksLikeActionRequest(text) {
+  const t = String(text || "").trim().toLowerCase();
+  return /\b(list|show|find|get|fetch|search|run|execute|create|add|make|start|stop|delete|update|send|check|read|write|look|tell me|dame|mostra|busca|ejecuta|crea|agrega|mandá|revisá|corré|borrá|arrancá)\b/.test(t);
+}
 export function isSuperAgentEnabled(cfg) {
   return !!(cfg && cfg.super_agent && cfg.super_agent.enabled && cfg.super_agent.model);
 }
@@ -144,12 +165,18 @@ export async function runSuperAgent({
   for (let iter = 0; iter < MAX_TOOL_ITERS; iter++) {
     await emitProgress(onEvent, { type: "model_start", iteration: iter + 1 });
+    // On the first iteration, force a tool call. This prevents the model from
+    // returning a bare acknowledgment ("ok", "dame un segundo") instead of
+    // acting on an action request. On later iterations (after tool results
+    // have been fed back) tool_choice is "auto" so the model can produce its
+    // final text summary.
     const result = await callEngine({
       modelId: activeModel,
       system,
       messages: conversation,
       config: globalConfig,
       tools: TOOL_SCHEMAS,
+      toolChoice: iter === 0 ? "required" : "auto",
       maxTokens: 1024,
     });
     totalUsage.input_tokens += result.usage?.input_tokens || 0;
@@ -172,6 +199,20 @@ export async function runSuperAgent({
     }
     if (!toolCalls || toolCalls.length === 0) {
+      // Ghost-response detection: if the model returned a pure acknowledgment
+      // (no tool calls, no real content) on the FIRST iteration in response to
+      // what looks like an action request, inject a re-prompt.
+      if (iter === 0 && isGhostResponse(lastText) && looksLikeActionRequest(prompt)) {
+        await emitProgress(onEvent, { type: "ghost_response_detected", text: lastText });
+        conversation.push({ role: "assistant", content: lastText });
+        conversation.push({
+          role: "user",
+          content:
+            "Remember: you must execute the action, not just confirm it. " +
+            "Call the tool now — action first, report after.",
+        });
+        continue; // give the model one more chance
+      }
       // Final answer — clean up any stray fence markers just in case
       lastText = cleanTextOfPseudoToolCalls(lastText) || lastText;
       break;