@agentprojectcontext/apx 1.10.4 → 1.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -28,7 +28,8 @@
28
28
  // }
29
29
 
30
30
  import fs from "node:fs";
31
- import { TELEGRAM_STATE_PATH } from "../../core/config.js";
31
+ import path from "node:path";
32
+ import { TELEGRAM_STATE_PATH, APX_HOME } from "../../core/config.js";
32
33
  import { callEngine } from "../engines/index.js";
33
34
  import { runSuperAgent, isSuperAgentEnabled } from "../super-agent.js";
34
35
  import { stripThinking } from "../thinking.js";
@@ -39,6 +40,119 @@ import { buildAgentSystem } from "../../core/agent-system.js";
39
40
  const API_BASE = "https://api.telegram.org";
40
41
  const nowIso = () => new Date().toISOString().replace(/\.\d{3}Z$/, "Z");
41
42
 
43
+ // ---------- media sending helpers -------------------------------------------
44
+
45
+ /**
46
+ * Send a photo to a Telegram chat.
47
+ * @param {string} token Bot token
48
+ * @param {string|number} chatId Telegram chat_id
49
+ * @param {string|Buffer} photo Absolute file path OR Buffer of image data
50
+ * @param {object} [opts]
51
+ * @param {string} [opts.caption]
52
+ * @param {string} [opts.parse_mode] "HTML" | "Markdown" | "MarkdownV2"
53
+ */
54
+ export async function sendPhoto(token, chatId, photo, { caption, parse_mode } = {}) {
55
+ const url = `${API_BASE}/bot${token}/sendPhoto`;
56
+ const form = new FormData();
57
+ form.append("chat_id", String(chatId));
58
+ if (caption) form.append("caption", caption);
59
+ if (parse_mode) form.append("parse_mode", parse_mode);
60
+
61
+ if (typeof photo === "string" && photo.startsWith("http")) {
62
+ // Public URL — send as string
63
+ form.append("photo", photo);
64
+ } else {
65
+ // Local file path or Buffer
66
+ const buf = Buffer.isBuffer(photo) ? photo : fs.readFileSync(photo);
67
+ const name = typeof photo === "string" ? path.basename(photo) : "photo.jpg";
68
+ const blob = new Blob([buf], { type: name.endsWith(".png") ? "image/png" : "image/jpeg" });
69
+ form.append("photo", blob, name);
70
+ }
71
+
72
+ const res = await fetch(url, { method: "POST", body: form });
73
+ const json = await res.json();
74
+ if (!json.ok) throw new Error(`sendPhoto failed: ${json.description || res.status}`);
75
+ return json.result;
76
+ }
77
+
78
+ /**
79
+ * Send a voice message (OGG/Opus preferred by Telegram).
80
+ * @param {string} token
81
+ * @param {string|number} chatId
82
+ * @param {string|Buffer} audio Path or Buffer
83
+ * @param {object} [opts]
84
+ * @param {string} [opts.caption]
85
+ * @param {number} [opts.duration]
86
+ */
87
+ export async function sendVoice(token, chatId, audio, { caption, duration } = {}) {
88
+ const url = `${API_BASE}/bot${token}/sendVoice`;
89
+ const form = new FormData();
90
+ form.append("chat_id", String(chatId));
91
+ if (caption) form.append("caption", caption);
92
+ if (duration) form.append("duration", String(duration));
93
+
94
+ const buf = Buffer.isBuffer(audio) ? audio : fs.readFileSync(audio);
95
+ const name = typeof audio === "string" ? path.basename(audio) : "voice.ogg";
96
+ const blob = new Blob([buf], { type: "audio/ogg" });
97
+ form.append("voice", blob, name);
98
+
99
+ const res = await fetch(url, { method: "POST", body: form });
100
+ const json = await res.json();
101
+ if (!json.ok) throw new Error(`sendVoice failed: ${json.description || res.status}`);
102
+ return json.result;
103
+ }
104
+
105
+ /**
106
+ * Send an audio file (MP3, M4A, etc — shown in Telegram music player).
107
+ * @param {string} token
108
+ * @param {string|number} chatId
109
+ * @param {string|Buffer} audio Path or Buffer
110
+ * @param {object} [opts]
111
+ * @param {string} [opts.caption]
112
+ * @param {string} [opts.title]
113
+ * @param {string} [opts.performer]
114
+ */
115
+ export async function sendAudio(token, chatId, audio, { caption, title, performer } = {}) {
116
+ const url = `${API_BASE}/bot${token}/sendAudio`;
117
+ const form = new FormData();
118
+ form.append("chat_id", String(chatId));
119
+ if (caption) form.append("caption", caption);
120
+ if (title) form.append("title", title);
121
+ if (performer) form.append("performer", performer);
122
+
123
+ const buf = Buffer.isBuffer(audio) ? audio : fs.readFileSync(audio);
124
+ const name = typeof audio === "string" ? path.basename(audio) : "audio.mp3";
125
+ const blob = new Blob([buf], { type: "audio/mpeg" });
126
+ form.append("audio", blob, name);
127
+
128
+ const res = await fetch(url, { method: "POST", body: form });
129
+ const json = await res.json();
130
+ if (!json.ok) throw new Error(`sendAudio failed: ${json.description || res.status}`);
131
+ return json.result;
132
+ }
133
+
134
+ /**
135
+ * Download a file from Telegram servers.
136
+ * Returns the local file path where it was saved.
137
+ */
138
+ async function downloadTelegramFile(token, fileId, destDir) {
139
+ // Step 1: get file path from Telegram
140
+ const infoRes = await fetch(`${API_BASE}/bot${token}/getFile?file_id=${fileId}`);
141
+ const infoJson = await infoRes.json();
142
+ if (!infoJson.ok) throw new Error(`getFile failed: ${infoJson.description}`);
143
+ const filePath = infoJson.result.file_path; // e.g. "photos/file_123.jpg"
144
+ const ext = path.extname(filePath) || ".jpg";
145
+ const fileName = `tg_${fileId.slice(-8)}_${Date.now()}${ext}`;
146
+ const localPath = path.join(destDir, fileName);
147
+
148
+ // Step 2: download
149
+ const dlRes = await fetch(`${API_BASE}/file/bot${token}/${filePath}`);
150
+ if (!dlRes.ok) throw new Error(`download failed: ${dlRes.status}`);
151
+ const buf = Buffer.from(await dlRes.arrayBuffer());
152
+ fs.writeFileSync(localPath, buf);
153
+ return localPath;
154
+ }
155
+
42
156
  // ---------- shared state ----------------------------------------------------
43
157
 
44
158
  function loadState() {
@@ -237,7 +351,43 @@ class ChannelPoller {
237
351
  ? "@" + msg.from.username
238
352
  : `${msg.from?.first_name || ""} ${msg.from?.last_name || ""}`.trim() || "unknown";
239
353
  const chat_id = msg.chat?.id;
240
- const text = msg.text || "";
354
+ const text = msg.text || msg.caption || "";
355
+
356
+ // ── Incoming photo handling ───────────────────────────────────────────
357
+ if (msg.photo && msg.photo.length > 0) {
358
+ // Telegram sends multiple sizes; pick the largest
359
+ const bestPhoto = msg.photo.reduce((a, b) => (b.file_size > a.file_size ? b : a));
360
+ const token = resolveBotToken(this.channel);
361
+ const mediaDir = path.join(APX_HOME, "media");
362
+ fs.mkdirSync(mediaDir, { recursive: true });
363
+ try {
364
+ const localPath = await downloadTelegramFile(token, bestPhoto.file_id, mediaDir);
365
+ this.log(`telegram[${this.channel.name}] photo saved: ${localPath}`);
366
+ appendGlobalMessage({
367
+ channel: "telegram",
368
+ direction: "in",
369
+ type: "photo",
370
+ actor_id: msg.from?.id ? String(msg.from.id) : author,
371
+ external_id: String(u.update_id),
372
+ author,
373
+ body: text || "[photo]",
374
+ meta: {
375
+ chat_id,
376
+ user_id: msg.from?.id || null,
377
+ message_id: msg.message_id,
378
+ tg_channel: this.channel.name,
379
+ local_path: localPath,
380
+ file_id: bestPhoto.file_id,
381
+ width: bestPhoto.width,
382
+ height: bestPhoto.height,
383
+ },
384
+ });
385
+ } catch (e) {
386
+ this.log(`telegram[${this.channel.name}] photo download failed: ${e.message}`);
387
+ }
388
+ // If there's a caption, continue to handle it as text; otherwise return
389
+ if (!text) return;
390
+ }
241
391
 
242
392
  // /reset or /new wipes the rolling context for this chat. We just
243
393
  // remember a marker timestamp; subsequent inbounds will only consider
@@ -488,6 +638,31 @@ class ChannelPoller {
488
638
  if (!json.ok) throw new Error(json.description || `send failed (${res.status})`);
489
639
  return json.result;
490
640
  }
641
+
642
+ /** Send a photo via this channel */
643
+ async _sendPhoto({ chat_id, photo, caption, parse_mode }) {
644
+ const token = resolveBotToken(this.channel);
645
+ if (!token) throw new Error(`channel ${this.channel.name}: no bot_token`);
646
+ const target = chat_id || resolveChatId(this.channel);
647
+ if (!target) throw new Error(`channel ${this.channel.name}: no chat_id`);
648
+ return sendPhoto(token, target, photo, { caption, parse_mode });
649
+ }
650
+
651
+ /** Send a voice message via this channel */
652
+ async _sendVoice({ chat_id, audio, caption, duration }) {
653
+ const token = resolveBotToken(this.channel);
654
+ if (!token) throw new Error(`channel ${this.channel.name}: no bot_token`);
655
+ const target = chat_id || resolveChatId(this.channel);
656
+ return sendVoice(token, target, audio, { caption, duration });
657
+ }
658
+
659
+ /** Send an audio file via this channel */
660
+ async _sendAudio({ chat_id, audio, caption, title, performer }) {
661
+ const token = resolveBotToken(this.channel);
662
+ if (!token) throw new Error(`channel ${this.channel.name}: no bot_token`);
663
+ const target = chat_id || resolveChatId(this.channel);
664
+ return sendAudio(token, target, audio, { caption, title, performer });
665
+ }
491
666
  }
492
667
 
493
668
  function sleep(ms) {
@@ -557,6 +732,77 @@ export default {
557
732
  });
558
733
  return result;
559
734
  },
735
+
736
+ /**
737
+ * Send a photo to a Telegram chat.
738
+ * photo: local file path, Buffer, or public URL
739
+ * opts: { caption, parse_mode, channel, author }
740
+ */
741
+ async sendPhoto({ channel: channelName, chat_id, photo, caption, parse_mode, author = "apx" }) {
742
+ const p =
743
+ (channelName && pollers.find((pp) => pp.channel.name === channelName)) ||
744
+ pollers.find((pp) => resolveBotToken(pp.channel)) ||
745
+ null;
746
+ if (!p) throw new Error("no telegram channel available");
747
+ const result = await p._sendPhoto({ chat_id, photo, caption, parse_mode });
748
+ appendGlobalMessage({
749
+ channel: "telegram",
750
+ direction: "out",
751
+ type: "photo",
752
+ actor_id: author,
753
+ author,
754
+ body: caption || "[photo]",
755
+ meta: { chat_id: chat_id || resolveChatId(p.channel), tg_channel: p.channel.name },
756
+ });
757
+ return result;
758
+ },
759
+
760
+ /**
761
+ * Send a voice message (OGG/Opus preferred).
762
+ * audio: local file path or Buffer
763
+ */
764
+ async sendVoice({ channel: channelName, chat_id, audio, caption, duration, author = "apx" }) {
765
+ const p =
766
+ (channelName && pollers.find((pp) => pp.channel.name === channelName)) ||
767
+ pollers.find((pp) => resolveBotToken(pp.channel)) ||
768
+ null;
769
+ if (!p) throw new Error("no telegram channel available");
770
+ const result = await p._sendVoice({ chat_id, audio, caption, duration });
771
+ appendGlobalMessage({
772
+ channel: "telegram",
773
+ direction: "out",
774
+ type: "voice",
775
+ actor_id: author,
776
+ author,
777
+ body: caption || "[voice]",
778
+ meta: { chat_id: chat_id || resolveChatId(p.channel), tg_channel: p.channel.name },
779
+ });
780
+ return result;
781
+ },
782
+
783
+ /**
784
+ * Send an audio file (MP3/M4A — shown in music player).
785
+ * audio: local file path or Buffer
786
+ */
787
+ async sendAudio({ channel: channelName, chat_id, audio, caption, title, performer, author = "apx" }) {
788
+ const p =
789
+ (channelName && pollers.find((pp) => pp.channel.name === channelName)) ||
790
+ pollers.find((pp) => resolveBotToken(pp.channel)) ||
791
+ null;
792
+ if (!p) throw new Error("no telegram channel available");
793
+ const result = await p._sendAudio({ chat_id, audio, caption, title, performer });
794
+ appendGlobalMessage({
795
+ channel: "telegram",
796
+ direction: "out",
797
+ type: "audio",
798
+ actor_id: author,
799
+ author,
800
+ body: caption || title || "[audio]",
801
+ meta: { chat_id: chat_id || resolveChatId(p.channel), tg_channel: p.channel.name },
802
+ });
803
+ return result;
804
+ },
805
+
560
806
  pollers,
561
807
  };
562
808
  },
@@ -20,8 +20,9 @@ import setIdentity from "./tools/set-identity.js";
20
20
  import setPermissionMode from "./tools/set-permission-mode.js";
21
21
  import searchFiles from "./tools/search-files.js";
22
22
  import { createPermissionGuard } from "./helpers.js";
23
+ import { buildBridgedTools, DEFAULT_CATEGORIES } from "./registry-bridge.js";
23
24
 
24
- const TOOLS = [
25
+ const NATIVE_TOOLS = [
25
26
  listProjects,
26
27
  listAgents,
27
28
  listVaultAgents,
@@ -45,6 +46,18 @@ const TOOLS = [
45
46
  searchFiles,
46
47
  ];
47
48
 
49
+ // Registry-backed bridges. Categories can be overridden per-process via env
50
+ // APX_BRIDGE_CATEGORIES (comma-separated), e.g. "browser,fetch,search".
51
+ // Default: browser, fetch, search, glob, grep (see registry-bridge.js).
52
+ function resolveBridgeCategories() {
53
+ const env = (process.env.APX_BRIDGE_CATEGORIES || "").trim();
54
+ if (!env) return DEFAULT_CATEGORIES;
55
+ return new Set(env.split(",").map(s => s.trim()).filter(Boolean));
56
+ }
57
+
58
+ const BRIDGED_TOOLS = buildBridgedTools({ categories: resolveBridgeCategories() });
59
+ const TOOLS = [...NATIVE_TOOLS, ...BRIDGED_TOOLS];
60
+
48
61
  export const TOOL_SCHEMAS = TOOLS.map((tool) => tool.schema);
49
62
 
50
63
  export function makeToolHandlers(ctx) {
@@ -56,3 +69,8 @@ export function makeToolHandlers(ctx) {
56
69
  };
57
70
  return Object.fromEntries(TOOLS.map((tool) => [tool.name, tool.makeHandler(toolCtx)]));
58
71
  }
72
+
73
+ // Diagnostic helper — useful for `apx daemon status` or debug logging.
74
+ export function listBridgedToolNames() {
75
+ return BRIDGED_TOOLS.map(t => t.name);
76
+ }
@@ -0,0 +1,122 @@
1
+ // daemon/super-agent-tools/registry-bridge.js
2
+ //
3
+ // Generic bridge that exposes registry-backed HTTP tools (browser, fetch,
4
+ // search, glob, grep, etc.) to the super-agent — no per-tool import boilerplate.
5
+ //
6
+ // How it works:
7
+ // 1. Read TOOL_DEFINITIONS from daemon/tools/registry.js
8
+ // 2. Drop entries whose names collide with native super-agent tools (those
9
+ // win — they touch in-process state directly).
10
+ // 3. For each remaining entry, produce { name, schema, makeHandler } in the
11
+ // exact shape index.js expects, so they slot into TOOL_SCHEMAS alongside
12
+ // the native ones.
13
+ // 4. The generated handler POSTs/GETs to the daemon's own HTTP server on
14
+ // 127.0.0.1:<port>. Yes, the super-agent talks to its own daemon — that
15
+ // keeps the bridge dead-simple, lets the engine adapter format tool
16
+ // schemas uniformly, and reuses the exact code path external callers hit.
17
+ //
18
+ // Net result: adding a tool = adding one entry to registry.js. No file in
19
+ // super-agent-tools/tools/, no import in index.js.
20
+
21
+ import { TOOL_DEFINITIONS } from "../tools/registry.js";
22
+
23
+ // Native handlers in super-agent-tools/tools/ that own these names. The bridge
24
+ // MUST skip them or the registry version (HTTP roundtrip) would shadow the
25
+ // native one with possibly different semantics.
26
+ const NATIVE_NAMES = new Set([
27
+ "list_projects", "list_agents", "list_vault_agents", "import_agent",
28
+ "add_project", "list_mcps", "read_agent_memory",
29
+ "list_files", "read_file", "write_file", "edit_file", "search_files",
30
+ "run_shell", "tail_messages", "search_messages",
31
+ "call_agent", "call_mcp", "call_runtime",
32
+ "send_telegram", "set_identity", "set_permission_mode",
33
+ ]);
34
+
35
+ // Default allow-list of categories the bridge will expose. The NATIVE_NAMES
36
+ // filter handles duplicates inside these categories (e.g. "file" contains
37
+ // both read_file [native] and glob [bridged]). Anything outside is ignored
38
+ // — "shell"/"mcp"/"memory"/"session" have different semantics handled
39
+ // natively. Override with env APX_BRIDGE_CATEGORIES.
40
+ const DEFAULT_CATEGORIES = new Set(["browser", "fetch", "search", "file"]);
41
+
42
+ function buildSchema(entry) {
43
+ return {
44
+ type: "function",
45
+ function: {
46
+ name: entry.name,
47
+ description: entry.description,
48
+ parameters: entry.parameters || { type: "object", properties: {} },
49
+ },
50
+ };
51
+ }
52
+
53
+ function buildHandler(entry) {
54
+ return ({ globalConfig }) => async (args = {}) => {
55
+ const port = globalConfig?.port || process.env.APX_PORT || 7430;
56
+ const method = String(entry.endpoint?.method || "POST").toUpperCase();
57
+ let url = `http://127.0.0.1:${port}${entry.endpoint?.path || ""}`;
58
+
59
+ const opts = {
60
+ method,
61
+ headers: { "content-type": "application/json" },
62
+ };
63
+
64
+ if (method === "GET" || method === "HEAD") {
65
+ const qs = new URLSearchParams();
66
+ for (const [k, v] of Object.entries(args)) {
67
+ if (v === undefined || v === null) continue;
68
+ qs.set(k, typeof v === "object" ? JSON.stringify(v) : String(v));
69
+ }
70
+ const q = qs.toString();
71
+ if (q) url += (url.includes("?") ? "&" : "?") + q;
72
+ } else {
73
+ opts.body = JSON.stringify(args);
74
+ }
75
+
76
+ let res, text;
77
+ try {
78
+ res = await fetch(url, opts);
79
+ text = await res.text();
80
+ } catch (e) {
81
+ return { error: `bridge fetch failed: ${e.message}`, url };
82
+ }
83
+
84
+ let parsed;
85
+ try { parsed = JSON.parse(text); }
86
+ catch { parsed = { raw: text }; }
87
+
88
+ if (!res.ok) {
89
+ return {
90
+ error: parsed?.error || `HTTP ${res.status}`,
91
+ status: res.status,
92
+ ...(typeof parsed === "object" ? parsed : {}),
93
+ };
94
+ }
95
+ return parsed;
96
+ };
97
+ }
98
+
99
+ /**
100
+ * Returns an array of tool objects in the shape super-agent-tools/index.js
101
+ * expects: { name, schema, makeHandler }.
102
+ *
103
+ * @param {object} opts
104
+ * @param {Set<string>=} opts.categories override DEFAULT_CATEGORIES
105
+ * @param {Set<string>=} opts.skipNames extra names to skip in addition to NATIVE_NAMES
106
+ */
107
+ export function buildBridgedTools(opts = {}) {
108
+ const categories = opts.categories instanceof Set ? opts.categories : DEFAULT_CATEGORIES;
109
+ const skipNames = opts.skipNames instanceof Set ? opts.skipNames : new Set();
110
+
111
+ return TOOL_DEFINITIONS
112
+ .filter(e => categories.has(e.category))
113
+ .filter(e => !NATIVE_NAMES.has(e.name) && !skipNames.has(e.name))
114
+ .filter(e => e.endpoint?.path)
115
+ .map(entry => ({
116
+ name: entry.name,
117
+ schema: buildSchema(entry),
118
+ makeHandler: buildHandler(entry),
119
+ }));
120
+ }
121
+
122
+ export { NATIVE_NAMES, DEFAULT_CATEGORIES };
@@ -60,7 +60,9 @@ HARD RULES (do not deviate):
60
60
  14. VAULT RULE: When the user wants a new existing agent/template, call list_vault_agents first. If a suitable vault agent exists, import_agent into the chosen project. If none fits, say briefly what is missing.
61
61
  15. NO-PENDING RULE: never say "give me a second", "I will do it", or "I will try later" as a final answer. Either call the tool in this same turn or say what blocks you.
62
62
  16. IDENTITY RULE: when the user asks you to change your name, call yourself something, or update your personality/language, call set_identity and persist the change. Then confirm with your new name.
63
- 17. ROUTINES RULE: NEVER create a routine in the default project (id=0). Routines MUST be tied to a specific registered project. Before adding a routine, call list_projects to find the correct project id or name. Then pass --project <id|name> to apx routine add. If no project fits, ask the user which project to use. Creating routines in project 0/default mixes unrelated projects' schedules and corrupts state.`;
63
+ 17. ROUTINES RULE: NEVER create a routine in the default project (id=0). Routines MUST be tied to a specific registered project. Before adding a routine, call list_projects to find the correct project id or name. Then pass --project <id|name> to apx routine add. If no project fits, ask the user which project to use. Creating routines in project 0/default mixes unrelated projects' schedules and corrupts state.
64
+ 18. **NO EMPTY RESPONSES**: Never respond with only text when you have tools available and the user is asking you to DO something. Call the tool FIRST, then explain. Never say "I'll do X" without immediately calling the tool. Empty acknowledgments ("ok", "entendido", "dame un minuto", "voy", "checking", "stand by") without a tool call are invalid responses — they will be re-prompted and waste a turn.
65
+ 19. **CWD RULE**: When the channel context includes a "CWD: <path>" line, that is the user's current working directory. References to "este directorio", "este proyecto", "esta carpeta", "acá", "aquí", "this directory", "this project", "current dir/folder" all mean that exact CWD path. Use it as the path argument directly — DO NOT ask the user "what's the path?" when CWD is already given. Example: if user says "agregá este proyecto a la lista", call add_project({path: <CWD>}) immediately.`;
64
66
 
65
67
  function isShortConfirmation(text) {
66
68
  return /^(yes|y|si|si dale|dale|ok|okay|confirm|confirmed|go|proceed|do it)\b/i
@@ -75,6 +77,26 @@ function lastAssistantAskedForConfirmation(messages) {
75
77
  return false;
76
78
  }
77
79
 
80
+ /**
81
+ * Returns true if the model response looks like a pure acknowledgment
82
+ * with no actual content — the classic "ghost response" anti-pattern.
83
+ */
84
+ function isGhostResponse(text) {
85
+ const t = String(text || "").trim();
86
+ if (t.length > 200) return false; // long responses are probably real
87
+ return /^(ok|okay|got it|understood|sure|of course|on it|dale|entendido|claro|voy|ya lo hago|dame un (segundo|momento)|un momento|let me|i (will|can|shall)|i'm (going|about)|give me a|ahora lo|enseguida|checking|looking|fetching|working on|stand by|please wait|un seg|dame sec)[\s.,!]*/i
88
+ .test(t);
89
+ }
90
+
91
+ /**
92
+ * Returns true if the user's prompt looks like an instruction to act
93
+ * rather than just a question or statement.
94
+ */
95
+ function looksLikeActionRequest(text) {
96
+ const t = String(text || "").trim().toLowerCase();
97
+ return /\b(list|show|find|get|fetch|search|run|execute|create|add|make|start|stop|delete|update|send|check|read|write|look|tell me|dame|mostra|busca|ejecuta|crea|agrega|mandá|revisá|corré|borrá|arrancá)\b/.test(t);
98
+ }
99
+
78
100
  export function isSuperAgentEnabled(cfg) {
79
101
  return !!(cfg && cfg.super_agent && cfg.super_agent.enabled && cfg.super_agent.model);
80
102
  }
@@ -144,12 +166,18 @@ export async function runSuperAgent({
144
166
 
145
167
  for (let iter = 0; iter < MAX_TOOL_ITERS; iter++) {
146
168
  await emitProgress(onEvent, { type: "model_start", iteration: iter + 1 });
169
+ // On the first iteration, force a tool call. This prevents the model from
170
+ // returning a bare acknowledgment ("ok", "dame un segundo") instead of
171
+ // acting on an action request. On later iterations (after tool results
172
+ // have been fed back) tool_choice is "auto" so the model can produce its
173
+ // final text summary.
147
174
  const result = await callEngine({
148
175
  modelId: activeModel,
149
176
  system,
150
177
  messages: conversation,
151
178
  config: globalConfig,
152
179
  tools: TOOL_SCHEMAS,
180
+ toolChoice: iter === 0 ? "required" : "auto",
153
181
  maxTokens: 1024,
154
182
  });
155
183
  totalUsage.input_tokens += result.usage?.input_tokens || 0;
@@ -172,6 +200,20 @@ export async function runSuperAgent({
172
200
  }
173
201
 
174
202
  if (!toolCalls || toolCalls.length === 0) {
203
+ // Ghost-response detection: if the model returned a pure acknowledgment
204
+ // (no tool calls, no real content) on the FIRST iteration in response to
205
+ // what looks like an action request, inject a re-prompt.
206
+ if (iter === 0 && isGhostResponse(lastText) && looksLikeActionRequest(prompt)) {
207
+ await emitProgress(onEvent, { type: "ghost_response_detected", text: lastText });
208
+ conversation.push({ role: "assistant", content: lastText });
209
+ conversation.push({
210
+ role: "user",
211
+ content:
212
+ "Remember: you must execute the action, not just confirm it. " +
213
+ "Call the tool now — action first, report after.",
214
+ });
215
+ continue; // give the model one more chance
216
+ }
175
217
  // Final answer — clean up any stray fence markers just in case
176
218
  lastText = cleanTextOfPseudoToolCalls(lastText) || lastText;
177
219
  break;