@agentprojectcontext/apx 1.10.3 → 1.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -28,7 +28,8 @@
28
28
  // }
29
29
 
30
30
  import fs from "node:fs";
31
- import { TELEGRAM_STATE_PATH } from "../../core/config.js";
31
+ import path from "node:path";
32
+ import { TELEGRAM_STATE_PATH, APX_HOME } from "../../core/config.js";
32
33
  import { callEngine } from "../engines/index.js";
33
34
  import { runSuperAgent, isSuperAgentEnabled } from "../super-agent.js";
34
35
  import { stripThinking } from "../thinking.js";
@@ -39,6 +40,119 @@ import { buildAgentSystem } from "../../core/agent-system.js";
39
40
  const API_BASE = "https://api.telegram.org";
40
41
  const nowIso = () => new Date().toISOString().replace(/\.\d{3}Z$/, "Z");
41
42
 
43
+ // ---------- media sending helpers -------------------------------------------
44
+
45
+ /**
46
+ * Send a photo to a Telegram chat.
47
+ * @param {string} token Bot token
48
+ * @param {string|number} chatId Telegram chat_id
49
+ * @param {string|Buffer} photo Absolute file path OR Buffer of image data
50
+ * @param {object} [opts]
51
+ * @param {string} [opts.caption]
52
+ * @param {string} [opts.parse_mode] "HTML" | "Markdown" | "MarkdownV2"
53
+ */
54
+ export async function sendPhoto(token, chatId, photo, { caption, parse_mode } = {}) {
55
+ const url = `${API_BASE}/bot${token}/sendPhoto`;
56
+ const form = new FormData();
57
+ form.append("chat_id", String(chatId));
58
+ if (caption) form.append("caption", caption);
59
+ if (parse_mode) form.append("parse_mode", parse_mode);
60
+
61
+ if (typeof photo === "string" && photo.startsWith("http")) {
62
+ // Public URL — send as string
63
+ form.append("photo", photo);
64
+ } else {
65
+ // Local file path or Buffer
66
+ const buf = Buffer.isBuffer(photo) ? photo : fs.readFileSync(photo);
67
+ const name = typeof photo === "string" ? path.basename(photo) : "photo.jpg";
68
+ const blob = new Blob([buf], { type: name.endsWith(".png") ? "image/png" : "image/jpeg" });
69
+ form.append("photo", blob, name);
70
+ }
71
+
72
+ const res = await fetch(url, { method: "POST", body: form });
73
+ const json = await res.json();
74
+ if (!json.ok) throw new Error(`sendPhoto failed: ${json.description || res.status}`);
75
+ return json.result;
76
+ }
77
+
78
+ /**
79
+ * Send a voice message (OGG/Opus preferred by Telegram).
80
+ * @param {string} token
81
+ * @param {string|number} chatId
82
+ * @param {string|Buffer} audio Path or Buffer
83
+ * @param {object} [opts]
84
+ * @param {string} [opts.caption]
85
+ * @param {number} [opts.duration]
86
+ */
87
+ export async function sendVoice(token, chatId, audio, { caption, duration } = {}) {
88
+ const url = `${API_BASE}/bot${token}/sendVoice`;
89
+ const form = new FormData();
90
+ form.append("chat_id", String(chatId));
91
+ if (caption) form.append("caption", caption);
92
+ if (duration) form.append("duration", String(duration));
93
+
94
+ const buf = Buffer.isBuffer(audio) ? audio : fs.readFileSync(audio);
95
+ const name = typeof audio === "string" ? path.basename(audio) : "voice.ogg";
96
+ const blob = new Blob([buf], { type: "audio/ogg" });
97
+ form.append("voice", blob, name);
98
+
99
+ const res = await fetch(url, { method: "POST", body: form });
100
+ const json = await res.json();
101
+ if (!json.ok) throw new Error(`sendVoice failed: ${json.description || res.status}`);
102
+ return json.result;
103
+ }
104
+
105
+ /**
106
+ * Send an audio file (MP3, M4A, etc — shown in Telegram music player).
107
+ * @param {string} token
108
+ * @param {string|number} chatId
109
+ * @param {string|Buffer} audio Path or Buffer
110
+ * @param {object} [opts]
111
+ * @param {string} [opts.caption]
112
+ * @param {string} [opts.title]
113
+ * @param {string} [opts.performer]
114
+ */
115
+ export async function sendAudio(token, chatId, audio, { caption, title, performer } = {}) {
116
+ const url = `${API_BASE}/bot${token}/sendAudio`;
117
+ const form = new FormData();
118
+ form.append("chat_id", String(chatId));
119
+ if (caption) form.append("caption", caption);
120
+ if (title) form.append("title", title);
121
+ if (performer) form.append("performer", performer);
122
+
123
+ const buf = Buffer.isBuffer(audio) ? audio : fs.readFileSync(audio);
124
+ const name = typeof audio === "string" ? path.basename(audio) : "audio.mp3";
125
+ const blob = new Blob([buf], { type: "audio/mpeg" });
126
+ form.append("audio", blob, name);
127
+
128
+ const res = await fetch(url, { method: "POST", body: form });
129
+ const json = await res.json();
130
+ if (!json.ok) throw new Error(`sendAudio failed: ${json.description || res.status}`);
131
+ return json.result;
132
+ }
133
+
134
+ /**
135
+ * Download a file from Telegram servers.
136
+ * Returns the local file path where it was saved.
137
+ */
138
+ async function downloadTelegramFile(token, fileId, destDir) {
139
+ // Step 1: get file path from Telegram
140
+ const infoRes = await fetch(`${API_BASE}/bot${token}/getFile?file_id=${fileId}`);
141
+ const infoJson = await infoRes.json();
142
+ if (!infoJson.ok) throw new Error(`getFile failed: ${infoJson.description}`);
143
+ const filePath = infoJson.result.file_path; // e.g. "photos/file_123.jpg"
144
+ const ext = path.extname(filePath) || ".jpg";
145
+ const fileName = `tg_${fileId.slice(-8)}_${Date.now()}${ext}`;
146
+ const localPath = path.join(destDir, fileName);
147
+
148
+ // Step 2: download
149
+ const dlRes = await fetch(`${API_BASE}/file/bot${token}/${filePath}`);
150
+ if (!dlRes.ok) throw new Error(`download failed: ${dlRes.status}`);
151
+ const buf = Buffer.from(await dlRes.arrayBuffer());
152
+ fs.writeFileSync(localPath, buf);
153
+ return localPath;
154
+ }
155
+
42
156
  // ---------- shared state ----------------------------------------------------
43
157
 
44
158
  function loadState() {
@@ -237,7 +351,43 @@ class ChannelPoller {
237
351
  ? "@" + msg.from.username
238
352
  : `${msg.from?.first_name || ""} ${msg.from?.last_name || ""}`.trim() || "unknown";
239
353
  const chat_id = msg.chat?.id;
240
- const text = msg.text || "";
354
+ const text = msg.text || msg.caption || "";
355
+
356
+ // ── Incoming photo handling ───────────────────────────────────────────
357
+ if (msg.photo && msg.photo.length > 0) {
358
+ // Telegram sends multiple sizes; pick the largest
359
+ const bestPhoto = msg.photo.reduce((a, b) => (b.file_size > a.file_size ? b : a));
360
+ const token = resolveBotToken(this.channel);
361
+ const mediaDir = path.join(APX_HOME, "media");
362
+ fs.mkdirSync(mediaDir, { recursive: true });
363
+ try {
364
+ const localPath = await downloadTelegramFile(token, bestPhoto.file_id, mediaDir);
365
+ this.log(`telegram[${this.channel.name}] photo saved: ${localPath}`);
366
+ appendGlobalMessage({
367
+ channel: "telegram",
368
+ direction: "in",
369
+ type: "photo",
370
+ actor_id: msg.from?.id ? String(msg.from.id) : author,
371
+ external_id: String(u.update_id),
372
+ author,
373
+ body: text || "[photo]",
374
+ meta: {
375
+ chat_id,
376
+ user_id: msg.from?.id || null,
377
+ message_id: msg.message_id,
378
+ tg_channel: this.channel.name,
379
+ local_path: localPath,
380
+ file_id: bestPhoto.file_id,
381
+ width: bestPhoto.width,
382
+ height: bestPhoto.height,
383
+ },
384
+ });
385
+ } catch (e) {
386
+ this.log(`telegram[${this.channel.name}] photo download failed: ${e.message}`);
387
+ }
388
+ // If there's a caption, continue to handle it as text; otherwise return
389
+ if (!text) return;
390
+ }
241
391
 
242
392
  // /reset or /new wipes the rolling context for this chat. We just
243
393
  // remember a marker timestamp; subsequent inbounds will only consider
@@ -488,6 +638,31 @@ class ChannelPoller {
488
638
  if (!json.ok) throw new Error(json.description || `send failed (${res.status})`);
489
639
  return json.result;
490
640
  }
641
+
642
+ /** Send a photo via this channel */
643
+ async _sendPhoto({ chat_id, photo, caption, parse_mode }) {
644
+ const token = resolveBotToken(this.channel);
645
+ if (!token) throw new Error(`channel ${this.channel.name}: no bot_token`);
646
+ const target = chat_id || resolveChatId(this.channel);
647
+ if (!target) throw new Error(`channel ${this.channel.name}: no chat_id`);
648
+ return sendPhoto(token, target, photo, { caption, parse_mode });
649
+ }
650
+
651
+ /** Send a voice message via this channel */
652
+ async _sendVoice({ chat_id, audio, caption, duration }) {
653
+ const token = resolveBotToken(this.channel);
654
+ if (!token) throw new Error(`channel ${this.channel.name}: no bot_token`);
655
+ const target = chat_id || resolveChatId(this.channel);
656
+ return sendVoice(token, target, audio, { caption, duration });
657
+ }
658
+
659
+ /** Send an audio file via this channel */
660
+ async _sendAudio({ chat_id, audio, caption, title, performer }) {
661
+ const token = resolveBotToken(this.channel);
662
+ if (!token) throw new Error(`channel ${this.channel.name}: no bot_token`);
663
+ const target = chat_id || resolveChatId(this.channel);
664
+ return sendAudio(token, target, audio, { caption, title, performer });
665
+ }
491
666
  }
492
667
 
493
668
  function sleep(ms) {
@@ -557,6 +732,77 @@ export default {
557
732
  });
558
733
  return result;
559
734
  },
735
+
736
+ /**
737
+ * Send a photo to a Telegram chat.
738
+ * photo: local file path, Buffer, or public URL
739
+ * opts: { caption, parse_mode, channel, author }
740
+ */
741
+ async sendPhoto({ channel: channelName, chat_id, photo, caption, parse_mode, author = "apx" }) {
742
+ const p =
743
+ (channelName && pollers.find((pp) => pp.channel.name === channelName)) ||
744
+ pollers.find((pp) => resolveBotToken(pp.channel)) ||
745
+ null;
746
+ if (!p) throw new Error("no telegram channel available");
747
+ const result = await p._sendPhoto({ chat_id, photo, caption, parse_mode });
748
+ appendGlobalMessage({
749
+ channel: "telegram",
750
+ direction: "out",
751
+ type: "photo",
752
+ actor_id: author,
753
+ author,
754
+ body: caption || "[photo]",
755
+ meta: { chat_id: chat_id || resolveChatId(p.channel), tg_channel: p.channel.name },
756
+ });
757
+ return result;
758
+ },
759
+
760
+ /**
761
+ * Send a voice message (OGG/Opus preferred).
762
+ * audio: local file path or Buffer
763
+ */
764
+ async sendVoice({ channel: channelName, chat_id, audio, caption, duration, author = "apx" }) {
765
+ const p =
766
+ (channelName && pollers.find((pp) => pp.channel.name === channelName)) ||
767
+ pollers.find((pp) => resolveBotToken(pp.channel)) ||
768
+ null;
769
+ if (!p) throw new Error("no telegram channel available");
770
+ const result = await p._sendVoice({ chat_id, audio, caption, duration });
771
+ appendGlobalMessage({
772
+ channel: "telegram",
773
+ direction: "out",
774
+ type: "voice",
775
+ actor_id: author,
776
+ author,
777
+ body: caption || "[voice]",
778
+ meta: { chat_id: chat_id || resolveChatId(p.channel), tg_channel: p.channel.name },
779
+ });
780
+ return result;
781
+ },
782
+
783
+ /**
784
+ * Send an audio file (MP3/M4A — shown in music player).
785
+ * audio: local file path or Buffer
786
+ */
787
+ async sendAudio({ channel: channelName, chat_id, audio, caption, title, performer, author = "apx" }) {
788
+ const p =
789
+ (channelName && pollers.find((pp) => pp.channel.name === channelName)) ||
790
+ pollers.find((pp) => resolveBotToken(pp.channel)) ||
791
+ null;
792
+ if (!p) throw new Error("no telegram channel available");
793
+ const result = await p._sendAudio({ chat_id, audio, caption, title, performer });
794
+ appendGlobalMessage({
795
+ channel: "telegram",
796
+ direction: "out",
797
+ type: "audio",
798
+ actor_id: author,
799
+ author,
800
+ body: caption || title || "[audio]",
801
+ meta: { chat_id: chat_id || resolveChatId(p.channel), tg_channel: p.channel.name },
802
+ });
803
+ return result;
804
+ },
805
+
560
806
  pollers,
561
807
  };
562
808
  },
@@ -1,5 +1,5 @@
1
1
  // OpenAI Codex CLI runtime adapter.
2
- // codex exec "<prompt>"
2
+ // codex exec --sandbox workspace-write --skip-git-repo-check "<prompt>"
3
3
  // System prompt is prepended to the prompt body since Codex doesn't have a
4
4
  // dedicated --system flag in `exec` mode.
5
5
  // Reference: https://github.com/openai/codex
@@ -15,7 +15,7 @@ export default {
15
15
  const fullPrompt = system ? `${system}\n\n---\n\n${prompt}` : prompt;
16
16
  const r = await runProcess({
17
17
  command: "codex",
18
- args: ["exec", fullPrompt],
18
+ args: ["exec", "--sandbox", "workspace-write", "--skip-git-repo-check", fullPrompt],
19
19
  cwd,
20
20
  env,
21
21
  timeoutMs,
@@ -60,7 +60,8 @@ HARD RULES (do not deviate):
60
60
  14. VAULT RULE: When the user wants a new existing agent/template, call list_vault_agents first. If a suitable vault agent exists, import_agent into the chosen project. If none fits, say briefly what is missing.
61
61
  15. NO-PENDING RULE: never say "give me a second", "I will do it", or "I will try later" as a final answer. Either call the tool in this same turn or say what blocks you.
62
62
  16. IDENTITY RULE: when the user asks you to change your name, call yourself something, or update your personality/language, call set_identity and persist the change. Then confirm with your new name.
63
- 17. ROUTINES RULE: NEVER create a routine in the default project (id=0). Routines MUST be tied to a specific registered project. Before adding a routine, call list_projects to find the correct project id or name. Then pass --project <id|name> to apx routine add. If no project fits, ask the user which project to use. Creating routines in project 0/default mixes unrelated projects' schedules and corrupts state.`;
63
+ 17. ROUTINES RULE: NEVER create a routine in the default project (id=0). Routines MUST be tied to a specific registered project. Before adding a routine, call list_projects to find the correct project id or name. Then pass --project <id|name> to apx routine add. If no project fits, ask the user which project to use. Creating routines in project 0/default mixes unrelated projects' schedules and corrupts state.
64
+ 18. **NO EMPTY RESPONSES**: Never respond with only text when you have tools available and the user is asking you to DO something. Call the tool FIRST, then explain. Never say "I'll do X" without immediately calling the tool. Empty acknowledgments ("ok", "entendido", "dame un minuto", "voy", "checking", "stand by") without a tool call are invalid responses — they will be re-prompted and waste a turn.`;
64
65
 
65
66
  function isShortConfirmation(text) {
66
67
  return /^(yes|y|si|si dale|dale|ok|okay|confirm|confirmed|go|proceed|do it)\b/i
@@ -75,6 +76,26 @@ function lastAssistantAskedForConfirmation(messages) {
75
76
  return false;
76
77
  }
77
78
 
79
+ /**
80
+ * Returns true if the model response looks like a pure acknowledgment
81
+ * with no actual content — the classic "ghost response" anti-pattern.
82
+ */
83
+ function isGhostResponse(text) {
84
+ const t = String(text || "").trim();
85
+ if (t.length > 200) return false; // long responses are probably real
86
+ return /^(ok|okay|got it|understood|sure|of course|on it|dale|entendido|claro|voy|ya lo hago|dame un (segundo|momento)|un momento|let me|i (will|can|shall)|i'm (going|about)|give me a|ahora lo|enseguida|checking|looking|fetching|working on|stand by|please wait|un seg|dame sec)[\s.,!]*/i
87
+ .test(t);
88
+ }
89
+
90
+ /**
91
+ * Returns true if the user's prompt looks like an instruction to act
92
+ * rather than just a question or statement.
93
+ */
94
+ function looksLikeActionRequest(text) {
95
+ const t = String(text || "").trim().toLowerCase();
96
+ return /\b(list|show|find|get|fetch|search|run|execute|create|add|make|start|stop|delete|update|send|check|read|write|look|tell me|dame|mostra|busca|ejecuta|crea|agrega|mandá|revisá|corré|borrá|arrancá)\b/.test(t);
97
+ }
98
+
78
99
  export function isSuperAgentEnabled(cfg) {
79
100
  return !!(cfg && cfg.super_agent && cfg.super_agent.enabled && cfg.super_agent.model);
80
101
  }
@@ -144,12 +165,18 @@ export async function runSuperAgent({
144
165
 
145
166
  for (let iter = 0; iter < MAX_TOOL_ITERS; iter++) {
146
167
  await emitProgress(onEvent, { type: "model_start", iteration: iter + 1 });
168
+ // On the first iteration, force a tool call. This prevents the model from
169
+ // returning a bare acknowledgment ("ok", "dame un segundo") instead of
170
+ // acting on an action request. On later iterations (after tool results
171
+ // have been fed back) tool_choice is "auto" so the model can produce its
172
+ // final text summary.
147
173
  const result = await callEngine({
148
174
  modelId: activeModel,
149
175
  system,
150
176
  messages: conversation,
151
177
  config: globalConfig,
152
178
  tools: TOOL_SCHEMAS,
179
+ toolChoice: iter === 0 ? "required" : "auto",
153
180
  maxTokens: 1024,
154
181
  });
155
182
  totalUsage.input_tokens += result.usage?.input_tokens || 0;
@@ -172,6 +199,20 @@ export async function runSuperAgent({
172
199
  }
173
200
 
174
201
  if (!toolCalls || toolCalls.length === 0) {
202
+ // Ghost-response detection: if the model returned a pure acknowledgment
203
+ // (no tool calls, no real content) on the FIRST iteration in response to
204
+ // what looks like an action request, inject a re-prompt.
205
+ if (iter === 0 && isGhostResponse(lastText) && looksLikeActionRequest(prompt)) {
206
+ await emitProgress(onEvent, { type: "ghost_response_detected", text: lastText });
207
+ conversation.push({ role: "assistant", content: lastText });
208
+ conversation.push({
209
+ role: "user",
210
+ content:
211
+ "Remember: you must execute the action, not just confirm it. " +
212
+ "Call the tool now — action first, report after.",
213
+ });
214
+ continue; // give the model one more chance
215
+ }
175
216
  // Final answer — clean up any stray fence markers just in case
176
217
  lastText = cleanTextOfPseudoToolCalls(lastText) || lastText;
177
218
  break;