@agentprojectcontext/apx 1.15.6 → 1.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (222) hide show
  1. package/package.json +46 -5
  2. package/src/cli/commands/log.js +113 -0
  3. package/src/cli/commands/overlay.js +253 -0
  4. package/src/cli/commands/sys.js +88 -16
  5. package/src/cli/index.js +23 -1
  6. package/src/cli/terminal-chat/renderer.js +71 -56
  7. package/src/cli-ts/commands/agent.ts +173 -0
  8. package/src/cli-ts/commands/chat.ts +119 -0
  9. package/src/cli-ts/commands/daemon.ts +112 -0
  10. package/src/cli-ts/commands/exec.ts +109 -0
  11. package/src/cli-ts/commands/mcp.ts +235 -0
  12. package/src/cli-ts/commands/session.ts +224 -0
  13. package/src/cli-ts/commands/status.ts +61 -0
  14. package/src/cli-ts/http.ts +36 -0
  15. package/src/cli-ts/index.ts +73 -0
  16. package/src/cli-ts/ui.ts +107 -0
  17. package/src/core/logging.js +81 -0
  18. package/src/daemon/api.js +58 -0
  19. package/src/daemon/engines/anthropic.js +60 -1
  20. package/src/daemon/engines/index.js +2 -1
  21. package/src/daemon/engines/ollama.js +70 -3
  22. package/src/daemon/index.js +58 -0
  23. package/src/daemon/overlay-ws.js +40 -0
  24. package/src/daemon/plugins/index.js +2 -1
  25. package/src/daemon/plugins/overlay.js +177 -0
  26. package/src/daemon/plugins/telegram.js +15 -3
  27. package/src/daemon/super-agent-langchain.js +296 -0
  28. package/src/daemon/super-agent.js +115 -19
  29. package/src/daemon/transcription.js +262 -59
  30. package/src/daemon/whisper-server.py +57 -6
  31. package/src/overlay/index.html +44 -0
  32. package/src/overlay/main.js +480 -0
  33. package/src/overlay/package.json +3 -0
  34. package/src/overlay/preload.js +34 -0
  35. package/src/overlay/renderer.js +371 -0
  36. package/src/overlay/style.css +250 -0
  37. package/src/tui/_shims/cli-error.ts +6 -0
  38. package/src/tui/_shims/cli-logo.ts +18 -0
  39. package/src/tui/_shims/cli-ui.ts +1 -0
  40. package/src/tui/_shims/config-console-state.ts +7 -0
  41. package/src/tui/_shims/core-any.ts +30 -0
  42. package/src/tui/_shims/core-binary.ts +13 -0
  43. package/src/tui/_shims/core-flag.ts +3 -0
  44. package/src/tui/_shims/core-log.ts +14 -0
  45. package/src/tui/_shims/lsp-language.ts +1 -0
  46. package/src/tui/_shims/opencode-any.ts +135 -0
  47. package/src/tui/_shims/opencode-sdk-v2.ts +48 -0
  48. package/src/tui/_shims/plugin-tui.ts +13 -0
  49. package/src/tui/_shims/provider-provider.ts +10 -0
  50. package/src/tui/_shims/session-retry.ts +1 -0
  51. package/src/tui/_shims/session-schema.ts +15 -0
  52. package/src/tui/_shims/session-session.ts +3 -0
  53. package/src/tui/_shims/snapshot.ts +4 -0
  54. package/src/tui/_shims/tool-any.ts +18 -0
  55. package/src/tui/_shims/util-error.ts +7 -0
  56. package/src/tui/_shims/util-filesystem.ts +79 -0
  57. package/src/tui/_shims/util-format.ts +7 -0
  58. package/src/tui/_shims/util-iife.ts +3 -0
  59. package/src/tui/_shims/util-locale.ts +10 -0
  60. package/src/tui/_shims/util-process.ts +38 -0
  61. package/src/tui/app.tsx +783 -0
  62. package/src/tui/asset/charge.wav +0 -0
  63. package/src/tui/asset/pulse-a.wav +0 -0
  64. package/src/tui/asset/pulse-b.wav +0 -0
  65. package/src/tui/asset/pulse-c.wav +0 -0
  66. package/src/tui/attach.ts +100 -0
  67. package/src/tui/component/bg-pulse-render.ts +436 -0
  68. package/src/tui/component/bg-pulse.tsx +99 -0
  69. package/src/tui/component/border.tsx +21 -0
  70. package/src/tui/component/dialog-agent.tsx +31 -0
  71. package/src/tui/component/dialog-console-org.tsx +103 -0
  72. package/src/tui/component/dialog-mcp.tsx +85 -0
  73. package/src/tui/component/dialog-model.tsx +175 -0
  74. package/src/tui/component/dialog-provider.tsx +456 -0
  75. package/src/tui/component/dialog-retry-action.tsx +160 -0
  76. package/src/tui/component/dialog-session-delete-failed.tsx +99 -0
  77. package/src/tui/component/dialog-session-list.tsx +323 -0
  78. package/src/tui/component/dialog-session-rename.tsx +31 -0
  79. package/src/tui/component/dialog-skill.tsx +36 -0
  80. package/src/tui/component/dialog-stash.tsx +87 -0
  81. package/src/tui/component/dialog-status.tsx +168 -0
  82. package/src/tui/component/dialog-tag.tsx +44 -0
  83. package/src/tui/component/dialog-theme-list.tsx +50 -0
  84. package/src/tui/component/dialog-variant.tsx +39 -0
  85. package/src/tui/component/dialog-workspace-create.tsx +302 -0
  86. package/src/tui/component/dialog-workspace-file-changes.tsx +138 -0
  87. package/src/tui/component/dialog-workspace-unavailable.tsx +69 -0
  88. package/src/tui/component/error-component.tsx +92 -0
  89. package/src/tui/component/logo.tsx +896 -0
  90. package/src/tui/component/plugin-route-missing.tsx +14 -0
  91. package/src/tui/component/prompt/autocomplete.tsx +869 -0
  92. package/src/tui/component/prompt/cwd.ts +0 -0
  93. package/src/tui/component/prompt/frecency.tsx +90 -0
  94. package/src/tui/component/prompt/history.tsx +108 -0
  95. package/src/tui/component/prompt/index.tsx +1809 -0
  96. package/src/tui/component/prompt/part.ts +16 -0
  97. package/src/tui/component/prompt/stash.tsx +101 -0
  98. package/src/tui/component/prompt/traits.ts +35 -0
  99. package/src/tui/component/spinner.tsx +24 -0
  100. package/src/tui/component/startup-loading.tsx +63 -0
  101. package/src/tui/component/todo-item.tsx +32 -0
  102. package/src/tui/component/use-connected.tsx +9 -0
  103. package/src/tui/component/workspace-label.tsx +19 -0
  104. package/src/tui/config/cwd.ts +5 -0
  105. package/src/tui/config/keybind.ts +432 -0
  106. package/src/tui/config/tui-migrate.ts +154 -0
  107. package/src/tui/config/tui-schema.ts +34 -0
  108. package/src/tui/config/tui.ts +46 -0
  109. package/src/tui/context/aggregate-failures.ts +34 -0
  110. package/src/tui/context/args.tsx +15 -0
  111. package/src/tui/context/command-palette.tsx +163 -0
  112. package/src/tui/context/directory.ts +15 -0
  113. package/src/tui/context/editor-zed.ts +283 -0
  114. package/src/tui/context/editor.ts +468 -0
  115. package/src/tui/context/event-apx.ts +22 -0
  116. package/src/tui/context/event.ts +6 -0
  117. package/src/tui/context/exit.tsx +60 -0
  118. package/src/tui/context/helper.tsx +25 -0
  119. package/src/tui/context/kv.tsx +81 -0
  120. package/src/tui/context/local.tsx +608 -0
  121. package/src/tui/context/path-format.tsx +39 -0
  122. package/src/tui/context/project-apx.tsx +48 -0
  123. package/src/tui/context/project.tsx +7 -0
  124. package/src/tui/context/prompt.tsx +18 -0
  125. package/src/tui/context/route.tsx +52 -0
  126. package/src/tui/context/sdk-apx.tsx +185 -0
  127. package/src/tui/context/sdk.tsx +6 -0
  128. package/src/tui/context/sync-apx.tsx +178 -0
  129. package/src/tui/context/sync-v2.tsx +16 -0
  130. package/src/tui/context/sync.tsx +118 -0
  131. package/src/tui/context/theme/aura.json +69 -0
  132. package/src/tui/context/theme/ayu.json +80 -0
  133. package/src/tui/context/theme/carbonfox.json +248 -0
  134. package/src/tui/context/theme/catppuccin-frappe.json +230 -0
  135. package/src/tui/context/theme/catppuccin-macchiato.json +230 -0
  136. package/src/tui/context/theme/catppuccin.json +112 -0
  137. package/src/tui/context/theme/cobalt2.json +225 -0
  138. package/src/tui/context/theme/cursor.json +249 -0
  139. package/src/tui/context/theme/dracula.json +219 -0
  140. package/src/tui/context/theme/everforest.json +241 -0
  141. package/src/tui/context/theme/flexoki.json +237 -0
  142. package/src/tui/context/theme/github.json +233 -0
  143. package/src/tui/context/theme/gruvbox.json +242 -0
  144. package/src/tui/context/theme/kanagawa.json +77 -0
  145. package/src/tui/context/theme/lucent-orng.json +234 -0
  146. package/src/tui/context/theme/material.json +235 -0
  147. package/src/tui/context/theme/matrix.json +77 -0
  148. package/src/tui/context/theme/mercury.json +252 -0
  149. package/src/tui/context/theme/monokai.json +221 -0
  150. package/src/tui/context/theme/nightowl.json +221 -0
  151. package/src/tui/context/theme/nord.json +223 -0
  152. package/src/tui/context/theme/one-dark.json +84 -0
  153. package/src/tui/context/theme/opencode.json +245 -0
  154. package/src/tui/context/theme/orng.json +249 -0
  155. package/src/tui/context/theme/osaka-jade.json +93 -0
  156. package/src/tui/context/theme/palenight.json +222 -0
  157. package/src/tui/context/theme/rosepine.json +234 -0
  158. package/src/tui/context/theme/solarized.json +223 -0
  159. package/src/tui/context/theme/synthwave84.json +226 -0
  160. package/src/tui/context/theme/tokyonight.json +243 -0
  161. package/src/tui/context/theme/vercel.json +245 -0
  162. package/src/tui/context/theme/vesper.json +218 -0
  163. package/src/tui/context/theme/zenburn.json +223 -0
  164. package/src/tui/context/theme.tsx +1247 -0
  165. package/src/tui/context/tui-config.tsx +9 -0
  166. package/src/tui/event.ts +16 -0
  167. package/src/tui/feature-plugins/home/footer.tsx +94 -0
  168. package/src/tui/feature-plugins/home/tips-view.tsx +166 -0
  169. package/src/tui/feature-plugins/home/tips.tsx +59 -0
  170. package/src/tui/feature-plugins/sidebar/context.tsx +65 -0
  171. package/src/tui/feature-plugins/sidebar/files.tsx +63 -0
  172. package/src/tui/feature-plugins/sidebar/footer.tsx +94 -0
  173. package/src/tui/feature-plugins/sidebar/lsp.tsx +65 -0
  174. package/src/tui/feature-plugins/sidebar/mcp.tsx +97 -0
  175. package/src/tui/feature-plugins/sidebar/todo.tsx +49 -0
  176. package/src/tui/feature-plugins/system/plugins.tsx +269 -0
  177. package/src/tui/feature-plugins/system/session-v2.tsx +1143 -0
  178. package/src/tui/feature-plugins/system/which-key.tsx +608 -0
  179. package/src/tui/keymap.tsx +166 -0
  180. package/src/tui/layer.ts +6 -0
  181. package/src/tui/plugin/api.tsx +381 -0
  182. package/src/tui/plugin/command-shim.ts +109 -0
  183. package/src/tui/plugin/internal.ts +33 -0
  184. package/src/tui/plugin/runtime.ts +1069 -0
  185. package/src/tui/plugin/slots.tsx +60 -0
  186. package/src/tui/routes/home.tsx +96 -0
  187. package/src/tui/routes/session/dialog-fork-from-timeline.tsx +76 -0
  188. package/src/tui/routes/session/dialog-message.tsx +108 -0
  189. package/src/tui/routes/session/dialog-subagent.tsx +26 -0
  190. package/src/tui/routes/session/dialog-timeline.tsx +47 -0
  191. package/src/tui/routes/session/footer.tsx +91 -0
  192. package/src/tui/routes/session/index.tsx +188 -0
  193. package/src/tui/routes/session/permission.tsx +722 -0
  194. package/src/tui/routes/session/question.tsx +490 -0
  195. package/src/tui/routes/session/sidebar.tsx +102 -0
  196. package/src/tui/routes/session/subagent-footer.tsx +133 -0
  197. package/src/tui/run.ts +84 -0
  198. package/src/tui/thread.ts +261 -0
  199. package/src/tui/tsconfig.json +40 -0
  200. package/src/tui/ui/dialog-alert.tsx +66 -0
  201. package/src/tui/ui/dialog-confirm.tsx +108 -0
  202. package/src/tui/ui/dialog-export-options.tsx +217 -0
  203. package/src/tui/ui/dialog-help.tsx +40 -0
  204. package/src/tui/ui/dialog-prompt.tsx +101 -0
  205. package/src/tui/ui/dialog-select.tsx +553 -0
  206. package/src/tui/ui/dialog.tsx +211 -0
  207. package/src/tui/ui/link.tsx +34 -0
  208. package/src/tui/ui/spinner.ts +368 -0
  209. package/src/tui/ui/toast.tsx +111 -0
  210. package/src/tui/util/clipboard.ts +217 -0
  211. package/src/tui/util/editor.ts +37 -0
  212. package/src/tui/util/model.ts +23 -0
  213. package/src/tui/util/provider-origin.ts +7 -0
  214. package/src/tui/util/revert-diff.ts +18 -0
  215. package/src/tui/util/scroll.ts +25 -0
  216. package/src/tui/util/selection.ts +65 -0
  217. package/src/tui/util/signal.ts +41 -0
  218. package/src/tui/util/sound.ts +156 -0
  219. package/src/tui/util/transcript.ts +112 -0
  220. package/src/tui/validate-session.ts +29 -0
  221. package/src/tui/win32.ts +130 -0
  222. package/src/tui/worker.ts +104 -0
@@ -22,7 +22,43 @@ import { readIdentity } from "../core/identity.js";
22
22
 
23
23
  const MAX_TOOL_ITERS = 6;
24
24
 
25
- const DEFAULT_SYSTEM = `You are the **APX dispatcher** the daemon-level agent that runs above all APC projects.
25
+ // Tools that, when they're the ONLY thing the model called in an iteration,
26
+ // don't count as "real work" — they're acknowledgements (telegram ping back
27
+ // to the user, log lines, etc). When the model emits an iteration that only
28
+ // contains acks, we DON'T let it leave the loop on iter N+1 with empty text:
29
+ // we force another required tool call so the actual task gets executed.
30
+ //
31
+ // This is the fix for the "agent sends 'ya te escucho 🎧' and then stops"
32
+ // bug. Without it, gemma4-class models sometimes consider the ack the
33
+ // complete reply on iter 0 and emit only "ok" on iter 1, breaking out.
34
+ const ACK_ONLY_TOOLS = new Set(["send_telegram"]);
35
+ // Hard cap so the model can't ack-ack-ack forever — after this many
36
+ // consecutive ack-only iterations we let the loop progress naturally
37
+ // (the model already had its chance to call a real tool).
38
+ const MAX_CONSECUTIVE_ACKS = 2;
39
+
40
+ export const DEFAULT_SYSTEM = `# Identity (override everything else)
41
+ You are **APX** — Manuel's personal assistant running on his Mac.
42
+ You are NOT a code analyzer, NOT a generic chatbot, NOT a tutor.
43
+ You are an **action agent**: you USE TOOLS to do real things on Manuel's system.
44
+
45
+ # Language — non-negotiable
46
+ ALWAYS reply in **Spanish (rioplatense, voseo when natural)** unless Manuel
47
+ explicitly writes to you in another language for that turn. The user is an
48
+ Argentinian developer; English replies feel broken to him. If you find
49
+ yourself writing English, stop and rewrite in Spanish before sending.
50
+ This rule beats every other formatting hint below.
51
+
52
+ # What you must NOT do
53
+ - Do NOT explain code or write essays about "the provided snippet".
54
+ - Do NOT describe what a tool *would* do — call it and report the result.
55
+ - Do NOT dump the tool catalog at the user.
56
+ - Do NOT respond with disclaimers ("as an AI…", "I'm just an assistant…").
57
+ - If a user message is short or ambiguous, ASK one short clarifying question
58
+ in Spanish — do not invent a topic.
59
+
60
+ # How you operate
61
+ You are the **APX dispatcher** — the daemon-level agent that runs above all APC projects.
26
62
 
27
63
  APX is a local daemon + CLI for APC projects. User-level runtime state lives under ~/.apx/:
28
64
  - ~/.apx/config.json: daemon config, engines, Telegram, super-agent settings
@@ -51,7 +87,7 @@ HARD RULES (do not deviate):
51
87
  3. NEVER answer "specify a project" — instead, just call the tool with no argument and you'll get the full picture.
52
88
  4. If a tool result has an error, retry with different arguments before falling back to asking the user.
53
89
  5. Respect permission mode. total = execute requested actions without confirmation. automatico = read/list/safe shell actions run directly; destructive, external, runtime, MCP calls, outbound messages, config, and filesystem mutations need explicit user confirmation. permiso = only allowed tools run directly; everything else needs confirmation.
54
- 6. Write in the user's language unless they request another language. The system prompt stays English. Plain text, no markdown formatting for Telegram.
90
+ 6. Write in **Spanish** by default (see "Language" section above). Plain text on Telegram no markdown tables, no code fences unless quoting code. Keep replies under 6 sentences unless the user asks for detail.
55
91
  7. Stay brief: under 6 sentences unless asked for detail.
56
92
  8. You DO see recent prior turns of this chat as previous messages when applicable. **Use them ONLY to disambiguate references** (e.g. "el primero" → first project mentioned earlier). For ANY factual data — agent details, MCP details, file contents, memory — RE-CALL the tool. Past turns are context, not a cache. Models change, agents change, files change.
57
93
  9. /reset or /new from the user means "forget previous turns and answer this one fresh" — if you see those prefixes the operator already cleared the context for you.
@@ -63,7 +99,7 @@ HARD RULES (do not deviate):
63
99
  15. NO-PENDING RULE: never say "give me a second", "I will do it", or "I will try later" as a final answer. Either call the tool in this same turn or say what blocks you.
64
100
  16. IDENTITY RULE: when the user asks you to change your name, call yourself something, or update your personality/language, call set_identity and persist the change. Then confirm with your new name.
65
101
  17. ROUTINES RULE: NEVER create a routine in the default project (id=0). Routines MUST be tied to a specific registered project. Before adding a routine, call list_projects to find the correct project id or name. Then pass --project <id|name> to apx routine add. If no project fits, ask the user which project to use. Creating routines in project 0/default mixes unrelated projects' schedules and corrupts state.
66
- 18. **NO EMPTY RESPONSES**: Never respond with only text when you have tools available and the user is asking you to DO something. Call the tool FIRST, then explain. Never say "I'll do X" without immediately calling the tool. Empty acknowledgments ("ok", "entendido", "dame un minuto", "voy", "checking", "stand by") without a tool call are invalid responses they will be re-prompted and waste a turn.
102
+ 18. **NO BARE ACKS AS FINAL ANSWER**: Empty acknowledgments ("ok", "entendido", "dame un minuto", "voy", "checking") are invalid as a FINAL response when a tool was needed they will be re-prompted. EXCEPTION: a short contextual ack sent via send_telegram BEFORE another tool call is encouraged on Telegram audio inputs and on tool calls that take more than a few seconds (browser_screenshot, web_search, run_shell, long file edits). The ack must be **contextual and varied** in Spanish — e.g. "Ya te escucho 🎧", "Dame un seg, transcribiendo…", "Buscando eso ahora", "Voy a revisar el repo…", "Un momento, ejecutando…". Never reuse the exact same ack twice in a row. The ack is the FIRST tool call in the turn; the actual work follows immediately in the SAME turn (do not return without doing the work).
67
103
  19. **CWD RULE**: When the channel context includes a "CWD: <path>" line, that is the user's current working directory. References to "este directorio", "este proyecto", "esta carpeta", "acá", "aquí", "this directory", "this project", "current dir/folder" all mean that exact CWD path. Use it as the path argument directly — DO NOT ask the user "what's the path?" when CWD is already given. Example: if user says "agregá este proyecto a la lista", call add_project({path: <CWD>}) immediately.
68
104
  20. **NO MANUAL SCAFFOLDING**: To register or scaffold a project, ALWAYS use add_project — it auto-creates AGENTS.md and .apc/project.json when missing (one call, atomic). NEVER write AGENTS.md, .apc/project.json, or any APC scaffold file by hand via run_shell / write_file / shell pipes. The schema must come from the official initApf scaffold, not improvised. If add_project errors, report the error to the user — don't try to work around it with shell hacks. Same for any other APC-managed file (.apc/agents/*, .apc/skills/*, etc.) — use the dedicated tool, never raw filesystem writes.
69
105
  21. **SKILLS — ON DEMAND**: The "# Available skills" section below lists every skill available to you (slug + description, NO body). When the user asks about specific APX/APC commands, project structure, agent runtimes, or anything where exact syntax or detailed behavior matches a skill description (in ANY language — match semantically, not by keyword), call load_skill({slug}) to fetch the full markdown body. If a CWD is in the contextNote, pass it as project_path so project-scoped skills resolve. If the user explicitly asks "what skills do you have?", you can either read the catalog below directly OR call list_skills to get a fresh enumeration. Do NOT load skills for trivial / unrelated questions — that wastes tokens. Don't guess CLI syntax when a skill can tell you; load it.
@@ -143,8 +179,32 @@ function looksLikeActionRequest(text) {
143
179
  return /\b(list|show|find|get|fetch|search|run|execute|create|add|make|start|stop|delete|update|send|check|read|write|look|tell me|dame|mostra|busca|ejecuta|crea|agrega|mandá|revisá|corré|borrá|arrancá)\b/.test(t);
144
180
  }
145
181
 
182
+ /**
183
+ * Build the identity block injected into every super-agent system prompt.
184
+ * Pure function — exported for unit tests.
185
+ *
186
+ * @param {object|null} identity result of readIdentity(), or a plain object for tests
187
+ * @param {string} userLang ISO 639-1 code from config.user.language (default "en")
188
+ */
189
+ export function buildIdentityBlock(identity, userLang = "en") {
190
+ const lines = ["# Identity"];
191
+ if (identity?.agent_name) lines.push(`Your name is ${identity.agent_name}.`);
192
+ if (identity?.personality) lines.push(`Your personality: ${identity.personality}.`);
193
+ if (identity?.owner_name) lines.push(`Your owner is ${identity.owner_name}.`);
194
+ if (identity?.owner_context) lines.push(`Owner context: ${identity.owner_context}`);
195
+ lines.push(`Always reply in the language with ISO code "${userLang}" unless the user explicitly switches.`);
196
+ return lines.join("\n");
197
+ }
198
+
146
199
  export function isSuperAgentEnabled(cfg) {
147
- return !!(cfg && cfg.super_agent && cfg.super_agent.enabled && cfg.super_agent.model);
200
+ // The super-agent is the system's default reply path. It is considered
201
+ // enabled as soon as a model is configured — the legacy `.enabled` flag is
202
+ // honoured only when explicitly set to `false`. This prevents the bot
203
+ // from silently dropping Telegram messages just because someone forgot to
204
+ // set super_agent.enabled = true.
205
+ const sa = cfg && cfg.super_agent;
206
+ if (!sa || !sa.model) return false;
207
+ return sa.enabled !== false;
148
208
  }
149
209
 
150
210
  export async function runSuperAgent({
@@ -158,6 +218,7 @@ export async function runSuperAgent({
158
218
  overrideModel = null,
159
219
  onEvent = null,
160
220
  signal,
221
+ onToken = null,
161
222
  }) {
162
223
  if (!isSuperAgentEnabled(globalConfig)) {
163
224
  throw new Error("super-agent not enabled (set super_agent.enabled and .model in ~/.apx/config.json)");
@@ -165,6 +226,19 @@ export async function runSuperAgent({
165
226
  const sa = globalConfig.super_agent;
166
227
  const activeModel = overrideModel || sa.model;
167
228
 
229
+ // Engine toggle: if config.super_agent.engine === "langchain", delegate to
230
+ // the LangChain AgentExecutor adapter. Default stays "native" (this loop).
231
+ // The toggle exists so we can A/B the two paths on the user's actual chat
232
+ // without committing to a full migration. See super-agent-langchain.js.
233
+ if (sa.engine === "langchain") {
234
+ const { runSuperAgentLangChain } = await import("./super-agent-langchain.js");
235
+ return runSuperAgentLangChain({
236
+ globalConfig, projects, plugins, registries,
237
+ prompt, previousMessages, contextNote,
238
+ onEvent, onToken, signal,
239
+ });
240
+ }
241
+
168
242
  // Tiny project hint — JUST names + ids, no detail. The model is expected to
169
243
  // call list_agents / list_mcps / read_agent_memory / etc. for everything
170
244
  // else. Keeping this short forces actual tool use instead of letting the
@@ -206,15 +280,7 @@ export async function runSuperAgent({
206
280
  // Language comes from config.user.language (ISO 639-1) so it stays in sync with transcription.
207
281
  const identity = (() => { try { return readIdentity(); } catch { return null; } })();
208
282
  const userLang = globalConfig?.user?.language || "en";
209
- const identityBlock = (() => {
210
- const lines = ["# Identity"];
211
- if (identity?.agent_name) lines.push(`Your name is ${identity.agent_name}.`);
212
- if (identity?.personality) lines.push(`Your personality: ${identity.personality}.`);
213
- if (identity?.owner_name) lines.push(`Your owner is ${identity.owner_name}.`);
214
- if (identity?.owner_context) lines.push(`Owner context: ${identity.owner_context}`);
215
- lines.push(`Always reply in the language with ISO code "${userLang}" unless the user explicitly switches.`);
216
- return lines.join("\n");
217
- })();
283
+ const identityBlock = buildIdentityBlock(identity, userLang);
218
284
 
219
285
  const system = [
220
286
  sa.system || DEFAULT_SYSTEM,
@@ -246,14 +312,21 @@ export async function runSuperAgent({
246
312
  let totalUsage = { input_tokens: 0, output_tokens: 0 };
247
313
  let lastText = "";
248
314
  let usePseudoTools = false;
315
+ // Track how many consecutive iterations contained only ACK_ONLY tools.
316
+ // While this is > 0 we keep tool_choice="required" so the next iter has
317
+ // to do real work — otherwise gemma4-class models call send_telegram
318
+ // for the ack and then break out with empty text on iter N+1.
319
+ let ackOnlyStreak = 0;
249
320
 
250
321
  for (let iter = 0; iter < MAX_TOOL_ITERS; iter++) {
251
322
  await emitProgress(onEvent, { type: "model_start", iteration: iter + 1 });
252
- // On the first iteration, force a tool call. This prevents the model from
253
- // returning a bare acknowledgment ("ok", "dame un segundo") instead of
254
- // acting on an action request. On later iterations (after tool results
255
- // have been fed back) tool_choice is "auto" so the model can produce its
256
- // final text summary.
323
+ // Force a tool call on iter 0 (no bare "ok dame un segundo" reply), AND
324
+ // on any iteration that immediately follows an ack-only iter (so the
325
+ // model can't ack and then stop). After at most MAX_CONSECUTIVE_ACKS
326
+ // forced rounds we let it fall back to "auto" so the model can finish.
327
+ const forceTool =
328
+ iter === 0 ||
329
+ (ackOnlyStreak > 0 && ackOnlyStreak <= MAX_CONSECUTIVE_ACKS);
257
330
  let result;
258
331
  try {
259
332
  result = await callEngine({
@@ -262,9 +335,12 @@ export async function runSuperAgent({
262
335
  messages: conversation,
263
336
  config: globalConfig,
264
337
  tools: usePseudoTools ? null : TOOL_SCHEMAS,
265
- toolChoice: usePseudoTools ? null : (iter === 0 ? "required" : "auto"),
338
+ toolChoice: usePseudoTools ? null : (forceTool ? "required" : "auto"),
266
339
  maxTokens: 1024,
267
340
  signal,
341
+ // Only stream tokens on non-forced iterations — on forced iters the
342
+ // model MUST emit a tool_call, streaming text would confuse the user.
343
+ onToken: (!forceTool && onToken) ? onToken : null,
268
344
  });
269
345
  } catch (e) {
270
346
  if (usePseudoTools && /^ollama:/i.test(String(activeModel || "")) && /ollama\s+500/i.test(String(e?.message || "")) && trace.length > 0) {
@@ -284,6 +360,7 @@ export async function runSuperAgent({
284
360
  toolChoice: null,
285
361
  maxTokens: 1024,
286
362
  signal,
363
+ onToken: (iter > 0 && onToken) ? onToken : null,
287
364
  });
288
365
  }
289
366
  totalUsage.input_tokens += result.usage?.input_tokens || 0;
@@ -378,6 +455,25 @@ export async function runSuperAgent({
378
455
  content: JSON.stringify(toolResult),
379
456
  });
380
457
  }
458
+
459
+ // Did this iteration consist of ONLY ack-style tool calls? If so we'll
460
+ // keep tool_choice forced on the next iter (see top of loop). A turn
461
+ // that mixes send_telegram + e.g. browser_screenshot counts as "real
462
+ // work" and resets the streak.
463
+ const allAckOnly = toolCalls.every((tc) => {
464
+ const n = (tc.function?.name) || tc.name;
465
+ return ACK_ONLY_TOOLS.has(n);
466
+ });
467
+ if (allAckOnly) {
468
+ ackOnlyStreak += 1;
469
+ await emitProgress(onEvent, {
470
+ type: "ack_only_iter",
471
+ iteration: iter + 1,
472
+ streak: ackOnlyStreak,
473
+ });
474
+ } else {
475
+ ackOnlyStreak = 0;
476
+ }
381
477
  }
382
478
 
383
479
  return {
@@ -28,8 +28,9 @@
28
28
 
29
29
  import fs from "node:fs";
30
30
  import path from "node:path";
31
- import { spawn } from "node:child_process";
31
+ import { spawn, exec } from "node:child_process";
32
32
  import { fileURLToPath } from "node:url";
33
+ import { logInfo, logWarn, logError } from "../core/logging.js";
33
34
 
34
35
  const __filename = fileURLToPath(import.meta.url);
35
36
  const __dirname = path.dirname(__filename);
@@ -43,8 +44,32 @@ const DEFAULT_LOCAL = {
43
44
  language: "auto",
44
45
  beam_size: 5,
45
46
  idle_minutes: 10,
47
+ // Max time we wait for /transcribe to return. Long audio files (Telegram
48
+ // voice notes > 10 min) can take several minutes on CPU; the previous
49
+ // hard-coded 5-minute cap silently truncated them. 20 minutes covers a
50
+ // ~60-minute voice note on a small int8 model. Override with
51
+ // transcription.local.timeout_ms in ~/.apx/config.json if needed.
52
+ timeout_ms: 20 * 60_000,
46
53
  };
47
54
 
55
+ // ---------------------------------------------------------------------------
56
+ // Config helpers (pure — exported for tests)
57
+ // ---------------------------------------------------------------------------
58
+
59
+ /**
60
+ * Resolve the effective transcription language.
61
+ * Priority: explicit local config → config.user.language → "auto" (whisper detects).
62
+ *
63
+ * @param {object} localCfg merged transcription.local config
64
+ * @param {string} userLang config.user.language ISO code (e.g. "es"), or ""
65
+ * @returns {string} ISO code or "auto"
66
+ */
67
+ export function resolveTranscriptionLanguage(localCfg, userLang) {
68
+ if (localCfg.language && localCfg.language !== "auto") return localCfg.language;
69
+ if (userLang) return userLang;
70
+ return "auto";
71
+ }
72
+
48
73
  // ---------------------------------------------------------------------------
49
74
  // Config
50
75
  // ---------------------------------------------------------------------------
@@ -59,9 +84,7 @@ async function getConfig() {
59
84
  // Explicit transcription.local.language always wins; "auto" means fall back to user.language.
60
85
  const userLang = cfg.user?.language || "";
61
86
  const localBase = { ...DEFAULT_LOCAL, ...(t.local || {}) };
62
- if ((!localBase.language || localBase.language === "auto") && userLang) {
63
- localBase.language = userLang;
64
- }
87
+ localBase.language = resolveTranscriptionLanguage(localBase, userLang);
65
88
  return {
66
89
  provider: t.provider || "auto",
67
90
  local: localBase,
@@ -98,6 +121,21 @@ async function _isServerHealthy() {
98
121
  }
99
122
  }
100
123
 
124
+ // Check if the running whisper-server is using a specific model.
125
+ // Returns the model name string, or null if not reachable.
126
+ async function _serverModelName() {
127
+ try {
128
+ const res = await fetch(`http://127.0.0.1:${WHISPER_PORT}/health`, {
129
+ signal: AbortSignal.timeout(800),
130
+ });
131
+ if (!res.ok) return null;
132
+ const j = await res.json();
133
+ return j?.model || null;
134
+ } catch {
135
+ return null;
136
+ }
137
+ }
138
+
101
139
  async function _waitForServer(maxMs = 15_000) {
102
140
  const deadline = Date.now() + maxMs;
103
141
  while (Date.now() < deadline) {
@@ -107,18 +145,62 @@ async function _waitForServer(maxMs = 15_000) {
107
145
  throw new Error(`whisper-server did not start within ${maxMs}ms`);
108
146
  }
109
147
 
148
+ // Find the PID of the process LISTENing on the whisper port (server only,
149
+ // not clients). Filtering by -sTCP:LISTEN is critical — without it, lsof
150
+ // also returns clients with an open connection (including this daemon).
151
+ async function _findListenerPid() {
152
+ return new Promise((resolve) => {
153
+ exec(`lsof -ti tcp:${WHISPER_PORT} -sTCP:LISTEN`, (err, stdout) => {
154
+ if (err || !stdout) return resolve(null);
155
+ const candidates = stdout.trim().split("\n")
156
+ .map(s => parseInt(s, 10))
157
+ .filter(n => Number.isFinite(n) && n !== process.pid);
158
+ resolve(candidates[0] || null);
159
+ });
160
+ });
161
+ }
162
+
163
+ async function _killOrphanWhisper() {
164
+ // First try graceful /shutdown on the whisper server.
165
+ try {
166
+ await fetch(`http://127.0.0.1:${WHISPER_PORT}/shutdown`, {
167
+ method: "POST", signal: AbortSignal.timeout(1000),
168
+ });
169
+ await _sleep(600);
170
+ } catch {}
171
+ // If still bound, force-kill the LISTENER pid only (never our own pid).
172
+ const pid = await _findListenerPid();
173
+ if (pid && pid !== process.pid) {
174
+ try { process.kill(pid, "SIGTERM"); } catch {}
175
+ await _sleep(400);
176
+ try { process.kill(pid, 0); try { process.kill(pid, "SIGKILL"); } catch {} } catch {}
177
+ await _sleep(300);
178
+ }
179
+ }
180
+
110
181
  async function ensureWhisperServer(opts) {
111
182
  const model = opts.model || DEFAULT_LOCAL.model;
112
183
 
113
184
  // Already running with the right model — health-check to confirm still alive.
114
185
  if (_serverProcess && _serverModel === model) {
115
186
  if (await _isServerHealthy()) return;
116
- // Process died (idle shutdown). Fall through to restart.
117
187
  _serverProcess = null;
118
188
  _serverModel = null;
119
189
  }
120
190
 
121
- // Wrong model: kill old server and start fresh.
191
+ // Adopt an externally-running whisper-server (e.g. left over from prior daemon).
192
+ if (!_serverProcess) {
193
+ const existing = await _serverModelName();
194
+ if (existing === model) {
195
+ _serverModel = model;
196
+ return;
197
+ }
198
+ if (existing) {
199
+ // Wrong model: kick out the orphan so we can start the right one.
200
+ await _killOrphanWhisper();
201
+ }
202
+ }
203
+
122
204
  if (_serverProcess) {
123
205
  try { _serverProcess.kill(); } catch {}
124
206
  _serverProcess = null;
@@ -126,6 +208,10 @@ async function ensureWhisperServer(opts) {
126
208
  await _sleep(300);
127
209
  }
128
210
 
211
+ await _spawnWhisper(opts, model, /* retried */ false);
212
+ }
213
+
214
+ async function _spawnWhisper(opts, model, retried) {
129
215
  const args = [
130
216
  WHISPER_SERVER,
131
217
  "--port", String(WHISPER_PORT),
@@ -151,32 +237,44 @@ async function ensureWhisperServer(opts) {
151
237
  });
152
238
 
153
239
  // Wait for the "ready" line on stdout, then wait for HTTP to respond.
154
- await new Promise((resolve, reject) => {
155
- const timeout = setTimeout(
156
- () => reject(new Error("whisper-server startup timed out (15s)")),
157
- 15_000
158
- );
159
- let buf = "";
160
- proc.stdout.on("data", (chunk) => {
161
- buf += chunk.toString();
162
- const nl = buf.indexOf("\n");
163
- if (nl === -1) return;
164
- const line = buf.slice(0, nl).trim();
165
- buf = buf.slice(nl + 1);
166
- clearTimeout(timeout);
167
- try {
168
- const msg = JSON.parse(line);
169
- if (msg.status === "error") return reject(new Error(msg.error || "whisper-server error"));
170
- resolve(); // "ready"
171
- } catch {
172
- resolve(); // unexpected line but server is up
173
- }
174
- });
175
- proc.on("exit", (code) => {
176
- clearTimeout(timeout);
177
- reject(new Error(`whisper-server exited (code ${code}) before becoming ready`));
240
+ try {
241
+ await new Promise((resolve, reject) => {
242
+ const timeout = setTimeout(
243
+ () => reject(new Error("whisper-server startup timed out (15s)")),
244
+ 15_000
245
+ );
246
+ let buf = "";
247
+ proc.stdout.on("data", (chunk) => {
248
+ buf += chunk.toString();
249
+ const nl = buf.indexOf("\n");
250
+ if (nl === -1) return;
251
+ const line = buf.slice(0, nl).trim();
252
+ buf = buf.slice(nl + 1);
253
+ clearTimeout(timeout);
254
+ try {
255
+ const msg = JSON.parse(line);
256
+ if (msg.status === "error") return reject(new Error(msg.error || "whisper-server error"));
257
+ resolve(); // "ready"
258
+ } catch {
259
+ resolve(); // unexpected line but server is up
260
+ }
261
+ });
262
+ proc.on("exit", (code) => {
263
+ clearTimeout(timeout);
264
+ reject(new Error(`whisper-server exited (code ${code}) before becoming ready`));
265
+ });
178
266
  });
179
- });
267
+ } catch (e) {
268
+ // Self-heal: if the port was already in use, kill the orphan and retry once.
269
+ const msg = e.message || "";
270
+ if (!retried && /address already in use|errno 48|eaddrinuse/i.test(msg)) {
271
+ _serverProcess = null;
272
+ _serverModel = null;
273
+ await _killOrphanWhisper();
274
+ return _spawnWhisper(opts, model, /* retried */ true);
275
+ }
276
+ throw e;
277
+ }
180
278
  }
181
279
 
182
280
  // ---------------------------------------------------------------------------
@@ -190,30 +288,74 @@ async function transcribeLocal(filePath, opts) {
190
288
  ? null
191
289
  : (opts.language || null);
192
290
 
193
- const res = await fetch(`http://127.0.0.1:${WHISPER_PORT}/transcribe`, {
194
- method: "POST",
195
- headers: { "content-type": "application/json" },
196
- body: JSON.stringify({
197
- audio_path: filePath,
198
- language,
199
- beam_size: opts.beam_size || DEFAULT_LOCAL.beam_size,
200
- }),
201
- signal: AbortSignal.timeout(5 * 60_000),
202
- });
291
+ const timeoutMs = Number(opts.timeout_ms) > 0
292
+ ? Number(opts.timeout_ms)
293
+ : DEFAULT_LOCAL.timeout_ms;
203
294
 
204
- const json = await res.json();
205
- if (!json.ok) throw new Error(json.error || "transcription failed");
295
+ const body = JSON.stringify({
296
+ audio_path: filePath,
297
+ language,
298
+ beam_size: opts.beam_size || DEFAULT_LOCAL.beam_size,
299
+ });
206
300
 
207
- return {
208
- ok: true,
209
- backend: "local",
210
- text: json.text || "",
211
- language: json.language || null,
212
- language_probability: json.language_probability ?? null,
213
- duration: json.duration ?? null,
214
- model: json.model,
215
- compute_type: json.compute_type,
216
- };
301
+ // Long transcriptions on CPU (small int8, 1-minute voice note) can take
302
+ // 30-45s. Under undici (Node fetch) we occasionally see "fetch failed"
303
+ // from the inbound Telegram path even though the whisper-server completes
304
+ // the request successfully — a keep-alive socket gets reset somewhere
305
+ // between the long whisper-server response and the daemon's other
306
+ // concurrent traffic. We retry once on a generic "fetch failed" so the
307
+ // user actually gets a reply.
308
+ const maxAttempts = 2;
309
+ let lastErr = null;
310
+ for (let attempt = 1; attempt <= maxAttempts; attempt++) {
311
+ const t0 = Date.now();
312
+ try {
313
+ logInfo("whisper", `transcribeLocal attempt ${attempt}/${maxAttempts}`, {
314
+ file: path.basename(filePath),
315
+ language: language || "auto",
316
+ timeout_ms: timeoutMs,
317
+ });
318
+ const res = await fetch(`http://127.0.0.1:${WHISPER_PORT}/transcribe`, {
319
+ method: "POST",
320
+ headers: { "content-type": "application/json", "connection": "close" },
321
+ body,
322
+ signal: AbortSignal.timeout(timeoutMs),
323
+ });
324
+ const json = await res.json();
325
+ if (!json.ok) throw new Error(json.error || "transcription failed");
326
+ logInfo("whisper", `transcribeLocal ok in ${Date.now() - t0}ms`, {
327
+ chars: (json.text || "").length,
328
+ language: json.language,
329
+ duration: json.duration,
330
+ });
331
+ return {
332
+ ok: true,
333
+ backend: "local",
334
+ text: json.text || "",
335
+ language: json.language || null,
336
+ language_probability: json.language_probability ?? null,
337
+ duration: json.duration ?? null,
338
+ model: json.model,
339
+ compute_type: json.compute_type,
340
+ };
341
+ } catch (e) {
342
+ lastErr = e;
343
+ const isRetriable =
344
+ /fetch failed|ECONNRESET|socket hang up|terminated/i.test(e.message || "");
345
+ const dt = Date.now() - t0;
346
+ logWarn("whisper", `transcribeLocal attempt ${attempt} failed in ${dt}ms`, {
347
+ error: e.message,
348
+ retriable: isRetriable,
349
+ will_retry: isRetriable && attempt < maxAttempts,
350
+ });
351
+ if (!isRetriable || attempt >= maxAttempts) break;
352
+ // Brief backoff before retry — gives the whisper-server.py thread time
353
+ // to flush its pending response and release the model lock.
354
+ await _sleep(500);
355
+ }
356
+ }
357
+ logError("whisper", `transcribeLocal exhausted retries`, { error: lastErr?.message });
358
+ throw lastErr || new Error("local transcription failed");
217
359
  }
218
360
 
219
361
  // ---------------------------------------------------------------------------
@@ -280,19 +422,80 @@ export async function transcribe(filePath, overrides = {}) {
280
422
  return transcribeOpenAI(filePath, cfg.openaiKey);
281
423
  }
282
424
  if (provider === "local") {
425
+ // Explicit local-only: bubble up the real error, do not mention OpenAI.
283
426
  return transcribeLocal(filePath, localOpts);
284
427
  }
285
428
 
286
- // auto: local first, fall back to openai
429
+ // auto: local first, fall back to openai only if a key is configured
287
430
  try {
288
431
  return await transcribeLocal(filePath, localOpts);
289
432
  } catch (localErr) {
290
- if (!cfg.openaiKey) {
291
- throw new Error(
292
- `local transcription failed and no OpenAI fallback available: ${localErr.message}`
293
- );
433
+ if (cfg.openaiKey) {
434
+ return transcribeOpenAI(filePath, cfg.openaiKey);
294
435
  }
295
- return transcribeOpenAI(filePath, cfg.openaiKey);
436
+ // No OpenAI configured — surface the real local error verbatim.
437
+ throw new Error(`local transcription failed: ${localErr.message}`);
438
+ }
439
+ }
440
+
441
+ /**
442
+ * Transcribe raw audio bytes (e.g. from a mic chunk or Telegram voice blob).
443
+ * Saves to a temp file, transcribes, cleans up.
444
+ *
445
+ * @param {Buffer} buf raw audio data
446
+ * @param {string} format file extension hint: "webm" | "ogg" | "wav" | "mp3" (default "webm")
447
+ * @param {object} overrides same as transcribe() overrides
448
+ */
449
+ export async function transcribeBuffer(buf, format = "webm", overrides = {}) {
450
+ if (!buf || !buf.length) throw new Error("transcribeBuffer: empty buffer");
451
+ const ext = format.replace(/^\./, "") || "webm";
452
+ const tmpFile = path.join(
453
+ (await import("node:os")).default.tmpdir(),
454
+ `apx-audio-${Date.now()}-${Math.random().toString(36).slice(2)}.${ext}`
455
+ );
456
+ try {
457
+ fs.writeFileSync(tmpFile, buf);
458
+ return await transcribe(tmpFile, overrides);
459
+ } finally {
460
+ try { fs.unlinkSync(tmpFile); } catch {}
461
+ }
462
+ }
463
+
464
+ // ---------------------------------------------------------------------------
465
+ // Lifecycle (preload on daemon start, shutdown on daemon stop)
466
+ // ---------------------------------------------------------------------------
467
+
468
+ /**
469
+ * Eagerly start the whisper server so the first transcription is fast.
470
+ * Safe to call multiple times. Never throws — logs and continues on failure.
471
+ */
472
+ export async function preloadWhisperServer(log = console.log) {
473
+ try {
474
+ const cfg = await getConfig();
475
+ if (cfg.provider === "openai") return; // local backend not used
476
+ log(`whisper: preloading model "${cfg.local.model}" on port ${WHISPER_PORT}…`);
477
+ await ensureWhisperServer(cfg.local);
478
+ log(`whisper: ready on port ${WHISPER_PORT} (model: ${_serverModel})`);
479
+ } catch (e) {
480
+ log(`whisper: preload failed — ${e.message} (will retry lazily on first request)`);
481
+ }
482
+ }
483
+
484
+ /**
485
+ * Stop the whisper server we own (no-op if we adopted an external one).
486
+ */
487
+ export async function shutdownWhisperServer() {
488
+ if (_serverProcess) {
489
+ try { _serverProcess.kill(); } catch {}
490
+ _serverProcess = null;
491
+ _serverModel = null;
492
+ } else {
493
+ // Try graceful shutdown of an adopted server
494
+ try {
495
+ await fetch(`http://127.0.0.1:${WHISPER_PORT}/shutdown`, {
496
+ method: "POST", signal: AbortSignal.timeout(500),
497
+ });
498
+ } catch {}
296
499
  }
297
500
  }
298
501