@agentprojectcontext/apx 1.15.6 → 1.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +46 -5
- package/src/cli/commands/log.js +113 -0
- package/src/cli/commands/overlay.js +253 -0
- package/src/cli/commands/sys.js +88 -16
- package/src/cli/index.js +23 -1
- package/src/cli/terminal-chat/renderer.js +71 -56
- package/src/cli-ts/commands/agent.ts +173 -0
- package/src/cli-ts/commands/chat.ts +119 -0
- package/src/cli-ts/commands/daemon.ts +112 -0
- package/src/cli-ts/commands/exec.ts +109 -0
- package/src/cli-ts/commands/mcp.ts +235 -0
- package/src/cli-ts/commands/session.ts +224 -0
- package/src/cli-ts/commands/status.ts +61 -0
- package/src/cli-ts/http.ts +36 -0
- package/src/cli-ts/index.ts +73 -0
- package/src/cli-ts/ui.ts +107 -0
- package/src/core/logging.js +81 -0
- package/src/daemon/api.js +58 -0
- package/src/daemon/engines/anthropic.js +60 -1
- package/src/daemon/engines/index.js +2 -1
- package/src/daemon/engines/ollama.js +70 -3
- package/src/daemon/index.js +58 -0
- package/src/daemon/overlay-ws.js +40 -0
- package/src/daemon/plugins/index.js +2 -1
- package/src/daemon/plugins/overlay.js +177 -0
- package/src/daemon/plugins/telegram.js +15 -3
- package/src/daemon/super-agent-langchain.js +296 -0
- package/src/daemon/super-agent.js +115 -19
- package/src/daemon/transcription.js +262 -59
- package/src/daemon/whisper-server.py +57 -6
- package/src/overlay/index.html +44 -0
- package/src/overlay/main.js +480 -0
- package/src/overlay/package.json +3 -0
- package/src/overlay/preload.js +34 -0
- package/src/overlay/renderer.js +371 -0
- package/src/overlay/style.css +250 -0
- package/src/tui/_shims/cli-error.ts +6 -0
- package/src/tui/_shims/cli-logo.ts +18 -0
- package/src/tui/_shims/cli-ui.ts +1 -0
- package/src/tui/_shims/config-console-state.ts +7 -0
- package/src/tui/_shims/core-any.ts +30 -0
- package/src/tui/_shims/core-binary.ts +13 -0
- package/src/tui/_shims/core-flag.ts +3 -0
- package/src/tui/_shims/core-log.ts +14 -0
- package/src/tui/_shims/lsp-language.ts +1 -0
- package/src/tui/_shims/opencode-any.ts +135 -0
- package/src/tui/_shims/opencode-sdk-v2.ts +48 -0
- package/src/tui/_shims/plugin-tui.ts +13 -0
- package/src/tui/_shims/provider-provider.ts +10 -0
- package/src/tui/_shims/session-retry.ts +1 -0
- package/src/tui/_shims/session-schema.ts +15 -0
- package/src/tui/_shims/session-session.ts +3 -0
- package/src/tui/_shims/snapshot.ts +4 -0
- package/src/tui/_shims/tool-any.ts +18 -0
- package/src/tui/_shims/util-error.ts +7 -0
- package/src/tui/_shims/util-filesystem.ts +79 -0
- package/src/tui/_shims/util-format.ts +7 -0
- package/src/tui/_shims/util-iife.ts +3 -0
- package/src/tui/_shims/util-locale.ts +10 -0
- package/src/tui/_shims/util-process.ts +38 -0
- package/src/tui/app.tsx +783 -0
- package/src/tui/asset/charge.wav +0 -0
- package/src/tui/asset/pulse-a.wav +0 -0
- package/src/tui/asset/pulse-b.wav +0 -0
- package/src/tui/asset/pulse-c.wav +0 -0
- package/src/tui/attach.ts +100 -0
- package/src/tui/component/bg-pulse-render.ts +436 -0
- package/src/tui/component/bg-pulse.tsx +99 -0
- package/src/tui/component/border.tsx +21 -0
- package/src/tui/component/dialog-agent.tsx +31 -0
- package/src/tui/component/dialog-console-org.tsx +103 -0
- package/src/tui/component/dialog-mcp.tsx +85 -0
- package/src/tui/component/dialog-model.tsx +175 -0
- package/src/tui/component/dialog-provider.tsx +456 -0
- package/src/tui/component/dialog-retry-action.tsx +160 -0
- package/src/tui/component/dialog-session-delete-failed.tsx +99 -0
- package/src/tui/component/dialog-session-list.tsx +323 -0
- package/src/tui/component/dialog-session-rename.tsx +31 -0
- package/src/tui/component/dialog-skill.tsx +36 -0
- package/src/tui/component/dialog-stash.tsx +87 -0
- package/src/tui/component/dialog-status.tsx +168 -0
- package/src/tui/component/dialog-tag.tsx +44 -0
- package/src/tui/component/dialog-theme-list.tsx +50 -0
- package/src/tui/component/dialog-variant.tsx +39 -0
- package/src/tui/component/dialog-workspace-create.tsx +302 -0
- package/src/tui/component/dialog-workspace-file-changes.tsx +138 -0
- package/src/tui/component/dialog-workspace-unavailable.tsx +69 -0
- package/src/tui/component/error-component.tsx +92 -0
- package/src/tui/component/logo.tsx +896 -0
- package/src/tui/component/plugin-route-missing.tsx +14 -0
- package/src/tui/component/prompt/autocomplete.tsx +869 -0
- package/src/tui/component/prompt/cwd.ts +0 -0
- package/src/tui/component/prompt/frecency.tsx +90 -0
- package/src/tui/component/prompt/history.tsx +108 -0
- package/src/tui/component/prompt/index.tsx +1809 -0
- package/src/tui/component/prompt/part.ts +16 -0
- package/src/tui/component/prompt/stash.tsx +101 -0
- package/src/tui/component/prompt/traits.ts +35 -0
- package/src/tui/component/spinner.tsx +24 -0
- package/src/tui/component/startup-loading.tsx +63 -0
- package/src/tui/component/todo-item.tsx +32 -0
- package/src/tui/component/use-connected.tsx +9 -0
- package/src/tui/component/workspace-label.tsx +19 -0
- package/src/tui/config/cwd.ts +5 -0
- package/src/tui/config/keybind.ts +432 -0
- package/src/tui/config/tui-migrate.ts +154 -0
- package/src/tui/config/tui-schema.ts +34 -0
- package/src/tui/config/tui.ts +46 -0
- package/src/tui/context/aggregate-failures.ts +34 -0
- package/src/tui/context/args.tsx +15 -0
- package/src/tui/context/command-palette.tsx +163 -0
- package/src/tui/context/directory.ts +15 -0
- package/src/tui/context/editor-zed.ts +283 -0
- package/src/tui/context/editor.ts +468 -0
- package/src/tui/context/event-apx.ts +22 -0
- package/src/tui/context/event.ts +6 -0
- package/src/tui/context/exit.tsx +60 -0
- package/src/tui/context/helper.tsx +25 -0
- package/src/tui/context/kv.tsx +81 -0
- package/src/tui/context/local.tsx +608 -0
- package/src/tui/context/path-format.tsx +39 -0
- package/src/tui/context/project-apx.tsx +48 -0
- package/src/tui/context/project.tsx +7 -0
- package/src/tui/context/prompt.tsx +18 -0
- package/src/tui/context/route.tsx +52 -0
- package/src/tui/context/sdk-apx.tsx +185 -0
- package/src/tui/context/sdk.tsx +6 -0
- package/src/tui/context/sync-apx.tsx +178 -0
- package/src/tui/context/sync-v2.tsx +16 -0
- package/src/tui/context/sync.tsx +118 -0
- package/src/tui/context/theme/aura.json +69 -0
- package/src/tui/context/theme/ayu.json +80 -0
- package/src/tui/context/theme/carbonfox.json +248 -0
- package/src/tui/context/theme/catppuccin-frappe.json +230 -0
- package/src/tui/context/theme/catppuccin-macchiato.json +230 -0
- package/src/tui/context/theme/catppuccin.json +112 -0
- package/src/tui/context/theme/cobalt2.json +225 -0
- package/src/tui/context/theme/cursor.json +249 -0
- package/src/tui/context/theme/dracula.json +219 -0
- package/src/tui/context/theme/everforest.json +241 -0
- package/src/tui/context/theme/flexoki.json +237 -0
- package/src/tui/context/theme/github.json +233 -0
- package/src/tui/context/theme/gruvbox.json +242 -0
- package/src/tui/context/theme/kanagawa.json +77 -0
- package/src/tui/context/theme/lucent-orng.json +234 -0
- package/src/tui/context/theme/material.json +235 -0
- package/src/tui/context/theme/matrix.json +77 -0
- package/src/tui/context/theme/mercury.json +252 -0
- package/src/tui/context/theme/monokai.json +221 -0
- package/src/tui/context/theme/nightowl.json +221 -0
- package/src/tui/context/theme/nord.json +223 -0
- package/src/tui/context/theme/one-dark.json +84 -0
- package/src/tui/context/theme/opencode.json +245 -0
- package/src/tui/context/theme/orng.json +249 -0
- package/src/tui/context/theme/osaka-jade.json +93 -0
- package/src/tui/context/theme/palenight.json +222 -0
- package/src/tui/context/theme/rosepine.json +234 -0
- package/src/tui/context/theme/solarized.json +223 -0
- package/src/tui/context/theme/synthwave84.json +226 -0
- package/src/tui/context/theme/tokyonight.json +243 -0
- package/src/tui/context/theme/vercel.json +245 -0
- package/src/tui/context/theme/vesper.json +218 -0
- package/src/tui/context/theme/zenburn.json +223 -0
- package/src/tui/context/theme.tsx +1247 -0
- package/src/tui/context/tui-config.tsx +9 -0
- package/src/tui/event.ts +16 -0
- package/src/tui/feature-plugins/home/footer.tsx +94 -0
- package/src/tui/feature-plugins/home/tips-view.tsx +166 -0
- package/src/tui/feature-plugins/home/tips.tsx +59 -0
- package/src/tui/feature-plugins/sidebar/context.tsx +65 -0
- package/src/tui/feature-plugins/sidebar/files.tsx +63 -0
- package/src/tui/feature-plugins/sidebar/footer.tsx +94 -0
- package/src/tui/feature-plugins/sidebar/lsp.tsx +65 -0
- package/src/tui/feature-plugins/sidebar/mcp.tsx +97 -0
- package/src/tui/feature-plugins/sidebar/todo.tsx +49 -0
- package/src/tui/feature-plugins/system/plugins.tsx +269 -0
- package/src/tui/feature-plugins/system/session-v2.tsx +1143 -0
- package/src/tui/feature-plugins/system/which-key.tsx +608 -0
- package/src/tui/keymap.tsx +166 -0
- package/src/tui/layer.ts +6 -0
- package/src/tui/plugin/api.tsx +381 -0
- package/src/tui/plugin/command-shim.ts +109 -0
- package/src/tui/plugin/internal.ts +33 -0
- package/src/tui/plugin/runtime.ts +1069 -0
- package/src/tui/plugin/slots.tsx +60 -0
- package/src/tui/routes/home.tsx +96 -0
- package/src/tui/routes/session/dialog-fork-from-timeline.tsx +76 -0
- package/src/tui/routes/session/dialog-message.tsx +108 -0
- package/src/tui/routes/session/dialog-subagent.tsx +26 -0
- package/src/tui/routes/session/dialog-timeline.tsx +47 -0
- package/src/tui/routes/session/footer.tsx +91 -0
- package/src/tui/routes/session/index.tsx +188 -0
- package/src/tui/routes/session/permission.tsx +722 -0
- package/src/tui/routes/session/question.tsx +490 -0
- package/src/tui/routes/session/sidebar.tsx +102 -0
- package/src/tui/routes/session/subagent-footer.tsx +133 -0
- package/src/tui/run.ts +84 -0
- package/src/tui/thread.ts +261 -0
- package/src/tui/tsconfig.json +40 -0
- package/src/tui/ui/dialog-alert.tsx +66 -0
- package/src/tui/ui/dialog-confirm.tsx +108 -0
- package/src/tui/ui/dialog-export-options.tsx +217 -0
- package/src/tui/ui/dialog-help.tsx +40 -0
- package/src/tui/ui/dialog-prompt.tsx +101 -0
- package/src/tui/ui/dialog-select.tsx +553 -0
- package/src/tui/ui/dialog.tsx +211 -0
- package/src/tui/ui/link.tsx +34 -0
- package/src/tui/ui/spinner.ts +368 -0
- package/src/tui/ui/toast.tsx +111 -0
- package/src/tui/util/clipboard.ts +217 -0
- package/src/tui/util/editor.ts +37 -0
- package/src/tui/util/model.ts +23 -0
- package/src/tui/util/provider-origin.ts +7 -0
- package/src/tui/util/revert-diff.ts +18 -0
- package/src/tui/util/scroll.ts +25 -0
- package/src/tui/util/selection.ts +65 -0
- package/src/tui/util/signal.ts +41 -0
- package/src/tui/util/sound.ts +156 -0
- package/src/tui/util/transcript.ts +112 -0
- package/src/tui/validate-session.ts +29 -0
- package/src/tui/win32.ts +130 -0
- package/src/tui/worker.ts +104 -0
|
@@ -22,7 +22,43 @@ import { readIdentity } from "../core/identity.js";
|
|
|
22
22
|
|
|
23
23
|
const MAX_TOOL_ITERS = 6;
|
|
24
24
|
|
|
25
|
-
|
|
25
|
+
// Tools that, when they're the ONLY thing the model called in an iteration,
|
|
26
|
+
// don't count as "real work" — they're acknowledgements (telegram ping back
|
|
27
|
+
// to the user, log lines, etc). When the model emits an iteration that only
|
|
28
|
+
// contains acks, we DON'T let it leave the loop on iter N+1 with empty text:
|
|
29
|
+
// we force another required tool call so the actual task gets executed.
|
|
30
|
+
//
|
|
31
|
+
// This is the fix for the "agent sends 'ya te escucho 🎧' and then stops"
|
|
32
|
+
// bug. Without it, gemma4-class models sometimes consider the ack the
|
|
33
|
+
// complete reply on iter 0 and emit only "ok" on iter 1, breaking out.
|
|
34
|
+
const ACK_ONLY_TOOLS = new Set(["send_telegram"]);
|
|
35
|
+
// Hard cap so the model can't ack-ack-ack forever — after this many
|
|
36
|
+
// consecutive ack-only iterations we let the loop progress naturally
|
|
37
|
+
// (the model already had its chance to call a real tool).
|
|
38
|
+
const MAX_CONSECUTIVE_ACKS = 2;
|
|
39
|
+
|
|
40
|
+
export const DEFAULT_SYSTEM = `# Identity (override everything else)
|
|
41
|
+
You are **APX** — Manuel's personal assistant running on his Mac.
|
|
42
|
+
You are NOT a code analyzer, NOT a generic chatbot, NOT a tutor.
|
|
43
|
+
You are an **action agent**: you USE TOOLS to do real things on Manuel's system.
|
|
44
|
+
|
|
45
|
+
# Language — non-negotiable
|
|
46
|
+
ALWAYS reply in **Spanish (rioplatense, voseo when natural)** unless Manuel
|
|
47
|
+
explicitly writes to you in another language for that turn. The user is an
|
|
48
|
+
Argentinian developer; English replies feel broken to him. If you find
|
|
49
|
+
yourself writing English, stop and rewrite in Spanish before sending.
|
|
50
|
+
This rule beats every other formatting hint below.
|
|
51
|
+
|
|
52
|
+
# What you must NOT do
|
|
53
|
+
- Do NOT explain code or write essays about "the provided snippet".
|
|
54
|
+
- Do NOT describe what a tool *would* do — call it and report the result.
|
|
55
|
+
- Do NOT dump the tool catalog at the user.
|
|
56
|
+
- Do NOT respond with disclaimers ("as an AI…", "I'm just an assistant…").
|
|
57
|
+
- If a user message is short or ambiguous, ASK one short clarifying question
|
|
58
|
+
in Spanish — do not invent a topic.
|
|
59
|
+
|
|
60
|
+
# How you operate
|
|
61
|
+
You are the **APX dispatcher** — the daemon-level agent that runs above all APC projects.
|
|
26
62
|
|
|
27
63
|
APX is a local daemon + CLI for APC projects. User-level runtime state lives under ~/.apx/:
|
|
28
64
|
- ~/.apx/config.json: daemon config, engines, Telegram, super-agent settings
|
|
@@ -51,7 +87,7 @@ HARD RULES (do not deviate):
|
|
|
51
87
|
3. NEVER answer "specify a project" — instead, just call the tool with no argument and you'll get the full picture.
|
|
52
88
|
4. If a tool result has an error, retry with different arguments before falling back to asking the user.
|
|
53
89
|
5. Respect permission mode. total = execute requested actions without confirmation. automatico = read/list/safe shell actions run directly; destructive, external, runtime, MCP calls, outbound messages, config, and filesystem mutations need explicit user confirmation. permiso = only allowed tools run directly; everything else needs confirmation.
|
|
54
|
-
6. Write in
|
|
90
|
+
6. Write in **Spanish** by default (see "Language" section above). Plain text on Telegram — no markdown tables, no code fences unless quoting code. Keep replies under 6 sentences unless the user asks for detail.
|
|
55
91
|
7. Stay brief: under 6 sentences unless asked for detail.
|
|
56
92
|
8. You DO see recent prior turns of this chat as previous messages when applicable. **Use them ONLY to disambiguate references** (e.g. "el primero" → first project mentioned earlier). For ANY factual data — agent details, MCP details, file contents, memory — RE-CALL the tool. Past turns are context, not a cache. Models change, agents change, files change.
|
|
57
93
|
9. /reset or /new from the user means "forget previous turns and answer this one fresh" — if you see those prefixes the operator already cleared the context for you.
|
|
@@ -63,7 +99,7 @@ HARD RULES (do not deviate):
|
|
|
63
99
|
15. NO-PENDING RULE: never say "give me a second", "I will do it", or "I will try later" as a final answer. Either call the tool in this same turn or say what blocks you.
|
|
64
100
|
16. IDENTITY RULE: when the user asks you to change your name, call yourself something, or update your personality/language, call set_identity and persist the change. Then confirm with your new name.
|
|
65
101
|
17. ROUTINES RULE: NEVER create a routine in the default project (id=0). Routines MUST be tied to a specific registered project. Before adding a routine, call list_projects to find the correct project id or name. Then pass --project <id|name> to apx routine add. If no project fits, ask the user which project to use. Creating routines in project 0/default mixes unrelated projects' schedules and corrupts state.
|
|
66
|
-
18. **NO
|
|
102
|
+
18. **NO BARE ACKS AS FINAL ANSWER**: Empty acknowledgments ("ok", "entendido", "dame un minuto", "voy", "checking") are invalid as a FINAL response when a tool was needed — they will be re-prompted. EXCEPTION: a short contextual ack sent via send_telegram BEFORE another tool call is encouraged on Telegram audio inputs and on tool calls that take more than a few seconds (browser_screenshot, web_search, run_shell, long file edits). The ack must be **contextual and varied** in Spanish — e.g. "Ya te escucho 🎧", "Dame un seg, transcribiendo…", "Buscando eso ahora", "Voy a revisar el repo…", "Un momento, ejecutando…". Never reuse the exact same ack twice in a row. The ack is the FIRST tool call in the turn; the actual work follows immediately in the SAME turn (do not return without doing the work).
|
|
67
103
|
19. **CWD RULE**: When the channel context includes a "CWD: <path>" line, that is the user's current working directory. References to "este directorio", "este proyecto", "esta carpeta", "acá", "aquí", "this directory", "this project", "current dir/folder" all mean that exact CWD path. Use it as the path argument directly — DO NOT ask the user "what's the path?" when CWD is already given. Example: if user says "agregá este proyecto a la lista", call add_project({path: <CWD>}) immediately.
|
|
68
104
|
20. **NO MANUAL SCAFFOLDING**: To register or scaffold a project, ALWAYS use add_project — it auto-creates AGENTS.md and .apc/project.json when missing (one call, atomic). NEVER write AGENTS.md, .apc/project.json, or any APC scaffold file by hand via run_shell / write_file / shell pipes. The schema must come from the official initApf scaffold, not improvised. If add_project errors, report the error to the user — don't try to work around it with shell hacks. Same for any other APC-managed file (.apc/agents/*, .apc/skills/*, etc.) — use the dedicated tool, never raw filesystem writes.
|
|
69
105
|
21. **SKILLS — ON DEMAND**: The "# Available skills" section below lists every skill available to you (slug + description, NO body). When the user asks about specific APX/APC commands, project structure, agent runtimes, or anything where exact syntax or detailed behavior matches a skill description (in ANY language — match semantically, not by keyword), call load_skill({slug}) to fetch the full markdown body. If a CWD is in the contextNote, pass it as project_path so project-scoped skills resolve. If the user explicitly asks "what skills do you have?", you can either read the catalog below directly OR call list_skills to get a fresh enumeration. Do NOT load skills for trivial / unrelated questions — that wastes tokens. Don't guess CLI syntax when a skill can tell you; load it.
|
|
@@ -143,8 +179,32 @@ function looksLikeActionRequest(text) {
|
|
|
143
179
|
return /\b(list|show|find|get|fetch|search|run|execute|create|add|make|start|stop|delete|update|send|check|read|write|look|tell me|dame|mostra|busca|ejecuta|crea|agrega|mandá|revisá|corré|borrá|arrancá)\b/.test(t);
|
|
144
180
|
}
|
|
145
181
|
|
|
182
|
+
/**
|
|
183
|
+
* Build the identity block injected into every super-agent system prompt.
|
|
184
|
+
* Pure function — exported for unit tests.
|
|
185
|
+
*
|
|
186
|
+
* @param {object|null} identity result of readIdentity(), or a plain object for tests
|
|
187
|
+
* @param {string} userLang ISO 639-1 code from config.user.language (default "en")
|
|
188
|
+
*/
|
|
189
|
+
export function buildIdentityBlock(identity, userLang = "en") {
|
|
190
|
+
const lines = ["# Identity"];
|
|
191
|
+
if (identity?.agent_name) lines.push(`Your name is ${identity.agent_name}.`);
|
|
192
|
+
if (identity?.personality) lines.push(`Your personality: ${identity.personality}.`);
|
|
193
|
+
if (identity?.owner_name) lines.push(`Your owner is ${identity.owner_name}.`);
|
|
194
|
+
if (identity?.owner_context) lines.push(`Owner context: ${identity.owner_context}`);
|
|
195
|
+
lines.push(`Always reply in the language with ISO code "${userLang}" unless the user explicitly switches.`);
|
|
196
|
+
return lines.join("\n");
|
|
197
|
+
}
|
|
198
|
+
|
|
146
199
|
export function isSuperAgentEnabled(cfg) {
|
|
147
|
-
|
|
200
|
+
// The super-agent is the system's default reply path. It is considered
|
|
201
|
+
// enabled as soon as a model is configured — the legacy `.enabled` flag is
|
|
202
|
+
// honoured only when explicitly set to `false`. This prevents the bot
|
|
203
|
+
// from silently dropping Telegram messages just because someone forgot to
|
|
204
|
+
// set super_agent.enabled = true.
|
|
205
|
+
const sa = cfg && cfg.super_agent;
|
|
206
|
+
if (!sa || !sa.model) return false;
|
|
207
|
+
return sa.enabled !== false;
|
|
148
208
|
}
|
|
149
209
|
|
|
150
210
|
export async function runSuperAgent({
|
|
@@ -158,6 +218,7 @@ export async function runSuperAgent({
|
|
|
158
218
|
overrideModel = null,
|
|
159
219
|
onEvent = null,
|
|
160
220
|
signal,
|
|
221
|
+
onToken = null,
|
|
161
222
|
}) {
|
|
162
223
|
if (!isSuperAgentEnabled(globalConfig)) {
|
|
163
224
|
throw new Error("super-agent not enabled (set super_agent.enabled and .model in ~/.apx/config.json)");
|
|
@@ -165,6 +226,19 @@ export async function runSuperAgent({
|
|
|
165
226
|
const sa = globalConfig.super_agent;
|
|
166
227
|
const activeModel = overrideModel || sa.model;
|
|
167
228
|
|
|
229
|
+
// Engine toggle: if config.super_agent.engine === "langchain", delegate to
|
|
230
|
+
// the LangChain AgentExecutor adapter. Default stays "native" (this loop).
|
|
231
|
+
// The toggle exists so we can A/B the two paths on the user's actual chat
|
|
232
|
+
// without committing to a full migration. See super-agent-langchain.js.
|
|
233
|
+
if (sa.engine === "langchain") {
|
|
234
|
+
const { runSuperAgentLangChain } = await import("./super-agent-langchain.js");
|
|
235
|
+
return runSuperAgentLangChain({
|
|
236
|
+
globalConfig, projects, plugins, registries,
|
|
237
|
+
prompt, previousMessages, contextNote,
|
|
238
|
+
onEvent, onToken, signal,
|
|
239
|
+
});
|
|
240
|
+
}
|
|
241
|
+
|
|
168
242
|
// Tiny project hint — JUST names + ids, no detail. The model is expected to
|
|
169
243
|
// call list_agents / list_mcps / read_agent_memory / etc. for everything
|
|
170
244
|
// else. Keeping this short forces actual tool use instead of letting the
|
|
@@ -206,15 +280,7 @@ export async function runSuperAgent({
|
|
|
206
280
|
// Language comes from config.user.language (ISO 639-1) so it stays in sync with transcription.
|
|
207
281
|
const identity = (() => { try { return readIdentity(); } catch { return null; } })();
|
|
208
282
|
const userLang = globalConfig?.user?.language || "en";
|
|
209
|
-
const identityBlock = (
|
|
210
|
-
const lines = ["# Identity"];
|
|
211
|
-
if (identity?.agent_name) lines.push(`Your name is ${identity.agent_name}.`);
|
|
212
|
-
if (identity?.personality) lines.push(`Your personality: ${identity.personality}.`);
|
|
213
|
-
if (identity?.owner_name) lines.push(`Your owner is ${identity.owner_name}.`);
|
|
214
|
-
if (identity?.owner_context) lines.push(`Owner context: ${identity.owner_context}`);
|
|
215
|
-
lines.push(`Always reply in the language with ISO code "${userLang}" unless the user explicitly switches.`);
|
|
216
|
-
return lines.join("\n");
|
|
217
|
-
})();
|
|
283
|
+
const identityBlock = buildIdentityBlock(identity, userLang);
|
|
218
284
|
|
|
219
285
|
const system = [
|
|
220
286
|
sa.system || DEFAULT_SYSTEM,
|
|
@@ -246,14 +312,21 @@ export async function runSuperAgent({
|
|
|
246
312
|
let totalUsage = { input_tokens: 0, output_tokens: 0 };
|
|
247
313
|
let lastText = "";
|
|
248
314
|
let usePseudoTools = false;
|
|
315
|
+
// Track how many consecutive iterations contained only ACK_ONLY tools.
|
|
316
|
+
// While this is > 0 we keep tool_choice="required" so the next iter has
|
|
317
|
+
// to do real work — otherwise gemma4-class models call send_telegram
|
|
318
|
+
// for the ack and then break out with empty text on iter N+1.
|
|
319
|
+
let ackOnlyStreak = 0;
|
|
249
320
|
|
|
250
321
|
for (let iter = 0; iter < MAX_TOOL_ITERS; iter++) {
|
|
251
322
|
await emitProgress(onEvent, { type: "model_start", iteration: iter + 1 });
|
|
252
|
-
//
|
|
253
|
-
//
|
|
254
|
-
//
|
|
255
|
-
//
|
|
256
|
-
|
|
323
|
+
// Force a tool call on iter 0 (no bare "ok dame un segundo" reply), AND
|
|
324
|
+
// on any iteration that immediately follows an ack-only iter (so the
|
|
325
|
+
// model can't ack and then stop). After at most MAX_CONSECUTIVE_ACKS
|
|
326
|
+
// forced rounds we let it fall back to "auto" so the model can finish.
|
|
327
|
+
const forceTool =
|
|
328
|
+
iter === 0 ||
|
|
329
|
+
(ackOnlyStreak > 0 && ackOnlyStreak <= MAX_CONSECUTIVE_ACKS);
|
|
257
330
|
let result;
|
|
258
331
|
try {
|
|
259
332
|
result = await callEngine({
|
|
@@ -262,9 +335,12 @@ export async function runSuperAgent({
|
|
|
262
335
|
messages: conversation,
|
|
263
336
|
config: globalConfig,
|
|
264
337
|
tools: usePseudoTools ? null : TOOL_SCHEMAS,
|
|
265
|
-
toolChoice: usePseudoTools ? null : (
|
|
338
|
+
toolChoice: usePseudoTools ? null : (forceTool ? "required" : "auto"),
|
|
266
339
|
maxTokens: 1024,
|
|
267
340
|
signal,
|
|
341
|
+
// Only stream tokens on non-forced iterations — on forced iters the
|
|
342
|
+
// model MUST emit a tool_call, streaming text would confuse the user.
|
|
343
|
+
onToken: (!forceTool && onToken) ? onToken : null,
|
|
268
344
|
});
|
|
269
345
|
} catch (e) {
|
|
270
346
|
if (usePseudoTools && /^ollama:/i.test(String(activeModel || "")) && /ollama\s+500/i.test(String(e?.message || "")) && trace.length > 0) {
|
|
@@ -284,6 +360,7 @@ export async function runSuperAgent({
|
|
|
284
360
|
toolChoice: null,
|
|
285
361
|
maxTokens: 1024,
|
|
286
362
|
signal,
|
|
363
|
+
onToken: (iter > 0 && onToken) ? onToken : null,
|
|
287
364
|
});
|
|
288
365
|
}
|
|
289
366
|
totalUsage.input_tokens += result.usage?.input_tokens || 0;
|
|
@@ -378,6 +455,25 @@ export async function runSuperAgent({
|
|
|
378
455
|
content: JSON.stringify(toolResult),
|
|
379
456
|
});
|
|
380
457
|
}
|
|
458
|
+
|
|
459
|
+
// Did this iteration consist of ONLY ack-style tool calls? If so we'll
|
|
460
|
+
// keep tool_choice forced on the next iter (see top of loop). A turn
|
|
461
|
+
// that mixes send_telegram + e.g. browser_screenshot counts as "real
|
|
462
|
+
// work" and resets the streak.
|
|
463
|
+
const allAckOnly = toolCalls.every((tc) => {
|
|
464
|
+
const n = (tc.function?.name) || tc.name;
|
|
465
|
+
return ACK_ONLY_TOOLS.has(n);
|
|
466
|
+
});
|
|
467
|
+
if (allAckOnly) {
|
|
468
|
+
ackOnlyStreak += 1;
|
|
469
|
+
await emitProgress(onEvent, {
|
|
470
|
+
type: "ack_only_iter",
|
|
471
|
+
iteration: iter + 1,
|
|
472
|
+
streak: ackOnlyStreak,
|
|
473
|
+
});
|
|
474
|
+
} else {
|
|
475
|
+
ackOnlyStreak = 0;
|
|
476
|
+
}
|
|
381
477
|
}
|
|
382
478
|
|
|
383
479
|
return {
|
|
@@ -28,8 +28,9 @@
|
|
|
28
28
|
|
|
29
29
|
import fs from "node:fs";
|
|
30
30
|
import path from "node:path";
|
|
31
|
-
import { spawn } from "node:child_process";
|
|
31
|
+
import { spawn, exec } from "node:child_process";
|
|
32
32
|
import { fileURLToPath } from "node:url";
|
|
33
|
+
import { logInfo, logWarn, logError } from "../core/logging.js";
|
|
33
34
|
|
|
34
35
|
const __filename = fileURLToPath(import.meta.url);
|
|
35
36
|
const __dirname = path.dirname(__filename);
|
|
@@ -43,8 +44,32 @@ const DEFAULT_LOCAL = {
|
|
|
43
44
|
language: "auto",
|
|
44
45
|
beam_size: 5,
|
|
45
46
|
idle_minutes: 10,
|
|
47
|
+
// Max time we wait for /transcribe to return. Long audio files (Telegram
|
|
48
|
+
// voice notes > 10 min) can take several minutes on CPU; the previous
|
|
49
|
+
// hard-coded 5-minute cap silently truncated them. 20 minutes covers a
|
|
50
|
+
// ~60-minute voice note on a small int8 model. Override with
|
|
51
|
+
// transcription.local.timeout_ms in ~/.apx/config.json if needed.
|
|
52
|
+
timeout_ms: 20 * 60_000,
|
|
46
53
|
};
|
|
47
54
|
|
|
55
|
+
// ---------------------------------------------------------------------------
|
|
56
|
+
// Config helpers (pure — exported for tests)
|
|
57
|
+
// ---------------------------------------------------------------------------
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* Resolve the effective transcription language.
|
|
61
|
+
* Priority: explicit local config → config.user.language → "auto" (whisper detects).
|
|
62
|
+
*
|
|
63
|
+
* @param {object} localCfg merged transcription.local config
|
|
64
|
+
* @param {string} userLang config.user.language ISO code (e.g. "es"), or ""
|
|
65
|
+
* @returns {string} ISO code or "auto"
|
|
66
|
+
*/
|
|
67
|
+
export function resolveTranscriptionLanguage(localCfg, userLang) {
|
|
68
|
+
if (localCfg.language && localCfg.language !== "auto") return localCfg.language;
|
|
69
|
+
if (userLang) return userLang;
|
|
70
|
+
return "auto";
|
|
71
|
+
}
|
|
72
|
+
|
|
48
73
|
// ---------------------------------------------------------------------------
|
|
49
74
|
// Config
|
|
50
75
|
// ---------------------------------------------------------------------------
|
|
@@ -59,9 +84,7 @@ async function getConfig() {
|
|
|
59
84
|
// Explicit transcription.local.language always wins; "auto" means fall back to user.language.
|
|
60
85
|
const userLang = cfg.user?.language || "";
|
|
61
86
|
const localBase = { ...DEFAULT_LOCAL, ...(t.local || {}) };
|
|
62
|
-
|
|
63
|
-
localBase.language = userLang;
|
|
64
|
-
}
|
|
87
|
+
localBase.language = resolveTranscriptionLanguage(localBase, userLang);
|
|
65
88
|
return {
|
|
66
89
|
provider: t.provider || "auto",
|
|
67
90
|
local: localBase,
|
|
@@ -98,6 +121,21 @@ async function _isServerHealthy() {
|
|
|
98
121
|
}
|
|
99
122
|
}
|
|
100
123
|
|
|
124
|
+
// Check if the running whisper-server is using a specific model.
|
|
125
|
+
// Returns the model name string, or null if not reachable.
|
|
126
|
+
async function _serverModelName() {
|
|
127
|
+
try {
|
|
128
|
+
const res = await fetch(`http://127.0.0.1:${WHISPER_PORT}/health`, {
|
|
129
|
+
signal: AbortSignal.timeout(800),
|
|
130
|
+
});
|
|
131
|
+
if (!res.ok) return null;
|
|
132
|
+
const j = await res.json();
|
|
133
|
+
return j?.model || null;
|
|
134
|
+
} catch {
|
|
135
|
+
return null;
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
|
|
101
139
|
async function _waitForServer(maxMs = 15_000) {
|
|
102
140
|
const deadline = Date.now() + maxMs;
|
|
103
141
|
while (Date.now() < deadline) {
|
|
@@ -107,18 +145,62 @@ async function _waitForServer(maxMs = 15_000) {
|
|
|
107
145
|
throw new Error(`whisper-server did not start within ${maxMs}ms`);
|
|
108
146
|
}
|
|
109
147
|
|
|
148
|
+
// Find the PID of the process LISTENing on the whisper port (server only,
|
|
149
|
+
// not clients). Filtering by -sTCP:LISTEN is critical — without it, lsof
|
|
150
|
+
// also returns clients with an open connection (including this daemon).
|
|
151
|
+
async function _findListenerPid() {
|
|
152
|
+
return new Promise((resolve) => {
|
|
153
|
+
exec(`lsof -ti tcp:${WHISPER_PORT} -sTCP:LISTEN`, (err, stdout) => {
|
|
154
|
+
if (err || !stdout) return resolve(null);
|
|
155
|
+
const candidates = stdout.trim().split("\n")
|
|
156
|
+
.map(s => parseInt(s, 10))
|
|
157
|
+
.filter(n => Number.isFinite(n) && n !== process.pid);
|
|
158
|
+
resolve(candidates[0] || null);
|
|
159
|
+
});
|
|
160
|
+
});
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
async function _killOrphanWhisper() {
|
|
164
|
+
// First try graceful /shutdown on the whisper server.
|
|
165
|
+
try {
|
|
166
|
+
await fetch(`http://127.0.0.1:${WHISPER_PORT}/shutdown`, {
|
|
167
|
+
method: "POST", signal: AbortSignal.timeout(1000),
|
|
168
|
+
});
|
|
169
|
+
await _sleep(600);
|
|
170
|
+
} catch {}
|
|
171
|
+
// If still bound, force-kill the LISTENER pid only (never our own pid).
|
|
172
|
+
const pid = await _findListenerPid();
|
|
173
|
+
if (pid && pid !== process.pid) {
|
|
174
|
+
try { process.kill(pid, "SIGTERM"); } catch {}
|
|
175
|
+
await _sleep(400);
|
|
176
|
+
try { process.kill(pid, 0); try { process.kill(pid, "SIGKILL"); } catch {} } catch {}
|
|
177
|
+
await _sleep(300);
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
|
|
110
181
|
async function ensureWhisperServer(opts) {
|
|
111
182
|
const model = opts.model || DEFAULT_LOCAL.model;
|
|
112
183
|
|
|
113
184
|
// Already running with the right model — health-check to confirm still alive.
|
|
114
185
|
if (_serverProcess && _serverModel === model) {
|
|
115
186
|
if (await _isServerHealthy()) return;
|
|
116
|
-
// Process died (idle shutdown). Fall through to restart.
|
|
117
187
|
_serverProcess = null;
|
|
118
188
|
_serverModel = null;
|
|
119
189
|
}
|
|
120
190
|
|
|
121
|
-
//
|
|
191
|
+
// Adopt an externally-running whisper-server (e.g. left over from prior daemon).
|
|
192
|
+
if (!_serverProcess) {
|
|
193
|
+
const existing = await _serverModelName();
|
|
194
|
+
if (existing === model) {
|
|
195
|
+
_serverModel = model;
|
|
196
|
+
return;
|
|
197
|
+
}
|
|
198
|
+
if (existing) {
|
|
199
|
+
// Wrong model: kick out the orphan so we can start the right one.
|
|
200
|
+
await _killOrphanWhisper();
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
|
|
122
204
|
if (_serverProcess) {
|
|
123
205
|
try { _serverProcess.kill(); } catch {}
|
|
124
206
|
_serverProcess = null;
|
|
@@ -126,6 +208,10 @@ async function ensureWhisperServer(opts) {
|
|
|
126
208
|
await _sleep(300);
|
|
127
209
|
}
|
|
128
210
|
|
|
211
|
+
await _spawnWhisper(opts, model, /* retried */ false);
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
async function _spawnWhisper(opts, model, retried) {
|
|
129
215
|
const args = [
|
|
130
216
|
WHISPER_SERVER,
|
|
131
217
|
"--port", String(WHISPER_PORT),
|
|
@@ -151,32 +237,44 @@ async function ensureWhisperServer(opts) {
|
|
|
151
237
|
});
|
|
152
238
|
|
|
153
239
|
// Wait for the "ready" line on stdout, then wait for HTTP to respond.
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
240
|
+
try {
|
|
241
|
+
await new Promise((resolve, reject) => {
|
|
242
|
+
const timeout = setTimeout(
|
|
243
|
+
() => reject(new Error("whisper-server startup timed out (15s)")),
|
|
244
|
+
15_000
|
|
245
|
+
);
|
|
246
|
+
let buf = "";
|
|
247
|
+
proc.stdout.on("data", (chunk) => {
|
|
248
|
+
buf += chunk.toString();
|
|
249
|
+
const nl = buf.indexOf("\n");
|
|
250
|
+
if (nl === -1) return;
|
|
251
|
+
const line = buf.slice(0, nl).trim();
|
|
252
|
+
buf = buf.slice(nl + 1);
|
|
253
|
+
clearTimeout(timeout);
|
|
254
|
+
try {
|
|
255
|
+
const msg = JSON.parse(line);
|
|
256
|
+
if (msg.status === "error") return reject(new Error(msg.error || "whisper-server error"));
|
|
257
|
+
resolve(); // "ready"
|
|
258
|
+
} catch {
|
|
259
|
+
resolve(); // unexpected line but server is up
|
|
260
|
+
}
|
|
261
|
+
});
|
|
262
|
+
proc.on("exit", (code) => {
|
|
263
|
+
clearTimeout(timeout);
|
|
264
|
+
reject(new Error(`whisper-server exited (code ${code}) before becoming ready`));
|
|
265
|
+
});
|
|
178
266
|
});
|
|
179
|
-
})
|
|
267
|
+
} catch (e) {
|
|
268
|
+
// Self-heal: if the port was already in use, kill the orphan and retry once.
|
|
269
|
+
const msg = e.message || "";
|
|
270
|
+
if (!retried && /address already in use|errno 48|eaddrinuse/i.test(msg)) {
|
|
271
|
+
_serverProcess = null;
|
|
272
|
+
_serverModel = null;
|
|
273
|
+
await _killOrphanWhisper();
|
|
274
|
+
return _spawnWhisper(opts, model, /* retried */ true);
|
|
275
|
+
}
|
|
276
|
+
throw e;
|
|
277
|
+
}
|
|
180
278
|
}
|
|
181
279
|
|
|
182
280
|
// ---------------------------------------------------------------------------
|
|
@@ -190,30 +288,74 @@ async function transcribeLocal(filePath, opts) {
|
|
|
190
288
|
? null
|
|
191
289
|
: (opts.language || null);
|
|
192
290
|
|
|
193
|
-
const
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
body: JSON.stringify({
|
|
197
|
-
audio_path: filePath,
|
|
198
|
-
language,
|
|
199
|
-
beam_size: opts.beam_size || DEFAULT_LOCAL.beam_size,
|
|
200
|
-
}),
|
|
201
|
-
signal: AbortSignal.timeout(5 * 60_000),
|
|
202
|
-
});
|
|
291
|
+
const timeoutMs = Number(opts.timeout_ms) > 0
|
|
292
|
+
? Number(opts.timeout_ms)
|
|
293
|
+
: DEFAULT_LOCAL.timeout_ms;
|
|
203
294
|
|
|
204
|
-
const
|
|
205
|
-
|
|
295
|
+
const body = JSON.stringify({
|
|
296
|
+
audio_path: filePath,
|
|
297
|
+
language,
|
|
298
|
+
beam_size: opts.beam_size || DEFAULT_LOCAL.beam_size,
|
|
299
|
+
});
|
|
206
300
|
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
301
|
+
// Long transcriptions on CPU (small int8, 1-minute voice note) can take
|
|
302
|
+
// 30-45s. Under undici (Node fetch) we occasionally see "fetch failed"
|
|
303
|
+
// from the inbound Telegram path even though the whisper-server completes
|
|
304
|
+
// the request successfully — a keep-alive socket gets reset somewhere
|
|
305
|
+
// between the long whisper-server response and the daemon's other
|
|
306
|
+
// concurrent traffic. We retry once on a generic "fetch failed" so the
|
|
307
|
+
// user actually gets a reply.
|
|
308
|
+
const maxAttempts = 2;
|
|
309
|
+
let lastErr = null;
|
|
310
|
+
for (let attempt = 1; attempt <= maxAttempts; attempt++) {
|
|
311
|
+
const t0 = Date.now();
|
|
312
|
+
try {
|
|
313
|
+
logInfo("whisper", `transcribeLocal attempt ${attempt}/${maxAttempts}`, {
|
|
314
|
+
file: path.basename(filePath),
|
|
315
|
+
language: language || "auto",
|
|
316
|
+
timeout_ms: timeoutMs,
|
|
317
|
+
});
|
|
318
|
+
const res = await fetch(`http://127.0.0.1:${WHISPER_PORT}/transcribe`, {
|
|
319
|
+
method: "POST",
|
|
320
|
+
headers: { "content-type": "application/json", "connection": "close" },
|
|
321
|
+
body,
|
|
322
|
+
signal: AbortSignal.timeout(timeoutMs),
|
|
323
|
+
});
|
|
324
|
+
const json = await res.json();
|
|
325
|
+
if (!json.ok) throw new Error(json.error || "transcription failed");
|
|
326
|
+
logInfo("whisper", `transcribeLocal ok in ${Date.now() - t0}ms`, {
|
|
327
|
+
chars: (json.text || "").length,
|
|
328
|
+
language: json.language,
|
|
329
|
+
duration: json.duration,
|
|
330
|
+
});
|
|
331
|
+
return {
|
|
332
|
+
ok: true,
|
|
333
|
+
backend: "local",
|
|
334
|
+
text: json.text || "",
|
|
335
|
+
language: json.language || null,
|
|
336
|
+
language_probability: json.language_probability ?? null,
|
|
337
|
+
duration: json.duration ?? null,
|
|
338
|
+
model: json.model,
|
|
339
|
+
compute_type: json.compute_type,
|
|
340
|
+
};
|
|
341
|
+
} catch (e) {
|
|
342
|
+
lastErr = e;
|
|
343
|
+
const isRetriable =
|
|
344
|
+
/fetch failed|ECONNRESET|socket hang up|terminated/i.test(e.message || "");
|
|
345
|
+
const dt = Date.now() - t0;
|
|
346
|
+
logWarn("whisper", `transcribeLocal attempt ${attempt} failed in ${dt}ms`, {
|
|
347
|
+
error: e.message,
|
|
348
|
+
retriable: isRetriable,
|
|
349
|
+
will_retry: isRetriable && attempt < maxAttempts,
|
|
350
|
+
});
|
|
351
|
+
if (!isRetriable || attempt >= maxAttempts) break;
|
|
352
|
+
// Brief backoff before retry — gives the whisper-server.py thread time
|
|
353
|
+
// to flush its pending response and release the model lock.
|
|
354
|
+
await _sleep(500);
|
|
355
|
+
}
|
|
356
|
+
}
|
|
357
|
+
logError("whisper", `transcribeLocal exhausted retries`, { error: lastErr?.message });
|
|
358
|
+
throw lastErr || new Error("local transcription failed");
|
|
217
359
|
}
|
|
218
360
|
|
|
219
361
|
// ---------------------------------------------------------------------------
|
|
@@ -280,19 +422,80 @@ export async function transcribe(filePath, overrides = {}) {
|
|
|
280
422
|
return transcribeOpenAI(filePath, cfg.openaiKey);
|
|
281
423
|
}
|
|
282
424
|
if (provider === "local") {
|
|
425
|
+
// Explicit local-only: bubble up the real error, do not mention OpenAI.
|
|
283
426
|
return transcribeLocal(filePath, localOpts);
|
|
284
427
|
}
|
|
285
428
|
|
|
286
|
-
// auto: local first, fall back to openai
|
|
429
|
+
// auto: local first, fall back to openai only if a key is configured
|
|
287
430
|
try {
|
|
288
431
|
return await transcribeLocal(filePath, localOpts);
|
|
289
432
|
} catch (localErr) {
|
|
290
|
-
if (
|
|
291
|
-
|
|
292
|
-
`local transcription failed and no OpenAI fallback available: ${localErr.message}`
|
|
293
|
-
);
|
|
433
|
+
if (cfg.openaiKey) {
|
|
434
|
+
return transcribeOpenAI(filePath, cfg.openaiKey);
|
|
294
435
|
}
|
|
295
|
-
|
|
436
|
+
// No OpenAI configured — surface the real local error verbatim.
|
|
437
|
+
throw new Error(`local transcription failed: ${localErr.message}`);
|
|
438
|
+
}
|
|
439
|
+
}
|
|
440
|
+
|
|
441
|
+
/**
|
|
442
|
+
* Transcribe raw audio bytes (e.g. from a mic chunk or Telegram voice blob).
|
|
443
|
+
* Saves to a temp file, transcribes, cleans up.
|
|
444
|
+
*
|
|
445
|
+
* @param {Buffer} buf raw audio data
|
|
446
|
+
* @param {string} format file extension hint: "webm" | "ogg" | "wav" | "mp3" (default "webm")
|
|
447
|
+
* @param {object} overrides same as transcribe() overrides
|
|
448
|
+
*/
|
|
449
|
+
export async function transcribeBuffer(buf, format = "webm", overrides = {}) {
|
|
450
|
+
if (!buf || !buf.length) throw new Error("transcribeBuffer: empty buffer");
|
|
451
|
+
const ext = format.replace(/^\./, "") || "webm";
|
|
452
|
+
const tmpFile = path.join(
|
|
453
|
+
(await import("node:os")).default.tmpdir(),
|
|
454
|
+
`apx-audio-${Date.now()}-${Math.random().toString(36).slice(2)}.${ext}`
|
|
455
|
+
);
|
|
456
|
+
try {
|
|
457
|
+
fs.writeFileSync(tmpFile, buf);
|
|
458
|
+
return await transcribe(tmpFile, overrides);
|
|
459
|
+
} finally {
|
|
460
|
+
try { fs.unlinkSync(tmpFile); } catch {}
|
|
461
|
+
}
|
|
462
|
+
}
|
|
463
|
+
|
|
464
|
+
// ---------------------------------------------------------------------------
|
|
465
|
+
// Lifecycle (preload on daemon start, shutdown on daemon stop)
|
|
466
|
+
// ---------------------------------------------------------------------------
|
|
467
|
+
|
|
468
|
+
/**
|
|
469
|
+
* Eagerly start the whisper server so the first transcription is fast.
|
|
470
|
+
* Safe to call multiple times. Never throws — logs and continues on failure.
|
|
471
|
+
*/
|
|
472
|
+
export async function preloadWhisperServer(log = console.log) {
|
|
473
|
+
try {
|
|
474
|
+
const cfg = await getConfig();
|
|
475
|
+
if (cfg.provider === "openai") return; // local backend not used
|
|
476
|
+
log(`whisper: preloading model "${cfg.local.model}" on port ${WHISPER_PORT}…`);
|
|
477
|
+
await ensureWhisperServer(cfg.local);
|
|
478
|
+
log(`whisper: ready on port ${WHISPER_PORT} (model: ${_serverModel})`);
|
|
479
|
+
} catch (e) {
|
|
480
|
+
log(`whisper: preload failed — ${e.message} (will retry lazily on first request)`);
|
|
481
|
+
}
|
|
482
|
+
}
|
|
483
|
+
|
|
484
|
+
/**
|
|
485
|
+
* Stop the whisper server we own (no-op if we adopted an external one).
|
|
486
|
+
*/
|
|
487
|
+
export async function shutdownWhisperServer() {
|
|
488
|
+
if (_serverProcess) {
|
|
489
|
+
try { _serverProcess.kill(); } catch {}
|
|
490
|
+
_serverProcess = null;
|
|
491
|
+
_serverModel = null;
|
|
492
|
+
} else {
|
|
493
|
+
// Try graceful shutdown of an adopted server
|
|
494
|
+
try {
|
|
495
|
+
await fetch(`http://127.0.0.1:${WHISPER_PORT}/shutdown`, {
|
|
496
|
+
method: "POST", signal: AbortSignal.timeout(500),
|
|
497
|
+
});
|
|
498
|
+
} catch {}
|
|
296
499
|
}
|
|
297
500
|
}
|
|
298
501
|
|