@agentprojectcontext/apx 1.33.1 → 1.35.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (208) hide show
  1. package/package.json +1 -1
  2. package/skills/apx/SKILL.md +49 -61
  3. package/src/core/agent/a2a/reply.js +48 -0
  4. package/src/core/agent/build-agent-system.js +136 -59
  5. package/src/core/agent/channels/voice-context.js +98 -0
  6. package/src/core/agent/memory.js +2 -1
  7. package/src/core/agent/prompt-builder.js +178 -124
  8. package/src/core/agent/prompts/channels/code.md +12 -10
  9. package/src/core/agent/prompts/channels/desktop.md +5 -32
  10. package/src/core/agent/prompts/channels/telegram.md +4 -15
  11. package/src/core/agent/prompts/channels/web_code.md +11 -11
  12. package/src/core/agent/prompts/core/agent-base.md +24 -0
  13. package/src/core/agent/prompts/core/project-agent.md +11 -0
  14. package/src/core/agent/prompts/core/super-agent.md +21 -0
  15. package/src/core/agent/prompts/discipline/action.md +10 -0
  16. package/src/core/agent/prompts/discipline/single-segment.md +6 -0
  17. package/src/core/agent/prompts/discipline/two-segment.md +11 -0
  18. package/src/core/agent/prompts/modes/code-build.md +1 -0
  19. package/src/core/agent/prompts/modes/code-plan.md +1 -0
  20. package/src/core/agent/prompts/modes/index.js +28 -0
  21. package/src/core/agent/self-memory.js +43 -1
  22. package/src/core/agent/skills/index-store.js +307 -0
  23. package/src/core/agent/skills/index.js +15 -1
  24. package/src/core/agent/skills/inspector.js +317 -0
  25. package/src/core/agent/skills/loader.js +22 -18
  26. package/src/core/agent/stream/turn-accumulator.js +73 -0
  27. package/src/core/agent/suggestions.js +37 -0
  28. package/src/core/agent/super-agent.js +7 -1
  29. package/src/core/agent/tools/handlers/_git.js +50 -0
  30. package/src/core/agent/tools/handlers/add-project.js +5 -2
  31. package/src/core/agent/tools/handlers/call-runtime.js +3 -2
  32. package/src/core/agent/tools/handlers/git-diff.js +44 -0
  33. package/src/core/agent/tools/handlers/git-log.js +38 -0
  34. package/src/core/agent/tools/handlers/git-show.js +34 -0
  35. package/src/core/agent/tools/handlers/git-status.js +61 -0
  36. package/src/core/agent/tools/handlers/transcribe-audio.js +1 -1
  37. package/src/core/agent/tools/helpers.js +2 -2
  38. package/src/core/agent/tools/names.js +169 -0
  39. package/src/core/agent/tools/registry-bridge.js +6 -14
  40. package/src/core/agent/tools/registry.js +103 -69
  41. package/src/core/apc/context-copy.js +27 -0
  42. package/src/core/apc/notes.js +19 -0
  43. package/src/core/apc/parser.js +12 -5
  44. package/src/core/apc/paths.js +87 -0
  45. package/src/core/apc/scaffold.js +82 -76
  46. package/src/core/apc/skill-sync.js +10 -0
  47. package/src/{host/daemon/plugins → core/channels}/telegram/dispatch.js +38 -16
  48. package/src/core/config/index.js +24 -2
  49. package/src/core/config/redact.js +95 -0
  50. package/src/core/constants/channels.js +2 -0
  51. package/src/core/constants/code-modes.js +10 -0
  52. package/src/core/constants/index.js +1 -0
  53. package/src/core/deck/manifest.js +186 -0
  54. package/src/core/engines/catalog.js +83 -0
  55. package/src/core/{tools → http-tools}/browser.js +0 -1
  56. package/src/core/{tools → http-tools}/fetch.js +0 -1
  57. package/src/core/{tools → http-tools}/glob.js +0 -1
  58. package/src/core/{tools → http-tools}/grep.js +0 -1
  59. package/src/core/{tools → http-tools}/registry.js +0 -1
  60. package/src/core/{tools → http-tools}/search.js +0 -1
  61. package/src/core/i18n/en.js +9 -0
  62. package/src/core/i18n/es.js +12 -0
  63. package/src/core/i18n/index.js +54 -0
  64. package/src/core/i18n/pt.js +9 -0
  65. package/src/core/identity/telegram.js +2 -1
  66. package/src/core/mcp/runner.js +272 -14
  67. package/src/core/mcp/sources.js +3 -2
  68. package/src/core/routines/index.js +16 -0
  69. package/src/{host/daemon/routines.js → core/routines/runner.js} +36 -103
  70. package/src/core/runtime-skills/apc-context/SKILL.md +159 -0
  71. package/src/core/runtime-skills/apx/SKILL.md +83 -0
  72. package/src/core/runtime-skills/apx-agency-agents/SKILL.md +125 -0
  73. package/src/core/runtime-skills/apx-agent/SKILL.md +97 -0
  74. package/src/core/runtime-skills/apx-mcp/SKILL.md +111 -0
  75. package/src/core/runtime-skills/apx-mcp-builder/SKILL.md +169 -0
  76. package/{skills → src/core/runtime-skills}/apx-project/SKILL.md +20 -29
  77. package/src/core/runtime-skills/apx-routine/SKILL.md +127 -0
  78. package/src/core/runtime-skills/apx-runtime/SKILL.md +99 -0
  79. package/src/core/runtime-skills/apx-sessions/SKILL.md +232 -0
  80. package/src/core/runtime-skills/apx-skill-builder/SKILL.md +129 -0
  81. package/{skills → src/core/runtime-skills}/apx-task/SKILL.md +18 -21
  82. package/src/core/runtime-skills/apx-telegram/SKILL.md +120 -0
  83. package/src/core/runtime-skills/apx-voice/SKILL.md +117 -0
  84. package/src/core/runtime-skills/{claude-code.md → claude-code/SKILL.md} +1 -0
  85. package/src/core/runtime-skills/{codex-cli.md → codex-cli/SKILL.md} +1 -0
  86. package/src/core/runtime-skills/{opencode-cli.md → opencode-cli/SKILL.md} +1 -0
  87. package/src/core/runtime-skills/{openrouter.md → openrouter/SKILL.md} +1 -0
  88. package/src/{host/daemon/env-detect.js → core/runtimes/detect.js} +1 -1
  89. package/src/core/stores/code-sessions.js +50 -2
  90. package/src/core/stores/routine-memory.js +1 -1
  91. package/src/core/stores/sessions-search.js +121 -0
  92. package/src/core/stores/sessions.js +38 -0
  93. package/src/core/vars/index.js +14 -0
  94. package/src/core/vars/interpolate.js +86 -0
  95. package/src/core/vars/sources.js +151 -0
  96. package/src/core/voice/audio-decode.js +38 -0
  97. package/src/core/voice/transcription.js +225 -0
  98. package/src/host/daemon/api/admin-config.js +5 -82
  99. package/src/host/daemon/api/agents.js +5 -5
  100. package/src/host/daemon/api/code.js +17 -169
  101. package/src/host/daemon/api/config.js +3 -4
  102. package/src/host/daemon/api/conversations.js +8 -29
  103. package/src/host/daemon/api/deck.js +37 -404
  104. package/src/host/daemon/api/engines.js +1 -80
  105. package/src/host/daemon/api/exec.js +1 -1
  106. package/src/host/daemon/api/mcps.js +32 -0
  107. package/src/host/daemon/api/routines.js +1 -1
  108. package/src/host/daemon/api/runtimes.js +4 -3
  109. package/src/host/daemon/api/sessions-search.js +24 -140
  110. package/src/host/daemon/api/sessions.js +12 -30
  111. package/src/host/daemon/api/shared.js +2 -1
  112. package/src/host/daemon/api/skills.js +140 -6
  113. package/src/host/daemon/api/super-agent.js +56 -1
  114. package/src/host/daemon/api/telegram.js +1 -11
  115. package/src/host/daemon/api/tools.js +6 -6
  116. package/src/host/daemon/api/transcribe.js +2 -2
  117. package/src/host/daemon/api/vars.js +137 -0
  118. package/src/host/daemon/api/voice.js +13 -290
  119. package/src/host/daemon/api.js +2 -0
  120. package/src/host/daemon/db.js +6 -6
  121. package/src/host/daemon/deck-exec.js +148 -0
  122. package/src/host/daemon/index.js +20 -3
  123. package/src/host/daemon/plugins/telegram/index.js +9 -9
  124. package/src/host/daemon/routines-scheduler.js +64 -0
  125. package/src/host/daemon/smoke.js +3 -2
  126. package/src/host/daemon/whisper-server.js +225 -0
  127. package/src/interfaces/cli/branding.js +53 -0
  128. package/src/interfaces/cli/commands/agent.js +3 -2
  129. package/src/interfaces/cli/commands/command.js +2 -3
  130. package/src/interfaces/cli/commands/messages.js +6 -2
  131. package/src/interfaces/cli/commands/pair.js +5 -4
  132. package/src/interfaces/cli/commands/search.js +1 -1
  133. package/src/interfaces/cli/commands/sessions.js +3 -2
  134. package/src/interfaces/cli/commands/skills.js +290 -55
  135. package/src/interfaces/cli/index.js +84 -2
  136. package/src/interfaces/web/dist/assets/index-C0fm31dY.js +618 -0
  137. package/src/interfaces/web/dist/assets/index-C0fm31dY.js.map +1 -0
  138. package/src/interfaces/web/dist/assets/index-UcAqlBO6.css +1 -0
  139. package/src/interfaces/web/dist/index.html +2 -2
  140. package/src/interfaces/web/package-lock.json +182 -182
  141. package/src/interfaces/web/src/components/ModelCombobox.tsx +2 -1
  142. package/src/interfaces/web/src/components/TelegramChannelDialog.tsx +1 -1
  143. package/src/interfaces/web/src/components/chat/AskAnswersCard.tsx +76 -0
  144. package/src/interfaces/web/src/components/chat/MessageBubble.tsx +37 -4
  145. package/src/interfaces/web/src/components/chat/MessageList.tsx +23 -1
  146. package/src/interfaces/web/src/components/chat/ModelPicker.tsx +3 -1
  147. package/src/interfaces/web/src/components/code/CodeArtifactsTab.tsx +4 -4
  148. package/src/interfaces/web/src/components/code/CodeChangesTab.tsx +1 -1
  149. package/src/interfaces/web/src/components/code/CodeFileTree.tsx +3 -2
  150. package/src/interfaces/web/src/components/code/CodeFileViewer.tsx +3 -2
  151. package/src/interfaces/web/src/components/code/CodeTerminal.tsx +3 -2
  152. package/src/interfaces/web/src/components/config/GlobalConfigEditor.tsx +2 -1
  153. package/src/interfaces/web/src/components/deck/WidgetRow.tsx +2 -1
  154. package/src/interfaces/web/src/components/inputs/KeyValueList.tsx +93 -0
  155. package/src/interfaces/web/src/components/inputs/VarTokenInput.tsx +449 -0
  156. package/src/interfaces/web/src/components/settings/DefaultRouterCard.tsx +2 -1
  157. package/src/interfaces/web/src/components/settings/EnginesPanel.tsx +2 -2
  158. package/src/interfaces/web/src/components/settings/MemoryPanel.tsx +73 -4
  159. package/src/interfaces/web/src/components/settings/SkillsInspectorPanel.tsx +222 -0
  160. package/src/interfaces/web/src/components/settings/providers/ProviderCard.tsx +3 -2
  161. package/src/interfaces/web/src/components/settings/providers/ProviderModal.tsx +3 -2
  162. package/src/interfaces/web/src/components/ui/chat-input.tsx +5 -4
  163. package/src/interfaces/web/src/components/ui/sidebar.tsx +3 -2
  164. package/src/interfaces/web/src/components/voice/VoiceProviderModal.tsx +2 -1
  165. package/src/interfaces/web/src/constants/index.ts +1 -1
  166. package/src/interfaces/web/src/hooks/useChat.ts +19 -0
  167. package/src/interfaces/web/src/i18n/en.ts +175 -7
  168. package/src/interfaces/web/src/i18n/es.ts +180 -15
  169. package/src/interfaces/web/src/lib/api/mcps.ts +25 -0
  170. package/src/interfaces/web/src/lib/api/skills.ts +70 -0
  171. package/src/interfaces/web/src/lib/api/vars.ts +38 -0
  172. package/src/interfaces/web/src/lib/api.ts +1 -0
  173. package/src/interfaces/web/src/screens/ProjectScreen.tsx +8 -31
  174. package/src/interfaces/web/src/screens/SettingsScreen.tsx +6 -2
  175. package/src/interfaces/web/src/screens/modules/CodeScreen.tsx +1 -1
  176. package/src/interfaces/web/src/screens/modules/DeckScreen.tsx +4 -3
  177. package/src/interfaces/web/src/screens/modules/DesktopScreen.tsx +7 -6
  178. package/src/interfaces/web/src/screens/modules/VoiceScreen.tsx +4 -3
  179. package/src/interfaces/web/src/screens/project/AgentDetailScreen.tsx +1 -1
  180. package/src/interfaces/web/src/screens/project/ConfigTab.tsx +132 -1
  181. package/src/interfaces/web/src/screens/project/McpsTab.tsx +549 -104
  182. package/src/interfaces/web/src/screens/project/RoutinesTab.tsx +1 -1
  183. package/src/interfaces/web/src/screens/project/VarsTab.tsx +300 -0
  184. package/src/interfaces/web/src/types/daemon.ts +15 -0
  185. package/skills/apx-agency-agents/SKILL.md +0 -141
  186. package/skills/apx-agent/SKILL.md +0 -100
  187. package/skills/apx-mcp-builder/SKILL.md +0 -183
  188. package/skills/apx-routine/SKILL.md +0 -140
  189. package/skills/apx-runtime/SKILL.md +0 -117
  190. package/skills/apx-sessions/SKILL.md +0 -281
  191. package/skills/apx-skill-builder/SKILL.md +0 -153
  192. package/skills/apx-telegram/SKILL.md +0 -131
  193. package/skills/apx-voice/SKILL.md +0 -137
  194. package/src/core/agent/prompts/action-discipline.md +0 -24
  195. package/src/core/agent/prompts/super-agent-base.md +0 -42
  196. package/src/host/daemon/transcription.js +0 -538
  197. package/src/host/daemon/whisper-transcribe.py +0 -73
  198. package/src/interfaces/web/dist/assets/index-Aaiw8BZN.css +0 -1
  199. package/src/interfaces/web/dist/assets/index-DPqtjDjh.js +0 -602
  200. package/src/interfaces/web/dist/assets/index-DPqtjDjh.js.map +0 -1
  201. /package/src/{host/daemon → core/apc}/projects-helpers.js +0 -0
  202. /package/src/{host/daemon/plugins → core/channels}/telegram/ask.js +0 -0
  203. /package/src/{host/daemon/plugins → core/channels}/telegram/helpers.js +0 -0
  204. /package/src/{host/daemon/plugins → core/channels}/telegram/media.js +0 -0
  205. /package/src/core/{tools → http-tools}/index.js +0 -0
  206. /package/src/{host/daemon/compact.js → core/stores/conversations-compactor.js} +0 -0
  207. /package/src/{host/daemon → core/stores}/conversations.js +0 -0
  208. /package/src/{host/daemon → core/util}/thinking.js +0 -0
@@ -1,538 +0,0 @@
1
- // daemon/transcription.js
2
- // Audio transcription dispatcher. Two backends:
3
- //
4
- // - LOCAL (faster-whisper via persistent Python server) — the server loads
5
- // the model once on first use and keeps it in RAM. It auto-shuts down after
6
- // idle_minutes (default 10) of inactivity, then restarts lazily on the
7
- // next request. Requires `pip3 install faster-whisper` on the host.
8
- //
9
- // - OPENAI (Whisper-1 cloud API) — needs OPENAI_API_KEY or
10
- // engines.openai.api_key in config.
11
- //
12
- // Provider selection in ~/.apx/config.json:
13
- // "transcription": {
14
- // "provider": "auto" | "local" | "openai", // default "auto"
15
- // "local": {
16
- // "model": "small", // tiny | base | small | medium | large | large-v2 | large-v3
17
- // "device": "cpu", // cpu | cuda
18
- // "compute_type": "int8", // int8 | int8_float16 | float16 | float32
19
- // "language": "auto", // ISO 639-1 code (e.g. "es") or "auto"
20
- // "beam_size": 5,
21
- // "idle_minutes": 10 // auto-shutdown after N minutes idle
22
- // }
23
- // }
24
- //
25
- // "auto" tries local first; on failure falls back to openai.
26
- //
27
- // Spanish tip: set language: "es" for better accuracy with the small model.
28
-
29
- import fs from "node:fs";
30
- import path from "node:path";
31
- import { spawn, exec } from "node:child_process";
32
- import { fileURLToPath } from "node:url";
33
- import { logInfo, logWarn, logError } from "#core/logging.js";
34
-
35
- const __filename = fileURLToPath(import.meta.url);
36
- const __dirname = path.dirname(__filename);
37
- const WHISPER_SERVER = path.join(__dirname, "whisper-server.py");
38
- const WHISPER_PORT = 18765;
39
-
40
- const DEFAULT_LOCAL = {
41
- model: "small",
42
- device: "cpu",
43
- compute_type: "int8",
44
- language: "auto",
45
- beam_size: 5,
46
- idle_minutes: 10,
47
- // Max time we wait for /transcribe to return. Long audio files (Telegram
48
- // voice notes > 10 min) can take several minutes on CPU; the previous
49
- // hard-coded 5-minute cap silently truncated them. 20 minutes covers a
50
- // ~60-minute voice note on a small int8 model. Override with
51
- // transcription.local.timeout_ms in ~/.apx/config.json if needed.
52
- timeout_ms: 20 * 60_000,
53
- };
54
-
55
- // ---------------------------------------------------------------------------
56
- // Config helpers (pure — exported for tests)
57
- // ---------------------------------------------------------------------------
58
-
59
- /**
60
- * Resolve the effective transcription language.
61
- * Priority: explicit local config → config.user.language → "auto" (whisper detects).
62
- *
63
- * @param {object} localCfg merged transcription.local config
64
- * @param {string} userLang config.user.language ISO code (e.g. "es"), or ""
65
- * @returns {string} ISO code or "auto"
66
- */
67
- export function resolveTranscriptionLanguage(localCfg, userLang) {
68
- if (localCfg.language && localCfg.language !== "auto") return localCfg.language;
69
- if (userLang) return userLang;
70
- return "auto";
71
- }
72
-
73
- // ---------------------------------------------------------------------------
74
- // Config
75
- // ---------------------------------------------------------------------------
76
-
77
- async function getConfig() {
78
- try {
79
- const { readConfig } = await import("#core/config/index.js");
80
- const cfg = readConfig() || {};
81
- const t = cfg.transcription || {};
82
- const openaiKey = cfg.engines?.openai?.api_key || process.env.OPENAI_API_KEY || "";
83
- // Use user.language as default for transcription language if not explicitly set.
84
- // Explicit transcription.local.language always wins; "auto" means fall back to user.language.
85
- const userLang = cfg.user?.language || "";
86
- const localBase = { ...DEFAULT_LOCAL, ...(t.local || {}) };
87
- localBase.language = resolveTranscriptionLanguage(localBase, userLang);
88
- return {
89
- provider: t.provider || "auto",
90
- local: localBase,
91
- openaiKey,
92
- };
93
- } catch {
94
- return {
95
- provider: "auto",
96
- local: { ...DEFAULT_LOCAL },
97
- openaiKey: process.env.OPENAI_API_KEY || "",
98
- };
99
- }
100
- }
101
-
102
- // ---------------------------------------------------------------------------
103
- // Persistent server management
104
- // ---------------------------------------------------------------------------
105
-
106
- let _serverProcess = null;
107
- let _serverModel = null; // model the running server was started with
108
-
109
- function _sleep(ms) {
110
- return new Promise((r) => setTimeout(r, ms));
111
- }
112
-
113
- async function _isServerHealthy() {
114
- try {
115
- const res = await fetch(`http://127.0.0.1:${WHISPER_PORT}/health`, {
116
- signal: AbortSignal.timeout(800),
117
- });
118
- return res.ok;
119
- } catch {
120
- return false;
121
- }
122
- }
123
-
124
- // Check if the running whisper-server is using a specific model.
125
- // Returns the model name string, or null if not reachable.
126
- async function _serverModelName() {
127
- try {
128
- const res = await fetch(`http://127.0.0.1:${WHISPER_PORT}/health`, {
129
- signal: AbortSignal.timeout(800),
130
- });
131
- if (!res.ok) return null;
132
- const j = await res.json();
133
- return j?.model || null;
134
- } catch {
135
- return null;
136
- }
137
- }
138
-
139
- async function _waitForServer(maxMs = 15_000) {
140
- const deadline = Date.now() + maxMs;
141
- while (Date.now() < deadline) {
142
- if (await _isServerHealthy()) return;
143
- await _sleep(250);
144
- }
145
- throw new Error(`whisper-server did not start within ${maxMs}ms`);
146
- }
147
-
148
- // Find the PID of the process LISTENing on the whisper port (server only,
149
- // not clients). Filtering by -sTCP:LISTEN is critical — without it, lsof
150
- // also returns clients with an open connection (including this daemon).
151
- async function _findListenerPid() {
152
- return new Promise((resolve) => {
153
- exec(`lsof -ti tcp:${WHISPER_PORT} -sTCP:LISTEN`, (err, stdout) => {
154
- if (err || !stdout) return resolve(null);
155
- const candidates = stdout.trim().split("\n")
156
- .map(s => parseInt(s, 10))
157
- .filter(n => Number.isFinite(n) && n !== process.pid);
158
- resolve(candidates[0] || null);
159
- });
160
- });
161
- }
162
-
163
- async function _killOrphanWhisper() {
164
- // First try graceful /shutdown on the whisper server.
165
- try {
166
- await fetch(`http://127.0.0.1:${WHISPER_PORT}/shutdown`, {
167
- method: "POST", signal: AbortSignal.timeout(1000),
168
- });
169
- await _sleep(600);
170
- } catch {}
171
- // If still bound, force-kill the LISTENER pid only (never our own pid).
172
- const pid = await _findListenerPid();
173
- if (pid && pid !== process.pid) {
174
- try { process.kill(pid, "SIGTERM"); } catch {}
175
- await _sleep(400);
176
- try { process.kill(pid, 0); try { process.kill(pid, "SIGKILL"); } catch {} } catch {}
177
- await _sleep(300);
178
- }
179
- }
180
-
181
- async function ensureWhisperServer(opts) {
182
- const model = opts.model || DEFAULT_LOCAL.model;
183
-
184
- // Already running with the right model — health-check to confirm still alive.
185
- if (_serverProcess && _serverModel === model) {
186
- if (await _isServerHealthy()) return;
187
- _serverProcess = null;
188
- _serverModel = null;
189
- }
190
-
191
- // Adopt an externally-running whisper-server (e.g. left over from prior daemon).
192
- if (!_serverProcess) {
193
- const existing = await _serverModelName();
194
- if (existing === model) {
195
- _serverModel = model;
196
- return;
197
- }
198
- if (existing) {
199
- // Wrong model: kick out the orphan so we can start the right one.
200
- await _killOrphanWhisper();
201
- }
202
- }
203
-
204
- if (_serverProcess) {
205
- try { _serverProcess.kill(); } catch {}
206
- _serverProcess = null;
207
- _serverModel = null;
208
- await _sleep(300);
209
- }
210
-
211
- await _spawnWhisper(opts, model, /* retried */ false);
212
- }
213
-
214
- async function _spawnWhisper(opts, model, retried) {
215
- const args = [
216
- WHISPER_SERVER,
217
- "--port", String(WHISPER_PORT),
218
- "--model", model,
219
- "--device", String(opts.device || DEFAULT_LOCAL.device),
220
- "--compute-type", String(opts.compute_type || DEFAULT_LOCAL.compute_type),
221
- "--idle-minutes", String(opts.idle_minutes ?? DEFAULT_LOCAL.idle_minutes),
222
- ];
223
-
224
- const proc = spawn("python3", args, {
225
- stdio: ["ignore", "pipe", "inherit"],
226
- detached: false,
227
- });
228
-
229
- _serverProcess = proc;
230
- _serverModel = model;
231
-
232
- proc.on("exit", () => {
233
- if (_serverProcess === proc) {
234
- _serverProcess = null;
235
- _serverModel = null;
236
- }
237
- });
238
-
239
- // Wait for the "ready" line on stdout, then wait for HTTP to respond.
240
- try {
241
- await new Promise((resolve, reject) => {
242
- const timeout = setTimeout(
243
- () => reject(new Error("whisper-server startup timed out (15s)")),
244
- 15_000
245
- );
246
- let buf = "";
247
- proc.stdout.on("data", (chunk) => {
248
- buf += chunk.toString();
249
- const nl = buf.indexOf("\n");
250
- if (nl === -1) return;
251
- const line = buf.slice(0, nl).trim();
252
- buf = buf.slice(nl + 1);
253
- clearTimeout(timeout);
254
- try {
255
- const msg = JSON.parse(line);
256
- if (msg.status === "error") return reject(new Error(msg.error || "whisper-server error"));
257
- resolve(); // "ready"
258
- } catch {
259
- resolve(); // unexpected line but server is up
260
- }
261
- });
262
- proc.on("exit", (code) => {
263
- clearTimeout(timeout);
264
- reject(new Error(`whisper-server exited (code ${code}) before becoming ready`));
265
- });
266
- });
267
- } catch (e) {
268
- // Self-heal: if the port was already in use, kill the orphan and retry once.
269
- const msg = e.message || "";
270
- if (!retried && /address already in use|errno 48|eaddrinuse/i.test(msg)) {
271
- _serverProcess = null;
272
- _serverModel = null;
273
- await _killOrphanWhisper();
274
- return _spawnWhisper(opts, model, /* retried */ true);
275
- }
276
- throw e;
277
- }
278
- }
279
-
280
- // ---------------------------------------------------------------------------
281
- // Local backend (persistent whisper-server.py via HTTP)
282
- // ---------------------------------------------------------------------------
283
-
284
- async function transcribeLocal(filePath, opts) {
285
- await ensureWhisperServer(opts);
286
-
287
- const language = (opts.language || DEFAULT_LOCAL.language) === "auto"
288
- ? null
289
- : (opts.language || null);
290
-
291
- const timeoutMs = Number(opts.timeout_ms) > 0
292
- ? Number(opts.timeout_ms)
293
- : DEFAULT_LOCAL.timeout_ms;
294
-
295
- const body = JSON.stringify({
296
- audio_path: filePath,
297
- language,
298
- beam_size: opts.beam_size || DEFAULT_LOCAL.beam_size,
299
- });
300
-
301
- // Long transcriptions on CPU (small int8, 1-minute voice note) can take
302
- // 30-45s. Under undici (Node fetch) we occasionally see "fetch failed"
303
- // from the inbound Telegram path even though the whisper-server completes
304
- // the request successfully — a keep-alive socket gets reset somewhere
305
- // between the long whisper-server response and the daemon's other
306
- // concurrent traffic. We retry once on a generic "fetch failed" so the
307
- // user actually gets a reply.
308
- const maxAttempts = 2;
309
- let lastErr = null;
310
- for (let attempt = 1; attempt <= maxAttempts; attempt++) {
311
- const t0 = Date.now();
312
- try {
313
- logInfo("whisper", `transcribeLocal attempt ${attempt}/${maxAttempts}`, {
314
- file: path.basename(filePath),
315
- language: language || "auto",
316
- timeout_ms: timeoutMs,
317
- });
318
- const res = await fetch(`http://127.0.0.1:${WHISPER_PORT}/transcribe`, {
319
- method: "POST",
320
- headers: { "content-type": "application/json", "connection": "close" },
321
- body,
322
- signal: AbortSignal.timeout(timeoutMs),
323
- });
324
- const json = await res.json();
325
- if (!json.ok) throw new Error(json.error || "transcription failed");
326
- logInfo("whisper", `transcribeLocal ok in ${Date.now() - t0}ms`, {
327
- chars: (json.text || "").length,
328
- language: json.language,
329
- duration: json.duration,
330
- });
331
- return {
332
- ok: true,
333
- backend: "local",
334
- text: json.text || "",
335
- language: json.language || null,
336
- language_probability: json.language_probability ?? null,
337
- duration: json.duration ?? null,
338
- model: json.model,
339
- compute_type: json.compute_type,
340
- };
341
- } catch (e) {
342
- lastErr = e;
343
- const isRetriable =
344
- /fetch failed|ECONNRESET|socket hang up|terminated/i.test(e.message || "");
345
- const dt = Date.now() - t0;
346
- logWarn("whisper", `transcribeLocal attempt ${attempt} failed in ${dt}ms`, {
347
- error: e.message,
348
- retriable: isRetriable,
349
- will_retry: isRetriable && attempt < maxAttempts,
350
- });
351
- if (!isRetriable || attempt >= maxAttempts) break;
352
- // Brief backoff before retry — gives the whisper-server.py thread time
353
- // to flush its pending response and release the model lock.
354
- await _sleep(500);
355
- }
356
- }
357
- logError("whisper", `transcribeLocal exhausted retries`, { error: lastErr?.message });
358
- throw lastErr || new Error("local transcription failed");
359
- }
360
-
361
- // ---------------------------------------------------------------------------
362
- // OpenAI backend (Whisper-1 cloud)
363
- // ---------------------------------------------------------------------------
364
-
365
- async function transcribeOpenAI(filePath, apiKey) {
366
- if (!apiKey) throw new Error("OPENAI_API_KEY not set (env or engines.openai.api_key)");
367
-
368
- const fileBuf = fs.readFileSync(filePath);
369
- const ext = path.extname(filePath).slice(1).toLowerCase() || "ogg";
370
- const mimeMap = {
371
- oga: "audio/ogg", ogg: "audio/ogg", opus: "audio/ogg",
372
- mp3: "audio/mpeg", m4a: "audio/mp4", mp4: "audio/mp4",
373
- wav: "audio/wav", webm: "audio/webm",
374
- };
375
- const blob = new Blob([fileBuf], { type: mimeMap[ext] || "audio/ogg" });
376
-
377
- const form = new FormData();
378
- form.append("file", blob, `audio.${ext}`);
379
- form.append("model", "whisper-1");
380
-
381
- const res = await fetch("https://api.openai.com/v1/audio/transcriptions", {
382
- method: "POST",
383
- headers: { Authorization: `Bearer ${apiKey}` },
384
- body: form,
385
- });
386
- if (!res.ok) {
387
- const err = await res.text().catch(() => "");
388
- throw new Error(`Whisper API ${res.status}: ${err.slice(0, 200)}`);
389
- }
390
- const json = await res.json();
391
- return {
392
- ok: true,
393
- backend: "openai",
394
- text: String(json.text || "").trim(),
395
- language: null,
396
- language_probability: null,
397
- duration: null,
398
- model: "whisper-1",
399
- };
400
- }
401
-
402
- // ---------------------------------------------------------------------------
403
- // Public API
404
- // ---------------------------------------------------------------------------
405
-
406
- /**
407
- * Transcribe an audio file using the configured backend.
408
- * Returns { ok, backend, text, language?, language_probability?, duration?, model? }.
409
- *
410
- * @param {string} filePath absolute path to audio file
411
- * @param {object} overrides optional: { provider, model, language, idle_minutes, ... }
412
- */
413
- export async function transcribe(filePath, overrides = {}) {
414
- if (!filePath || !fs.existsSync(filePath)) {
415
- throw new Error(`transcribe: file not found: ${filePath}`);
416
- }
417
- const cfg = await getConfig();
418
- const provider = overrides.provider || cfg.provider;
419
- const localOpts = { ...cfg.local, ...overrides };
420
-
421
- if (provider === "openai") {
422
- return transcribeOpenAI(filePath, cfg.openaiKey);
423
- }
424
- if (provider === "local") {
425
- // Explicit local-only: bubble up the real error, do not mention OpenAI.
426
- return transcribeLocal(filePath, localOpts);
427
- }
428
-
429
- // auto: local first, fall back to openai only if a key is configured
430
- try {
431
- return await transcribeLocal(filePath, localOpts);
432
- } catch (localErr) {
433
- if (cfg.openaiKey) {
434
- return transcribeOpenAI(filePath, cfg.openaiKey);
435
- }
436
- // No OpenAI configured — surface the real local error verbatim.
437
- throw new Error(`local transcription failed: ${localErr.message}`);
438
- }
439
- }
440
-
441
- /**
442
- * Transcribe raw audio bytes (e.g. from a mic chunk or Telegram voice blob).
443
- * Saves to a temp file, transcribes, cleans up.
444
- *
445
- * @param {Buffer} buf raw audio data
446
- * @param {string} format file extension hint: "webm" | "ogg" | "wav" | "mp3" (default "webm")
447
- * @param {object} overrides same as transcribe() overrides
448
- */
449
- export async function transcribeBuffer(buf, format = "webm", overrides = {}) {
450
- if (!buf || !buf.length) throw new Error("transcribeBuffer: empty buffer");
451
- const ext = format.replace(/^\./, "") || "webm";
452
- const tmpFile = path.join(
453
- (await import("node:os")).default.tmpdir(),
454
- `apx-audio-${Date.now()}-${Math.random().toString(36).slice(2)}.${ext}`
455
- );
456
- try {
457
- fs.writeFileSync(tmpFile, buf);
458
- return await transcribe(tmpFile, overrides);
459
- } finally {
460
- try { fs.unlinkSync(tmpFile); } catch {}
461
- }
462
- }
463
-
464
- // ---------------------------------------------------------------------------
465
- // Lifecycle (preload on daemon start, shutdown on daemon stop)
466
- // ---------------------------------------------------------------------------
467
-
468
- /**
469
- * Eagerly start the whisper server so the first transcription is fast.
470
- * Safe to call multiple times. Never throws — logs and continues on failure.
471
- */
472
- export async function preloadWhisperServer(log = console.log) {
473
- try {
474
- const cfg = await getConfig();
475
- if (cfg.provider === "openai") return; // local backend not used
476
- log(`whisper: preloading model "${cfg.local.model}" on port ${WHISPER_PORT}…`);
477
- await ensureWhisperServer(cfg.local);
478
- log(`whisper: ready on port ${WHISPER_PORT} (model: ${_serverModel})`);
479
- } catch (e) {
480
- log(`whisper: preload failed — ${e.message} (will retry lazily on first request)`);
481
- }
482
- }
483
-
484
- /**
485
- * Keep the local whisper server warm. Ensures it's loaded and pings /health,
486
- * which resets the server's idle watchdog so a live session (e.g. the desktop
487
- * window held open) never pays the cold-load cost on the next utterance.
488
- * Cheap and safe to call repeatedly. Never throws.
489
- * Returns { ok, model?, loaded?, provider } for the caller to surface.
490
- */
491
- export async function warmupWhisper() {
492
- try {
493
- const cfg = await getConfig();
494
- if (cfg.provider === "openai") return { ok: true, provider: "openai", loaded: false };
495
- await ensureWhisperServer(cfg.local);
496
- // /warmup loads the model into RAM (lazy otherwise) AND touches _last_used,
497
- // resetting the idle timer. First call may block ~15-30s on a cold model;
498
- // instant once warm. Generous timeout so the cold load can finish.
499
- let loaded = false;
500
- try {
501
- const r = await fetch(`http://127.0.0.1:${WHISPER_PORT}/warmup`, {
502
- signal: AbortSignal.timeout(40_000),
503
- });
504
- const j = await r.json().catch(() => ({}));
505
- loaded = !!j.loaded;
506
- } catch {}
507
- return { ok: true, provider: "local", model: _serverModel, loaded };
508
- } catch (e) {
509
- return { ok: false, error: e.message };
510
- }
511
- }
512
-
513
- /**
514
- * Stop the whisper server we own (no-op if we adopted an external one).
515
- */
516
- export async function shutdownWhisperServer() {
517
- if (_serverProcess) {
518
- try { _serverProcess.kill(); } catch {}
519
- _serverProcess = null;
520
- _serverModel = null;
521
- } else {
522
- // Try graceful shutdown of an adopted server
523
- try {
524
- await fetch(`http://127.0.0.1:${WHISPER_PORT}/shutdown`, {
525
- method: "POST", signal: AbortSignal.timeout(500),
526
- });
527
- } catch {}
528
- }
529
- }
530
-
531
- // ---------------------------------------------------------------------------
532
- // Diagnostics
533
- // ---------------------------------------------------------------------------
534
-
535
- export const TRANSCRIPTION_PATHS = {
536
- whisper_server: WHISPER_SERVER,
537
- port: WHISPER_PORT,
538
- };
@@ -1,73 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- Local audio transcription via faster-whisper. Mirrors the implementation in
4
- the Panda project (transcription_service.py): same default model "medium",
5
- device cpu, compute_type int8, beam_size 5. Lazy singleton model cache.
6
-
7
- Invoked by APX daemon (Node) as a subprocess. Args:
8
- whisper-transcribe.py <audio_path> [--model medium] [--language auto] [--device cpu] [--compute-type int8] [--beam-size 5]
9
-
10
- Outputs JSON on stdout:
11
- { "ok": true, "text": "...", "language": "es", "language_probability": 0.98, "duration": 12.4 }
12
- { "ok": false, "error": "..." }
13
- """
14
- import argparse
15
- import json
16
- import os
17
- import sys
18
-
19
-
20
- def main() -> int:
21
- parser = argparse.ArgumentParser()
22
- parser.add_argument("audio_path")
23
- parser.add_argument("--model", default="medium")
24
- parser.add_argument("--language", default="auto")
25
- parser.add_argument("--device", default="cpu")
26
- parser.add_argument("--compute-type", dest="compute_type", default="int8")
27
- parser.add_argument("--beam-size", dest="beam_size", type=int, default=5)
28
- args = parser.parse_args()
29
-
30
- if not os.path.exists(args.audio_path):
31
- print(json.dumps({"ok": False, "error": f"file not found: {args.audio_path}"}))
32
- return 1
33
-
34
- try:
35
- from faster_whisper import WhisperModel
36
- except ImportError as e:
37
- print(json.dumps({
38
- "ok": False,
39
- "error": "faster-whisper not installed. Run: pip3 install faster-whisper",
40
- "import_error": str(e),
41
- }))
42
- return 1
43
-
44
- try:
45
- import multiprocessing
46
- threads = os.cpu_count() or 4
47
- model = WhisperModel(args.model, device=args.device, compute_type=args.compute_type, cpu_threads=threads)
48
- except Exception as e:
49
- print(json.dumps({"ok": False, "error": f"failed to load model '{args.model}': {e}"}))
50
- return 1
51
-
52
- language = None if args.language == "auto" else args.language
53
-
54
- try:
55
- segments, info = model.transcribe(args.audio_path, beam_size=args.beam_size, language=language)
56
- text = " ".join(seg.text.strip() for seg in segments).strip()
57
- print(json.dumps({
58
- "ok": True,
59
- "text": text,
60
- "language": info.language,
61
- "language_probability": round(info.language_probability, 4),
62
- "duration": round(info.duration, 2),
63
- "model": args.model,
64
- "compute_type": args.compute_type,
65
- }))
66
- return 0
67
- except Exception as e:
68
- print(json.dumps({"ok": False, "error": f"transcription failed: {e}"}))
69
- return 1
70
-
71
-
72
- if __name__ == "__main__":
73
- sys.exit(main())