@agentprojectcontext/apx 1.15.5 → 1.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (222) hide show
  1. package/package.json +40 -5
  2. package/src/cli/commands/log.js +113 -0
  3. package/src/cli/commands/overlay.js +253 -0
  4. package/src/cli/commands/sys.js +88 -16
  5. package/src/cli/index.js +23 -1
  6. package/src/cli/terminal-chat/renderer.js +71 -56
  7. package/src/cli-ts/commands/agent.ts +173 -0
  8. package/src/cli-ts/commands/chat.ts +119 -0
  9. package/src/cli-ts/commands/daemon.ts +112 -0
  10. package/src/cli-ts/commands/exec.ts +109 -0
  11. package/src/cli-ts/commands/mcp.ts +235 -0
  12. package/src/cli-ts/commands/session.ts +224 -0
  13. package/src/cli-ts/commands/status.ts +61 -0
  14. package/src/cli-ts/http.ts +36 -0
  15. package/src/cli-ts/index.ts +73 -0
  16. package/src/cli-ts/ui.ts +107 -0
  17. package/src/core/logging.js +81 -0
  18. package/src/daemon/api.js +58 -0
  19. package/src/daemon/engines/anthropic.js +60 -1
  20. package/src/daemon/engines/index.js +2 -1
  21. package/src/daemon/engines/ollama.js +70 -3
  22. package/src/daemon/index.js +58 -0
  23. package/src/daemon/overlay-ws.js +40 -0
  24. package/src/daemon/plugins/index.js +2 -1
  25. package/src/daemon/plugins/overlay.js +177 -0
  26. package/src/daemon/plugins/telegram.js +15 -3
  27. package/src/daemon/super-agent.js +102 -19
  28. package/src/daemon/transcription.js +262 -59
  29. package/src/daemon/wakeup.js +14 -19
  30. package/src/daemon/whisper-server.py +57 -6
  31. package/src/overlay/index.html +44 -0
  32. package/src/overlay/main.js +480 -0
  33. package/src/overlay/package.json +3 -0
  34. package/src/overlay/preload.js +34 -0
  35. package/src/overlay/renderer.js +371 -0
  36. package/src/overlay/style.css +250 -0
  37. package/src/tui/_shims/cli-error.ts +6 -0
  38. package/src/tui/_shims/cli-logo.ts +18 -0
  39. package/src/tui/_shims/cli-ui.ts +1 -0
  40. package/src/tui/_shims/config-console-state.ts +7 -0
  41. package/src/tui/_shims/core-any.ts +30 -0
  42. package/src/tui/_shims/core-binary.ts +13 -0
  43. package/src/tui/_shims/core-flag.ts +3 -0
  44. package/src/tui/_shims/core-log.ts +14 -0
  45. package/src/tui/_shims/lsp-language.ts +1 -0
  46. package/src/tui/_shims/opencode-any.ts +135 -0
  47. package/src/tui/_shims/opencode-sdk-v2.ts +48 -0
  48. package/src/tui/_shims/plugin-tui.ts +13 -0
  49. package/src/tui/_shims/provider-provider.ts +10 -0
  50. package/src/tui/_shims/session-retry.ts +1 -0
  51. package/src/tui/_shims/session-schema.ts +15 -0
  52. package/src/tui/_shims/session-session.ts +3 -0
  53. package/src/tui/_shims/snapshot.ts +4 -0
  54. package/src/tui/_shims/tool-any.ts +18 -0
  55. package/src/tui/_shims/util-error.ts +7 -0
  56. package/src/tui/_shims/util-filesystem.ts +79 -0
  57. package/src/tui/_shims/util-format.ts +7 -0
  58. package/src/tui/_shims/util-iife.ts +3 -0
  59. package/src/tui/_shims/util-locale.ts +10 -0
  60. package/src/tui/_shims/util-process.ts +38 -0
  61. package/src/tui/app.tsx +783 -0
  62. package/src/tui/asset/charge.wav +0 -0
  63. package/src/tui/asset/pulse-a.wav +0 -0
  64. package/src/tui/asset/pulse-b.wav +0 -0
  65. package/src/tui/asset/pulse-c.wav +0 -0
  66. package/src/tui/attach.ts +100 -0
  67. package/src/tui/component/bg-pulse-render.ts +436 -0
  68. package/src/tui/component/bg-pulse.tsx +99 -0
  69. package/src/tui/component/border.tsx +21 -0
  70. package/src/tui/component/dialog-agent.tsx +31 -0
  71. package/src/tui/component/dialog-console-org.tsx +103 -0
  72. package/src/tui/component/dialog-mcp.tsx +85 -0
  73. package/src/tui/component/dialog-model.tsx +175 -0
  74. package/src/tui/component/dialog-provider.tsx +456 -0
  75. package/src/tui/component/dialog-retry-action.tsx +160 -0
  76. package/src/tui/component/dialog-session-delete-failed.tsx +99 -0
  77. package/src/tui/component/dialog-session-list.tsx +323 -0
  78. package/src/tui/component/dialog-session-rename.tsx +31 -0
  79. package/src/tui/component/dialog-skill.tsx +36 -0
  80. package/src/tui/component/dialog-stash.tsx +87 -0
  81. package/src/tui/component/dialog-status.tsx +168 -0
  82. package/src/tui/component/dialog-tag.tsx +44 -0
  83. package/src/tui/component/dialog-theme-list.tsx +50 -0
  84. package/src/tui/component/dialog-variant.tsx +39 -0
  85. package/src/tui/component/dialog-workspace-create.tsx +302 -0
  86. package/src/tui/component/dialog-workspace-file-changes.tsx +138 -0
  87. package/src/tui/component/dialog-workspace-unavailable.tsx +69 -0
  88. package/src/tui/component/error-component.tsx +92 -0
  89. package/src/tui/component/logo.tsx +896 -0
  90. package/src/tui/component/plugin-route-missing.tsx +14 -0
  91. package/src/tui/component/prompt/autocomplete.tsx +869 -0
  92. package/src/tui/component/prompt/cwd.ts +0 -0
  93. package/src/tui/component/prompt/frecency.tsx +90 -0
  94. package/src/tui/component/prompt/history.tsx +108 -0
  95. package/src/tui/component/prompt/index.tsx +1809 -0
  96. package/src/tui/component/prompt/part.ts +16 -0
  97. package/src/tui/component/prompt/stash.tsx +101 -0
  98. package/src/tui/component/prompt/traits.ts +35 -0
  99. package/src/tui/component/spinner.tsx +24 -0
  100. package/src/tui/component/startup-loading.tsx +63 -0
  101. package/src/tui/component/todo-item.tsx +32 -0
  102. package/src/tui/component/use-connected.tsx +9 -0
  103. package/src/tui/component/workspace-label.tsx +19 -0
  104. package/src/tui/config/cwd.ts +5 -0
  105. package/src/tui/config/keybind.ts +432 -0
  106. package/src/tui/config/tui-migrate.ts +154 -0
  107. package/src/tui/config/tui-schema.ts +34 -0
  108. package/src/tui/config/tui.ts +46 -0
  109. package/src/tui/context/aggregate-failures.ts +34 -0
  110. package/src/tui/context/args.tsx +15 -0
  111. package/src/tui/context/command-palette.tsx +163 -0
  112. package/src/tui/context/directory.ts +15 -0
  113. package/src/tui/context/editor-zed.ts +283 -0
  114. package/src/tui/context/editor.ts +468 -0
  115. package/src/tui/context/event-apx.ts +22 -0
  116. package/src/tui/context/event.ts +6 -0
  117. package/src/tui/context/exit.tsx +60 -0
  118. package/src/tui/context/helper.tsx +25 -0
  119. package/src/tui/context/kv.tsx +81 -0
  120. package/src/tui/context/local.tsx +608 -0
  121. package/src/tui/context/path-format.tsx +39 -0
  122. package/src/tui/context/project-apx.tsx +48 -0
  123. package/src/tui/context/project.tsx +7 -0
  124. package/src/tui/context/prompt.tsx +18 -0
  125. package/src/tui/context/route.tsx +52 -0
  126. package/src/tui/context/sdk-apx.tsx +185 -0
  127. package/src/tui/context/sdk.tsx +6 -0
  128. package/src/tui/context/sync-apx.tsx +178 -0
  129. package/src/tui/context/sync-v2.tsx +16 -0
  130. package/src/tui/context/sync.tsx +118 -0
  131. package/src/tui/context/theme/aura.json +69 -0
  132. package/src/tui/context/theme/ayu.json +80 -0
  133. package/src/tui/context/theme/carbonfox.json +248 -0
  134. package/src/tui/context/theme/catppuccin-frappe.json +230 -0
  135. package/src/tui/context/theme/catppuccin-macchiato.json +230 -0
  136. package/src/tui/context/theme/catppuccin.json +112 -0
  137. package/src/tui/context/theme/cobalt2.json +225 -0
  138. package/src/tui/context/theme/cursor.json +249 -0
  139. package/src/tui/context/theme/dracula.json +219 -0
  140. package/src/tui/context/theme/everforest.json +241 -0
  141. package/src/tui/context/theme/flexoki.json +237 -0
  142. package/src/tui/context/theme/github.json +233 -0
  143. package/src/tui/context/theme/gruvbox.json +242 -0
  144. package/src/tui/context/theme/kanagawa.json +77 -0
  145. package/src/tui/context/theme/lucent-orng.json +234 -0
  146. package/src/tui/context/theme/material.json +235 -0
  147. package/src/tui/context/theme/matrix.json +77 -0
  148. package/src/tui/context/theme/mercury.json +252 -0
  149. package/src/tui/context/theme/monokai.json +221 -0
  150. package/src/tui/context/theme/nightowl.json +221 -0
  151. package/src/tui/context/theme/nord.json +223 -0
  152. package/src/tui/context/theme/one-dark.json +84 -0
  153. package/src/tui/context/theme/opencode.json +245 -0
  154. package/src/tui/context/theme/orng.json +249 -0
  155. package/src/tui/context/theme/osaka-jade.json +93 -0
  156. package/src/tui/context/theme/palenight.json +222 -0
  157. package/src/tui/context/theme/rosepine.json +234 -0
  158. package/src/tui/context/theme/solarized.json +223 -0
  159. package/src/tui/context/theme/synthwave84.json +226 -0
  160. package/src/tui/context/theme/tokyonight.json +243 -0
  161. package/src/tui/context/theme/vercel.json +245 -0
  162. package/src/tui/context/theme/vesper.json +218 -0
  163. package/src/tui/context/theme/zenburn.json +223 -0
  164. package/src/tui/context/theme.tsx +1247 -0
  165. package/src/tui/context/tui-config.tsx +9 -0
  166. package/src/tui/event.ts +16 -0
  167. package/src/tui/feature-plugins/home/footer.tsx +94 -0
  168. package/src/tui/feature-plugins/home/tips-view.tsx +166 -0
  169. package/src/tui/feature-plugins/home/tips.tsx +59 -0
  170. package/src/tui/feature-plugins/sidebar/context.tsx +65 -0
  171. package/src/tui/feature-plugins/sidebar/files.tsx +63 -0
  172. package/src/tui/feature-plugins/sidebar/footer.tsx +94 -0
  173. package/src/tui/feature-plugins/sidebar/lsp.tsx +65 -0
  174. package/src/tui/feature-plugins/sidebar/mcp.tsx +97 -0
  175. package/src/tui/feature-plugins/sidebar/todo.tsx +49 -0
  176. package/src/tui/feature-plugins/system/plugins.tsx +269 -0
  177. package/src/tui/feature-plugins/system/session-v2.tsx +1143 -0
  178. package/src/tui/feature-plugins/system/which-key.tsx +608 -0
  179. package/src/tui/keymap.tsx +166 -0
  180. package/src/tui/layer.ts +6 -0
  181. package/src/tui/plugin/api.tsx +381 -0
  182. package/src/tui/plugin/command-shim.ts +109 -0
  183. package/src/tui/plugin/internal.ts +33 -0
  184. package/src/tui/plugin/runtime.ts +1069 -0
  185. package/src/tui/plugin/slots.tsx +60 -0
  186. package/src/tui/routes/home.tsx +96 -0
  187. package/src/tui/routes/session/dialog-fork-from-timeline.tsx +76 -0
  188. package/src/tui/routes/session/dialog-message.tsx +108 -0
  189. package/src/tui/routes/session/dialog-subagent.tsx +26 -0
  190. package/src/tui/routes/session/dialog-timeline.tsx +47 -0
  191. package/src/tui/routes/session/footer.tsx +91 -0
  192. package/src/tui/routes/session/index.tsx +188 -0
  193. package/src/tui/routes/session/permission.tsx +722 -0
  194. package/src/tui/routes/session/question.tsx +490 -0
  195. package/src/tui/routes/session/sidebar.tsx +102 -0
  196. package/src/tui/routes/session/subagent-footer.tsx +133 -0
  197. package/src/tui/run.ts +84 -0
  198. package/src/tui/thread.ts +261 -0
  199. package/src/tui/tsconfig.json +40 -0
  200. package/src/tui/ui/dialog-alert.tsx +66 -0
  201. package/src/tui/ui/dialog-confirm.tsx +108 -0
  202. package/src/tui/ui/dialog-export-options.tsx +217 -0
  203. package/src/tui/ui/dialog-help.tsx +40 -0
  204. package/src/tui/ui/dialog-prompt.tsx +101 -0
  205. package/src/tui/ui/dialog-select.tsx +553 -0
  206. package/src/tui/ui/dialog.tsx +211 -0
  207. package/src/tui/ui/link.tsx +34 -0
  208. package/src/tui/ui/spinner.ts +368 -0
  209. package/src/tui/ui/toast.tsx +111 -0
  210. package/src/tui/util/clipboard.ts +217 -0
  211. package/src/tui/util/editor.ts +37 -0
  212. package/src/tui/util/model.ts +23 -0
  213. package/src/tui/util/provider-origin.ts +7 -0
  214. package/src/tui/util/revert-diff.ts +18 -0
  215. package/src/tui/util/scroll.ts +25 -0
  216. package/src/tui/util/selection.ts +65 -0
  217. package/src/tui/util/signal.ts +41 -0
  218. package/src/tui/util/sound.ts +156 -0
  219. package/src/tui/util/transcript.ts +112 -0
  220. package/src/tui/validate-session.ts +29 -0
  221. package/src/tui/win32.ts +130 -0
  222. package/src/tui/worker.ts +104 -0
@@ -28,8 +28,9 @@
28
28
 
29
29
  import fs from "node:fs";
30
30
  import path from "node:path";
31
- import { spawn } from "node:child_process";
31
+ import { spawn, exec } from "node:child_process";
32
32
  import { fileURLToPath } from "node:url";
33
+ import { logInfo, logWarn, logError } from "../core/logging.js";
33
34
 
34
35
  const __filename = fileURLToPath(import.meta.url);
35
36
  const __dirname = path.dirname(__filename);
@@ -43,8 +44,32 @@ const DEFAULT_LOCAL = {
43
44
  language: "auto",
44
45
  beam_size: 5,
45
46
  idle_minutes: 10,
47
+ // Max time we wait for /transcribe to return. Long audio files (Telegram
48
+ // voice notes > 10 min) can take several minutes on CPU; the previous
49
+ // hard-coded 5-minute cap silently truncated them. 20 minutes covers a
50
+ // ~60-minute voice note on a small int8 model. Override with
51
+ // transcription.local.timeout_ms in ~/.apx/config.json if needed.
52
+ timeout_ms: 20 * 60_000,
46
53
  };
47
54
 
55
+ // ---------------------------------------------------------------------------
56
+ // Config helpers (pure — exported for tests)
57
+ // ---------------------------------------------------------------------------
58
+
59
+ /**
60
+ * Resolve the effective transcription language.
61
+ * Priority: explicit local config → config.user.language → "auto" (whisper detects).
62
+ *
63
+ * @param {object} localCfg merged transcription.local config
64
+ * @param {string} userLang config.user.language ISO code (e.g. "es"), or ""
65
+ * @returns {string} ISO code or "auto"
66
+ */
67
+ export function resolveTranscriptionLanguage(localCfg, userLang) {
68
+ if (localCfg.language && localCfg.language !== "auto") return localCfg.language;
69
+ if (userLang) return userLang;
70
+ return "auto";
71
+ }
72
+
48
73
  // ---------------------------------------------------------------------------
49
74
  // Config
50
75
  // ---------------------------------------------------------------------------
@@ -59,9 +84,7 @@ async function getConfig() {
59
84
  // Explicit transcription.local.language always wins; "auto" means fall back to user.language.
60
85
  const userLang = cfg.user?.language || "";
61
86
  const localBase = { ...DEFAULT_LOCAL, ...(t.local || {}) };
62
- if ((!localBase.language || localBase.language === "auto") && userLang) {
63
- localBase.language = userLang;
64
- }
87
+ localBase.language = resolveTranscriptionLanguage(localBase, userLang);
65
88
  return {
66
89
  provider: t.provider || "auto",
67
90
  local: localBase,
@@ -98,6 +121,21 @@ async function _isServerHealthy() {
98
121
  }
99
122
  }
100
123
 
124
+ // Check if the running whisper-server is using a specific model.
125
+ // Returns the model name string, or null if not reachable.
126
+ async function _serverModelName() {
127
+ try {
128
+ const res = await fetch(`http://127.0.0.1:${WHISPER_PORT}/health`, {
129
+ signal: AbortSignal.timeout(800),
130
+ });
131
+ if (!res.ok) return null;
132
+ const j = await res.json();
133
+ return j?.model || null;
134
+ } catch {
135
+ return null;
136
+ }
137
+ }
138
+
101
139
  async function _waitForServer(maxMs = 15_000) {
102
140
  const deadline = Date.now() + maxMs;
103
141
  while (Date.now() < deadline) {
@@ -107,18 +145,62 @@ async function _waitForServer(maxMs = 15_000) {
107
145
  throw new Error(`whisper-server did not start within ${maxMs}ms`);
108
146
  }
109
147
 
148
+ // Find the PID of the process LISTENing on the whisper port (server only,
149
+ // not clients). Filtering by -sTCP:LISTEN is critical — without it, lsof
150
+ // also returns clients with an open connection (including this daemon).
151
+ async function _findListenerPid() {
152
+ return new Promise((resolve) => {
153
+ exec(`lsof -ti tcp:${WHISPER_PORT} -sTCP:LISTEN`, (err, stdout) => {
154
+ if (err || !stdout) return resolve(null);
155
+ const candidates = stdout.trim().split("\n")
156
+ .map(s => parseInt(s, 10))
157
+ .filter(n => Number.isFinite(n) && n !== process.pid);
158
+ resolve(candidates[0] || null);
159
+ });
160
+ });
161
+ }
162
+
163
+ async function _killOrphanWhisper() {
164
+ // First try graceful /shutdown on the whisper server.
165
+ try {
166
+ await fetch(`http://127.0.0.1:${WHISPER_PORT}/shutdown`, {
167
+ method: "POST", signal: AbortSignal.timeout(1000),
168
+ });
169
+ await _sleep(600);
170
+ } catch {}
171
+ // If still bound, force-kill the LISTENER pid only (never our own pid).
172
+ const pid = await _findListenerPid();
173
+ if (pid && pid !== process.pid) {
174
+ try { process.kill(pid, "SIGTERM"); } catch {}
175
+ await _sleep(400);
176
+ try { process.kill(pid, 0); try { process.kill(pid, "SIGKILL"); } catch {} } catch {}
177
+ await _sleep(300);
178
+ }
179
+ }
180
+
110
181
  async function ensureWhisperServer(opts) {
111
182
  const model = opts.model || DEFAULT_LOCAL.model;
112
183
 
113
184
  // Already running with the right model — health-check to confirm still alive.
114
185
  if (_serverProcess && _serverModel === model) {
115
186
  if (await _isServerHealthy()) return;
116
- // Process died (idle shutdown). Fall through to restart.
117
187
  _serverProcess = null;
118
188
  _serverModel = null;
119
189
  }
120
190
 
121
- // Wrong model: kill old server and start fresh.
191
+ // Adopt an externally-running whisper-server (e.g. left over from prior daemon).
192
+ if (!_serverProcess) {
193
+ const existing = await _serverModelName();
194
+ if (existing === model) {
195
+ _serverModel = model;
196
+ return;
197
+ }
198
+ if (existing) {
199
+ // Wrong model: kick out the orphan so we can start the right one.
200
+ await _killOrphanWhisper();
201
+ }
202
+ }
203
+
122
204
  if (_serverProcess) {
123
205
  try { _serverProcess.kill(); } catch {}
124
206
  _serverProcess = null;
@@ -126,6 +208,10 @@ async function ensureWhisperServer(opts) {
126
208
  await _sleep(300);
127
209
  }
128
210
 
211
+ await _spawnWhisper(opts, model, /* retried */ false);
212
+ }
213
+
214
+ async function _spawnWhisper(opts, model, retried) {
129
215
  const args = [
130
216
  WHISPER_SERVER,
131
217
  "--port", String(WHISPER_PORT),
@@ -151,32 +237,44 @@ async function ensureWhisperServer(opts) {
151
237
  });
152
238
 
153
239
  // Wait for the "ready" line on stdout, then wait for HTTP to respond.
154
- await new Promise((resolve, reject) => {
155
- const timeout = setTimeout(
156
- () => reject(new Error("whisper-server startup timed out (15s)")),
157
- 15_000
158
- );
159
- let buf = "";
160
- proc.stdout.on("data", (chunk) => {
161
- buf += chunk.toString();
162
- const nl = buf.indexOf("\n");
163
- if (nl === -1) return;
164
- const line = buf.slice(0, nl).trim();
165
- buf = buf.slice(nl + 1);
166
- clearTimeout(timeout);
167
- try {
168
- const msg = JSON.parse(line);
169
- if (msg.status === "error") return reject(new Error(msg.error || "whisper-server error"));
170
- resolve(); // "ready"
171
- } catch {
172
- resolve(); // unexpected line but server is up
173
- }
174
- });
175
- proc.on("exit", (code) => {
176
- clearTimeout(timeout);
177
- reject(new Error(`whisper-server exited (code ${code}) before becoming ready`));
240
+ try {
241
+ await new Promise((resolve, reject) => {
242
+ const timeout = setTimeout(
243
+ () => reject(new Error("whisper-server startup timed out (15s)")),
244
+ 15_000
245
+ );
246
+ let buf = "";
247
+ proc.stdout.on("data", (chunk) => {
248
+ buf += chunk.toString();
249
+ const nl = buf.indexOf("\n");
250
+ if (nl === -1) return;
251
+ const line = buf.slice(0, nl).trim();
252
+ buf = buf.slice(nl + 1);
253
+ clearTimeout(timeout);
254
+ try {
255
+ const msg = JSON.parse(line);
256
+ if (msg.status === "error") return reject(new Error(msg.error || "whisper-server error"));
257
+ resolve(); // "ready"
258
+ } catch {
259
+ resolve(); // unexpected line but server is up
260
+ }
261
+ });
262
+ proc.on("exit", (code) => {
263
+ clearTimeout(timeout);
264
+ reject(new Error(`whisper-server exited (code ${code}) before becoming ready`));
265
+ });
178
266
  });
179
- });
267
+ } catch (e) {
268
+ // Self-heal: if the port was already in use, kill the orphan and retry once.
269
+ const msg = e.message || "";
270
+ if (!retried && /address already in use|errno 48|eaddrinuse/i.test(msg)) {
271
+ _serverProcess = null;
272
+ _serverModel = null;
273
+ await _killOrphanWhisper();
274
+ return _spawnWhisper(opts, model, /* retried */ true);
275
+ }
276
+ throw e;
277
+ }
180
278
  }
181
279
 
182
280
  // ---------------------------------------------------------------------------
@@ -190,30 +288,74 @@ async function transcribeLocal(filePath, opts) {
190
288
  ? null
191
289
  : (opts.language || null);
192
290
 
193
- const res = await fetch(`http://127.0.0.1:${WHISPER_PORT}/transcribe`, {
194
- method: "POST",
195
- headers: { "content-type": "application/json" },
196
- body: JSON.stringify({
197
- audio_path: filePath,
198
- language,
199
- beam_size: opts.beam_size || DEFAULT_LOCAL.beam_size,
200
- }),
201
- signal: AbortSignal.timeout(5 * 60_000),
202
- });
291
+ const timeoutMs = Number(opts.timeout_ms) > 0
292
+ ? Number(opts.timeout_ms)
293
+ : DEFAULT_LOCAL.timeout_ms;
203
294
 
204
- const json = await res.json();
205
- if (!json.ok) throw new Error(json.error || "transcription failed");
295
+ const body = JSON.stringify({
296
+ audio_path: filePath,
297
+ language,
298
+ beam_size: opts.beam_size || DEFAULT_LOCAL.beam_size,
299
+ });
206
300
 
207
- return {
208
- ok: true,
209
- backend: "local",
210
- text: json.text || "",
211
- language: json.language || null,
212
- language_probability: json.language_probability ?? null,
213
- duration: json.duration ?? null,
214
- model: json.model,
215
- compute_type: json.compute_type,
216
- };
301
+ // Long transcriptions on CPU (small int8, 1-minute voice note) can take
302
+ // 30-45s. Under undici (Node fetch) we occasionally see "fetch failed"
303
+ // from the inbound Telegram path even though the whisper-server completes
304
+ // the request successfully — a keep-alive socket gets reset somewhere
305
+ // between the long whisper-server response and the daemon's other
306
+ // concurrent traffic. We retry once on a generic "fetch failed" so the
307
+ // user actually gets a reply.
308
+ const maxAttempts = 2;
309
+ let lastErr = null;
310
+ for (let attempt = 1; attempt <= maxAttempts; attempt++) {
311
+ const t0 = Date.now();
312
+ try {
313
+ logInfo("whisper", `transcribeLocal attempt ${attempt}/${maxAttempts}`, {
314
+ file: path.basename(filePath),
315
+ language: language || "auto",
316
+ timeout_ms: timeoutMs,
317
+ });
318
+ const res = await fetch(`http://127.0.0.1:${WHISPER_PORT}/transcribe`, {
319
+ method: "POST",
320
+ headers: { "content-type": "application/json", "connection": "close" },
321
+ body,
322
+ signal: AbortSignal.timeout(timeoutMs),
323
+ });
324
+ const json = await res.json();
325
+ if (!json.ok) throw new Error(json.error || "transcription failed");
326
+ logInfo("whisper", `transcribeLocal ok in ${Date.now() - t0}ms`, {
327
+ chars: (json.text || "").length,
328
+ language: json.language,
329
+ duration: json.duration,
330
+ });
331
+ return {
332
+ ok: true,
333
+ backend: "local",
334
+ text: json.text || "",
335
+ language: json.language || null,
336
+ language_probability: json.language_probability ?? null,
337
+ duration: json.duration ?? null,
338
+ model: json.model,
339
+ compute_type: json.compute_type,
340
+ };
341
+ } catch (e) {
342
+ lastErr = e;
343
+ const isRetriable =
344
+ /fetch failed|ECONNRESET|socket hang up|terminated/i.test(e.message || "");
345
+ const dt = Date.now() - t0;
346
+ logWarn("whisper", `transcribeLocal attempt ${attempt} failed in ${dt}ms`, {
347
+ error: e.message,
348
+ retriable: isRetriable,
349
+ will_retry: isRetriable && attempt < maxAttempts,
350
+ });
351
+ if (!isRetriable || attempt >= maxAttempts) break;
352
+ // Brief backoff before retry — gives the whisper-server.py thread time
353
+ // to flush its pending response and release the model lock.
354
+ await _sleep(500);
355
+ }
356
+ }
357
+ logError("whisper", `transcribeLocal exhausted retries`, { error: lastErr?.message });
358
+ throw lastErr || new Error("local transcription failed");
217
359
  }
218
360
 
219
361
  // ---------------------------------------------------------------------------
@@ -280,19 +422,80 @@ export async function transcribe(filePath, overrides = {}) {
280
422
  return transcribeOpenAI(filePath, cfg.openaiKey);
281
423
  }
282
424
  if (provider === "local") {
425
+ // Explicit local-only: bubble up the real error, do not mention OpenAI.
283
426
  return transcribeLocal(filePath, localOpts);
284
427
  }
285
428
 
286
- // auto: local first, fall back to openai
429
+ // auto: local first, fall back to openai only if a key is configured
287
430
  try {
288
431
  return await transcribeLocal(filePath, localOpts);
289
432
  } catch (localErr) {
290
- if (!cfg.openaiKey) {
291
- throw new Error(
292
- `local transcription failed and no OpenAI fallback available: ${localErr.message}`
293
- );
433
+ if (cfg.openaiKey) {
434
+ return transcribeOpenAI(filePath, cfg.openaiKey);
294
435
  }
295
- return transcribeOpenAI(filePath, cfg.openaiKey);
436
+ // No OpenAI configured — surface the real local error verbatim.
437
+ throw new Error(`local transcription failed: ${localErr.message}`);
438
+ }
439
+ }
440
+
441
+ /**
442
+ * Transcribe raw audio bytes (e.g. from a mic chunk or Telegram voice blob).
443
+ * Saves to a temp file, transcribes, cleans up.
444
+ *
445
+ * @param {Buffer} buf raw audio data
446
+ * @param {string} format file extension hint: "webm" | "ogg" | "wav" | "mp3" (default "webm")
447
+ * @param {object} overrides same as transcribe() overrides
448
+ */
449
+ export async function transcribeBuffer(buf, format = "webm", overrides = {}) {
450
+ if (!buf || !buf.length) throw new Error("transcribeBuffer: empty buffer");
451
+ const ext = format.replace(/^\./, "") || "webm";
452
+ const tmpFile = path.join(
453
+ (await import("node:os")).default.tmpdir(),
454
+ `apx-audio-${Date.now()}-${Math.random().toString(36).slice(2)}.${ext}`
455
+ );
456
+ try {
457
+ fs.writeFileSync(tmpFile, buf);
458
+ return await transcribe(tmpFile, overrides);
459
+ } finally {
460
+ try { fs.unlinkSync(tmpFile); } catch {}
461
+ }
462
+ }
463
+
464
+ // ---------------------------------------------------------------------------
465
+ // Lifecycle (preload on daemon start, shutdown on daemon stop)
466
+ // ---------------------------------------------------------------------------
467
+
468
+ /**
469
+ * Eagerly start the whisper server so the first transcription is fast.
470
+ * Safe to call multiple times. Never throws — logs and continues on failure.
471
+ */
472
+ export async function preloadWhisperServer(log = console.log) {
473
+ try {
474
+ const cfg = await getConfig();
475
+ if (cfg.provider === "openai") return; // local backend not used
476
+ log(`whisper: preloading model "${cfg.local.model}" on port ${WHISPER_PORT}…`);
477
+ await ensureWhisperServer(cfg.local);
478
+ log(`whisper: ready on port ${WHISPER_PORT} (model: ${_serverModel})`);
479
+ } catch (e) {
480
+ log(`whisper: preload failed — ${e.message} (will retry lazily on first request)`);
481
+ }
482
+ }
483
+
484
+ /**
485
+ * Stop the whisper server we own (no-op if we adopted an external one).
486
+ */
487
+ export async function shutdownWhisperServer() {
488
+ if (_serverProcess) {
489
+ try { _serverProcess.kill(); } catch {}
490
+ _serverProcess = null;
491
+ _serverModel = null;
492
+ } else {
493
+ // Try graceful shutdown of an adopted server
494
+ try {
495
+ await fetch(`http://127.0.0.1:${WHISPER_PORT}/shutdown`, {
496
+ method: "POST", signal: AbortSignal.timeout(500),
497
+ });
498
+ } catch {}
296
499
  }
297
500
  }
298
501
 
@@ -5,33 +5,28 @@ import { resolveProvider, getAdapter } from "./engines/index.js";
5
5
 
6
6
  const WAKEUP_COOLDOWN_MS = 30 * 60 * 1000; // 30 min
7
7
 
8
- // Detect preferred language from identity, then fall back to system LANG env.
9
- function detectLanguage(identity) {
10
- if (identity.language) return identity.language;
8
+ const ISO_TO_LANGUAGE = {
9
+ es: "Spanish", en: "English", fr: "French", pt: "Portuguese",
10
+ de: "German", it: "Italian", nl: "Dutch", ru: "Russian",
11
+ ja: "Japanese", zh: "Chinese", ko: "Korean", ar: "Arabic",
12
+ };
13
+
14
+ // Exported for unit testing.
15
+ // Priority: config.user.language (ISO 639-1) → identity.language (legacy) → system LANG env.
16
+ export function detectLanguage(identity, config) {
17
+ const cfgLang = config?.user?.language;
18
+ if (cfgLang) return ISO_TO_LANGUAGE[cfgLang.toLowerCase()] || cfgLang;
19
+ if (identity?.language) return identity.language;
11
20
  const lang = process.env.LANG || process.env.LC_MESSAGES || process.env.LC_ALL || "";
12
21
  const code = lang.split(/[_\.]/)[0].toLowerCase();
13
- const map = {
14
- es: "Spanish",
15
- en: "English",
16
- fr: "French",
17
- pt: "Portuguese",
18
- de: "German",
19
- it: "Italian",
20
- nl: "Dutch",
21
- ru: "Russian",
22
- ja: "Japanese",
23
- zh: "Chinese",
24
- ko: "Korean",
25
- ar: "Arabic",
26
- };
27
- return map[code] || "English";
22
+ return ISO_TO_LANGUAGE[code] || "English";
28
23
  }
29
24
 
30
25
  async function generateMessage(identity, engineConfig) {
31
26
  try {
32
27
  const { provider, model } = resolveProvider("ollama:qwen2.5:14b");
33
28
  const engine = getAdapter(provider);
34
- const language = detectLanguage(identity);
29
+ const language = detectLanguage(identity, engineConfig);
35
30
  const result = await engine.chat({
36
31
  system: `You are ${identity.agent_name}, an AI agent assistant. Your personality: ${identity.personality || "direct, curious, helpful"}. Your owner is ${identity.owner_name}. Context: ${identity.owner_context || "AI developer"}.`,
37
32
  messages: [
@@ -64,12 +64,18 @@ class _Handler(BaseHTTPRequestHandler):
64
64
  pass # suppress access log; APX daemon handles its own logging
65
65
 
66
66
  def _send_json(self, code, body):
67
- data = json.dumps(body).encode()
68
- self.send_response(code)
69
- self.send_header("Content-Type", "application/json")
70
- self.send_header("Content-Length", str(len(data)))
71
- self.end_headers()
72
- self.wfile.write(data)
67
+ # Swallow BrokenPipe / ConnectionReset — these happen when the daemon
68
+ # times out and aborts the request before we finish responding, and
69
+ # they used to fill the daemon log with multi-page Python tracebacks.
70
+ try:
71
+ data = json.dumps(body).encode()
72
+ self.send_response(code)
73
+ self.send_header("Content-Type", "application/json")
74
+ self.send_header("Content-Length", str(len(data)))
75
+ self.end_headers()
76
+ self.wfile.write(data)
77
+ except (BrokenPipeError, ConnectionResetError):
78
+ pass
73
79
 
74
80
  def _read_body(self):
75
81
  n = int(self.headers.get("Content-Length", 0))
@@ -92,6 +98,51 @@ class _Handler(BaseHTTPRequestHandler):
92
98
  self._send_json(404, {"ok": False, "error": "not found"})
93
99
 
94
100
  def do_POST(self):
101
+ # /transcribe_chunk reads raw bytes — must be handled BEFORE _read_body()
102
+ # which would consume rfile for JSON endpoints.
103
+ if self.path == "/transcribe_chunk":
104
+ _touch()
105
+ content_length = int(self.headers.get("Content-Length", 0))
106
+ if content_length <= 0:
107
+ self._send_json(400, {"ok": False, "error": "empty body"})
108
+ return
109
+ audio_bytes = self.rfile.read(content_length)
110
+ audio_format = (self.headers.get("X-Audio-Format") or "webm").strip().lstrip(".")
111
+ language_hdr = self.headers.get("X-Language") or None
112
+ language = language_hdr if language_hdr and language_hdr != "auto" else None
113
+ beam_size = int(self.headers.get("X-Beam-Size") or 3)
114
+
115
+ with _model_lock:
116
+ try:
117
+ m = _load_model_if_needed(_Handler.model_name, _Handler.device, _Handler.compute_type)
118
+ except ImportError:
119
+ self._send_json(500, {"ok": False, "error": "faster-whisper not installed"})
120
+ return
121
+ except Exception as e:
122
+ self._send_json(500, {"ok": False, "error": f"model load failed: {e}"})
123
+ return
124
+
125
+ import tempfile
126
+ tmp = tempfile.NamedTemporaryFile(suffix=f".{audio_format}", delete=False)
127
+ try:
128
+ tmp.write(audio_bytes)
129
+ tmp.close()
130
+ segments, info = m.transcribe(tmp.name, beam_size=beam_size, language=language)
131
+ text = " ".join(seg.text.strip() for seg in segments).strip()
132
+ self._send_json(200, {
133
+ "ok": True, "text": text,
134
+ "language": info.language,
135
+ "language_probability": round(info.language_probability, 4),
136
+ "duration": round(info.duration, 2) if hasattr(info, "duration") else None,
137
+ "model": _model_name,
138
+ })
139
+ except Exception as e:
140
+ self._send_json(500, {"ok": False, "error": f"chunk transcription failed: {e}"})
141
+ finally:
142
+ try: os.unlink(tmp.name)
143
+ except Exception: pass
144
+ return
145
+
95
146
  req = self._read_body()
96
147
 
97
148
  if self.path == "/transcribe":
@@ -0,0 +1,44 @@
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta http-equiv="Content-Security-Policy"
6
+ content="default-src 'self'; script-src 'self'; style-src 'self' 'unsafe-inline'; media-src *;">
7
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
8
+ <title>APX</title>
9
+ <link rel="stylesheet" href="style.css">
10
+ </head>
11
+ <body>
12
+ <div id="app">
13
+ <!-- Header -->
14
+ <div id="header">
15
+ <span class="logo">APX</span>
16
+ <span id="status-text" class="status">Ready</span>
17
+ <div id="conn-badge" title="Daemon disconnected"></div>
18
+ <button class="btn-close" id="btn-close" title="Close (Esc)">✕</button>
19
+ </div>
20
+
21
+ <!-- Messages -->
22
+ <div id="messages">
23
+ <div id="empty-state">
24
+ <div class="icon">🎙</div>
25
+ <div>Press <kbd id="empty-shortcut-hint">⌘G</kbd> to start speaking</div>
26
+ </div>
27
+ </div>
28
+
29
+ <!-- Live transcription bar -->
30
+ <div id="live-bar">
31
+ <div class="rec-dot"></div>
32
+ <div id="live-text">Listening…</div>
33
+ </div>
34
+
35
+ <!-- Hint bar -->
36
+ <div id="hint-bar">
37
+ <span class="hint"><kbd id="shortcut-hint">⌘⇧\</kbd> record</span>
38
+ <span class="hint"><kbd>Esc</kbd> cancel / close</span>
39
+ </div>
40
+ </div>
41
+
42
+ <script src="renderer.js"></script>
43
+ </body>
44
+ </html>