@agentprojectcontext/apx 1.42.1 → 1.43.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/package.json +1 -1
  2. package/src/core/channels/telegram/api.js +62 -0
  3. package/src/core/channels/telegram/ask-callbacks.js +238 -0
  4. package/src/core/config/index.js +2 -0
  5. package/src/core/config/redact.js +2 -0
  6. package/src/core/confirmation/adapters/telegram.js +20 -37
  7. package/src/core/desktop/process.js +126 -0
  8. package/src/core/voice/stt-hardware.js +87 -0
  9. package/src/core/voice/stt-models.js +97 -0
  10. package/src/core/voice/transcription.js +147 -16
  11. package/src/host/daemon/api/desktop.js +54 -8
  12. package/src/host/daemon/api/transcribe.js +40 -1
  13. package/src/host/daemon/plugins/desktop/index.js +6 -1
  14. package/src/host/daemon/plugins/telegram/index.js +61 -351
  15. package/src/host/daemon/whisper-server.js +18 -8
  16. package/src/host/daemon/whisper-server.py +71 -44
  17. package/src/interfaces/cli/commands/desktop.js +13 -68
  18. package/src/interfaces/desktop/main.js +32 -4
  19. package/src/interfaces/desktop/renderer.js +26 -5
  20. package/src/interfaces/web/dist/assets/index-B0nTYflm.js +651 -0
  21. package/src/interfaces/web/dist/assets/index-B0nTYflm.js.map +1 -0
  22. package/src/interfaces/web/dist/assets/index-C22PmKCD.css +1 -0
  23. package/src/interfaces/web/dist/index.html +2 -2
  24. package/src/interfaces/web/package-lock.json +3 -3
  25. package/src/interfaces/web/src/components/ShortcutInput.tsx +156 -0
  26. package/src/interfaces/web/src/components/voice/VoiceSttCard.tsx +101 -5
  27. package/src/interfaces/web/src/i18n/en.ts +28 -2
  28. package/src/interfaces/web/src/i18n/es.ts +28 -2
  29. package/src/interfaces/web/src/lib/api/desktop.ts +28 -0
  30. package/src/interfaces/web/src/lib/api/voice.ts +26 -2
  31. package/src/interfaces/web/src/screens/modules/DeckScreen.tsx +55 -3
  32. package/src/interfaces/web/src/screens/modules/DesktopScreen.tsx +98 -36
  33. package/src/interfaces/web/dist/assets/index-BReF4_xV.js +0 -646
  34. package/src/interfaces/web/dist/assets/index-BReF4_xV.js.map +0 -1
  35. package/src/interfaces/web/dist/assets/index-wrEbTJbc.css +0 -1
@@ -39,6 +39,9 @@ def _touch():
39
39
  _last_used = time.monotonic()
40
40
 
41
41
 
42
+ _mlx_loaded = False # mlx_whisper caches models internally; we just track readiness
43
+
44
+
42
45
  def _load_model_if_needed(model_name, device, compute_type):
43
46
  global _model, _model_name
44
47
  if _model is not None and _model_name == model_name:
@@ -51,11 +54,61 @@ def _load_model_if_needed(model_name, device, compute_type):
51
54
  return m
52
55
 
53
56
 
57
+ def _warmup_model():
58
+ """Eagerly load the active backend's model into RAM. Returns True if loaded."""
59
+ global _mlx_loaded
60
+ if _Handler.backend == "mlx":
61
+ import mlx_whisper # noqa: F401 (raises ImportError if the stack is missing)
62
+ try:
63
+ from mlx_whisper.load_models import load_model
64
+ load_model(_Handler.model_name)
65
+ _mlx_loaded = True
66
+ except Exception:
67
+ pass # first transcribe will load it lazily
68
+ return _mlx_loaded
69
+ _load_model_if_needed(_Handler.model_name, _Handler.device, _Handler.compute_type)
70
+ return _model is not None
71
+
72
+
73
+ def _transcribe_file(audio_path, language, beam_size):
74
+ """Backend-agnostic transcription → result dict. Raises on failure."""
75
+ global _mlx_loaded
76
+ if _Handler.backend == "mlx":
77
+ import mlx_whisper
78
+ kw = {"path_or_hf_repo": _Handler.model_name}
79
+ if language:
80
+ kw["language"] = language
81
+ r = mlx_whisper.transcribe(audio_path, **kw)
82
+ _mlx_loaded = True
83
+ return {
84
+ "ok": True,
85
+ "text": (r.get("text") or "").strip(),
86
+ "language": r.get("language"),
87
+ "language_probability": None,
88
+ "duration": None,
89
+ "model": _Handler.model_name,
90
+ "compute_type": "mlx-metal",
91
+ }
92
+ m = _load_model_if_needed(_Handler.model_name, _Handler.device, _Handler.compute_type)
93
+ segments, info = m.transcribe(audio_path, beam_size=beam_size, language=language)
94
+ text = " ".join(seg.text.strip() for seg in segments).strip()
95
+ return {
96
+ "ok": True,
97
+ "text": text,
98
+ "language": info.language,
99
+ "language_probability": round(info.language_probability, 4),
100
+ "duration": round(info.duration, 2) if hasattr(info, "duration") else None,
101
+ "model": _model_name,
102
+ "compute_type": _Handler.compute_type,
103
+ }
104
+
105
+
54
106
  # ---------------------------------------------------------------------------
55
107
  # HTTP handler
56
108
  # ---------------------------------------------------------------------------
57
109
 
58
110
  class _Handler(BaseHTTPRequestHandler):
111
+ backend = "faster" # "faster" (CTranslate2, CPU/CUDA) | "mlx" (Apple Metal)
59
112
  model_name = "small"
60
113
  device = "cpu"
61
114
  compute_type = "int8"
@@ -89,10 +142,12 @@ class _Handler(BaseHTTPRequestHandler):
89
142
  def do_GET(self):
90
143
  if self.path == "/health":
91
144
  _touch()
145
+ loaded = _mlx_loaded if _Handler.backend == "mlx" else (_model is not None)
92
146
  self._send_json(200, {
93
147
  "ok": True,
148
+ "backend": _Handler.backend,
94
149
  "model": _model_name or _Handler.model_name,
95
- "loaded": _model is not None,
150
+ "loaded": loaded,
96
151
  })
97
152
  elif self.path == "/warmup":
98
153
  # Eagerly load the model into RAM (no audio needed) and reset the
@@ -101,8 +156,10 @@ class _Handler(BaseHTTPRequestHandler):
101
156
  _touch()
102
157
  with _model_lock:
103
158
  try:
104
- _load_model_if_needed(_Handler.model_name, _Handler.device, _Handler.compute_type)
105
- self._send_json(200, {"ok": True, "loaded": _model is not None, "model": _model_name})
159
+ loaded = _warmup_model()
160
+ self._send_json(200, {"ok": True, "loaded": loaded, "model": _Handler.model_name, "backend": _Handler.backend})
161
+ except ImportError as e:
162
+ self._send_json(500, {"ok": False, "error": f"{_Handler.backend} backend not installed: {e}"})
106
163
  except Exception as e:
107
164
  self._send_json(500, {"ok": False, "error": f"model load failed: {e}"})
108
165
  else:
@@ -124,29 +181,14 @@ class _Handler(BaseHTTPRequestHandler):
124
181
  beam_size = int(self.headers.get("X-Beam-Size") or 3)
125
182
 
126
183
  with _model_lock:
127
- try:
128
- m = _load_model_if_needed(_Handler.model_name, _Handler.device, _Handler.compute_type)
129
- except ImportError:
130
- self._send_json(500, {"ok": False, "error": "faster-whisper not installed"})
131
- return
132
- except Exception as e:
133
- self._send_json(500, {"ok": False, "error": f"model load failed: {e}"})
134
- return
135
-
136
184
  import tempfile
137
185
  tmp = tempfile.NamedTemporaryFile(suffix=f".{audio_format}", delete=False)
138
186
  try:
139
187
  tmp.write(audio_bytes)
140
188
  tmp.close()
141
- segments, info = m.transcribe(tmp.name, beam_size=beam_size, language=language)
142
- text = " ".join(seg.text.strip() for seg in segments).strip()
143
- self._send_json(200, {
144
- "ok": True, "text": text,
145
- "language": info.language,
146
- "language_probability": round(info.language_probability, 4),
147
- "duration": round(info.duration, 2) if hasattr(info, "duration") else None,
148
- "model": _model_name,
149
- })
189
+ self._send_json(200, _transcribe_file(tmp.name, language, beam_size))
190
+ except ImportError as e:
191
+ self._send_json(500, {"ok": False, "error": f"{_Handler.backend} backend not installed: {e}"})
150
192
  except Exception as e:
151
193
  self._send_json(500, {"ok": False, "error": f"chunk transcription failed: {e}"})
152
194
  finally:
@@ -168,29 +210,11 @@ class _Handler(BaseHTTPRequestHandler):
168
210
 
169
211
  with _model_lock:
170
212
  try:
171
- m = _load_model_if_needed(_Handler.model_name, _Handler.device, _Handler.compute_type)
172
- except ImportError:
173
- self._send_json(500, {
174
- "ok": False,
175
- "error": "faster-whisper not installed — run: pip3 install faster-whisper",
176
- })
177
- return
178
- except Exception as e:
179
- self._send_json(500, {"ok": False, "error": f"model load failed: {e}"})
180
- return
181
-
182
- try:
183
- segments, info = m.transcribe(audio_path, beam_size=beam_size, language=language)
184
- text = " ".join(seg.text.strip() for seg in segments).strip()
185
- self._send_json(200, {
186
- "ok": True,
187
- "text": text,
188
- "language": info.language,
189
- "language_probability": round(info.language_probability, 4),
190
- "duration": round(info.duration, 2),
191
- "model": _model_name,
192
- "compute_type": _Handler.compute_type,
193
- })
213
+ self._send_json(200, _transcribe_file(audio_path, language, beam_size))
214
+ except ImportError as e:
215
+ hint = ("pip3 install faster-whisper" if _Handler.backend == "faster"
216
+ else "pip3 install mlx-whisper")
217
+ self._send_json(500, {"ok": False, "error": f"{_Handler.backend} backend not installed — run: {hint} ({e})"})
194
218
  except Exception as e:
195
219
  self._send_json(500, {"ok": False, "error": f"transcription failed: {e}"})
196
220
 
@@ -231,12 +255,14 @@ def main():
231
255
 
232
256
  parser = argparse.ArgumentParser(description="Persistent APX Whisper server")
233
257
  parser.add_argument("--port", type=int, default=18765)
258
+ parser.add_argument("--backend", default="faster", choices=["faster", "mlx"])
234
259
  parser.add_argument("--model", default="small")
235
260
  parser.add_argument("--device", default="cpu")
236
261
  parser.add_argument("--compute-type", dest="compute_type", default="int8")
237
262
  parser.add_argument("--idle-minutes", dest="idle_minutes", type=int, default=10)
238
263
  args = parser.parse_args()
239
264
 
265
+ _Handler.backend = args.backend
240
266
  _Handler.model_name = args.model
241
267
  _Handler.device = args.device
242
268
  _Handler.compute_type = args.compute_type
@@ -252,6 +278,7 @@ def main():
252
278
  print(json.dumps({
253
279
  "status": "ready",
254
280
  "port": args.port,
281
+ "backend": args.backend,
255
282
  "model": args.model,
256
283
  "idle_minutes": args.idle_minutes,
257
284
  }), flush=True)
@@ -16,19 +16,26 @@ import {
16
16
  WIN_RUN_KEY,
17
17
  WIN_RUN_NAME,
18
18
  } from "#core/desktop/autostart.js";
19
+ import {
20
+ DESKTOP_MAIN,
21
+ readPid, writePid, clearPid, pidAlive, isDesktopRunning,
22
+ findElectron as _findElectron,
23
+ buildElectronSpawn as _buildElectronSpawn,
24
+ startDesktopDetached,
25
+ stopDesktop,
26
+ } from "#core/desktop/process.js";
19
27
 
20
28
  // Re-exports — kept so existing tests (tests/desktop-autostart.test.js)
21
29
  // can still import these directly from the CLI module.
22
30
  export const getApxRunner = _getApxRunner;
23
31
  export const buildPlist = _buildPlist;
24
32
  export const autostartIsOn = _autostartIsOn;
33
+ export const findElectron = _findElectron;
34
+ export const buildElectronSpawn = _buildElectronSpawn;
25
35
 
26
36
  const __filename = fileURLToPath(import.meta.url);
27
37
  const __dirname = path.dirname(__filename);
28
38
 
29
- const DESKTOP_MAIN = path.resolve(__dirname, "../../desktop/main.js");
30
- const DESKTOP_PID = path.join(os.homedir(), ".apx", "desktop.pid");
31
-
32
39
  // ── ANSI ─────────────────────────────────────────────────────────────────────
33
40
  const c = { reset:"\x1b[0m", bold:"\x1b[1m", dim:"\x1b[2m", green:"\x1b[32m",
34
41
  red:"\x1b[31m", yellow:"\x1b[33m", cyan:"\x1b[36m", gray:"\x1b[90m" };
@@ -38,71 +45,9 @@ const fmt = {
38
45
  cyan:(s)=>`${c.cyan}${s}${c.reset}`, gray:(s)=>`${c.gray}${s}${c.reset}`,
39
46
  };
40
47
 
41
- // ── Helpers ───────────────────────────────────────────────────────────────────
42
-
43
- function readPid() {
44
- try { return parseInt(fs.readFileSync(DESKTOP_PID, "utf8").trim(), 10); } catch { return null; }
45
- }
46
- function writePid(pid) {
47
- fs.mkdirSync(path.dirname(DESKTOP_PID), { recursive: true });
48
- fs.writeFileSync(DESKTOP_PID, String(pid));
49
- }
50
- function clearPid() { try { fs.unlinkSync(DESKTOP_PID); } catch {} }
51
- function pidAlive(pid) {
52
- if (!pid) return false;
53
- try { process.kill(pid, 0); return true; } catch { return false; }
54
- }
55
-
56
- // Validate that an electron candidate actually runs (a pnpm shim can exist as a
57
- // file while its underlying package was never built — `--version` smokes that out).
58
- function electronRuns(cmd, argv) {
59
- try {
60
- execFileSync(cmd, argv, { stdio: "ignore", timeout: 5000 });
61
- return true;
62
- } catch { return false; }
63
- }
64
-
65
- // Returns a descriptor used by buildElectronSpawn():
66
- // absolute path to a real electron binary,
67
- // absolute path to electron's cli.js (".js" → run via node),
68
- // "npx" as a last-resort fallback (downloads/uses electron via npx).
69
- // Never returns null — npx is always attempted so the user gets a real error
70
- // from the spawn (and a one-time download) rather than a silent no-op.
71
- export function findElectron() {
72
- // commands/ is 4 levels under the project root: src/interfaces/cli/commands/
73
- const root = path.resolve(__dirname, "..", "..", "..", "..");
74
- const bin = path.join(root, "node_modules", ".bin", "electron");
75
- // The .bin shim is a shell wrapper that `exec node …`. Under launchd's
76
- // minimal PATH (`/usr/bin:/bin:/usr/sbin:/sbin`) `node` isn't found, so the
77
- // shim fails. We try it first (cheap, works for terminal use) and then fall
78
- // back to invoking electron's cli.js directly with process.execPath, which
79
- // is launchd-safe.
80
- if (fs.existsSync(bin) && electronRuns(bin, ["--version"])) return bin;
81
-
82
- const cli = path.join(root, "node_modules", "electron", "cli.js");
83
- if (fs.existsSync(cli) && electronRuns(process.execPath, [cli, "--version"])) return cli;
84
-
85
- // Global electron on PATH (works from terminal, usually not from launchd)
86
- try {
87
- const which = execFileSync("which", ["electron"], { stdio: ["ignore", "pipe", "ignore"] }).toString().trim();
88
- if (which && electronRuns(which, ["--version"])) return which;
89
- } catch {}
90
-
91
- // Last resort: npx (pulls electron if absent). Will ENOENT under launchd if
92
- // npx isn't on PATH — that's why we try cli.js BEFORE this.
93
- return "npx";
94
- }
95
-
96
- // Turn a findElectron() descriptor + the app entry into a { cmd, argv } pair.
97
- export function buildElectronSpawn(descriptor, mainPath, port) {
98
- if (descriptor === "npx") {
99
- return { cmd: "npx", argv: ["-y", "electron", mainPath, "--port", port] };
100
- }
101
- if (descriptor.endsWith(".js")) {
102
- return { cmd: process.execPath, argv: [descriptor, mainPath, "--port", port] };
103
- }
104
- return { cmd: descriptor, argv: [mainPath, "--port", port] };
105
- }
48
+ // PID + electron-resolution helpers live in #core/desktop/process.js (shared
49
+ // with the daemon's /desktop/{start,stop} endpoints). findElectron and
50
+ // buildElectronSpawn are re-exported above for the existing tests.
106
51
 
107
52
  // ── Commands ──────────────────────────────────────────────────────────────────
108
53
 
@@ -99,9 +99,12 @@ function getTheme() {
99
99
  try {
100
100
  const cfg = JSON.parse(fs.readFileSync(CONFIG_PATH, "utf8"));
101
101
  const t = cfg?.desktop?.theme;
102
- if (t === "light" || t === "dark") return t;
102
+ if (t === "light" || t === "dark" || t === "system") return t;
103
103
  } catch {}
104
- return "light";
104
+ // "system" follows the OS appearance (the renderer resolves it via
105
+ // prefers-color-scheme). It's the default so a fresh install matches the
106
+ // user's macOS/Windows light/dark setting out of the box.
107
+ return "system";
105
108
  }
106
109
 
107
110
  // Resolve the agent's display name from ~/.apx/identity.json + config.
@@ -316,6 +319,25 @@ function hideOverlay() {
316
319
  if (isRecording) stopRecording();
317
320
  }
318
321
 
322
+ // Soft-restart the floating window: re-read ~/.apx/config.json, move the window
323
+ // to the (possibly new) configured position, and reload the renderer so it
324
+ // re-applies theme/position/shortcut. Triggered by the web admin's Restart
325
+ // button via a "reload" WS event — far cheaper than killing + relaunching the
326
+ // Electron process (which would drop the tray + global shortcut). Recreates the
327
+ // window if it was closed.
328
+ function reloadDesktopWindow() {
329
+ try {
330
+ if (!mainWindow) { createWindow(); showOverlay(); return; }
331
+ const [, currentH] = mainWindow.getSize();
332
+ const origin = getWindowOrigin(currentH);
333
+ mainWindow.setPosition(origin.x, origin.y);
334
+ mainWindow.webContents.reload();
335
+ showOverlay();
336
+ } catch (e) {
337
+ console.warn("desktop: reload failed —", e.message);
338
+ }
339
+ }
340
+
319
341
  // ---------------------------------------------------------------------------
320
342
  // Global shortcut: Cmd/Ctrl+Shift+Space toggles recording
321
343
  // ---------------------------------------------------------------------------
@@ -527,8 +549,10 @@ function transcribeChunk(buf, format, language) {
527
549
  "Content-Length": buf.length,
528
550
  "X-Audio-Format": format,
529
551
  "X-Language": language,
530
- // Overlay is real-time local whisper only. Never fall back to OpenAI.
531
- "X-Provider": "local",
552
+ // No X-Provider override: the desktop honours the configured STT engine
553
+ // (transcription.provider in ~/.apx/config.json) — local faster-whisper,
554
+ // OpenAI cloud, or a custom OpenAI-compatible server (mlx-audio / a
555
+ // Radeon/NVIDIA box on the LAN). Set it in the web admin → /m/voice.
532
556
  ...(token ? { "Authorization": `Bearer ${token}` } : {}),
533
557
  },
534
558
  };
@@ -585,6 +609,10 @@ function connectDaemon() {
585
609
  wsConn.on("message", (raw) => {
586
610
  let msg;
587
611
  try { msg = JSON.parse(raw.toString()); } catch { return; }
612
+ // "reload" is a control event from the web admin's Restart button (POST
613
+ // /desktop/restart). Re-read config, reposition, and soft-reload the
614
+ // renderer so theme/position changes apply without killing the process.
615
+ if (msg && msg.type === "reload") { reloadDesktopWindow(); return; }
588
616
  // Forward all daemon events to the renderer
589
617
  mainWindow?.webContents.send("daemon-event", msg);
590
618
  });
@@ -84,7 +84,7 @@
84
84
  let turnWatchdog = null; // flushes the queue if a segment's TTS hangs
85
85
 
86
86
  let history = []; // [{role:'user'|'assistant', content}] sent to daemon for context
87
- let theme = "light";
87
+ let theme = "system"; // "light" | "dark" | "system" (config value, pre-resolution)
88
88
  let position = "right";
89
89
  let agentName = "Superagente"; // overwritten from config on first render
90
90
 
@@ -161,20 +161,20 @@
161
161
  // the agent name stays wrong until the user changes mode.
162
162
  let configReady = false;
163
163
  Promise.all([
164
- window.apx?.getTheme?.() ?? "light",
164
+ window.apx?.getTheme?.() ?? "system",
165
165
  window.apx?.getPosition?.() ?? "right",
166
166
  window.apx?.getShortcut?.() ?? "CommandOrControl+G",
167
167
  window.apx?.getAgentName?.() ?? "Superagente",
168
168
  window.apx?.getVoiceTiming?.() ?? null,
169
169
  ]).then(([th, pos, shortcut, name, timing]) => {
170
- theme = th || "light";
170
+ theme = th || "system";
171
171
  position = pos || "right";
172
172
  agentName = (name && String(name).trim()) || "Superagente";
173
173
  if (timing) {
174
174
  if (typeof timing.silence_ms === "number") SILENCE_MS = timing.silence_ms;
175
175
  if (typeof timing.voice_rms === "number") VOICE_RMS = timing.voice_rms;
176
176
  }
177
- document.documentElement.setAttribute("data-theme", theme);
177
+ applyTheme(theme);
178
178
  setPosition(position);
179
179
  captionShortcut = shortcut || "CommandOrControl+G";
180
180
  configReady = true;
@@ -186,13 +186,34 @@
186
186
  if (input) input.placeholder = `Hablá o escribí a ${agentName}…`;
187
187
  render();
188
188
  }).catch(() => {
189
- document.documentElement.setAttribute("data-theme", "light");
189
+ applyTheme("system");
190
190
  setPosition("right");
191
191
  captionShortcut = "CommandOrControl+G";
192
192
  configReady = true;
193
193
  render();
194
194
  });
195
195
 
196
+ // Resolve the configured theme to a concrete data-theme value. "system"
197
+ // follows the OS appearance via prefers-color-scheme; "light"/"dark" are
198
+ // used verbatim. We also subscribe to OS changes so a window left on
199
+ // "system" flips live when the user toggles macOS/Windows dark mode.
200
+ function prefersDark() {
201
+ try { return !!(window.matchMedia && window.matchMedia("(prefers-color-scheme: dark)").matches); }
202
+ catch { return false; }
203
+ }
204
+ function resolveTheme(pref) {
205
+ return pref === "system" ? (prefersDark() ? "dark" : "light") : (pref || "light");
206
+ }
207
+ function applyTheme(pref) {
208
+ theme = pref || "system";
209
+ document.documentElement.setAttribute("data-theme", resolveTheme(theme));
210
+ }
211
+ try {
212
+ window.matchMedia("(prefers-color-scheme: dark)").addEventListener("change", () => {
213
+ if (theme === "system") document.documentElement.setAttribute("data-theme", resolveTheme("system"));
214
+ });
215
+ } catch {}
216
+
196
217
  function setPosition(p) {
197
218
  $root.classList.remove("pos-left", "pos-center", "pos-right");
198
219
  $root.classList.add("pos-" + p);