npm - @agentprojectcontext/apx - Versions diffs - 1.42.1 → 1.43.0 - Mend

@agentprojectcontext/apx 1.42.1 → 1.43.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

package/package.json +1 -1
package/src/core/channels/telegram/api.js +62 -0
package/src/core/channels/telegram/ask-callbacks.js +238 -0
package/src/core/config/index.js +2 -0
package/src/core/config/redact.js +2 -0
package/src/core/confirmation/adapters/telegram.js +20 -37
package/src/core/desktop/process.js +126 -0
package/src/core/voice/stt-hardware.js +87 -0
package/src/core/voice/stt-models.js +97 -0
package/src/core/voice/transcription.js +147 -16
package/src/host/daemon/api/desktop.js +54 -8
package/src/host/daemon/api/transcribe.js +40 -1
package/src/host/daemon/plugins/desktop/index.js +6 -1
package/src/host/daemon/plugins/telegram/index.js +61 -351
package/src/host/daemon/whisper-server.js +18 -8
package/src/host/daemon/whisper-server.py +71 -44
package/src/interfaces/cli/commands/desktop.js +13 -68
package/src/interfaces/desktop/main.js +32 -4
package/src/interfaces/desktop/renderer.js +26 -5
package/src/interfaces/web/dist/assets/index-B0nTYflm.js +651 -0
package/src/interfaces/web/dist/assets/index-B0nTYflm.js.map +1 -0
package/src/interfaces/web/dist/assets/index-C22PmKCD.css +1 -0
package/src/interfaces/web/dist/index.html +2 -2
package/src/interfaces/web/package-lock.json +3 -3
package/src/interfaces/web/src/components/ShortcutInput.tsx +156 -0
package/src/interfaces/web/src/components/voice/VoiceSttCard.tsx +101 -5
package/src/interfaces/web/src/i18n/en.ts +28 -2
package/src/interfaces/web/src/i18n/es.ts +28 -2
package/src/interfaces/web/src/lib/api/desktop.ts +28 -0
package/src/interfaces/web/src/lib/api/voice.ts +26 -2
package/src/interfaces/web/src/screens/modules/DeckScreen.tsx +55 -3
package/src/interfaces/web/src/screens/modules/DesktopScreen.tsx +98 -36
package/src/interfaces/web/dist/assets/index-BReF4_xV.js +0 -646
package/src/interfaces/web/dist/assets/index-BReF4_xV.js.map +0 -1
package/src/interfaces/web/dist/assets/index-wrEbTJbc.css +0 -1

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@agentprojectcontext/apx",
-  "version": "1.42.1",
+  "version": "1.43.0",
   "description": "APX — unified CLI + daemon for the Agent Project Context (APC) standard.",
   "publishConfig": {
     "access": "public"

package/src/core/channels/telegram/api.js ADDED Viewed

@@ -0,0 +1,62 @@
+// Low-level Telegram Bot API client — the single place the raw JSON endpoints
+// are called. Higher layers (the poller's send/typing/keyboard methods, the
+// confirmation adapter, the ask flow) compose these instead of hand-rolling
+// fetch boilerplate, so each endpoint's quirks live in exactly one spot. These
+// used to be duplicated across the poller AND the confirm adapter.
+//
+// Every call is token-explicit (no channel/config coupling) so it's reusable
+// from any surface — poller, adapter, routines, tests. Media uploads (multipart
+// FormData) stay in ./media.js; this module owns the JSON endpoints.
+import { API_BASE } from "./media.js";
+/**
+ * POST a JSON body to a Bot API method. Returns the parsed `result` on success;
+ * throws on transport failure or a non-ok Telegram response. Best-effort callers
+ * (typing, keyboard edits, callback acks) wrap this in their own try/catch.
+ */
+async function apiCall(token, method, body) {
+  const res = await fetch(`${API_BASE}/bot${token}/${method}`, {
+    method: "POST",
+    headers: { "content-type": "application/json" },
+    body: JSON.stringify(body),
+  });
+  const json = await res.json().catch(() => ({}));
+  if (!json.ok) throw new Error(json.description || `${method} failed (${res.status})`);
+  return json.result;
+}
+/** sendMessage: the plain text reply (optionally with an inline keyboard). */
+export function sendMessage(token, chatId, { text, reply_markup, parse_mode } = {}) {
+  const body = { chat_id: chatId, text };
+  if (reply_markup) body.reply_markup = reply_markup;
+  if (parse_mode) body.parse_mode = parse_mode;
+  return apiCall(token, "sendMessage", body);
+}
+/** sendChatAction: the "typing…" indicator (auto-clears after ~5s). */
+export function sendChatAction(token, chatId, action = "typing") {
+  return apiCall(token, "sendChatAction", { chat_id: chatId, action });
+}
+/** editMessageReplyMarkup: swap/clear the inline keyboard on a sent message. */
+export function editMessageReplyMarkup(token, chatId, messageId, reply_markup) {
+  const body = { chat_id: chatId, message_id: messageId };
+  if (reply_markup) body.reply_markup = reply_markup;
+  return apiCall(token, "editMessageReplyMarkup", body);
+}
+/** answerCallbackQuery: clear the spinner on a tapped inline button (+ toast). */
+export function answerCallbackQuery(token, callbackQueryId, text) {
+  const body = { callback_query_id: callbackQueryId };
+  if (text) body.text = text;
+  return apiCall(token, "answerCallbackQuery", body);
+}
+/** getUpdates: long-poll for inbound updates from a given offset. */
+export async function getUpdates(token, { offset = 0, timeout = 25 } = {}) {
+  const res = await fetch(`${API_BASE}/bot${token}/getUpdates?timeout=${timeout}&offset=${offset}`);
+  if (!res.ok) throw new Error(`getUpdates ${res.status}`);
+  const json = await res.json();
+  if (!json.ok) throw new Error(json.description || "telegram error");
+  return json.result || [];
+}

package/src/core/channels/telegram/ask-callbacks.js ADDED Viewed

@@ -0,0 +1,238 @@
+// ask_questions flow orchestration for Telegram, extracted from the host poller
+// so that file stays focused on process lifecycle. Like dispatch.js, every
+// function takes the poller instance (`self`) and reaches its I/O surface
+// (self._send / _editKeyboard / _answerCallback / _startTyping) and config
+// through it. The flow's own state machine lives in ./ask.js; this is the glue
+// that turns its decisions into Telegram messages and re-enters the reply path.
+import * as askFlow from "./ask.js";
+import { resolveBotToken } from "./helpers.js";
+import { buildStreamHandler, runTelegramSuperAgent, telegramErrorText, sendFinalReply } from "./reply.js";
+import { createTelegramConfirmAdapter } from "#core/confirmation/adapters/telegram.js";
+import { getConfirmationStore as getConfirmStore } from "#core/confirmation/pending-store.js";
+import { getRecentTelegramTurnsFromFs, appendGlobalMessage } from "#core/stores/messages.js";
+import { CHANNELS } from "#core/constants/channels.js";
+import { SUPERAGENT_ACTOR_ID } from "#core/identity/index.js";
+/**
+ * Route an inbound callback_query. ask_questions button presses are handled
+ * here; everything else falls through to the confirmation adapter. Both use
+ * `apx:<verb>:...` namespacing but the ask flow owns its own state.
+ */
+export async function handleCallbackQuery(self, callbackQuery) {
+  const data = callbackQuery.data || "";
+  if (data.startsWith("apx:ask:")) {
+    await handleAskCallback(self, callbackQuery);
+    return;
+  }
+  const adapter = createTelegramConfirmAdapter({
+    token: resolveBotToken(self.channel),
+    chatId: callbackQuery.message?.chat?.id,
+    pendingStore: getConfirmStore(),
+  });
+  const handled = await adapter.handleCallbackQuery(callbackQuery);
+  if (!handled) {
+    self.log(`telegram[${self.channel.name}] unhandled callback_query: ${callbackQuery.data}`);
+  }
+}
+/**
+ * Draw the current question as a fresh message with its inline keyboard, wiping
+ * the previous question's keyboard so the chat reads as a clean history.
+ */
+export async function renderQuestion(self, state) {
+  const text = askFlow.formatQuestionText(state);
+  const reply_markup = askFlow.buildKeyboard(state);
+  if (state.messageId) {
+    try {
+      await self._editKeyboard({
+        chat_id: state.chatId,
+        message_id: state.messageId,
+        reply_markup: { inline_keyboard: [] },
+      });
+    } catch { /* best-effort */ }
+  }
+  const sent = await self._send({ chat_id: state.chatId, text, reply_markup, parse_mode: "Markdown" });
+  state.messageId = sent?.message_id || null;
+  askFlow.saveState(state.chatId, state);
+}
+/**
+ * Kick off a brand-new ask flow after the super-agent called ask_questions. The
+ * flow's `resume` callback captures the per-turn context so when the compiled
+ * answer arrives we run another super-agent turn without retyping the inputs.
+ */
+export async function startAskFlow(self, ctx) {
+  const state = askFlow.startFlow({
+    chatId: ctx.chat_id,
+    projectId: ctx.projectId,
+    authorId: ctx.authorId,
+    questions: ctx.questions,
+    resume: async (compiled) => {
+      await runResumedTurn(self, { ...ctx, compiled });
+    },
+  });
+  await renderQuestion(self, state);
+}
+/** Apply an inline-keyboard press, then react: redraw, advance, cancel or finish. */
+export async function handleAskCallback(self, callbackQuery) {
+  const chatId = callbackQuery.message?.chat?.id;
+  if (!chatId) return;
+  const result = askFlow.applyCallback(chatId, callbackQuery.data || "");
+  // Ack the press regardless — keeps the spinner from hanging client-side.
+  await self._answerCallback({ callback_query_id: callbackQuery.id });
+  if (!result) return; // stale or unknown — adapter already ack'd.
+  if (result.action === "redraw") {
+    // Multi-select toggle: refresh the keyboard on the SAME message.
+    try {
+      await self._editKeyboard({
+        chat_id: chatId,
+        message_id: callbackQuery.message?.message_id,
+        reply_markup: askFlow.buildKeyboard(result.state),
+      });
+    } catch (e) {
+      self.log(`telegram[${self.channel.name}] redraw failed: ${e.message}`);
+    }
+    return;
+  }
+  if (result.action === "advance") {
+    await renderQuestion(self, result.state);
+    return;
+  }
+  if (result.action === "cancel") {
+    try {
+      await self._editKeyboard({
+        chat_id: chatId,
+        message_id: callbackQuery.message?.message_id,
+        reply_markup: { inline_keyboard: [] },
+      });
+      await self._send({ chat_id: chatId, text: "Pregunta cancelada." });
+    } catch { /* best-effort */ }
+    return;
+  }
+  if (result.action === "done") {
+    try {
+      await self._editKeyboard({
+        chat_id: chatId,
+        message_id: callbackQuery.message?.message_id,
+        reply_markup: { inline_keyboard: [] },
+      });
+    } catch { /* best-effort */ }
+    // Feed the compiled answer back as a synthetic user turn.
+    if (typeof result.state.resume === "function") {
+      await result.state.resume(result.compiled);
+    }
+  }
+}
+/**
+ * Apply a free-text user reply when there's a pending free-text question.
+ * Returns true iff the message was consumed by the ask flow (so the normal
+ * super-agent path should be skipped for this update).
+ */
+export async function maybeConsumeAskTextAnswer(self, { chat_id, text }) {
+  if (!chat_id || !text) return false;
+  if (!askFlow.hasPendingFreeText(chat_id)) return false;
+  const state = askFlow.applyTextAnswer(chat_id, text);
+  if (!state) return false;
+  // Advance: emit a synthetic "next" to move past this question.
+  const next = askFlow.applyCallback(chat_id, `apx:ask:${state.correlationId}:next`);
+  if (!next) return true;
+  if (next.action === "advance") {
+    await renderQuestion(self, next.state);
+    return true;
+  }
+  if (next.action === "done") {
+    if (typeof next.state.resume === "function") {
+      await next.state.resume(next.compiled);
+    }
+    return true;
+  }
+  return true;
+}
+/**
+ * Run a follow-up super-agent turn with the compiled answers as the user prompt.
+ * Shares the exact reply path as a normal inbound turn (./reply.js) — only the
+ * photo/audio/reset preamble is skipped. Re-enters the ask flow if the model
+ * decides to ask again.
+ */
+export async function runResumedTurn(self, ctx) {
+  const { chat_id, compiled, target, relationshipBlock, allowedTools, author, agentDisplay, update_id, sender, authorId } = ctx;
+  if (!chat_id) return;
+  // Log the synthetic user message so getRecentTelegramTurnsFromFs picks it up
+  // on the NEXT inbound. Mirrors how a normal text reply would be recorded.
+  appendGlobalMessage({
+    channel: CHANNELS.TELEGRAM,
+    direction: "in",
+    type: "user",
+    actor_id: authorId ? String(authorId) : (author || "ask_flow"),
+    external_id: `ask-${Date.now()}`,
+    author: author || "user",
+    body: compiled,
+    meta: { chat_id, user_id: authorId || null, tg_channel: self.channel.name, ask_flow: true },
+  });
+  const previousMessages = getRecentTelegramTurnsFromFs({ chat_id, keepRecent: 40, max_age_hours: 24 });
+  const { onEvent, state } = buildStreamHandler(self, { chat_id, update_id, agentDisplay });
+  const stopTyping = self._startTyping(chat_id);
+  let replyText;
+  let replyAuthor;
+  let saUsage = null;
+  try {
+    const sa = await runTelegramSuperAgent(self, {
+      chat_id,
+      prompt: compiled,
+      previousMessages,
+      target,
+      author,
+      relationshipBlock,
+      allowedTools,
+      onEvent,
+    });
+    // Did the model ask again? Restart the flow instead of replying.
+    const followupAsk = askFlow.extractAskQuestionsFromTrace(sa.trace);
+    if (followupAsk) {
+      stopTyping();
+      await startAskFlow(self, {
+        chat_id,
+        projectId: target?.id,
+        authorId,
+        questions: followupAsk,
+        author,
+        agentDisplay,
+        relationshipBlock,
+        allowedTools,
+        target,
+        sender,
+        update_id,
+      });
+      return;
+    }
+    replyText = sa.text;
+    replyAuthor = sa.name || agentDisplay;
+    saUsage = sa.usage;
+  } catch (e) {
+    self.log(`telegram[${self.channel.name}] ask resume failed: ${e.message}`);
+    replyText = telegramErrorText(self, e);
+    replyAuthor = agentDisplay;
+  }
+  stopTyping();
+  await sendFinalReply(self, {
+    chat_id,
+    update_id,
+    replyText,
+    replyAuthor,
+    replyActorId: SUPERAGENT_ACTOR_ID,
+    replyKind: "superagent",
+    saUsage,
+    streamedCount: state.streamedCount,
+    lastStreamedText: state.lastStreamedText,
+    agentDisplay,
+    extraMeta: { ask_resume: true },
+  });
+}

package/src/core/config/index.js CHANGED Viewed

@@ -190,6 +190,8 @@ const CREDENTIAL_PATHS = [
   ["voice", "tts", "elevenlabs", "api_key"],
   ["voice", "tts", "openai", "api_key"],
   ["voice", "tts", "gemini", "api_key"],
+  ["transcription", "openai", "api_key"],
+  ["transcription", "custom", "api_key"],
   ["memory", "embeddings", "openai", "api_key"],
   ["memory", "embeddings", "gemini", "api_key"],
   ["telegram", "channels"], // entire array — losing it is also a regression

package/src/core/config/redact.js CHANGED Viewed

@@ -18,6 +18,8 @@ export const SECRET_PATHS = [
   "voice.tts.elevenlabs.api_key",
   "voice.tts.openai.api_key",
   "voice.tts.gemini.api_key",
+  "transcription.openai.api_key",
+  "transcription.custom.api_key",
   "memory.embeddings.openai.api_key",
   "memory.embeddings.gemini.api_key",
   // Telegram bot tokens live inside an array — handled separately in redact()

package/src/core/confirmation/adapters/telegram.js CHANGED Viewed

@@ -24,7 +24,10 @@
 // keyboard but before the user tapped, pendingStore.wasKnown() detects the
 // SQLite row with no memory entry and we show "Expirado" instead of an error.
-const API_BASE = "https://api.telegram.org";
+// Raw Bot API calls go through the shared client so endpoint boilerplate lives
+// in one place (these used to be hand-rolled fetch calls duplicated here).
+import { sendMessage, answerCallbackQuery as apiAnswerCallbackQuery, editMessageReplyMarkup } from "#core/channels/telegram/api.js";
 const TIMEOUT_MS = 60_000; // 60 s — long enough for a human, short enough to not block forever
 /**
@@ -81,51 +84,31 @@ export function createTelegramConfirmAdapter({ token, chatId, pendingStore }) {
 async function sendConfirmKeyboard(token, chatId, description, correlationId, timeoutMs) {
   const timeoutSec = Math.round(timeoutMs / 1000);
-  await fetch(`${API_BASE}/bot${token}/sendMessage`, {
-    method: "POST",
-    headers: { "content-type": "application/json" },
-    body: JSON.stringify({
-      chat_id: chatId,
-      text:
-        `⚠️ *Confirm action*\n\n${escapeMarkdown(description)}\n\n` +
-        `_Expires in ${timeoutSec}s. No response → cancelled._`,
-      parse_mode: "Markdown",
-      reply_markup: {
-        inline_keyboard: [[
-          { text: "✅ Yes", callback_data: `apx:confirm:${correlationId}:yes` },
-          { text: "❌ No",  callback_data: `apx:confirm:${correlationId}:no` },
-        ]],
-      },
-    }),
+  await sendMessage(token, chatId, {
+    text:
+      `⚠️ *Confirm action*\n\n${escapeMarkdown(description)}\n\n` +
+      `_Expires in ${timeoutSec}s. No response → cancelled._`,
+    parse_mode: "Markdown",
+    reply_markup: {
+      inline_keyboard: [[
+        { text: "✅ Yes", callback_data: `apx:confirm:${correlationId}:yes` },
+        { text: "❌ No",  callback_data: `apx:confirm:${correlationId}:no` },
+      ]],
+    },
   });
 }
+// best-effort — Telegram gives only ~30s to answer; after that it's already cleared
 async function answerCallbackQuery(token, callbackQueryId, text) {
   try {
-    await fetch(`${API_BASE}/bot${token}/answerCallbackQuery`, {
-      method: "POST",
-      headers: { "content-type": "application/json" },
-      body: JSON.stringify({ callback_query_id: callbackQueryId, text }),
-    });
-  } catch {
-    // best-effort — Telegram gives only 30s to answer; after that it's already cleared
-  }
+    await apiAnswerCallbackQuery(token, callbackQueryId, text);
+  } catch { /* best-effort */ }
 }
 async function editMessageButtons(token, chatId, messageId, inlineKeyboard) {
   try {
-    await fetch(`${API_BASE}/bot${token}/editMessageReplyMarkup`, {
-      method: "POST",
-      headers: { "content-type": "application/json" },
-      body: JSON.stringify({
-        chat_id: chatId,
-        message_id: messageId,
-        reply_markup: { inline_keyboard: inlineKeyboard },
-      }),
-    });
-  } catch {
-    // best-effort
-  }
+    await editMessageReplyMarkup(token, chatId, messageId, { inline_keyboard: inlineKeyboard });
+  } catch { /* best-effort */ }
 }
 // Escape Markdown special chars so description text doesn't break Telegram markup.

package/src/core/desktop/process.js ADDED Viewed

@@ -0,0 +1,126 @@
+// Desktop (Electron floating window) process control — shared by the CLI
+// (`apx desktop start/stop/restart`) and the daemon's /desktop/{start,stop}
+// HTTP endpoints, so both spawn/kill the window the exact same way.
+//
+// The window is a detached Electron process (it must survive the spawner so a
+// LaunchAgent / a short-lived CLI invocation doesn't take it down). State is
+// tracked via ~/.apx/desktop.pid.
+"use strict";
+import fs from "node:fs";
+import os from "node:os";
+import path from "node:path";
+import { spawn, execFileSync } from "node:child_process";
+import { fileURLToPath } from "node:url";
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = path.dirname(__filename);
+// src/core/desktop/ → repo root is three levels up.
+const ROOT = path.resolve(__dirname, "..", "..", "..");
+export const DESKTOP_MAIN = path.resolve(__dirname, "..", "..", "interfaces", "desktop", "main.js");
+export const DESKTOP_PID = path.join(os.homedir(), ".apx", "desktop.pid");
+const DESKTOP_LOG = path.join(os.homedir(), ".apx", "desktop.log");
+// ── PID file ────────────────────────────────────────────────────────────────
+export function readPid() {
+  try { return parseInt(fs.readFileSync(DESKTOP_PID, "utf8").trim(), 10); } catch { return null; }
+}
+export function writePid(pid) {
+  fs.mkdirSync(path.dirname(DESKTOP_PID), { recursive: true });
+  fs.writeFileSync(DESKTOP_PID, String(pid));
+}
+export function clearPid() { try { fs.unlinkSync(DESKTOP_PID); } catch {} }
+export function pidAlive(pid) {
+  if (!pid) return false;
+  try { process.kill(pid, 0); return true; } catch { return false; }
+}
+export function isDesktopRunning() { return pidAlive(readPid()); }
+// ── Electron resolution ───────────────────────────────────────────────────
+// Validate a candidate actually runs (a pnpm shim can exist as a file while its
+// underlying package was never built — `--version` smokes that out).
+function electronRuns(cmd, argv) {
+  try { execFileSync(cmd, argv, { stdio: "ignore", timeout: 5000 }); return true; } catch { return false; }
+}
+// Returns a descriptor for buildElectronSpawn(): an absolute electron binary
+// path, electron's cli.js (".js" → run via node), a global bin, or "npx" as a
+// last resort. Never returns null.
+export function findElectron() {
+  const bin = path.join(ROOT, "node_modules", ".bin", "electron");
+  if (fs.existsSync(bin) && electronRuns(bin, ["--version"])) return bin;
+  const cli = path.join(ROOT, "node_modules", "electron", "cli.js");
+  if (fs.existsSync(cli) && electronRuns(process.execPath, [cli, "--version"])) return cli;
+  try {
+    const which = execFileSync("which", ["electron"], { stdio: ["ignore", "pipe", "ignore"] }).toString().trim();
+    if (which && electronRuns(which, ["--version"])) return which;
+  } catch {}
+  return "npx";
+}
+// Turn a findElectron() descriptor + the app entry into a { cmd, argv } pair.
+export function buildElectronSpawn(descriptor, mainPath, port) {
+  if (descriptor === "npx") {
+    return { cmd: "npx", argv: ["-y", "electron", mainPath, "--port", port] };
+  }
+  if (descriptor.endsWith(".js")) {
+    return { cmd: process.execPath, argv: [descriptor, mainPath, "--port", port] };
+  }
+  return { cmd: descriptor, argv: [mainPath, "--port", port] };
+}
+// ── Lifecycle ───────────────────────────────────────────────────────────────
+// Spawn the window detached (survives the spawner). No console output — callers
+// format their own UX. Returns { ok, pid, already? } | { ok:false, error }.
+// detached:true gives the child its own session so a LaunchAgent / short-lived
+// CLI doesn't drag it down on exit; we unref() after a 1.5s fail-fast window.
+export async function startDesktopDetached({ port = process.env.APX_PORT || "7430" } = {}) {
+  if (isDesktopRunning()) return { ok: true, pid: readPid(), already: true };
+  clearPid();
+  if (!fs.existsSync(DESKTOP_MAIN)) return { ok: false, error: `desktop app not found at ${DESKTOP_MAIN}` };
+  const { cmd, argv } = buildElectronSpawn(findElectron(), DESKTOP_MAIN, String(port));
+  let logFd;
+  try { logFd = fs.openSync(DESKTOP_LOG, "a"); } catch { logFd = "ignore"; }
+  let child;
+  try {
+    child = spawn(cmd, argv, {
+      detached: true,
+      stdio: ["ignore", logFd, logFd],
+      env: { ...process.env, ELECTRON_ENABLE_LOGGING: "1" },
+    });
+  } catch (e) {
+    if (typeof logFd === "number") { try { fs.closeSync(logFd); } catch {} }
+    return { ok: false, error: e.message };
+  }
+  if (typeof logFd === "number") { try { fs.closeSync(logFd); } catch {} }
+  const res = await new Promise((resolve) => {
+    let settled = false;
+    child.on("exit", (code) => { if (!settled) { settled = true; resolve({ ok: code === 0, code }); } });
+    setTimeout(() => { if (!settled) { settled = true; child.unref(); resolve({ ok: true }); } }, 1500);
+  });
+  if (!res.ok) return { ok: false, error: `desktop exited with code ${res.code}` };
+  if (child.pid) writePid(child.pid);
+  return { ok: true, pid: child.pid };
+}
+// Stop the running window (SIGTERM). Returns { ok, stopped, pid? } — stopped is
+// false when nothing was running.
+export function stopDesktop() {
+  const pid = readPid();
+  if (!pidAlive(pid)) { clearPid(); return { ok: true, stopped: false }; }
+  try {
+    process.kill(pid, "SIGTERM");
+    clearPid();
+    return { ok: true, stopped: true, pid };
+  } catch (e) {
+    return { ok: false, error: e.message };
+  }
+}

package/src/core/voice/stt-hardware.js ADDED Viewed

@@ -0,0 +1,87 @@
+// Hardware probe + STT engine recommendation.
+//
+// The transcription backend should adapt to the machine instead of making the
+// user understand CTranslate2 vs MLX vs whisper.cpp:
+//
+//   Apple Silicon (Metal)  → mlx-whisper, large-v3-turbo   (GPU/ANE accelerated)
+//   NVIDIA (CUDA)          → faster-whisper cuda, large-v3  (GPU accelerated)
+//   AMD / Radeon           → faster-whisper cpu (limited)   (no ROCm in CT2)
+//   CPU only               → faster-whisper cpu, small      (safe + light)
+//
+// Detection is dependency-free and best-effort: short-timeout probes of
+// nvidia-smi / rocminfo, plus os.platform()/os.arch(). Anything uncertain
+// degrades to the CPU recommendation.
+import os from "node:os";
+import { spawnSync } from "node:child_process";
+function cmdOk(cmd, args = []) {
+  try {
+    const r = spawnSync(cmd, args, { timeout: 1500, stdio: "ignore" });
+    return r.status === 0;
+  } catch {
+    return false;
+  }
+}
+/**
+ * Probe the machine. Returns a stable shape the UI + recommender consume.
+ * @returns {{platform:string, arch:string, appleSilicon:boolean, gpu:"metal"|"cuda"|"rocm"|"none", gpuName?:string}}
+ */
+export function detectHardware() {
+  const platform = os.platform();           // "darwin" | "linux" | "win32"
+  const arch = os.arch();                    // "arm64" | "x64" | ...
+  const appleSilicon = platform === "darwin" && arch === "arm64";
+  if (appleSilicon) {
+    return { platform, arch, appleSilicon: true, gpu: "metal", gpuName: cpuBrand() };
+  }
+  // NVIDIA: nvidia-smi exits 0 when a CUDA GPU + driver are present.
+  if (cmdOk("nvidia-smi", ["-L"])) {
+    return { platform, arch, appleSilicon: false, gpu: "cuda" };
+  }
+  // AMD/Radeon: rocminfo (ROCm stack) is the clearest signal on Linux.
+  if (platform === "linux" && cmdOk("rocminfo")) {
+    return { platform, arch, appleSilicon: false, gpu: "rocm" };
+  }
+  return { platform, arch, appleSilicon: false, gpu: "none" };
+}
+function cpuBrand() {
+  try { return (os.cpus()?.[0]?.model || "").trim() || undefined; } catch { return undefined; }
+}
+// Recommended STT backend + model per hardware tier. `backend` maps to a local
+// engine implementation; `model` is the repo id in that engine's format.
+export function recommendStt(hw = detectHardware()) {
+  if (hw.gpu === "metal") {
+    return {
+      backend: "mlx", device: "metal",
+      model: "mlx-community/whisper-large-v3-turbo",
+      reason: "Apple Silicon: MLX corre en la GPU/Neural Engine (Metal).",
+      tier: "gpu",
+    };
+  }
+  if (hw.gpu === "cuda") {
+    return {
+      backend: "faster", device: "cuda", compute_type: "float16",
+      model: "large-v3",
+      reason: "GPU NVIDIA: faster-whisper en CUDA soporta modelos grandes rápido.",
+      tier: "gpu",
+    };
+  }
+  if (hw.gpu === "rocm") {
+    return {
+      backend: "faster", device: "cpu", compute_type: "int8",
+      model: "small",
+      reason: "Radeon/ROCm no está soportado por CTranslate2 — se usa CPU. (whisper.cpp Vulkan es una mejora futura.)",
+      tier: "cpu",
+      limited: true,
+    };
+  }
+  return {
+    backend: "faster", device: "cpu", compute_type: "int8",
+    model: "small",
+    reason: "Sin GPU acelerada: faster-whisper en CPU con un modelo liviano.",
+    tier: "cpu",
+  };
+}