@agentprojectcontext/apx 1.42.1 → 1.43.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/package.json +1 -1
  2. package/src/core/channels/telegram/api.js +62 -0
  3. package/src/core/channels/telegram/ask-callbacks.js +238 -0
  4. package/src/core/config/index.js +2 -0
  5. package/src/core/config/redact.js +2 -0
  6. package/src/core/confirmation/adapters/telegram.js +20 -37
  7. package/src/core/desktop/process.js +126 -0
  8. package/src/core/voice/stt-hardware.js +87 -0
  9. package/src/core/voice/stt-models.js +97 -0
  10. package/src/core/voice/transcription.js +147 -16
  11. package/src/host/daemon/api/desktop.js +54 -8
  12. package/src/host/daemon/api/transcribe.js +40 -1
  13. package/src/host/daemon/plugins/desktop/index.js +6 -1
  14. package/src/host/daemon/plugins/telegram/index.js +61 -351
  15. package/src/host/daemon/whisper-server.js +18 -8
  16. package/src/host/daemon/whisper-server.py +71 -44
  17. package/src/interfaces/cli/commands/desktop.js +13 -68
  18. package/src/interfaces/desktop/main.js +32 -4
  19. package/src/interfaces/desktop/renderer.js +26 -5
  20. package/src/interfaces/web/dist/assets/index-B0nTYflm.js +651 -0
  21. package/src/interfaces/web/dist/assets/index-B0nTYflm.js.map +1 -0
  22. package/src/interfaces/web/dist/assets/index-C22PmKCD.css +1 -0
  23. package/src/interfaces/web/dist/index.html +2 -2
  24. package/src/interfaces/web/package-lock.json +3 -3
  25. package/src/interfaces/web/src/components/ShortcutInput.tsx +156 -0
  26. package/src/interfaces/web/src/components/voice/VoiceSttCard.tsx +101 -5
  27. package/src/interfaces/web/src/i18n/en.ts +28 -2
  28. package/src/interfaces/web/src/i18n/es.ts +28 -2
  29. package/src/interfaces/web/src/lib/api/desktop.ts +28 -0
  30. package/src/interfaces/web/src/lib/api/voice.ts +26 -2
  31. package/src/interfaces/web/src/screens/modules/DeckScreen.tsx +55 -3
  32. package/src/interfaces/web/src/screens/modules/DesktopScreen.tsx +98 -36
  33. package/src/interfaces/web/dist/assets/index-BReF4_xV.js +0 -646
  34. package/src/interfaces/web/dist/assets/index-BReF4_xV.js.map +0 -1
  35. package/src/interfaces/web/dist/assets/index-wrEbTJbc.css +0 -1
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@agentprojectcontext/apx",
3
- "version": "1.42.1",
3
+ "version": "1.43.0",
4
4
  "description": "APX — unified CLI + daemon for the Agent Project Context (APC) standard.",
5
5
  "publishConfig": {
6
6
  "access": "public"
@@ -0,0 +1,62 @@
1
+ // Low-level Telegram Bot API client — the single place the raw JSON endpoints
2
+ // are called. Higher layers (the poller's send/typing/keyboard methods, the
3
+ // confirmation adapter, the ask flow) compose these instead of hand-rolling
4
+ // fetch boilerplate, so each endpoint's quirks live in exactly one spot. These
5
+ // used to be duplicated across the poller AND the confirm adapter.
6
+ //
7
+ // Every call is token-explicit (no channel/config coupling) so it's reusable
8
+ // from any surface — poller, adapter, routines, tests. Media uploads (multipart
9
+ // FormData) stay in ./media.js; this module owns the JSON endpoints.
10
+ import { API_BASE } from "./media.js";
11
+
12
+ /**
13
+ * POST a JSON body to a Bot API method. Returns the parsed `result` on success;
14
+ * throws on transport failure or a non-ok Telegram response. Best-effort callers
15
+ * (typing, keyboard edits, callback acks) wrap this in their own try/catch.
16
+ */
17
+ async function apiCall(token, method, body) {
18
+ const res = await fetch(`${API_BASE}/bot${token}/${method}`, {
19
+ method: "POST",
20
+ headers: { "content-type": "application/json" },
21
+ body: JSON.stringify(body),
22
+ });
23
+ const json = await res.json().catch(() => ({}));
24
+ if (!json.ok) throw new Error(json.description || `${method} failed (${res.status})`);
25
+ return json.result;
26
+ }
27
+
28
+ /** sendMessage: the plain text reply (optionally with an inline keyboard). */
29
+ export function sendMessage(token, chatId, { text, reply_markup, parse_mode } = {}) {
30
+ const body = { chat_id: chatId, text };
31
+ if (reply_markup) body.reply_markup = reply_markup;
32
+ if (parse_mode) body.parse_mode = parse_mode;
33
+ return apiCall(token, "sendMessage", body);
34
+ }
35
+
36
+ /** sendChatAction: the "typing…" indicator (auto-clears after ~5s). */
37
+ export function sendChatAction(token, chatId, action = "typing") {
38
+ return apiCall(token, "sendChatAction", { chat_id: chatId, action });
39
+ }
40
+
41
+ /** editMessageReplyMarkup: swap/clear the inline keyboard on a sent message. */
42
+ export function editMessageReplyMarkup(token, chatId, messageId, reply_markup) {
43
+ const body = { chat_id: chatId, message_id: messageId };
44
+ if (reply_markup) body.reply_markup = reply_markup;
45
+ return apiCall(token, "editMessageReplyMarkup", body);
46
+ }
47
+
48
+ /** answerCallbackQuery: clear the spinner on a tapped inline button (+ toast). */
49
+ export function answerCallbackQuery(token, callbackQueryId, text) {
50
+ const body = { callback_query_id: callbackQueryId };
51
+ if (text) body.text = text;
52
+ return apiCall(token, "answerCallbackQuery", body);
53
+ }
54
+
55
+ /** getUpdates: long-poll for inbound updates from a given offset. */
56
+ export async function getUpdates(token, { offset = 0, timeout = 25 } = {}) {
57
+ const res = await fetch(`${API_BASE}/bot${token}/getUpdates?timeout=${timeout}&offset=${offset}`);
58
+ if (!res.ok) throw new Error(`getUpdates ${res.status}`);
59
+ const json = await res.json();
60
+ if (!json.ok) throw new Error(json.description || "telegram error");
61
+ return json.result || [];
62
+ }
@@ -0,0 +1,238 @@
1
+ // ask_questions flow orchestration for Telegram, extracted from the host poller
2
+ // so that file stays focused on process lifecycle. Like dispatch.js, every
3
+ // function takes the poller instance (`self`) and reaches its I/O surface
4
+ // (self._send / _editKeyboard / _answerCallback / _startTyping) and config
5
+ // through it. The flow's own state machine lives in ./ask.js; this is the glue
6
+ // that turns its decisions into Telegram messages and re-enters the reply path.
7
+ import * as askFlow from "./ask.js";
8
+ import { resolveBotToken } from "./helpers.js";
9
+ import { buildStreamHandler, runTelegramSuperAgent, telegramErrorText, sendFinalReply } from "./reply.js";
10
+ import { createTelegramConfirmAdapter } from "#core/confirmation/adapters/telegram.js";
11
+ import { getConfirmationStore as getConfirmStore } from "#core/confirmation/pending-store.js";
12
+ import { getRecentTelegramTurnsFromFs, appendGlobalMessage } from "#core/stores/messages.js";
13
+ import { CHANNELS } from "#core/constants/channels.js";
14
+ import { SUPERAGENT_ACTOR_ID } from "#core/identity/index.js";
15
+
16
+ /**
17
+ * Route an inbound callback_query. ask_questions button presses are handled
18
+ * here; everything else falls through to the confirmation adapter. Both use
19
+ * `apx:<verb>:...` namespacing but the ask flow owns its own state.
20
+ */
21
+ export async function handleCallbackQuery(self, callbackQuery) {
22
+ const data = callbackQuery.data || "";
23
+ if (data.startsWith("apx:ask:")) {
24
+ await handleAskCallback(self, callbackQuery);
25
+ return;
26
+ }
27
+ const adapter = createTelegramConfirmAdapter({
28
+ token: resolveBotToken(self.channel),
29
+ chatId: callbackQuery.message?.chat?.id,
30
+ pendingStore: getConfirmStore(),
31
+ });
32
+ const handled = await adapter.handleCallbackQuery(callbackQuery);
33
+ if (!handled) {
34
+ self.log(`telegram[${self.channel.name}] unhandled callback_query: ${callbackQuery.data}`);
35
+ }
36
+ }
37
+
38
+ /**
39
+ * Draw the current question as a fresh message with its inline keyboard, wiping
40
+ * the previous question's keyboard so the chat reads as a clean history.
41
+ */
42
+ export async function renderQuestion(self, state) {
43
+ const text = askFlow.formatQuestionText(state);
44
+ const reply_markup = askFlow.buildKeyboard(state);
45
+ if (state.messageId) {
46
+ try {
47
+ await self._editKeyboard({
48
+ chat_id: state.chatId,
49
+ message_id: state.messageId,
50
+ reply_markup: { inline_keyboard: [] },
51
+ });
52
+ } catch { /* best-effort */ }
53
+ }
54
+ const sent = await self._send({ chat_id: state.chatId, text, reply_markup, parse_mode: "Markdown" });
55
+ state.messageId = sent?.message_id || null;
56
+ askFlow.saveState(state.chatId, state);
57
+ }
58
+
59
+ /**
60
+ * Kick off a brand-new ask flow after the super-agent called ask_questions. The
61
+ * flow's `resume` callback captures the per-turn context so when the compiled
62
+ * answer arrives we run another super-agent turn without retyping the inputs.
63
+ */
64
+ export async function startAskFlow(self, ctx) {
65
+ const state = askFlow.startFlow({
66
+ chatId: ctx.chat_id,
67
+ projectId: ctx.projectId,
68
+ authorId: ctx.authorId,
69
+ questions: ctx.questions,
70
+ resume: async (compiled) => {
71
+ await runResumedTurn(self, { ...ctx, compiled });
72
+ },
73
+ });
74
+ await renderQuestion(self, state);
75
+ }
76
+
77
+ /** Apply an inline-keyboard press, then react: redraw, advance, cancel or finish. */
78
+ export async function handleAskCallback(self, callbackQuery) {
79
+ const chatId = callbackQuery.message?.chat?.id;
80
+ if (!chatId) return;
81
+ const result = askFlow.applyCallback(chatId, callbackQuery.data || "");
82
+ // Ack the press regardless — keeps the spinner from hanging client-side.
83
+ await self._answerCallback({ callback_query_id: callbackQuery.id });
84
+ if (!result) return; // stale or unknown — adapter already ack'd.
85
+
86
+ if (result.action === "redraw") {
87
+ // Multi-select toggle: refresh the keyboard on the SAME message.
88
+ try {
89
+ await self._editKeyboard({
90
+ chat_id: chatId,
91
+ message_id: callbackQuery.message?.message_id,
92
+ reply_markup: askFlow.buildKeyboard(result.state),
93
+ });
94
+ } catch (e) {
95
+ self.log(`telegram[${self.channel.name}] redraw failed: ${e.message}`);
96
+ }
97
+ return;
98
+ }
99
+ if (result.action === "advance") {
100
+ await renderQuestion(self, result.state);
101
+ return;
102
+ }
103
+ if (result.action === "cancel") {
104
+ try {
105
+ await self._editKeyboard({
106
+ chat_id: chatId,
107
+ message_id: callbackQuery.message?.message_id,
108
+ reply_markup: { inline_keyboard: [] },
109
+ });
110
+ await self._send({ chat_id: chatId, text: "Pregunta cancelada." });
111
+ } catch { /* best-effort */ }
112
+ return;
113
+ }
114
+ if (result.action === "done") {
115
+ try {
116
+ await self._editKeyboard({
117
+ chat_id: chatId,
118
+ message_id: callbackQuery.message?.message_id,
119
+ reply_markup: { inline_keyboard: [] },
120
+ });
121
+ } catch { /* best-effort */ }
122
+ // Feed the compiled answer back as a synthetic user turn.
123
+ if (typeof result.state.resume === "function") {
124
+ await result.state.resume(result.compiled);
125
+ }
126
+ }
127
+ }
128
+
129
+ /**
130
+ * Apply a free-text user reply when there's a pending free-text question.
131
+ * Returns true iff the message was consumed by the ask flow (so the normal
132
+ * super-agent path should be skipped for this update).
133
+ */
134
+ export async function maybeConsumeAskTextAnswer(self, { chat_id, text }) {
135
+ if (!chat_id || !text) return false;
136
+ if (!askFlow.hasPendingFreeText(chat_id)) return false;
137
+ const state = askFlow.applyTextAnswer(chat_id, text);
138
+ if (!state) return false;
139
+ // Advance: emit a synthetic "next" to move past this question.
140
+ const next = askFlow.applyCallback(chat_id, `apx:ask:${state.correlationId}:next`);
141
+ if (!next) return true;
142
+ if (next.action === "advance") {
143
+ await renderQuestion(self, next.state);
144
+ return true;
145
+ }
146
+ if (next.action === "done") {
147
+ if (typeof next.state.resume === "function") {
148
+ await next.state.resume(next.compiled);
149
+ }
150
+ return true;
151
+ }
152
+ return true;
153
+ }
154
+
155
+ /**
156
+ * Run a follow-up super-agent turn with the compiled answers as the user prompt.
157
+ * Shares the exact reply path as a normal inbound turn (./reply.js) — only the
158
+ * photo/audio/reset preamble is skipped. Re-enters the ask flow if the model
159
+ * decides to ask again.
160
+ */
161
+ export async function runResumedTurn(self, ctx) {
162
+ const { chat_id, compiled, target, relationshipBlock, allowedTools, author, agentDisplay, update_id, sender, authorId } = ctx;
163
+ if (!chat_id) return;
164
+ // Log the synthetic user message so getRecentTelegramTurnsFromFs picks it up
165
+ // on the NEXT inbound. Mirrors how a normal text reply would be recorded.
166
+ appendGlobalMessage({
167
+ channel: CHANNELS.TELEGRAM,
168
+ direction: "in",
169
+ type: "user",
170
+ actor_id: authorId ? String(authorId) : (author || "ask_flow"),
171
+ external_id: `ask-${Date.now()}`,
172
+ author: author || "user",
173
+ body: compiled,
174
+ meta: { chat_id, user_id: authorId || null, tg_channel: self.channel.name, ask_flow: true },
175
+ });
176
+
177
+ const previousMessages = getRecentTelegramTurnsFromFs({ chat_id, keepRecent: 40, max_age_hours: 24 });
178
+
179
+ const { onEvent, state } = buildStreamHandler(self, { chat_id, update_id, agentDisplay });
180
+ const stopTyping = self._startTyping(chat_id);
181
+ let replyText;
182
+ let replyAuthor;
183
+ let saUsage = null;
184
+ try {
185
+ const sa = await runTelegramSuperAgent(self, {
186
+ chat_id,
187
+ prompt: compiled,
188
+ previousMessages,
189
+ target,
190
+ author,
191
+ relationshipBlock,
192
+ allowedTools,
193
+ onEvent,
194
+ });
195
+
196
+ // Did the model ask again? Restart the flow instead of replying.
197
+ const followupAsk = askFlow.extractAskQuestionsFromTrace(sa.trace);
198
+ if (followupAsk) {
199
+ stopTyping();
200
+ await startAskFlow(self, {
201
+ chat_id,
202
+ projectId: target?.id,
203
+ authorId,
204
+ questions: followupAsk,
205
+ author,
206
+ agentDisplay,
207
+ relationshipBlock,
208
+ allowedTools,
209
+ target,
210
+ sender,
211
+ update_id,
212
+ });
213
+ return;
214
+ }
215
+ replyText = sa.text;
216
+ replyAuthor = sa.name || agentDisplay;
217
+ saUsage = sa.usage;
218
+ } catch (e) {
219
+ self.log(`telegram[${self.channel.name}] ask resume failed: ${e.message}`);
220
+ replyText = telegramErrorText(self, e);
221
+ replyAuthor = agentDisplay;
222
+ }
223
+
224
+ stopTyping();
225
+ await sendFinalReply(self, {
226
+ chat_id,
227
+ update_id,
228
+ replyText,
229
+ replyAuthor,
230
+ replyActorId: SUPERAGENT_ACTOR_ID,
231
+ replyKind: "superagent",
232
+ saUsage,
233
+ streamedCount: state.streamedCount,
234
+ lastStreamedText: state.lastStreamedText,
235
+ agentDisplay,
236
+ extraMeta: { ask_resume: true },
237
+ });
238
+ }
@@ -190,6 +190,8 @@ const CREDENTIAL_PATHS = [
190
190
  ["voice", "tts", "elevenlabs", "api_key"],
191
191
  ["voice", "tts", "openai", "api_key"],
192
192
  ["voice", "tts", "gemini", "api_key"],
193
+ ["transcription", "openai", "api_key"],
194
+ ["transcription", "custom", "api_key"],
193
195
  ["memory", "embeddings", "openai", "api_key"],
194
196
  ["memory", "embeddings", "gemini", "api_key"],
195
197
  ["telegram", "channels"], // entire array — losing it is also a regression
@@ -18,6 +18,8 @@ export const SECRET_PATHS = [
18
18
  "voice.tts.elevenlabs.api_key",
19
19
  "voice.tts.openai.api_key",
20
20
  "voice.tts.gemini.api_key",
21
+ "transcription.openai.api_key",
22
+ "transcription.custom.api_key",
21
23
  "memory.embeddings.openai.api_key",
22
24
  "memory.embeddings.gemini.api_key",
23
25
  // Telegram bot tokens live inside an array — handled separately in redact()
@@ -24,7 +24,10 @@
24
24
  // keyboard but before the user tapped, pendingStore.wasKnown() detects the
25
25
  // SQLite row with no memory entry and we show "Expirado" instead of an error.
26
26
 
27
- const API_BASE = "https://api.telegram.org";
27
+ // Raw Bot API calls go through the shared client so endpoint boilerplate lives
28
+ // in one place (these used to be hand-rolled fetch calls duplicated here).
29
+ import { sendMessage, answerCallbackQuery as apiAnswerCallbackQuery, editMessageReplyMarkup } from "#core/channels/telegram/api.js";
30
+
28
31
  const TIMEOUT_MS = 60_000; // 60 s — long enough for a human, short enough to not block forever
29
32
 
30
33
  /**
@@ -81,51 +84,31 @@ export function createTelegramConfirmAdapter({ token, chatId, pendingStore }) {
81
84
 
82
85
  async function sendConfirmKeyboard(token, chatId, description, correlationId, timeoutMs) {
83
86
  const timeoutSec = Math.round(timeoutMs / 1000);
84
- await fetch(`${API_BASE}/bot${token}/sendMessage`, {
85
- method: "POST",
86
- headers: { "content-type": "application/json" },
87
- body: JSON.stringify({
88
- chat_id: chatId,
89
- text:
90
- `⚠️ *Confirm action*\n\n${escapeMarkdown(description)}\n\n` +
91
- `_Expires in ${timeoutSec}s. No response cancelled._`,
92
- parse_mode: "Markdown",
93
- reply_markup: {
94
- inline_keyboard: [[
95
- { text: "✅ Yes", callback_data: `apx:confirm:${correlationId}:yes` },
96
- { text: "❌ No", callback_data: `apx:confirm:${correlationId}:no` },
97
- ]],
98
- },
99
- }),
87
+ await sendMessage(token, chatId, {
88
+ text:
89
+ `⚠️ *Confirm action*\n\n${escapeMarkdown(description)}\n\n` +
90
+ `_Expires in ${timeoutSec}s. No response → cancelled._`,
91
+ parse_mode: "Markdown",
92
+ reply_markup: {
93
+ inline_keyboard: [[
94
+ { text: "✅ Yes", callback_data: `apx:confirm:${correlationId}:yes` },
95
+ { text: "❌ No", callback_data: `apx:confirm:${correlationId}:no` },
96
+ ]],
97
+ },
100
98
  });
101
99
  }
102
100
 
101
+ // best-effort — Telegram gives only ~30s to answer; after that it's already cleared
103
102
  async function answerCallbackQuery(token, callbackQueryId, text) {
104
103
  try {
105
- await fetch(`${API_BASE}/bot${token}/answerCallbackQuery`, {
106
- method: "POST",
107
- headers: { "content-type": "application/json" },
108
- body: JSON.stringify({ callback_query_id: callbackQueryId, text }),
109
- });
110
- } catch {
111
- // best-effort — Telegram gives only 30s to answer; after that it's already cleared
112
- }
104
+ await apiAnswerCallbackQuery(token, callbackQueryId, text);
105
+ } catch { /* best-effort */ }
113
106
  }
114
107
 
115
108
  async function editMessageButtons(token, chatId, messageId, inlineKeyboard) {
116
109
  try {
117
- await fetch(`${API_BASE}/bot${token}/editMessageReplyMarkup`, {
118
- method: "POST",
119
- headers: { "content-type": "application/json" },
120
- body: JSON.stringify({
121
- chat_id: chatId,
122
- message_id: messageId,
123
- reply_markup: { inline_keyboard: inlineKeyboard },
124
- }),
125
- });
126
- } catch {
127
- // best-effort
128
- }
110
+ await editMessageReplyMarkup(token, chatId, messageId, { inline_keyboard: inlineKeyboard });
111
+ } catch { /* best-effort */ }
129
112
  }
130
113
 
131
114
  // Escape Markdown special chars so description text doesn't break Telegram markup.
@@ -0,0 +1,126 @@
1
+ // Desktop (Electron floating window) process control — shared by the CLI
2
+ // (`apx desktop start/stop/restart`) and the daemon's /desktop/{start,stop}
3
+ // HTTP endpoints, so both spawn/kill the window the exact same way.
4
+ //
5
+ // The window is a detached Electron process (it must survive the spawner so a
6
+ // LaunchAgent / a short-lived CLI invocation doesn't take it down). State is
7
+ // tracked via ~/.apx/desktop.pid.
8
+
9
+ "use strict";
10
+ import fs from "node:fs";
11
+ import os from "node:os";
12
+ import path from "node:path";
13
+ import { spawn, execFileSync } from "node:child_process";
14
+ import { fileURLToPath } from "node:url";
15
+
16
+ const __filename = fileURLToPath(import.meta.url);
17
+ const __dirname = path.dirname(__filename);
18
+
19
+ // src/core/desktop/ → repo root is three levels up.
20
+ const ROOT = path.resolve(__dirname, "..", "..", "..");
21
+ export const DESKTOP_MAIN = path.resolve(__dirname, "..", "..", "interfaces", "desktop", "main.js");
22
+ export const DESKTOP_PID = path.join(os.homedir(), ".apx", "desktop.pid");
23
+ const DESKTOP_LOG = path.join(os.homedir(), ".apx", "desktop.log");
24
+
25
+ // ── PID file ────────────────────────────────────────────────────────────────
26
+ export function readPid() {
27
+ try { return parseInt(fs.readFileSync(DESKTOP_PID, "utf8").trim(), 10); } catch { return null; }
28
+ }
29
+ export function writePid(pid) {
30
+ fs.mkdirSync(path.dirname(DESKTOP_PID), { recursive: true });
31
+ fs.writeFileSync(DESKTOP_PID, String(pid));
32
+ }
33
+ export function clearPid() { try { fs.unlinkSync(DESKTOP_PID); } catch {} }
34
+ export function pidAlive(pid) {
35
+ if (!pid) return false;
36
+ try { process.kill(pid, 0); return true; } catch { return false; }
37
+ }
38
+ export function isDesktopRunning() { return pidAlive(readPid()); }
39
+
40
+ // ── Electron resolution ───────────────────────────────────────────────────
41
+ // Validate a candidate actually runs (a pnpm shim can exist as a file while its
42
+ // underlying package was never built — `--version` smokes that out).
43
+ function electronRuns(cmd, argv) {
44
+ try { execFileSync(cmd, argv, { stdio: "ignore", timeout: 5000 }); return true; } catch { return false; }
45
+ }
46
+
47
+ // Returns a descriptor for buildElectronSpawn(): an absolute electron binary
48
+ // path, electron's cli.js (".js" → run via node), a global bin, or "npx" as a
49
+ // last resort. Never returns null.
50
+ export function findElectron() {
51
+ const bin = path.join(ROOT, "node_modules", ".bin", "electron");
52
+ if (fs.existsSync(bin) && electronRuns(bin, ["--version"])) return bin;
53
+
54
+ const cli = path.join(ROOT, "node_modules", "electron", "cli.js");
55
+ if (fs.existsSync(cli) && electronRuns(process.execPath, [cli, "--version"])) return cli;
56
+
57
+ try {
58
+ const which = execFileSync("which", ["electron"], { stdio: ["ignore", "pipe", "ignore"] }).toString().trim();
59
+ if (which && electronRuns(which, ["--version"])) return which;
60
+ } catch {}
61
+
62
+ return "npx";
63
+ }
64
+
65
+ // Turn a findElectron() descriptor + the app entry into a { cmd, argv } pair.
66
+ export function buildElectronSpawn(descriptor, mainPath, port) {
67
+ if (descriptor === "npx") {
68
+ return { cmd: "npx", argv: ["-y", "electron", mainPath, "--port", port] };
69
+ }
70
+ if (descriptor.endsWith(".js")) {
71
+ return { cmd: process.execPath, argv: [descriptor, mainPath, "--port", port] };
72
+ }
73
+ return { cmd: descriptor, argv: [mainPath, "--port", port] };
74
+ }
75
+
76
+ // ── Lifecycle ───────────────────────────────────────────────────────────────
77
+ // Spawn the window detached (survives the spawner). No console output — callers
78
+ // format their own UX. Returns { ok, pid, already? } | { ok:false, error }.
79
+ // detached:true gives the child its own session so a LaunchAgent / short-lived
80
+ // CLI doesn't drag it down on exit; we unref() after a 1.5s fail-fast window.
81
+ export async function startDesktopDetached({ port = process.env.APX_PORT || "7430" } = {}) {
82
+ if (isDesktopRunning()) return { ok: true, pid: readPid(), already: true };
83
+ clearPid();
84
+ if (!fs.existsSync(DESKTOP_MAIN)) return { ok: false, error: `desktop app not found at ${DESKTOP_MAIN}` };
85
+
86
+ const { cmd, argv } = buildElectronSpawn(findElectron(), DESKTOP_MAIN, String(port));
87
+ let logFd;
88
+ try { logFd = fs.openSync(DESKTOP_LOG, "a"); } catch { logFd = "ignore"; }
89
+
90
+ let child;
91
+ try {
92
+ child = spawn(cmd, argv, {
93
+ detached: true,
94
+ stdio: ["ignore", logFd, logFd],
95
+ env: { ...process.env, ELECTRON_ENABLE_LOGGING: "1" },
96
+ });
97
+ } catch (e) {
98
+ if (typeof logFd === "number") { try { fs.closeSync(logFd); } catch {} }
99
+ return { ok: false, error: e.message };
100
+ }
101
+ if (typeof logFd === "number") { try { fs.closeSync(logFd); } catch {} }
102
+
103
+ const res = await new Promise((resolve) => {
104
+ let settled = false;
105
+ child.on("exit", (code) => { if (!settled) { settled = true; resolve({ ok: code === 0, code }); } });
106
+ setTimeout(() => { if (!settled) { settled = true; child.unref(); resolve({ ok: true }); } }, 1500);
107
+ });
108
+ if (!res.ok) return { ok: false, error: `desktop exited with code ${res.code}` };
109
+
110
+ if (child.pid) writePid(child.pid);
111
+ return { ok: true, pid: child.pid };
112
+ }
113
+
114
+ // Stop the running window (SIGTERM). Returns { ok, stopped, pid? } — stopped is
115
+ // false when nothing was running.
116
+ export function stopDesktop() {
117
+ const pid = readPid();
118
+ if (!pidAlive(pid)) { clearPid(); return { ok: true, stopped: false }; }
119
+ try {
120
+ process.kill(pid, "SIGTERM");
121
+ clearPid();
122
+ return { ok: true, stopped: true, pid };
123
+ } catch (e) {
124
+ return { ok: false, error: e.message };
125
+ }
126
+ }
@@ -0,0 +1,87 @@
1
+ // Hardware probe + STT engine recommendation.
2
+ //
3
+ // The transcription backend should adapt to the machine instead of making the
4
+ // user understand CTranslate2 vs MLX vs whisper.cpp:
5
+ //
6
+ // Apple Silicon (Metal) → mlx-whisper, large-v3-turbo (GPU/ANE accelerated)
7
+ // NVIDIA (CUDA) → faster-whisper cuda, large-v3 (GPU accelerated)
8
+ // AMD / Radeon → faster-whisper cpu (limited) (no ROCm in CT2)
9
+ // CPU only → faster-whisper cpu, small (safe + light)
10
+ //
11
+ // Detection is dependency-free and best-effort: short-timeout probes of
12
+ // nvidia-smi / rocminfo, plus os.platform()/os.arch(). Anything uncertain
13
+ // degrades to the CPU recommendation.
14
+ import os from "node:os";
15
+ import { spawnSync } from "node:child_process";
16
+
17
+ function cmdOk(cmd, args = []) {
18
+ try {
19
+ const r = spawnSync(cmd, args, { timeout: 1500, stdio: "ignore" });
20
+ return r.status === 0;
21
+ } catch {
22
+ return false;
23
+ }
24
+ }
25
+
26
+ /**
27
+ * Probe the machine. Returns a stable shape the UI + recommender consume.
28
+ * @returns {{platform:string, arch:string, appleSilicon:boolean, gpu:"metal"|"cuda"|"rocm"|"none", gpuName?:string}}
29
+ */
30
+ export function detectHardware() {
31
+ const platform = os.platform(); // "darwin" | "linux" | "win32"
32
+ const arch = os.arch(); // "arm64" | "x64" | ...
33
+ const appleSilicon = platform === "darwin" && arch === "arm64";
34
+
35
+ if (appleSilicon) {
36
+ return { platform, arch, appleSilicon: true, gpu: "metal", gpuName: cpuBrand() };
37
+ }
38
+ // NVIDIA: nvidia-smi exits 0 when a CUDA GPU + driver are present.
39
+ if (cmdOk("nvidia-smi", ["-L"])) {
40
+ return { platform, arch, appleSilicon: false, gpu: "cuda" };
41
+ }
42
+ // AMD/Radeon: rocminfo (ROCm stack) is the clearest signal on Linux.
43
+ if (platform === "linux" && cmdOk("rocminfo")) {
44
+ return { platform, arch, appleSilicon: false, gpu: "rocm" };
45
+ }
46
+ return { platform, arch, appleSilicon: false, gpu: "none" };
47
+ }
48
+
49
+ function cpuBrand() {
50
+ try { return (os.cpus()?.[0]?.model || "").trim() || undefined; } catch { return undefined; }
51
+ }
52
+
53
+ // Recommended STT backend + model per hardware tier. `backend` maps to a local
54
+ // engine implementation; `model` is the repo id in that engine's format.
55
+ export function recommendStt(hw = detectHardware()) {
56
+ if (hw.gpu === "metal") {
57
+ return {
58
+ backend: "mlx", device: "metal",
59
+ model: "mlx-community/whisper-large-v3-turbo",
60
+ reason: "Apple Silicon: MLX corre en la GPU/Neural Engine (Metal).",
61
+ tier: "gpu",
62
+ };
63
+ }
64
+ if (hw.gpu === "cuda") {
65
+ return {
66
+ backend: "faster", device: "cuda", compute_type: "float16",
67
+ model: "large-v3",
68
+ reason: "GPU NVIDIA: faster-whisper en CUDA soporta modelos grandes rápido.",
69
+ tier: "gpu",
70
+ };
71
+ }
72
+ if (hw.gpu === "rocm") {
73
+ return {
74
+ backend: "faster", device: "cpu", compute_type: "int8",
75
+ model: "small",
76
+ reason: "Radeon/ROCm no está soportado por CTranslate2 — se usa CPU. (whisper.cpp Vulkan es una mejora futura.)",
77
+ tier: "cpu",
78
+ limited: true,
79
+ };
80
+ }
81
+ return {
82
+ backend: "faster", device: "cpu", compute_type: "int8",
83
+ model: "small",
84
+ reason: "Sin GPU acelerada: faster-whisper en CPU con un modelo liviano.",
85
+ tier: "cpu",
86
+ };
87
+ }