@agentprojectcontext/apx 1.42.1 → 1.43.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/package.json +1 -1
  2. package/src/core/channels/telegram/api.js +62 -0
  3. package/src/core/channels/telegram/ask-callbacks.js +238 -0
  4. package/src/core/config/index.js +2 -0
  5. package/src/core/config/redact.js +2 -0
  6. package/src/core/confirmation/adapters/telegram.js +20 -37
  7. package/src/core/desktop/process.js +126 -0
  8. package/src/core/voice/stt-hardware.js +87 -0
  9. package/src/core/voice/stt-models.js +97 -0
  10. package/src/core/voice/transcription.js +147 -16
  11. package/src/host/daemon/api/desktop.js +54 -8
  12. package/src/host/daemon/api/transcribe.js +40 -1
  13. package/src/host/daemon/plugins/desktop/index.js +6 -1
  14. package/src/host/daemon/plugins/telegram/index.js +61 -351
  15. package/src/host/daemon/whisper-server.js +18 -8
  16. package/src/host/daemon/whisper-server.py +71 -44
  17. package/src/interfaces/cli/commands/desktop.js +13 -68
  18. package/src/interfaces/desktop/main.js +32 -4
  19. package/src/interfaces/desktop/renderer.js +26 -5
  20. package/src/interfaces/web/dist/assets/index-B0nTYflm.js +651 -0
  21. package/src/interfaces/web/dist/assets/index-B0nTYflm.js.map +1 -0
  22. package/src/interfaces/web/dist/assets/index-C22PmKCD.css +1 -0
  23. package/src/interfaces/web/dist/index.html +2 -2
  24. package/src/interfaces/web/package-lock.json +3 -3
  25. package/src/interfaces/web/src/components/ShortcutInput.tsx +156 -0
  26. package/src/interfaces/web/src/components/voice/VoiceSttCard.tsx +101 -5
  27. package/src/interfaces/web/src/i18n/en.ts +28 -2
  28. package/src/interfaces/web/src/i18n/es.ts +28 -2
  29. package/src/interfaces/web/src/lib/api/desktop.ts +28 -0
  30. package/src/interfaces/web/src/lib/api/voice.ts +26 -2
  31. package/src/interfaces/web/src/screens/modules/DeckScreen.tsx +55 -3
  32. package/src/interfaces/web/src/screens/modules/DesktopScreen.tsx +98 -36
  33. package/src/interfaces/web/dist/assets/index-BReF4_xV.js +0 -646
  34. package/src/interfaces/web/dist/assets/index-BReF4_xV.js.map +0 -1
  35. package/src/interfaces/web/dist/assets/index-wrEbTJbc.css +0 -1
@@ -28,22 +28,14 @@
28
28
  // }
29
29
 
30
30
  // This poller is intentionally thin: per-update logic lives in core/channels/
31
- // telegram/ (dispatch + reply + ask + inbound). It keeps only what the *running
32
- // process* needs lifecycle, the poll loop, offset state and the inline-keyboard
33
- // callbacks. The earlier dispatch extraction left a pile of now-dead imports
34
- // here; only what's actually referenced below remains.
35
- import { getRecentTelegramTurnsFromFs, appendGlobalMessage } from "#core/stores/messages.js";
31
+ // telegram/ dispatch (inbound routing), reply (the super-agent turn),
32
+ // ask-callbacks (the ask_questions flow), inbound/ (media), and the raw Bot API
33
+ // in api.js + media.js. The poller keeps only what the *running process* needs:
34
+ // lifecycle, the poll loop, offset state, and the thin I/O surface (self._send
35
+ // etc.) that the extracted core logic calls back into through `self`.
36
+ import { appendGlobalMessage } from "#core/stores/messages.js";
36
37
  import { resolveAgentName, SUPERAGENT_ACTOR_ID } from "#core/identity/index.js";
37
- import { getConfirmationStore as getConfirmStore } from "#core/confirmation/pending-store.js";
38
38
  import { CHANNELS } from "#core/constants/channels.js";
39
- import { createTelegramConfirmAdapter } from "#core/confirmation/adapters/telegram.js";
40
- import * as askFlow from "#core/channels/telegram/ask.js";
41
-
42
- // API_BASE re-imported from #core/channels/telegram/media.js below
43
- const nowIso = () => new Date().toISOString().replace(/\.\d{3}Z$/, "Z");
44
-
45
- // All non-class-bound channel logic lives in core/channels/telegram/ — this
46
- // file stays focused on the poller class + plugin lifecycle wiring.
47
39
  import {
48
40
  loadState,
49
41
  saveState,
@@ -54,12 +46,13 @@ import {
54
46
  sleep,
55
47
  } from "#core/channels/telegram/helpers.js";
56
48
  import { handleUpdate } from "#core/channels/telegram/dispatch.js";
57
- import { buildStreamHandler, runTelegramSuperAgent, telegramErrorText, sendFinalReply } from "#core/channels/telegram/reply.js";
58
-
59
- // ---------- media sending helpers (re-exports) ------------------------------
60
- import { sendPhoto, sendVoice, sendDocument, sendAudio, API_BASE } from "#core/channels/telegram/media.js";
49
+ import { handleCallbackQuery, startAskFlow, maybeConsumeAskTextAnswer } from "#core/channels/telegram/ask-callbacks.js";
50
+ import { sendMessage, sendChatAction, editMessageReplyMarkup, answerCallbackQuery, getUpdates } from "#core/channels/telegram/api.js";
51
+ import { sendPhoto, sendVoice, sendDocument, sendAudio } from "#core/channels/telegram/media.js";
61
52
  export { sendPhoto, sendVoice, sendDocument, sendAudio };
62
53
 
54
+ const nowIso = () => new Date().toISOString().replace(/\.\d{3}Z$/, "Z");
55
+
63
56
  // ---------- per-channel poller ----------------------------------------------
64
57
 
65
58
  class ChannelPoller {
@@ -157,13 +150,7 @@ class ChannelPoller {
157
150
  }
158
151
 
159
152
  async _getUpdates() {
160
- const token = resolveBotToken(this.channel);
161
- const url = `${API_BASE}/bot${token}/getUpdates?timeout=25&offset=${this.offset}`;
162
- const res = await fetch(url);
163
- if (!res.ok) throw new Error(`getUpdates ${res.status}`);
164
- const json = await res.json();
165
- if (!json.ok) throw new Error(json.description || "telegram error");
166
- return json.result || [];
153
+ return getUpdates(resolveBotToken(this.channel), { offset: this.offset });
167
154
  }
168
155
 
169
156
  // Method body lives in ./dispatch.js as `handleUpdate(self, u)` so this file
@@ -173,270 +160,42 @@ class ChannelPoller {
173
160
  return handleUpdate(this, u);
174
161
  }
175
162
 
163
+ // ── ask_questions flow ──────────────────────────────────────────────────
164
+ // Orchestration lives in ./ask-callbacks.js (state machine in ./ask.js). These
165
+ // are thin delegates: dispatch.js reaches _startAskFlow / _maybeConsumeAsk...
166
+ // through `self`, and inbound callback_query routes through _handleCallbackQuery.
167
+ // The core functions call back into this poller's I/O surface (_send etc.).
176
168
  async _handleCallbackQuery(callbackQuery) {
177
- // Route ask_questions button presses before the confirmation adapter —
178
- // both use `apx:<verb>:...` namespacing but ask owns its own state.
179
- const data = callbackQuery.data || "";
180
- if (data.startsWith("apx:ask:")) {
181
- await this._handleAskCallback(callbackQuery);
182
- return;
183
- }
184
-
185
- const adapter = createTelegramConfirmAdapter({
186
- token: resolveBotToken(this.channel),
187
- chatId: callbackQuery.message?.chat?.id,
188
- pendingStore: getConfirmStore(),
189
- });
190
- const handled = await adapter.handleCallbackQuery(callbackQuery);
191
- if (!handled) {
192
- this.log(`telegram[${this.channel.name}] unhandled callback_query: ${callbackQuery.data}`);
193
- }
194
- }
195
-
196
- // ── ask_questions: state-machine helpers ───────────────────────────────
197
- // The flow lives in telegram-ask.js; this class owns the I/O (sending
198
- // messages, editing keyboards, re-entering the super-agent loop with the
199
- // compiled answer once the flow finishes).
200
-
201
- async _renderQuestion(state) {
202
- const text = askFlow.formatQuestionText(state);
203
- const reply_markup = askFlow.buildKeyboard(state);
204
- // If we already have a message for the previous question, leave its
205
- // keyboard wiped — we draw a fresh message per question for clearer
206
- // history in the chat (the question text stays as a record).
207
- if (state.messageId) {
208
- try {
209
- await this._editKeyboard({
210
- chat_id: state.chatId,
211
- message_id: state.messageId,
212
- reply_markup: { inline_keyboard: [] },
213
- });
214
- } catch { /* best-effort */ }
215
- }
216
- const sent = await this._send({
217
- chat_id: state.chatId,
218
- text,
219
- reply_markup,
220
- parse_mode: "Markdown",
221
- });
222
- state.messageId = sent?.message_id || null;
223
- askFlow.saveState(state.chatId, state);
169
+ return handleCallbackQuery(this, callbackQuery);
224
170
  }
225
171
 
226
- // Kick off a brand-new ask flow after the super-agent called ask_questions.
227
- // The flow's `resume` callback captures the per-turn context (sender,
228
- // relationship, project) so when the compiled answer arrives we can run
229
- // another super-agent turn without retyping all the inputs.
230
172
  async _startAskFlow(ctx) {
231
- const state = askFlow.startFlow({
232
- chatId: ctx.chat_id,
233
- projectId: ctx.projectId,
234
- authorId: ctx.authorId,
235
- questions: ctx.questions,
236
- resume: async (compiled) => {
237
- await this._runResumedTurn({ ...ctx, compiled });
238
- },
239
- });
240
- await this._renderQuestion(state);
173
+ return startAskFlow(this, ctx);
241
174
  }
242
175
 
243
- // Apply an inline-keyboard press, then react: redraw, advance, or finish.
244
- async _handleAskCallback(callbackQuery) {
245
- const chatId = callbackQuery.message?.chat?.id;
246
- if (!chatId) return;
247
- const result = askFlow.applyCallback(chatId, callbackQuery.data || "");
248
- // Ack the press regardless — keeps the spinner from hanging client-side.
249
- await this._answerCallback({ callback_query_id: callbackQuery.id });
250
- if (!result) return; // stale or unknown — adapter already ack'd.
251
-
252
- if (result.action === "redraw") {
253
- // Multi-select toggle: just refresh the keyboard on the SAME message.
254
- try {
255
- await this._editKeyboard({
256
- chat_id: chatId,
257
- message_id: callbackQuery.message?.message_id,
258
- reply_markup: askFlow.buildKeyboard(result.state),
259
- });
260
- } catch (e) {
261
- this.log(`telegram[${this.channel.name}] redraw failed: ${e.message}`);
262
- }
263
- return;
264
- }
265
- if (result.action === "advance") {
266
- await this._renderQuestion(result.state);
267
- return;
268
- }
269
- if (result.action === "cancel") {
270
- try {
271
- await this._editKeyboard({
272
- chat_id: chatId,
273
- message_id: callbackQuery.message?.message_id,
274
- reply_markup: { inline_keyboard: [] },
275
- });
276
- await this._send({ chat_id: chatId, text: "Pregunta cancelada." });
277
- } catch { /* best-effort */ }
278
- return;
279
- }
280
- if (result.action === "done") {
281
- try {
282
- await this._editKeyboard({
283
- chat_id: chatId,
284
- message_id: callbackQuery.message?.message_id,
285
- reply_markup: { inline_keyboard: [] },
286
- });
287
- } catch { /* best-effort */ }
288
- // Feed the compiled answer back as a synthetic user turn.
289
- if (typeof result.state.resume === "function") {
290
- await result.state.resume(result.compiled);
291
- }
292
- }
293
- }
294
-
295
- // Apply a free-text user reply when there's a pending free-text question.
296
- // Returns true iff the message was consumed by the ask flow (so the normal
297
- // super-agent path should be skipped for this update).
298
- async _maybeConsumeAskTextAnswer({ chat_id, text }) {
299
- if (!chat_id || !text) return false;
300
- if (!askFlow.hasPendingFreeText(chat_id)) return false;
301
- const state = askFlow.applyTextAnswer(chat_id, text);
302
- if (!state) return false;
303
- // Advance: emit a synthetic "next" to move past this question.
304
- const next = askFlow.applyCallback(
305
- chat_id,
306
- `apx:ask:${state.correlationId}:next`,
307
- );
308
- if (!next) return true;
309
- if (next.action === "advance") {
310
- await this._renderQuestion(next.state);
311
- return true;
312
- }
313
- if (next.action === "done") {
314
- if (typeof next.state.resume === "function") {
315
- await next.state.resume(next.compiled);
316
- }
317
- return true;
318
- }
319
- return true;
176
+ async _maybeConsumeAskTextAnswer(args) {
177
+ return maybeConsumeAskTextAnswer(this, args);
320
178
  }
321
179
 
322
- // Run a follow-up super-agent turn with the compiled answers as the user
323
- // prompt. Shares the exact reply path as a normal inbound turn (core/channels/
324
- // telegram/reply.js) — only the photo/audio/reset preamble is skipped.
325
- // Re-enters the ask flow if the model decides to ask again.
326
- async _runResumedTurn(ctx) {
327
- const { chat_id, compiled, target, relationshipBlock, allowedTools, author, agentDisplay, update_id, sender, authorId } = ctx;
328
- if (!chat_id) return;
329
- // Log the synthetic user message so getRecentTelegramTurnsFromFs picks
330
- // it up on the NEXT inbound. Mirrors how a normal text reply would be
331
- // recorded.
332
- appendGlobalMessage({
333
- channel: CHANNELS.TELEGRAM,
334
- direction: "in",
335
- type: "user",
336
- actor_id: authorId ? String(authorId) : (author || "ask_flow"),
337
- external_id: `ask-${Date.now()}`,
338
- author: author || "user",
339
- body: compiled,
340
- meta: {
341
- chat_id,
342
- user_id: authorId || null,
343
- tg_channel: this.channel.name,
344
- ask_flow: true,
345
- },
346
- });
347
-
348
- const previousMessages = getRecentTelegramTurnsFromFs({
349
- chat_id,
350
- keepRecent: 40,
351
- max_age_hours: 24,
352
- });
353
-
354
- // Drive the resume through the SAME shared reply path as a normal inbound
355
- // turn (see core/channels/telegram/reply.js): streaming, the autonomy budget
356
- // (maxIters), the never-silent floor, localized errors and rich channelMeta.
357
- // This used to be a hand-rolled copy that silently lagged behind the main
358
- // path — now there's one source of truth.
359
- const { onEvent, state } = buildStreamHandler(this, { chat_id, update_id, agentDisplay });
360
- const stopTyping = this._startTyping(chat_id);
361
- let replyText;
362
- let replyAuthor;
363
- let saUsage = null;
364
- try {
365
- const sa = await runTelegramSuperAgent(this, {
366
- chat_id,
367
- prompt: compiled,
368
- previousMessages,
369
- target,
370
- author,
371
- relationshipBlock,
372
- allowedTools,
373
- onEvent,
374
- });
375
-
376
- // Did the model ask again? Restart the flow instead of replying.
377
- const followupAsk = askFlow.extractAskQuestionsFromTrace(sa.trace);
378
- if (followupAsk) {
379
- stopTyping();
380
- await this._startAskFlow({
381
- chat_id,
382
- projectId: target?.id,
383
- authorId,
384
- questions: followupAsk,
385
- author,
386
- agentDisplay,
387
- relationshipBlock,
388
- allowedTools,
389
- target,
390
- sender,
391
- update_id,
392
- });
393
- return;
394
- }
395
- replyText = sa.text;
396
- replyAuthor = sa.name || agentDisplay;
397
- saUsage = sa.usage;
398
- } catch (e) {
399
- this.log(`telegram[${this.channel.name}] ask resume failed: ${e.message}`);
400
- replyText = telegramErrorText(this, e);
401
- replyAuthor = agentDisplay;
402
- }
403
-
404
- stopTyping();
405
- await sendFinalReply(this, {
406
- chat_id,
407
- update_id,
408
- replyText,
409
- replyAuthor,
410
- replyActorId: SUPERAGENT_ACTOR_ID,
411
- replyKind: "superagent",
412
- saUsage,
413
- streamedCount: state.streamedCount,
414
- lastStreamedText: state.lastStreamedText,
415
- agentDisplay,
416
- extraMeta: { ask_resume: true },
417
- });
180
+ // Resolve the bot token + outbound chat for this channel the single place
181
+ // the "no token / no chat" guards live, shared by every send method.
182
+ _resolve(chat_id) {
183
+ const token = resolveBotToken(this.channel);
184
+ if (!token) throw new Error(`channel ${this.channel.name}: no bot_token`);
185
+ const target = chat_id || resolveChatId(this.channel);
186
+ if (!target) throw new Error(`channel ${this.channel.name}: no chat_id`);
187
+ return { token, target };
418
188
  }
419
189
 
420
- // Show "typing..." indicator in the chat. Telegram clears it automatically
421
- // after 5 seconds, so call this every ~4s while a long operation is going.
190
+ // Show "typing..." indicator. Telegram clears it after ~5s; _startTyping
191
+ // re-pings every 4s. Best-effort failures aren't worth surfacing.
422
192
  async _typing(chat_id) {
423
- try {
424
- const token = resolveBotToken(this.channel);
425
- if (!token || !chat_id) return;
426
- const url = `${API_BASE}/bot${token}/sendChatAction`;
427
- await fetch(url, {
428
- method: "POST",
429
- headers: { "content-type": "application/json" },
430
- body: JSON.stringify({ chat_id, action: "typing" }),
431
- });
432
- } catch {
433
- // best-effort; failures here aren't worth surfacing
434
- }
193
+ const token = resolveBotToken(this.channel);
194
+ if (!token || !chat_id) return;
195
+ try { await sendChatAction(token, chat_id); } catch { /* best-effort */ }
435
196
  }
436
197
 
437
- // Returns a function that pings sendChatAction every 4s until called as
438
- // stop(). Used to wrap the engine round-trip in a "typing" loop so the
439
- // user sees feedback while qwen thinks.
198
+ // Returns a stop() fn; pings the typing indicator every 4s until called.
440
199
  _startTyping(chat_id) {
441
200
  if (!chat_id) return () => {};
442
201
  let stopped = false;
@@ -450,58 +209,27 @@ class ChannelPoller {
450
209
  }
451
210
 
452
211
  async _send({ chat_id, text, reply_markup, parse_mode }) {
453
- const token = resolveBotToken(this.channel);
454
- if (!token) throw new Error(`channel ${this.channel.name}: no bot_token`);
455
- const target = chat_id || resolveChatId(this.channel);
456
- if (!target) throw new Error(`channel ${this.channel.name}: no chat_id`);
457
- const url = `${API_BASE}/bot${token}/sendMessage`;
458
- const body = { chat_id: target, text };
459
- if (reply_markup) body.reply_markup = reply_markup;
460
- if (parse_mode) body.parse_mode = parse_mode;
461
- const res = await fetch(url, {
462
- method: "POST",
463
- headers: { "content-type": "application/json" },
464
- body: JSON.stringify(body),
465
- });
466
- const json = await res.json();
467
- if (!json.ok) throw new Error(json.description || `send failed (${res.status})`);
468
- return json.result;
212
+ const { token, target } = this._resolve(chat_id);
213
+ return sendMessage(token, target, { text, reply_markup, parse_mode });
469
214
  }
470
215
 
471
- // Replace just the inline keyboard on a previously-sent message (used to
472
- // refresh after a multi-select toggle, or to wipe buttons once the flow
473
- // has moved on). Best-effort: failures are logged but don't break the flow.
216
+ // Replace/clear the inline keyboard on a sent message. Best-effort: logged.
474
217
  async _editKeyboard({ chat_id, message_id, reply_markup }) {
475
218
  const token = resolveBotToken(this.channel);
476
219
  if (!token) return;
477
220
  try {
478
- const url = `${API_BASE}/bot${token}/editMessageReplyMarkup`;
479
- const body = { chat_id, message_id };
480
- if (reply_markup) body.reply_markup = reply_markup;
481
- await fetch(url, {
482
- method: "POST",
483
- headers: { "content-type": "application/json" },
484
- body: JSON.stringify(body),
485
- });
221
+ await editMessageReplyMarkup(token, chat_id, message_id, reply_markup);
486
222
  } catch (e) {
487
223
  this.log(`telegram[${this.channel.name}] editMessageReplyMarkup failed: ${e.message}`);
488
224
  }
489
225
  }
490
226
 
491
- // Acknowledge a callback button press so the user's Telegram client clears
492
- // the spinner on the tapped button. Optional `text` shows a small toast.
227
+ // Ack a callback button press so the client clears the spinner (+ optional toast).
493
228
  async _answerCallback({ callback_query_id, text }) {
494
229
  const token = resolveBotToken(this.channel);
495
230
  if (!token) return;
496
231
  try {
497
- const url = `${API_BASE}/bot${token}/answerCallbackQuery`;
498
- const body = { callback_query_id };
499
- if (text) body.text = text;
500
- await fetch(url, {
501
- method: "POST",
502
- headers: { "content-type": "application/json" },
503
- body: JSON.stringify(body),
504
- });
232
+ await answerCallbackQuery(token, callback_query_id, text);
505
233
  } catch (e) {
506
234
  this.log(`telegram[${this.channel.name}] answerCallbackQuery failed: ${e.message}`);
507
235
  }
@@ -509,40 +237,42 @@ class ChannelPoller {
509
237
 
510
238
  /** Send a photo via this channel */
511
239
  async _sendPhoto({ chat_id, photo, caption, parse_mode }) {
512
- const token = resolveBotToken(this.channel);
513
- if (!token) throw new Error(`channel ${this.channel.name}: no bot_token`);
514
- const target = chat_id || resolveChatId(this.channel);
515
- if (!target) throw new Error(`channel ${this.channel.name}: no chat_id`);
240
+ const { token, target } = this._resolve(chat_id);
516
241
  return sendPhoto(token, target, photo, { caption, parse_mode });
517
242
  }
518
243
 
519
244
  /** Send a voice message via this channel */
520
245
  async _sendVoice({ chat_id, audio, caption, duration }) {
521
- const token = resolveBotToken(this.channel);
522
- if (!token) throw new Error(`channel ${this.channel.name}: no bot_token`);
523
- const target = chat_id || resolveChatId(this.channel);
246
+ const { token, target } = this._resolve(chat_id);
524
247
  return sendVoice(token, target, audio, { caption, duration });
525
248
  }
526
249
 
527
250
  /** Send a document (PDF, zip, etc) via this channel */
528
251
  async _sendDocument({ chat_id, document, caption, filename, mime_type }) {
529
- const token = resolveBotToken(this.channel);
530
- if (!token) throw new Error(`channel ${this.channel.name}: no bot_token`);
531
- const target = chat_id || resolveChatId(this.channel);
252
+ const { token, target } = this._resolve(chat_id);
532
253
  return sendDocument(token, target, document, { caption, filename, mime_type });
533
254
  }
534
255
 
535
256
  /** Send an audio file via this channel */
536
257
  async _sendAudio({ chat_id, audio, caption, title, performer }) {
537
- const token = resolveBotToken(this.channel);
538
- if (!token) throw new Error(`channel ${this.channel.name}: no bot_token`);
539
- const target = chat_id || resolveChatId(this.channel);
258
+ const { token, target } = this._resolve(chat_id);
540
259
  return sendAudio(token, target, audio, { caption, title, performer });
541
260
  }
542
261
  }
543
262
 
544
263
  // ---------- plugin export ---------------------------------------------------
545
264
 
265
+ // Pick the poller to send through: the named channel if given, else the first
266
+ // channel with a usable bot token. Shared by every outbound helper below.
267
+ function pickPoller(pollers, channelName) {
268
+ const p =
269
+ (channelName && pollers.find((pp) => pp.channel.name === channelName)) ||
270
+ pollers.find((pp) => resolveBotToken(pp.channel)) ||
271
+ null;
272
+ if (!p) throw new Error("no telegram channel available");
273
+ return p;
274
+ }
275
+
546
276
  export default {
547
277
  id: "telegram",
548
278
 
@@ -582,11 +312,7 @@ export default {
582
312
  // the outbound on `messages` of the channel's target project so audit
583
313
  // trails are complete.
584
314
  async send({ channel: channelName, chat_id, text, author = resolveAgentName(config), project }) {
585
- const p =
586
- (channelName && pollers.find((pp) => pp.channel.name === channelName)) ||
587
- pollers.find((pp) => resolveBotToken(pp.channel)) ||
588
- null;
589
- if (!p) throw new Error("no telegram channel available");
315
+ const p = pickPoller(pollers, channelName);
590
316
  const result = await p._send({ chat_id, text });
591
317
  appendGlobalMessage({
592
318
  channel: CHANNELS.TELEGRAM,
@@ -612,11 +338,7 @@ export default {
612
338
  * opts: { caption, parse_mode, channel, author }
613
339
  */
614
340
  async sendPhoto({ channel: channelName, chat_id, photo, caption, parse_mode, author = resolveAgentName(config) }) {
615
- const p =
616
- (channelName && pollers.find((pp) => pp.channel.name === channelName)) ||
617
- pollers.find((pp) => resolveBotToken(pp.channel)) ||
618
- null;
619
- if (!p) throw new Error("no telegram channel available");
341
+ const p = pickPoller(pollers, channelName);
620
342
  const result = await p._sendPhoto({ chat_id, photo, caption, parse_mode });
621
343
  appendGlobalMessage({
622
344
  channel: CHANNELS.TELEGRAM,
@@ -636,11 +358,7 @@ export default {
636
358
  * audio: local file path or Buffer
637
359
  */
638
360
  async sendVoice({ channel: channelName, chat_id, audio, caption, duration, author = resolveAgentName(config) }) {
639
- const p =
640
- (channelName && pollers.find((pp) => pp.channel.name === channelName)) ||
641
- pollers.find((pp) => resolveBotToken(pp.channel)) ||
642
- null;
643
- if (!p) throw new Error("no telegram channel available");
361
+ const p = pickPoller(pollers, channelName);
644
362
  const result = await p._sendVoice({ chat_id, audio, caption, duration });
645
363
  appendGlobalMessage({
646
364
  channel: CHANNELS.TELEGRAM,
@@ -660,11 +378,7 @@ export default {
660
378
  * document: local file path, Buffer, or public https URL.
661
379
  */
662
380
  async sendDocument({ channel: channelName, chat_id, document, caption, filename, mime_type, author = resolveAgentName(config) }) {
663
- const p =
664
- (channelName && pollers.find((pp) => pp.channel.name === channelName)) ||
665
- pollers.find((pp) => resolveBotToken(pp.channel)) ||
666
- null;
667
- if (!p) throw new Error("no telegram channel available");
381
+ const p = pickPoller(pollers, channelName);
668
382
  const result = await p._sendDocument({ chat_id, document, caption, filename, mime_type });
669
383
  appendGlobalMessage({
670
384
  channel: CHANNELS.TELEGRAM,
@@ -684,11 +398,7 @@ export default {
684
398
  * audio: local file path or Buffer
685
399
  */
686
400
  async sendAudio({ channel: channelName, chat_id, audio, caption, title, performer, author = resolveAgentName(config) }) {
687
- const p =
688
- (channelName && pollers.find((pp) => pp.channel.name === channelName)) ||
689
- pollers.find((pp) => resolveBotToken(pp.channel)) ||
690
- null;
691
- if (!p) throw new Error("no telegram channel available");
401
+ const p = pickPoller(pollers, channelName);
692
402
  const result = await p._sendAudio({ chat_id, audio, caption, title, performer });
693
403
  appendGlobalMessage({
694
404
  channel: CHANNELS.TELEGRAM,
@@ -23,6 +23,7 @@ const WHISPER_SERVER = path.join(__dirname, "whisper-server.py");
23
23
 
24
24
  let _serverProcess = null;
25
25
  let _serverModel = null;
26
+ let _serverBackend = null; // "faster" | "mlx" — restart when this changes too
26
27
 
27
28
  function _sleep(ms) {
28
29
  return new Promise((r) => setTimeout(r, ms));
@@ -39,14 +40,14 @@ async function _isServerHealthy() {
39
40
  }
40
41
  }
41
42
 
42
- async function _serverModelName() {
43
+ async function _serverHealthInfo() {
43
44
  try {
44
45
  const res = await fetch(`http://127.0.0.1:${WHISPER_LOCAL_PORT}/health`, {
45
46
  signal: AbortSignal.timeout(800),
46
47
  });
47
48
  if (!res.ok) return null;
48
49
  const j = await res.json();
49
- return j?.model || null;
50
+ return { model: j?.model || null, backend: j?.backend || "faster" };
50
51
  } catch {
51
52
  return null;
52
53
  }
@@ -82,17 +83,20 @@ async function _killOrphanWhisper() {
82
83
 
83
84
  export async function ensureWhisperServer(opts) {
84
85
  const model = opts.model || DEFAULT_LOCAL.model;
86
+ const backend = opts.backend || "faster";
85
87
 
86
- if (_serverProcess && _serverModel === model) {
88
+ if (_serverProcess && _serverModel === model && _serverBackend === backend) {
87
89
  if (await _isServerHealthy()) return;
88
90
  _serverProcess = null;
89
91
  _serverModel = null;
92
+ _serverBackend = null;
90
93
  }
91
94
 
92
95
  if (!_serverProcess) {
93
- const existing = await _serverModelName();
94
- if (existing === model) {
96
+ const existing = await _serverHealthInfo();
97
+ if (existing && existing.model === model && existing.backend === backend) {
95
98
  _serverModel = model;
99
+ _serverBackend = backend;
96
100
  return;
97
101
  }
98
102
  if (existing) {
@@ -104,16 +108,18 @@ export async function ensureWhisperServer(opts) {
104
108
  try { _serverProcess.kill(); } catch {}
105
109
  _serverProcess = null;
106
110
  _serverModel = null;
111
+ _serverBackend = null;
107
112
  await _sleep(300);
108
113
  }
109
114
 
110
- await _spawnWhisper(opts, model, /* retried */ false);
115
+ await _spawnWhisper(opts, model, backend, /* retried */ false);
111
116
  }
112
117
 
113
- async function _spawnWhisper(opts, model, retried) {
118
+ async function _spawnWhisper(opts, model, backend, retried) {
114
119
  const args = [
115
120
  WHISPER_SERVER,
116
121
  "--port", String(WHISPER_LOCAL_PORT),
122
+ "--backend", String(backend || "faster"),
117
123
  "--model", model,
118
124
  "--device", String(opts.device || DEFAULT_LOCAL.device),
119
125
  "--compute-type", String(opts.compute_type || DEFAULT_LOCAL.compute_type),
@@ -127,11 +133,13 @@ async function _spawnWhisper(opts, model, retried) {
127
133
 
128
134
  _serverProcess = proc;
129
135
  _serverModel = model;
136
+ _serverBackend = backend;
130
137
 
131
138
  proc.on("exit", () => {
132
139
  if (_serverProcess === proc) {
133
140
  _serverProcess = null;
134
141
  _serverModel = null;
142
+ _serverBackend = null;
135
143
  }
136
144
  });
137
145
 
@@ -167,8 +175,9 @@ async function _spawnWhisper(opts, model, retried) {
167
175
  if (!retried && /address already in use|errno 48|eaddrinuse/i.test(msg)) {
168
176
  _serverProcess = null;
169
177
  _serverModel = null;
178
+ _serverBackend = null;
170
179
  await _killOrphanWhisper();
171
- return _spawnWhisper(opts, model, /* retried */ true);
180
+ return _spawnWhisper(opts, model, backend, /* retried */ true);
172
181
  }
173
182
  throw e;
174
183
  }
@@ -210,6 +219,7 @@ export async function shutdownWhisperServer() {
210
219
  try { _serverProcess.kill(); } catch {}
211
220
  _serverProcess = null;
212
221
  _serverModel = null;
222
+ _serverBackend = null;
213
223
  } else {
214
224
  try {
215
225
  await fetch(`http://127.0.0.1:${WHISPER_LOCAL_PORT}/shutdown`, {