npm - @agentprojectcontext/apx - Versions diffs - 1.48.1 → 1.48.2 - Mend

@agentprojectcontext/apx 1.48.1 → 1.48.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/src/interfaces/web/src/components/voice/VoiceSttCard.tsx CHANGED Viewed

@@ -1,9 +1,36 @@
+import { useEffect, useState } from "react";
 import { Field, Input } from "../ui";
 import { UiSelect } from "../UiSelect";
-import { WHISPER_MODELS, type TranscriptionConfig } from "../../lib/api/voice";
+import { Voice, WHISPER_MODELS, type TranscriptionConfig, type SttHardwareResponse, type SttModelEntry } from "../../lib/api/voice";
 import { isSecretMarker, secretSuffix } from "../../lib/secrets";
 import { t } from "../../i18n";
+// Acceleration badge — each compute backend gets its own colour so the user can
+// tell at a glance what the local engine runs on (Metal on Apple Silicon, CUDA
+// on NVIDIA, Vulkan/ROCm on AMD, plain CPU otherwise).
+const ACCEL: Record<string, { label: string; cls: string }> = {
+  metal: { label: "Metal",         cls: "text-emerald-400 border-emerald-500/40 bg-emerald-500/10" },
+  cuda:  { label: "CUDA",          cls: "text-lime-400 border-lime-500/40 bg-lime-500/10" },
+  rocm:  { label: "Vulkan / ROCm", cls: "text-orange-400 border-orange-500/40 bg-orange-500/10" },
+  none:  { label: "CPU",           cls: "text-muted-fg border-border bg-muted" },
+};
+function AccelBadge({ gpu }: { gpu: string }) {
+  const a = ACCEL[gpu] ?? ACCEL.none;
+  return (
+    <span className={`inline-flex items-center rounded-md border px-1.5 py-0.5 text-[11px] font-medium ${a.cls}`}>
+      {a.label}
+    </span>
+  );
+}
+// Human label for the recommended backend (engine + where it runs).
+function backendLabel(rec: SttHardwareResponse["recommended"]): string {
+  if (rec.backend === "mlx") return "Metal · mlx-whisper";
+  if (rec.backend === "faster") return (rec.device === "cuda" ? "CUDA" : "CPU") + " · faster-whisper";
+  return rec.backend;
+}
 // STT (speech-to-text) configuration. Persisted under config.transcription.
 // The actual capture happens in the desktop window / Telegram / CLI; here the
 // owner picks the engine and configures it:
@@ -36,6 +63,13 @@ const langOptions = () => [
 ];
 export function VoiceSttCard({ config, onPatch, busy }: Props) {
+  const [hw, setHw] = useState<SttHardwareResponse | null>(null);
+  useEffect(() => {
+    let alive = true;
+    Voice.sttHardware().then((r) => { if (alive) setHw(r); }).catch(() => {});
+    return () => { alive = false; };
+  }, []);
   const provider = config.provider || "auto";
   const local = config.local || {};
   const openai = config.openai || {};
@@ -63,8 +97,63 @@ export function VoiceSttCard({ config, onPatch, busy }: Props) {
       ? t("voice_ui.api_key_set", { suffix: secretSuffix(marker) ?? "" })
       : t("voice_ui.api_key_label");
+  // ── Local engine: acceleration backend + model (hardware-adaptive) ─────────
+  const localBackend = local.backend || "auto";
+  const accel = hw?.hardware.gpu || "none";
+  // What "auto" actually resolves to on this machine (mlx on Metal, faster else).
+  const effectiveBackend = localBackend === "auto" ? (hw?.recommended.backend || "faster") : localBackend;
+  const isMlx = effectiveBackend === "mlx";
+  // The accel a chosen backend runs on — drives the badge next to the selector.
+  const selectedAccel = isMlx ? "metal" : (effectiveBackend === "faster" && accel === "cuda" ? "cuda" : "none");
+  const backendOptions = () => {
+    const opts = [{ value: "auto", label: t("voice_ui.stt_backend_auto") }];
+    if (accel === "metal") opts.push({ value: "mlx", label: "Metal — mlx-whisper" });
+    opts.push({ value: "faster", label: accel === "cuda" ? "CUDA — faster-whisper" : "CPU — faster-whisper" });
+    return opts;
+  };
+  // Model list for the effective backend, with on-disk status in the label.
+  const [models, setModels] = useState<SttModelEntry[]>([]);
+  useEffect(() => {
+    let alive = true;
+    Voice.sttModels(effectiveBackend).then((r) => { if (alive) setModels(r.models); }).catch(() => { if (alive) setModels([]); });
+    return () => { alive = false; };
+  }, [effectiveBackend]);
+  const fmtModel = (m: SttModelEntry) => `${m.id} · ${m.downloaded ? "✓ " + m.size : m.size}`;
+  const modelOptions = () =>
+    models.length
+      ? models.map((m) => ({ value: isMlx ? m.repo : m.id, label: fmtModel(m) }))
+      : WHISPER_MODELS.map((m) => ({ value: m, label: m }));
+  const modelValue = isMlx ? (local.mlx_model || hw?.recommended.model || "") : model;
+  const modelPatchKey = isMlx ? "transcription.local.mlx_model" : "transcription.local.model";
+  const selectedModel = models.find((m) => (isMlx ? m.repo : m.id) === modelValue);
+  const needsDownload = !!selectedModel && !selectedModel.downloaded;
   return (
     <div className="space-y-3">
+      {hw && (
+        <div className="rounded-lg border border-border bg-muted px-3 py-2 text-sm">
+          <div className="flex flex-wrap items-center gap-2">
+            <span className="text-muted-fg">{t("voice_ui.stt_hw_label")}:</span>
+            <AccelBadge gpu={hw.hardware.gpu} />
+            <span className="font-medium text-fg">{hw.hardware.gpuName || hw.hardware.platform}</span>
+            {hw.hardware.mem_gb ? (
+              <span className="text-muted-fg">
+                · {hw.hardware.mem_gb} GB{hw.hardware.unified_memory ? " unified" : ""}
+              </span>
+            ) : null}
+          </div>
+          <div className="mt-1 text-xs text-muted-fg">
+            {t("voice_ui.stt_hw_recommended")}:{" "}
+            <span className="text-fg">{hw.recommended.model}</span>
+            {" "}({backendLabel(hw.recommended)})
+            {hw.recommended.limited ? ` — ${t("voice_ui.stt_hw_limited")}` : ""}
+          </div>
+        </div>
+      )}
       <Field label={t("voice_ui.stt_engine_label")} hint={t("voice_ui.stt_engine_hint")}>
         <UiSelect
           value={provider}
@@ -76,23 +165,40 @@ export function VoiceSttCard({ config, onPatch, busy }: Props) {
       </Field>
       {showLocal && (
-        <div className="grid grid-cols-1 gap-3 sm:grid-cols-2">
-          <Field label={t("voice_ui.stt_model_label")} hint={t("voice_ui.stt_model_hint")}>
-            <UiSelect
-              value={model}
-              onChange={(v) => onPatch({ "transcription.local.model": v })}
-              options={WHISPER_MODELS.map((m) => ({ value: m, label: m }))}
-              disabled={busy}
-            />
-          </Field>
-          <Field label={t("voice_ui.stt_language_label")} hint={t("voice_ui.stt_language_hint")}>
-            <UiSelect
-              value={language}
-              onChange={(v) => onPatch({ "transcription.local.language": v })}
-              options={langOptions()}
-              disabled={busy}
-            />
+        <div className="space-y-3">
+          <Field label={t("voice_ui.stt_backend_label")} hint={t("voice_ui.stt_backend_hint")}>
+            <div className="flex items-center gap-2">
+              <UiSelect
+                value={localBackend}
+                onChange={(v) => onPatch({ "transcription.local.backend": v })}
+                options={backendOptions()}
+                disabled={busy}
+                className="max-w-xs"
+              />
+              <AccelBadge gpu={selectedAccel} />
+            </div>
           </Field>
+          <div className="grid grid-cols-1 gap-3 sm:grid-cols-2">
+            <Field
+              label={t("voice_ui.stt_model_label")}
+              hint={needsDownload ? t("voice_ui.stt_model_needs_download", { size: selectedModel!.size }) : t("voice_ui.stt_model_hint")}
+            >
+              <UiSelect
+                value={modelValue}
+                onChange={(v) => onPatch({ [modelPatchKey]: v })}
+                options={modelOptions()}
+                disabled={busy}
+              />
+            </Field>
+            <Field label={t("voice_ui.stt_language_label")} hint={t("voice_ui.stt_language_hint")}>
+              <UiSelect
+                value={language}
+                onChange={(v) => onPatch({ "transcription.local.language": v })}
+                options={langOptions()}
+                disabled={busy}
+              />
+            </Field>
+          </div>
         </div>
       )}

package/src/interfaces/web/src/i18n/en.ts CHANGED Viewed

@@ -1171,6 +1171,13 @@ export const en = {
     stt_custom_model_label:   "Model",
     stt_custom_model_hint:    "e.g. mlx-community/whisper-large-v3-turbo or large-v3.",
     stt_custom_key_hint:      "Optional — most local servers need no key.",
+    stt_hw_label:             "Detected hardware",
+    stt_hw_recommended:       "Recommended",
+    stt_hw_limited:           "limited GPU acceleration, using CPU",
+    stt_backend_label:        "Acceleration / Engine",
+    stt_backend_hint:         "Auto adapts to your hardware. Metal runs on the GPU (mlx); CPU uses faster-whisper.",
+    stt_backend_auto:         "Automatic (recommended)",
+    stt_model_needs_download: "Not downloaded (~{size}). The model must be downloaded to use this engine.",
     lang_auto:            "Auto-detect",
     lang_es:              "Spanish",
     lang_en:              "English",

package/src/interfaces/web/src/i18n/es.ts CHANGED Viewed

@@ -1169,6 +1169,13 @@ export const es = {
     stt_custom_model_label:   "Modelo",
     stt_custom_model_hint:    "Ej: mlx-community/whisper-large-v3-turbo o large-v3.",
     stt_custom_key_hint:      "Opcional — la mayoría de los servers locales no requieren key.",
+    stt_hw_label:             "Hardware detectado",
+    stt_hw_recommended:       "Recomendado",
+    stt_hw_limited:           "aceleración GPU limitada, se usa CPU",
+    stt_backend_label:        "Aceleración / Motor",
+    stt_backend_hint:         "Auto elige según tu hardware. Metal corre en la GPU (mlx); CPU usa faster-whisper.",
+    stt_backend_auto:         "Automático (recomendado)",
+    stt_model_needs_download: "Falta descargar (~{size}). Hay que bajar el modelo para usar este motor.",
     lang_auto:            "Detección automática",
     lang_es:              "Español",
     lang_en:              "Inglés",

package/src/interfaces/web/src/lib/api/voice.ts CHANGED Viewed

@@ -92,7 +92,9 @@ export interface VoiceTtsConfig {
 }
 export interface TranscriptionLocalConfig {
-  model?: string;        // tiny | base | small | medium | large | large-v2 | large-v3
+  backend?: string;      // auto | faster | mlx  (auto adapts to the hardware)
+  model?: string;        // faster-whisper model id (tiny | base | small | …)
+  mlx_model?: string;    // mlx repo (e.g. mlx-community/whisper-large-v3-turbo)
   device?: string;       // cpu | cuda
   compute_type?: string; // int8 | int8_float16 | float16 | float32
   language?: string;     // ISO code or "auto"
@@ -117,6 +119,34 @@ export interface TranscriptionConfig {
   custom?: TranscriptionCustomConfig;
 }
+/** Detected machine + recommended local backend (GET /transcribe/hardware). */
+export interface SttHardware {
+  platform: string;
+  arch: string;
+  appleSilicon: boolean;
+  gpu: "metal" | "cuda" | "rocm" | "none";
+  gpuName?: string;
+  mem_gb?: number;
+  unified_memory?: boolean;
+}
+export interface SttHardwareResponse {
+  hardware: SttHardware;
+  recommended: { backend: string; device?: string; model: string; reason?: string; tier?: string; limited?: boolean };
+}
+/** One model row from GET /transcribe/models. */
+export interface SttModelEntry {
+  id: string;
+  repo: string;
+  downloaded: boolean;
+  size: string;        // "1.6 GB" when present, "~1.6 GB" when not yet downloaded
+  size_bytes: number;
+}
+export interface SttModelsResponse {
+  backend: string;
+  models: SttModelEntry[];
+}
 /** One STT engine entry as reported by GET /transcribe/providers. */
 export interface SttProviderEntry {
   id: string;             // "local" | "openai" | "custom"
@@ -169,6 +199,12 @@ export const Voice = {
   /** List TTS engines + availability + the configured default provider. */
   providers: () => http.get<TtsProvidersResponse>("/tts/providers"),
+  /** Detected hardware + the recommended local STT backend (Metal/CUDA/CPU). */
+  sttHardware: () => http.get<SttHardwareResponse>("/transcribe/hardware"),
+  /** Model catalog + on-disk status for a local backend ("faster" | "mlx"). */
+  sttModels: (backend: string) => http.get<SttModelsResponse>(`/transcribe/models?backend=${backend}`),
   /**
    * Synthesize speech. Returns the audio file path (server-side); the web
    * fetches it via fetchTtsAudioUrl() to play it in the browser. `no_play`