npm - @agentprojectcontext/apx - Versions diffs - 1.48.0 → 1.48.2 - Mend

@agentprojectcontext/apx 1.48.0 → 1.48.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/package.json +1 -1
package/src/core/voice/stt-hardware.js +13 -4
package/src/host/daemon/api/sessions.js +6 -10
package/src/host/daemon/api/shared.js +24 -0
package/src/host/daemon/api/tasks.js +8 -14
package/src/host/daemon/api/transcribe.js +5 -1
package/src/interfaces/web/dist/assets/{index-C9O1GTZ_.js → index-Bgu-xy_L.js} +134 -134
package/src/interfaces/web/dist/assets/{index-C9O1GTZ_.js.map → index-Bgu-xy_L.js.map} +1 -1
package/src/interfaces/web/dist/assets/index-C53eJujd.css +1 -0
package/src/interfaces/web/dist/index.html +2 -2
package/src/interfaces/web/src/components/voice/VoiceSttCard.tsx +123 -17
package/src/interfaces/web/src/i18n/en.ts +7 -0
package/src/interfaces/web/src/i18n/es.ts +7 -0
package/src/interfaces/web/src/lib/api/sessions.ts +6 -6
package/src/interfaces/web/src/lib/api/tasks.ts +6 -9
package/src/interfaces/web/src/lib/api/voice.ts +37 -1
package/src/interfaces/web/src/lib/http.ts +25 -27
package/src/interfaces/web/dist/assets/index-CilEtMjV.css +0 -1

package/src/interfaces/web/src/components/voice/VoiceSttCard.tsx CHANGED Viewed

@@ -1,9 +1,36 @@
+import { useEffect, useState } from "react";
 import { Field, Input } from "../ui";
 import { UiSelect } from "../UiSelect";
-import { WHISPER_MODELS, type TranscriptionConfig } from "../../lib/api/voice";
+import { Voice, WHISPER_MODELS, type TranscriptionConfig, type SttHardwareResponse, type SttModelEntry } from "../../lib/api/voice";
 import { isSecretMarker, secretSuffix } from "../../lib/secrets";
 import { t } from "../../i18n";
+// Acceleration badge — each compute backend gets its own colour so the user can
+// tell at a glance what the local engine runs on (Metal on Apple Silicon, CUDA
+// on NVIDIA, Vulkan/ROCm on AMD, plain CPU otherwise).
+const ACCEL: Record<string, { label: string; cls: string }> = {
+  metal: { label: "Metal",         cls: "text-emerald-400 border-emerald-500/40 bg-emerald-500/10" },
+  cuda:  { label: "CUDA",          cls: "text-lime-400 border-lime-500/40 bg-lime-500/10" },
+  rocm:  { label: "Vulkan / ROCm", cls: "text-orange-400 border-orange-500/40 bg-orange-500/10" },
+  none:  { label: "CPU",           cls: "text-muted-fg border-border bg-muted" },
+};
+function AccelBadge({ gpu }: { gpu: string }) {
+  const a = ACCEL[gpu] ?? ACCEL.none;
+  return (
+    <span className={`inline-flex items-center rounded-md border px-1.5 py-0.5 text-[11px] font-medium ${a.cls}`}>
+      {a.label}
+    </span>
+  );
+}
+// Human label for the recommended backend (engine + where it runs).
+function backendLabel(rec: SttHardwareResponse["recommended"]): string {
+  if (rec.backend === "mlx") return "Metal · mlx-whisper";
+  if (rec.backend === "faster") return (rec.device === "cuda" ? "CUDA" : "CPU") + " · faster-whisper";
+  return rec.backend;
+}
 // STT (speech-to-text) configuration. Persisted under config.transcription.
 // The actual capture happens in the desktop window / Telegram / CLI; here the
 // owner picks the engine and configures it:
@@ -36,6 +63,13 @@ const langOptions = () => [
 ];
 export function VoiceSttCard({ config, onPatch, busy }: Props) {
+  const [hw, setHw] = useState<SttHardwareResponse | null>(null);
+  useEffect(() => {
+    let alive = true;
+    Voice.sttHardware().then((r) => { if (alive) setHw(r); }).catch(() => {});
+    return () => { alive = false; };
+  }, []);
   const provider = config.provider || "auto";
   const local = config.local || {};
   const openai = config.openai || {};
@@ -63,8 +97,63 @@ export function VoiceSttCard({ config, onPatch, busy }: Props) {
       ? t("voice_ui.api_key_set", { suffix: secretSuffix(marker) ?? "" })
       : t("voice_ui.api_key_label");
+  // ── Local engine: acceleration backend + model (hardware-adaptive) ─────────
+  const localBackend = local.backend || "auto";
+  const accel = hw?.hardware.gpu || "none";
+  // What "auto" actually resolves to on this machine (mlx on Metal, faster else).
+  const effectiveBackend = localBackend === "auto" ? (hw?.recommended.backend || "faster") : localBackend;
+  const isMlx = effectiveBackend === "mlx";
+  // The accel a chosen backend runs on — drives the badge next to the selector.
+  const selectedAccel = isMlx ? "metal" : (effectiveBackend === "faster" && accel === "cuda" ? "cuda" : "none");
+  const backendOptions = () => {
+    const opts = [{ value: "auto", label: t("voice_ui.stt_backend_auto") }];
+    if (accel === "metal") opts.push({ value: "mlx", label: "Metal — mlx-whisper" });
+    opts.push({ value: "faster", label: accel === "cuda" ? "CUDA — faster-whisper" : "CPU — faster-whisper" });
+    return opts;
+  };
+  // Model list for the effective backend, with on-disk status in the label.
+  const [models, setModels] = useState<SttModelEntry[]>([]);
+  useEffect(() => {
+    let alive = true;
+    Voice.sttModels(effectiveBackend).then((r) => { if (alive) setModels(r.models); }).catch(() => { if (alive) setModels([]); });
+    return () => { alive = false; };
+  }, [effectiveBackend]);
+  const fmtModel = (m: SttModelEntry) => `${m.id} · ${m.downloaded ? "✓ " + m.size : m.size}`;
+  const modelOptions = () =>
+    models.length
+      ? models.map((m) => ({ value: isMlx ? m.repo : m.id, label: fmtModel(m) }))
+      : WHISPER_MODELS.map((m) => ({ value: m, label: m }));
+  const modelValue = isMlx ? (local.mlx_model || hw?.recommended.model || "") : model;
+  const modelPatchKey = isMlx ? "transcription.local.mlx_model" : "transcription.local.model";
+  const selectedModel = models.find((m) => (isMlx ? m.repo : m.id) === modelValue);
+  const needsDownload = !!selectedModel && !selectedModel.downloaded;
   return (
     <div className="space-y-3">
+      {hw && (
+        <div className="rounded-lg border border-border bg-muted px-3 py-2 text-sm">
+          <div className="flex flex-wrap items-center gap-2">
+            <span className="text-muted-fg">{t("voice_ui.stt_hw_label")}:</span>
+            <AccelBadge gpu={hw.hardware.gpu} />
+            <span className="font-medium text-fg">{hw.hardware.gpuName || hw.hardware.platform}</span>
+            {hw.hardware.mem_gb ? (
+              <span className="text-muted-fg">
+                · {hw.hardware.mem_gb} GB{hw.hardware.unified_memory ? " unified" : ""}
+              </span>
+            ) : null}
+          </div>
+          <div className="mt-1 text-xs text-muted-fg">
+            {t("voice_ui.stt_hw_recommended")}:{" "}
+            <span className="text-fg">{hw.recommended.model}</span>
+            {" "}({backendLabel(hw.recommended)})
+            {hw.recommended.limited ? ` — ${t("voice_ui.stt_hw_limited")}` : ""}
+          </div>
+        </div>
+      )}
       <Field label={t("voice_ui.stt_engine_label")} hint={t("voice_ui.stt_engine_hint")}>
         <UiSelect
           value={provider}
@@ -76,23 +165,40 @@ export function VoiceSttCard({ config, onPatch, busy }: Props) {
       </Field>
       {showLocal && (
-        <div className="grid grid-cols-1 gap-3 sm:grid-cols-2">
-          <Field label={t("voice_ui.stt_model_label")} hint={t("voice_ui.stt_model_hint")}>
-            <UiSelect
-              value={model}
-              onChange={(v) => onPatch({ "transcription.local.model": v })}
-              options={WHISPER_MODELS.map((m) => ({ value: m, label: m }))}
-              disabled={busy}
-            />
-          </Field>
-          <Field label={t("voice_ui.stt_language_label")} hint={t("voice_ui.stt_language_hint")}>
-            <UiSelect
-              value={language}
-              onChange={(v) => onPatch({ "transcription.local.language": v })}
-              options={langOptions()}
-              disabled={busy}
-            />
+        <div className="space-y-3">
+          <Field label={t("voice_ui.stt_backend_label")} hint={t("voice_ui.stt_backend_hint")}>
+            <div className="flex items-center gap-2">
+              <UiSelect
+                value={localBackend}
+                onChange={(v) => onPatch({ "transcription.local.backend": v })}
+                options={backendOptions()}
+                disabled={busy}
+                className="max-w-xs"
+              />
+              <AccelBadge gpu={selectedAccel} />
+            </div>
           </Field>
+          <div className="grid grid-cols-1 gap-3 sm:grid-cols-2">
+            <Field
+              label={t("voice_ui.stt_model_label")}
+              hint={needsDownload ? t("voice_ui.stt_model_needs_download", { size: selectedModel!.size }) : t("voice_ui.stt_model_hint")}
+            >
+              <UiSelect
+                value={modelValue}
+                onChange={(v) => onPatch({ [modelPatchKey]: v })}
+                options={modelOptions()}
+                disabled={busy}
+              />
+            </Field>
+            <Field label={t("voice_ui.stt_language_label")} hint={t("voice_ui.stt_language_hint")}>
+              <UiSelect
+                value={language}
+                onChange={(v) => onPatch({ "transcription.local.language": v })}
+                options={langOptions()}
+                disabled={busy}
+              />
+            </Field>
+          </div>
         </div>
       )}

package/src/interfaces/web/src/i18n/en.ts CHANGED Viewed

@@ -1171,6 +1171,13 @@ export const en = {
     stt_custom_model_label:   "Model",
     stt_custom_model_hint:    "e.g. mlx-community/whisper-large-v3-turbo or large-v3.",
     stt_custom_key_hint:      "Optional — most local servers need no key.",
+    stt_hw_label:             "Detected hardware",
+    stt_hw_recommended:       "Recommended",
+    stt_hw_limited:           "limited GPU acceleration, using CPU",
+    stt_backend_label:        "Acceleration / Engine",
+    stt_backend_hint:         "Auto adapts to your hardware. Metal runs on the GPU (mlx); CPU uses faster-whisper.",
+    stt_backend_auto:         "Automatic (recommended)",
+    stt_model_needs_download: "Not downloaded (~{size}). The model must be downloaded to use this engine.",
     lang_auto:            "Auto-detect",
     lang_es:              "Spanish",
     lang_en:              "English",

package/src/interfaces/web/src/i18n/es.ts CHANGED Viewed

@@ -1169,6 +1169,13 @@ export const es = {
     stt_custom_model_label:   "Modelo",
     stt_custom_model_hint:    "Ej: mlx-community/whisper-large-v3-turbo o large-v3.",
     stt_custom_key_hint:      "Opcional — la mayoría de los servers locales no requieren key.",
+    stt_hw_label:             "Hardware detectado",
+    stt_hw_recommended:       "Recomendado",
+    stt_hw_limited:           "aceleración GPU limitada, se usa CPU",
+    stt_backend_label:        "Aceleración / Motor",
+    stt_backend_hint:         "Auto elige según tu hardware. Metal corre en la GPU (mlx); CPU usa faster-whisper.",
+    stt_backend_auto:         "Automático (recomendado)",
+    stt_model_needs_download: "Falta descargar (~{size}). Hay que bajar el modelo para usar este motor.",
     lang_auto:            "Detección automática",
     lang_es:              "Español",
     lang_en:              "Inglés",

package/src/interfaces/web/src/lib/api/sessions.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { http } from "../http";
+import { http, unwrapPage } from "../http";
 export interface SessionRow {
   engine: string;
@@ -10,15 +10,15 @@ export interface SessionRow {
 }
 export const Sessions = {
-  // Cross-engine sessions (apx · claude · codex), newest first.
+  // Cross-engine sessions (apx · claude · codex), newest first — full set.
   global: (engine?: string) =>
-    http.get<{ sessions: SessionRow[] }>(`/sessions${engine ? `?engine=${encodeURIComponent(engine)}` : ""}`),
+    http
+      .get<unknown>(`/sessions${engine ? `?engine=${encodeURIComponent(engine)}` : ""}`)
+      .then((b) => ({ sessions: unwrapPage<SessionRow>(b).items })),
   // Server-paginated page: returns the requested window plus the full total.
   page: ({ engine, limit, offset }: { engine?: string; limit: number; offset: number }) => {
     const q = new URLSearchParams({ limit: String(limit), offset: String(offset) });
     if (engine) q.set("engine", engine);
-    return http
-      .getWithTotal<{ sessions: SessionRow[] }>(`/sessions?${q.toString()}`)
-      .then((r) => ({ items: r.data.sessions, total: r.total }));
+    return http.get<unknown>(`/sessions?${q.toString()}`).then((b) => unwrapPage<SessionRow>(b));
   },
 };

package/src/interfaces/web/src/lib/api/tasks.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { http } from "../http";
+import { http, unwrapPage } from "../http";
 import type { TaskEntry } from "../../types/daemon";
 export interface GlobalTaskEntry extends TaskEntry {
@@ -7,20 +7,17 @@ export interface GlobalTaskEntry extends TaskEntry {
 }
 export const Tasks = {
+  // Full sets (no pagination) — unwrapped to plain arrays for non-paged callers.
   list:   (pid: string, state: TaskEntry["state"] | "all" = "open") =>
-    http.get<TaskEntry[]>(`/projects/${pid}/tasks?state=${state}`),
+    http.get<unknown>(`/projects/${pid}/tasks?state=${state}`).then((b) => unwrapPage<TaskEntry>(b).items),
   global: (state: TaskEntry["state"] | "all" = "open") =>
-    http.get<GlobalTaskEntry[]>(`/tasks?state=${state}`),
+    http.get<unknown>(`/tasks?state=${state}`).then((b) => unwrapPage<GlobalTaskEntry>(b).items),
   // Server-paginated variants: one project (listPage) or all projects
   // (globalPage). Each returns the requested window plus the full total.
   listPage: (pid: string, { state, limit, offset }: { state: TaskEntry["state"] | "all"; limit: number; offset: number }) =>
-    http
-      .getWithTotal<TaskEntry[]>(`/projects/${pid}/tasks?state=${state}&limit=${limit}&offset=${offset}`)
-      .then((r) => ({ items: r.data, total: r.total })),
+    http.get<unknown>(`/projects/${pid}/tasks?state=${state}&limit=${limit}&offset=${offset}`).then((b) => unwrapPage<TaskEntry>(b)),
   globalPage: ({ state, limit, offset }: { state: TaskEntry["state"] | "all"; limit: number; offset: number }) =>
-    http
-      .getWithTotal<GlobalTaskEntry[]>(`/tasks?state=${state}&limit=${limit}&offset=${offset}`)
-      .then((r) => ({ items: r.data, total: r.total })),
+    http.get<unknown>(`/tasks?state=${state}&limit=${limit}&offset=${offset}`).then((b) => unwrapPage<GlobalTaskEntry>(b)),
   add:    (pid: string, body: Partial<TaskEntry>) =>
     http.post<TaskEntry>(`/projects/${pid}/tasks`, body),
   done:   (pid: string, id: string) => http.post<TaskEntry>(`/projects/${pid}/tasks/${id}/done`),

package/src/interfaces/web/src/lib/api/voice.ts CHANGED Viewed

@@ -92,7 +92,9 @@ export interface VoiceTtsConfig {
 }
 export interface TranscriptionLocalConfig {
-  model?: string;        // tiny | base | small | medium | large | large-v2 | large-v3
+  backend?: string;      // auto | faster | mlx  (auto adapts to the hardware)
+  model?: string;        // faster-whisper model id (tiny | base | small | …)
+  mlx_model?: string;    // mlx repo (e.g. mlx-community/whisper-large-v3-turbo)
   device?: string;       // cpu | cuda
   compute_type?: string; // int8 | int8_float16 | float16 | float32
   language?: string;     // ISO code or "auto"
@@ -117,6 +119,34 @@ export interface TranscriptionConfig {
   custom?: TranscriptionCustomConfig;
 }
+/** Detected machine + recommended local backend (GET /transcribe/hardware). */
+export interface SttHardware {
+  platform: string;
+  arch: string;
+  appleSilicon: boolean;
+  gpu: "metal" | "cuda" | "rocm" | "none";
+  gpuName?: string;
+  mem_gb?: number;
+  unified_memory?: boolean;
+}
+export interface SttHardwareResponse {
+  hardware: SttHardware;
+  recommended: { backend: string; device?: string; model: string; reason?: string; tier?: string; limited?: boolean };
+}
+/** One model row from GET /transcribe/models. */
+export interface SttModelEntry {
+  id: string;
+  repo: string;
+  downloaded: boolean;
+  size: string;        // "1.6 GB" when present, "~1.6 GB" when not yet downloaded
+  size_bytes: number;
+}
+export interface SttModelsResponse {
+  backend: string;
+  models: SttModelEntry[];
+}
 /** One STT engine entry as reported by GET /transcribe/providers. */
 export interface SttProviderEntry {
   id: string;             // "local" | "openai" | "custom"
@@ -169,6 +199,12 @@ export const Voice = {
   /** List TTS engines + availability + the configured default provider. */
   providers: () => http.get<TtsProvidersResponse>("/tts/providers"),
+  /** Detected hardware + the recommended local STT backend (Metal/CUDA/CPU). */
+  sttHardware: () => http.get<SttHardwareResponse>("/transcribe/hardware"),
+  /** Model catalog + on-disk status for a local backend ("faster" | "mlx"). */
+  sttModels: (backend: string) => http.get<SttModelsResponse>(`/transcribe/models?backend=${backend}`),
   /**
    * Synthesize speech. Returns the audio file path (server-side); the web
    * fetches it via fetchTtsAudioUrl() to play it in the browser. `no_play`

package/src/interfaces/web/src/lib/http.ts CHANGED Viewed

@@ -52,38 +52,36 @@ async function request<T>(
   return (await res.json()) as T;
 }
-// GET that also surfaces the total-row count for server-side pagination. The
-// daemon returns the full count in the X-Total-Count header (the body keeps its
-// normal shape); we fall back to the payload length when the header is absent
-// (e.g. an older daemon) so pagination degrades gracefully instead of breaking.
-async function getWithTotal<T>(path: string): Promise<{ data: T; total: number }> {
-  const headers: Record<string, string> = token ? { authorization: `Bearer ${token}` } : {};
-  const res = await fetch(path, { method: "GET", headers });
-  if (!res.ok) {
-    let detail = "";
-    let parsed: unknown = null;
-    try {
-      parsed = await res.json();
-      detail = (parsed as { error?: string })?.error || JSON.stringify(parsed);
-    } catch {
-      detail = await res.text();
-    }
-    throw new HttpError(res.status, `GET ${path} → ${res.status}: ${detail}`, parsed);
+// Pagination metadata returned by list endpoints in the { meta, data } envelope.
+export interface PageMeta {
+  total: number;
+  offset: number;
+  limit: number | null;
+  pageSize: number;
+  page: number;
+  pageCount: number;
+}
+// Normalize any list response into { items, total }. Accepts the { meta, data }
+// envelope (current daemon), a bare array, or the legacy { sessions } object, so
+// the UI keeps working across a daemon that hasn't been restarted yet (it just
+// degrades to a single page when no meta.total is present).
+export function unwrapPage<T>(body: unknown): { items: T[]; total: number } {
+  const b = body as { data?: unknown; meta?: { total?: number }; sessions?: unknown };
+  if (Array.isArray(body)) return { items: body as T[], total: body.length };
+  if (b && Array.isArray(b.data)) {
+    const items = b.data as T[];
+    return { items, total: typeof b.meta?.total === "number" ? b.meta.total : items.length };
+  }
+  if (b && Array.isArray(b.sessions)) {
+    const items = b.sessions as T[];
+    return { items, total: items.length };
   }
-  const data = (await res.json()) as T;
-  const header = res.headers.get("X-Total-Count");
-  const total =
-    header != null && header !== ""
-      ? parseInt(header, 10)
-      : Array.isArray(data)
-        ? data.length
-        : 0;
-  return { data, total };
+  return { items: [], total: 0 };
 }
 export const http = {
   get:   <T>(p: string)              => request<T>("GET", p),
-  getWithTotal,
   post:  <T>(p: string, b?: unknown) => request<T>("POST", p, b),
   put:   <T>(p: string, b?: unknown) => request<T>("PUT", p, b),
   patch: <T>(p: string, b?: unknown) => request<T>("PATCH", p, b),