@agentprojectcontext/apx 1.48.0 → 1.48.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,9 +1,36 @@
1
+ import { useEffect, useState } from "react";
1
2
  import { Field, Input } from "../ui";
2
3
  import { UiSelect } from "../UiSelect";
3
- import { WHISPER_MODELS, type TranscriptionConfig } from "../../lib/api/voice";
4
+ import { Voice, WHISPER_MODELS, type TranscriptionConfig, type SttHardwareResponse, type SttModelEntry } from "../../lib/api/voice";
4
5
  import { isSecretMarker, secretSuffix } from "../../lib/secrets";
5
6
  import { t } from "../../i18n";
6
7
 
8
+ // Acceleration badge — each compute backend gets its own colour so the user can
9
+ // tell at a glance what the local engine runs on (Metal on Apple Silicon, CUDA
10
+ // on NVIDIA, Vulkan/ROCm on AMD, plain CPU otherwise).
11
+ const ACCEL: Record<string, { label: string; cls: string }> = {
12
+ metal: { label: "Metal", cls: "text-emerald-400 border-emerald-500/40 bg-emerald-500/10" },
13
+ cuda: { label: "CUDA", cls: "text-lime-400 border-lime-500/40 bg-lime-500/10" },
14
+ rocm: { label: "Vulkan / ROCm", cls: "text-orange-400 border-orange-500/40 bg-orange-500/10" },
15
+ none: { label: "CPU", cls: "text-muted-fg border-border bg-muted" },
16
+ };
17
+
18
+ function AccelBadge({ gpu }: { gpu: string }) {
19
+ const a = ACCEL[gpu] ?? ACCEL.none;
20
+ return (
21
+ <span className={`inline-flex items-center rounded-md border px-1.5 py-0.5 text-[11px] font-medium ${a.cls}`}>
22
+ {a.label}
23
+ </span>
24
+ );
25
+ }
26
+
27
+ // Human label for the recommended backend (engine + where it runs).
28
+ function backendLabel(rec: SttHardwareResponse["recommended"]): string {
29
+ if (rec.backend === "mlx") return "Metal · mlx-whisper";
30
+ if (rec.backend === "faster") return (rec.device === "cuda" ? "CUDA" : "CPU") + " · faster-whisper";
31
+ return rec.backend;
32
+ }
33
+
7
34
  // STT (speech-to-text) configuration. Persisted under config.transcription.
8
35
  // The actual capture happens in the desktop window / Telegram / CLI; here the
9
36
  // owner picks the engine and configures it:
@@ -36,6 +63,13 @@ const langOptions = () => [
36
63
  ];
37
64
 
38
65
  export function VoiceSttCard({ config, onPatch, busy }: Props) {
66
+ const [hw, setHw] = useState<SttHardwareResponse | null>(null);
67
+ useEffect(() => {
68
+ let alive = true;
69
+ Voice.sttHardware().then((r) => { if (alive) setHw(r); }).catch(() => {});
70
+ return () => { alive = false; };
71
+ }, []);
72
+
39
73
  const provider = config.provider || "auto";
40
74
  const local = config.local || {};
41
75
  const openai = config.openai || {};
@@ -63,8 +97,63 @@ export function VoiceSttCard({ config, onPatch, busy }: Props) {
63
97
  ? t("voice_ui.api_key_set", { suffix: secretSuffix(marker) ?? "" })
64
98
  : t("voice_ui.api_key_label");
65
99
 
100
+ // ── Local engine: acceleration backend + model (hardware-adaptive) ─────────
101
+ const localBackend = local.backend || "auto";
102
+ const accel = hw?.hardware.gpu || "none";
103
+ // What "auto" actually resolves to on this machine (mlx on Metal, faster else).
104
+ const effectiveBackend = localBackend === "auto" ? (hw?.recommended.backend || "faster") : localBackend;
105
+ const isMlx = effectiveBackend === "mlx";
106
+ // The accel a chosen backend runs on — drives the badge next to the selector.
107
+ const selectedAccel = isMlx ? "metal" : (effectiveBackend === "faster" && accel === "cuda" ? "cuda" : "none");
108
+
109
+ const backendOptions = () => {
110
+ const opts = [{ value: "auto", label: t("voice_ui.stt_backend_auto") }];
111
+ if (accel === "metal") opts.push({ value: "mlx", label: "Metal — mlx-whisper" });
112
+ opts.push({ value: "faster", label: accel === "cuda" ? "CUDA — faster-whisper" : "CPU — faster-whisper" });
113
+ return opts;
114
+ };
115
+
116
+ // Model list for the effective backend, with on-disk status in the label.
117
+ const [models, setModels] = useState<SttModelEntry[]>([]);
118
+ useEffect(() => {
119
+ let alive = true;
120
+ Voice.sttModels(effectiveBackend).then((r) => { if (alive) setModels(r.models); }).catch(() => { if (alive) setModels([]); });
121
+ return () => { alive = false; };
122
+ }, [effectiveBackend]);
123
+
124
+ const fmtModel = (m: SttModelEntry) => `${m.id} · ${m.downloaded ? "✓ " + m.size : m.size}`;
125
+ const modelOptions = () =>
126
+ models.length
127
+ ? models.map((m) => ({ value: isMlx ? m.repo : m.id, label: fmtModel(m) }))
128
+ : WHISPER_MODELS.map((m) => ({ value: m, label: m }));
129
+ const modelValue = isMlx ? (local.mlx_model || hw?.recommended.model || "") : model;
130
+ const modelPatchKey = isMlx ? "transcription.local.mlx_model" : "transcription.local.model";
131
+ const selectedModel = models.find((m) => (isMlx ? m.repo : m.id) === modelValue);
132
+ const needsDownload = !!selectedModel && !selectedModel.downloaded;
133
+
66
134
  return (
67
135
  <div className="space-y-3">
136
+ {hw && (
137
+ <div className="rounded-lg border border-border bg-muted px-3 py-2 text-sm">
138
+ <div className="flex flex-wrap items-center gap-2">
139
+ <span className="text-muted-fg">{t("voice_ui.stt_hw_label")}:</span>
140
+ <AccelBadge gpu={hw.hardware.gpu} />
141
+ <span className="font-medium text-fg">{hw.hardware.gpuName || hw.hardware.platform}</span>
142
+ {hw.hardware.mem_gb ? (
143
+ <span className="text-muted-fg">
144
+ · {hw.hardware.mem_gb} GB{hw.hardware.unified_memory ? " unified" : ""}
145
+ </span>
146
+ ) : null}
147
+ </div>
148
+ <div className="mt-1 text-xs text-muted-fg">
149
+ {t("voice_ui.stt_hw_recommended")}:{" "}
150
+ <span className="text-fg">{hw.recommended.model}</span>
151
+ {" "}({backendLabel(hw.recommended)})
152
+ {hw.recommended.limited ? ` — ${t("voice_ui.stt_hw_limited")}` : ""}
153
+ </div>
154
+ </div>
155
+ )}
156
+
68
157
  <Field label={t("voice_ui.stt_engine_label")} hint={t("voice_ui.stt_engine_hint")}>
69
158
  <UiSelect
70
159
  value={provider}
@@ -76,23 +165,40 @@ export function VoiceSttCard({ config, onPatch, busy }: Props) {
76
165
  </Field>
77
166
 
78
167
  {showLocal && (
79
- <div className="grid grid-cols-1 gap-3 sm:grid-cols-2">
80
- <Field label={t("voice_ui.stt_model_label")} hint={t("voice_ui.stt_model_hint")}>
81
- <UiSelect
82
- value={model}
83
- onChange={(v) => onPatch({ "transcription.local.model": v })}
84
- options={WHISPER_MODELS.map((m) => ({ value: m, label: m }))}
85
- disabled={busy}
86
- />
87
- </Field>
88
- <Field label={t("voice_ui.stt_language_label")} hint={t("voice_ui.stt_language_hint")}>
89
- <UiSelect
90
- value={language}
91
- onChange={(v) => onPatch({ "transcription.local.language": v })}
92
- options={langOptions()}
93
- disabled={busy}
94
- />
168
+ <div className="space-y-3">
169
+ <Field label={t("voice_ui.stt_backend_label")} hint={t("voice_ui.stt_backend_hint")}>
170
+ <div className="flex items-center gap-2">
171
+ <UiSelect
172
+ value={localBackend}
173
+ onChange={(v) => onPatch({ "transcription.local.backend": v })}
174
+ options={backendOptions()}
175
+ disabled={busy}
176
+ className="max-w-xs"
177
+ />
178
+ <AccelBadge gpu={selectedAccel} />
179
+ </div>
95
180
  </Field>
181
+ <div className="grid grid-cols-1 gap-3 sm:grid-cols-2">
182
+ <Field
183
+ label={t("voice_ui.stt_model_label")}
184
+ hint={needsDownload ? t("voice_ui.stt_model_needs_download", { size: selectedModel!.size }) : t("voice_ui.stt_model_hint")}
185
+ >
186
+ <UiSelect
187
+ value={modelValue}
188
+ onChange={(v) => onPatch({ [modelPatchKey]: v })}
189
+ options={modelOptions()}
190
+ disabled={busy}
191
+ />
192
+ </Field>
193
+ <Field label={t("voice_ui.stt_language_label")} hint={t("voice_ui.stt_language_hint")}>
194
+ <UiSelect
195
+ value={language}
196
+ onChange={(v) => onPatch({ "transcription.local.language": v })}
197
+ options={langOptions()}
198
+ disabled={busy}
199
+ />
200
+ </Field>
201
+ </div>
96
202
  </div>
97
203
  )}
98
204
 
@@ -1171,6 +1171,13 @@ export const en = {
1171
1171
  stt_custom_model_label: "Model",
1172
1172
  stt_custom_model_hint: "e.g. mlx-community/whisper-large-v3-turbo or large-v3.",
1173
1173
  stt_custom_key_hint: "Optional — most local servers need no key.",
1174
+ stt_hw_label: "Detected hardware",
1175
+ stt_hw_recommended: "Recommended",
1176
+ stt_hw_limited: "limited GPU acceleration, using CPU",
1177
+ stt_backend_label: "Acceleration / Engine",
1178
+ stt_backend_hint: "Auto adapts to your hardware. Metal runs on the GPU (mlx); CPU uses faster-whisper.",
1179
+ stt_backend_auto: "Automatic (recommended)",
1180
+ stt_model_needs_download: "Not downloaded (~{size}). The model must be downloaded to use this engine.",
1174
1181
  lang_auto: "Auto-detect",
1175
1182
  lang_es: "Spanish",
1176
1183
  lang_en: "English",
@@ -1169,6 +1169,13 @@ export const es = {
1169
1169
  stt_custom_model_label: "Modelo",
1170
1170
  stt_custom_model_hint: "Ej: mlx-community/whisper-large-v3-turbo o large-v3.",
1171
1171
  stt_custom_key_hint: "Opcional — la mayoría de los servers locales no requieren key.",
1172
+ stt_hw_label: "Hardware detectado",
1173
+ stt_hw_recommended: "Recomendado",
1174
+ stt_hw_limited: "aceleración GPU limitada, se usa CPU",
1175
+ stt_backend_label: "Aceleración / Motor",
1176
+ stt_backend_hint: "Auto elige según tu hardware. Metal corre en la GPU (mlx); CPU usa faster-whisper.",
1177
+ stt_backend_auto: "Automático (recomendado)",
1178
+ stt_model_needs_download: "Falta descargar (~{size}). Hay que bajar el modelo para usar este motor.",
1172
1179
  lang_auto: "Detección automática",
1173
1180
  lang_es: "Español",
1174
1181
  lang_en: "Inglés",
@@ -1,4 +1,4 @@
1
- import { http } from "../http";
1
+ import { http, unwrapPage } from "../http";
2
2
 
3
3
  export interface SessionRow {
4
4
  engine: string;
@@ -10,15 +10,15 @@ export interface SessionRow {
10
10
  }
11
11
 
12
12
  export const Sessions = {
13
- // Cross-engine sessions (apx · claude · codex), newest first.
13
+ // Cross-engine sessions (apx · claude · codex), newest first — full set.
14
14
  global: (engine?: string) =>
15
- http.get<{ sessions: SessionRow[] }>(`/sessions${engine ? `?engine=${encodeURIComponent(engine)}` : ""}`),
15
+ http
16
+ .get<unknown>(`/sessions${engine ? `?engine=${encodeURIComponent(engine)}` : ""}`)
17
+ .then((b) => ({ sessions: unwrapPage<SessionRow>(b).items })),
16
18
  // Server-paginated page: returns the requested window plus the full total.
17
19
  page: ({ engine, limit, offset }: { engine?: string; limit: number; offset: number }) => {
18
20
  const q = new URLSearchParams({ limit: String(limit), offset: String(offset) });
19
21
  if (engine) q.set("engine", engine);
20
- return http
21
- .getWithTotal<{ sessions: SessionRow[] }>(`/sessions?${q.toString()}`)
22
- .then((r) => ({ items: r.data.sessions, total: r.total }));
22
+ return http.get<unknown>(`/sessions?${q.toString()}`).then((b) => unwrapPage<SessionRow>(b));
23
23
  },
24
24
  };
@@ -1,4 +1,4 @@
1
- import { http } from "../http";
1
+ import { http, unwrapPage } from "../http";
2
2
  import type { TaskEntry } from "../../types/daemon";
3
3
 
4
4
  export interface GlobalTaskEntry extends TaskEntry {
@@ -7,20 +7,17 @@ export interface GlobalTaskEntry extends TaskEntry {
7
7
  }
8
8
 
9
9
  export const Tasks = {
10
+ // Full sets (no pagination) — unwrapped to plain arrays for non-paged callers.
10
11
  list: (pid: string, state: TaskEntry["state"] | "all" = "open") =>
11
- http.get<TaskEntry[]>(`/projects/${pid}/tasks?state=${state}`),
12
+ http.get<unknown>(`/projects/${pid}/tasks?state=${state}`).then((b) => unwrapPage<TaskEntry>(b).items),
12
13
  global: (state: TaskEntry["state"] | "all" = "open") =>
13
- http.get<GlobalTaskEntry[]>(`/tasks?state=${state}`),
14
+ http.get<unknown>(`/tasks?state=${state}`).then((b) => unwrapPage<GlobalTaskEntry>(b).items),
14
15
  // Server-paginated variants: one project (listPage) or all projects
15
16
  // (globalPage). Each returns the requested window plus the full total.
16
17
  listPage: (pid: string, { state, limit, offset }: { state: TaskEntry["state"] | "all"; limit: number; offset: number }) =>
17
- http
18
- .getWithTotal<TaskEntry[]>(`/projects/${pid}/tasks?state=${state}&limit=${limit}&offset=${offset}`)
19
- .then((r) => ({ items: r.data, total: r.total })),
18
+ http.get<unknown>(`/projects/${pid}/tasks?state=${state}&limit=${limit}&offset=${offset}`).then((b) => unwrapPage<TaskEntry>(b)),
20
19
  globalPage: ({ state, limit, offset }: { state: TaskEntry["state"] | "all"; limit: number; offset: number }) =>
21
- http
22
- .getWithTotal<GlobalTaskEntry[]>(`/tasks?state=${state}&limit=${limit}&offset=${offset}`)
23
- .then((r) => ({ items: r.data, total: r.total })),
20
+ http.get<unknown>(`/tasks?state=${state}&limit=${limit}&offset=${offset}`).then((b) => unwrapPage<GlobalTaskEntry>(b)),
24
21
  add: (pid: string, body: Partial<TaskEntry>) =>
25
22
  http.post<TaskEntry>(`/projects/${pid}/tasks`, body),
26
23
  done: (pid: string, id: string) => http.post<TaskEntry>(`/projects/${pid}/tasks/${id}/done`),
@@ -92,7 +92,9 @@ export interface VoiceTtsConfig {
92
92
  }
93
93
 
94
94
  export interface TranscriptionLocalConfig {
95
- model?: string; // tiny | base | small | medium | large | large-v2 | large-v3
95
+ backend?: string; // auto | faster | mlx (auto adapts to the hardware)
96
+ model?: string; // faster-whisper model id (tiny | base | small | …)
97
+ mlx_model?: string; // mlx repo (e.g. mlx-community/whisper-large-v3-turbo)
96
98
  device?: string; // cpu | cuda
97
99
  compute_type?: string; // int8 | int8_float16 | float16 | float32
98
100
  language?: string; // ISO code or "auto"
@@ -117,6 +119,34 @@ export interface TranscriptionConfig {
117
119
  custom?: TranscriptionCustomConfig;
118
120
  }
119
121
 
122
+ /** Detected machine + recommended local backend (GET /transcribe/hardware). */
123
+ export interface SttHardware {
124
+ platform: string;
125
+ arch: string;
126
+ appleSilicon: boolean;
127
+ gpu: "metal" | "cuda" | "rocm" | "none";
128
+ gpuName?: string;
129
+ mem_gb?: number;
130
+ unified_memory?: boolean;
131
+ }
132
+ export interface SttHardwareResponse {
133
+ hardware: SttHardware;
134
+ recommended: { backend: string; device?: string; model: string; reason?: string; tier?: string; limited?: boolean };
135
+ }
136
+
137
+ /** One model row from GET /transcribe/models. */
138
+ export interface SttModelEntry {
139
+ id: string;
140
+ repo: string;
141
+ downloaded: boolean;
142
+ size: string; // "1.6 GB" when present, "~1.6 GB" when not yet downloaded
143
+ size_bytes: number;
144
+ }
145
+ export interface SttModelsResponse {
146
+ backend: string;
147
+ models: SttModelEntry[];
148
+ }
149
+
120
150
  /** One STT engine entry as reported by GET /transcribe/providers. */
121
151
  export interface SttProviderEntry {
122
152
  id: string; // "local" | "openai" | "custom"
@@ -169,6 +199,12 @@ export const Voice = {
169
199
  /** List TTS engines + availability + the configured default provider. */
170
200
  providers: () => http.get<TtsProvidersResponse>("/tts/providers"),
171
201
 
202
+ /** Detected hardware + the recommended local STT backend (Metal/CUDA/CPU). */
203
+ sttHardware: () => http.get<SttHardwareResponse>("/transcribe/hardware"),
204
+
205
+ /** Model catalog + on-disk status for a local backend ("faster" | "mlx"). */
206
+ sttModels: (backend: string) => http.get<SttModelsResponse>(`/transcribe/models?backend=${backend}`),
207
+
172
208
  /**
173
209
  * Synthesize speech. Returns the audio file path (server-side); the web
174
210
  * fetches it via fetchTtsAudioUrl() to play it in the browser. `no_play`
@@ -52,38 +52,36 @@ async function request<T>(
52
52
  return (await res.json()) as T;
53
53
  }
54
54
 
55
- // GET that also surfaces the total-row count for server-side pagination. The
56
- // daemon returns the full count in the X-Total-Count header (the body keeps its
57
- // normal shape); we fall back to the payload length when the header is absent
58
- // (e.g. an older daemon) so pagination degrades gracefully instead of breaking.
59
- async function getWithTotal<T>(path: string): Promise<{ data: T; total: number }> {
60
- const headers: Record<string, string> = token ? { authorization: `Bearer ${token}` } : {};
61
- const res = await fetch(path, { method: "GET", headers });
62
- if (!res.ok) {
63
- let detail = "";
64
- let parsed: unknown = null;
65
- try {
66
- parsed = await res.json();
67
- detail = (parsed as { error?: string })?.error || JSON.stringify(parsed);
68
- } catch {
69
- detail = await res.text();
70
- }
71
- throw new HttpError(res.status, `GET ${path} ${res.status}: ${detail}`, parsed);
55
+ // Pagination metadata returned by list endpoints in the { meta, data } envelope.
56
+ export interface PageMeta {
57
+ total: number;
58
+ offset: number;
59
+ limit: number | null;
60
+ pageSize: number;
61
+ page: number;
62
+ pageCount: number;
63
+ }
64
+
65
+ // Normalize any list response into { items, total }. Accepts the { meta, data }
66
+ // envelope (current daemon), a bare array, or the legacy { sessions } object, so
67
+ // the UI keeps working across a daemon that hasn't been restarted yet (it just
68
+ // degrades to a single page when no meta.total is present).
69
+ export function unwrapPage<T>(body: unknown): { items: T[]; total: number } {
70
+ const b = body as { data?: unknown; meta?: { total?: number }; sessions?: unknown };
71
+ if (Array.isArray(body)) return { items: body as T[], total: body.length };
72
+ if (b && Array.isArray(b.data)) {
73
+ const items = b.data as T[];
74
+ return { items, total: typeof b.meta?.total === "number" ? b.meta.total : items.length };
75
+ }
76
+ if (b && Array.isArray(b.sessions)) {
77
+ const items = b.sessions as T[];
78
+ return { items, total: items.length };
72
79
  }
73
- const data = (await res.json()) as T;
74
- const header = res.headers.get("X-Total-Count");
75
- const total =
76
- header != null && header !== ""
77
- ? parseInt(header, 10)
78
- : Array.isArray(data)
79
- ? data.length
80
- : 0;
81
- return { data, total };
80
+ return { items: [], total: 0 };
82
81
  }
83
82
 
84
83
  export const http = {
85
84
  get: <T>(p: string) => request<T>("GET", p),
86
- getWithTotal,
87
85
  post: <T>(p: string, b?: unknown) => request<T>("POST", p, b),
88
86
  put: <T>(p: string, b?: unknown) => request<T>("PUT", p, b),
89
87
  patch: <T>(p: string, b?: unknown) => request<T>("PATCH", p, b),