@rubytech/taskmaster 1.2.1 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/dist/agents/auth-profiles/profiles.js +37 -0
  2. package/dist/agents/auth-profiles.js +1 -1
  3. package/dist/agents/pi-tools.policy.js +4 -0
  4. package/dist/agents/taskmaster-tools.js +14 -0
  5. package/dist/agents/tool-policy.js +5 -2
  6. package/dist/agents/tools/apikeys-tool.js +16 -5
  7. package/dist/agents/tools/contact-create-tool.js +59 -0
  8. package/dist/agents/tools/contact-delete-tool.js +48 -0
  9. package/dist/agents/tools/contact-update-tool.js +17 -2
  10. package/dist/agents/tools/file-delete-tool.js +137 -0
  11. package/dist/agents/tools/file-list-tool.js +127 -0
  12. package/dist/auto-reply/reply/commands-tts.js +7 -2
  13. package/dist/build-info.json +3 -3
  14. package/dist/cli/provision-seed.js +1 -2
  15. package/dist/commands/doctor-config-flow.js +13 -0
  16. package/dist/config/agent-tools-reconcile.js +53 -0
  17. package/dist/config/defaults.js +10 -1
  18. package/dist/config/legacy.migrations.part-3.js +26 -0
  19. package/dist/config/zod-schema.core.js +9 -1
  20. package/dist/config/zod-schema.js +1 -0
  21. package/dist/control-ui/assets/index-CPawOl_z.css +1 -0
  22. package/dist/control-ui/assets/{index-N8du4fwV.js → index-DQ1kxYd4.js} +692 -598
  23. package/dist/control-ui/assets/index-DQ1kxYd4.js.map +1 -0
  24. package/dist/control-ui/index.html +2 -2
  25. package/dist/gateway/config-reload.js +1 -0
  26. package/dist/gateway/media-http.js +28 -0
  27. package/dist/gateway/server-methods/apikeys.js +56 -4
  28. package/dist/gateway/server-methods/tts.js +11 -2
  29. package/dist/gateway/server.impl.js +15 -0
  30. package/dist/media-understanding/apply.js +35 -0
  31. package/dist/media-understanding/providers/deepgram/audio.js +1 -1
  32. package/dist/media-understanding/providers/google/audio.js +1 -1
  33. package/dist/media-understanding/providers/google/video.js +1 -1
  34. package/dist/media-understanding/providers/index.js +2 -0
  35. package/dist/media-understanding/providers/openai/audio.js +1 -1
  36. package/dist/media-understanding/providers/sherpa-onnx/index.js +10 -0
  37. package/dist/media-understanding/runner.js +61 -72
  38. package/dist/media-understanding/sherpa-onnx-local.js +223 -0
  39. package/dist/records/records-manager.js +10 -0
  40. package/dist/tts/tts.js +98 -10
  41. package/dist/web/auto-reply/monitor/process-message.js +1 -0
  42. package/dist/web/inbound/monitor.js +9 -1
  43. package/extensions/googlechat/node_modules/.bin/taskmaster +2 -2
  44. package/extensions/googlechat/package.json +2 -2
  45. package/extensions/line/node_modules/.bin/taskmaster +2 -2
  46. package/extensions/line/package.json +1 -1
  47. package/extensions/matrix/node_modules/.bin/taskmaster +2 -2
  48. package/extensions/matrix/package.json +1 -1
  49. package/extensions/msteams/node_modules/.bin/taskmaster +2 -2
  50. package/extensions/msteams/package.json +1 -1
  51. package/extensions/nostr/node_modules/.bin/taskmaster +2 -2
  52. package/extensions/nostr/package.json +1 -1
  53. package/extensions/zalo/node_modules/.bin/taskmaster +2 -2
  54. package/extensions/zalo/package.json +1 -1
  55. package/extensions/zalouser/node_modules/.bin/taskmaster +2 -2
  56. package/extensions/zalouser/package.json +1 -1
  57. package/package.json +3 -2
  58. package/scripts/postinstall.js +76 -0
  59. package/skills/business-assistant/references/crm.md +32 -8
  60. package/taskmaster-docs/USER-GUIDE.md +84 -5
  61. package/templates/beagle/agents/admin/AGENTS.md +4 -2
  62. package/templates/taskmaster/agents/admin/AGENTS.md +1 -0
  63. package/dist/control-ui/assets/index-DtQHRIVD.css +0 -1
  64. package/dist/control-ui/assets/index-N8du4fwV.js.map +0 -1
@@ -6,8 +6,8 @@
6
6
  <title>Taskmaster Control</title>
7
7
  <meta name="color-scheme" content="dark light" />
8
8
  <link rel="icon" type="image/png" href="./favicon.png" />
9
- <script type="module" crossorigin src="./assets/index-N8du4fwV.js"></script>
10
- <link rel="stylesheet" crossorigin href="./assets/index-DtQHRIVD.css">
9
+ <script type="module" crossorigin src="./assets/index-DQ1kxYd4.js"></script>
10
+ <link rel="stylesheet" crossorigin href="./assets/index-CPawOl_z.css">
11
11
  </head>
12
12
  <body>
13
13
  <taskmaster-app></taskmaster-app>
@@ -9,6 +9,7 @@ const BASE_RELOAD_RULES = [
9
9
  { prefix: "access", kind: "none" },
10
10
  { prefix: "publicChat", kind: "none" },
11
11
  { prefix: "apiKeys", kind: "none" },
12
+ { prefix: "apiKeysDisabled", kind: "none" },
12
13
  { prefix: "gateway.remote", kind: "none" },
13
14
  { prefix: "gateway.reload", kind: "none" },
14
15
  { prefix: "hooks.gmail", kind: "hot", actions: ["restart-gmail-watcher"] },
@@ -22,6 +22,16 @@ const ALLOWED_MEDIA_EXTENSIONS = new Set([
22
22
  ".tiff",
23
23
  ".tif",
24
24
  ".pdf",
25
+ // Audio
26
+ ".mp3",
27
+ ".opus",
28
+ ".ogg",
29
+ ".oga",
30
+ ".wav",
31
+ ".m4a",
32
+ ".webm",
33
+ ".aac",
34
+ ".flac",
25
35
  ]);
26
36
  function contentType(ext) {
27
37
  switch (ext) {
@@ -45,6 +55,24 @@ function contentType(ext) {
45
55
  return "image/tiff";
46
56
  case ".pdf":
47
57
  return "application/pdf";
58
+ // Audio
59
+ case ".mp3":
60
+ return "audio/mpeg";
61
+ case ".opus":
62
+ return "audio/opus";
63
+ case ".ogg":
64
+ case ".oga":
65
+ return "audio/ogg";
66
+ case ".wav":
67
+ return "audio/wav";
68
+ case ".m4a":
69
+ return "audio/mp4";
70
+ case ".webm":
71
+ return "audio/webm";
72
+ case ".aac":
73
+ return "audio/aac";
74
+ case ".flac":
75
+ return "audio/flac";
48
76
  default:
49
77
  return "application/octet-stream";
50
78
  }
@@ -1,18 +1,24 @@
1
1
  /**
2
2
  * Gateway handlers for centralized API key management.
3
3
  *
4
- * apikeys.list — returns provider catalog with set/unset status
5
- * apikeys.set — stores a key for a provider (config watcher hot-reloads)
6
- * apikeys.remove — removes a key for a provider (config watcher hot-reloads)
4
+ * apikeys.list — returns provider catalog with set/unset/disabled status
5
+ * apikeys.set — stores a key for a provider (config watcher hot-reloads)
6
+ * apikeys.remove — removes a key for a provider (config watcher hot-reloads)
7
+ * apikeys.disable — toggles a key's disabled state (key preserved, not distributed)
7
8
  *
8
9
  * No explicit restart needed — the config file watcher detects the change,
9
10
  * and `applyApiKeys()` runs as a side effect of `readConfigFileSnapshot()`,
10
11
  * injecting keys into the auth profile store and tool config paths.
11
12
  * The `apiKeys` prefix is registered as "none" in config-reload.ts.
12
13
  */
14
+ import { removeAuthProfile } from "../../agents/auth-profiles.js";
13
15
  import { readConfigFileSnapshot, writeConfigFile } from "../../config/config.js";
14
16
  import { ErrorCodes, errorShape } from "../protocol/index.js";
15
17
  import { formatForLog } from "../ws-log.js";
18
+ /** Providers whose API keys are stored as auth profiles (type: api_key). */
19
+ const AUTH_PROFILE_PROVIDERS = new Set([
20
+ "anthropic", "openai", "google", "replicate", "hume",
21
+ ]);
16
22
  const PROVIDER_CATALOG = [
17
23
  { id: "anthropic", name: "Anthropic", category: "AI Model", primary: true },
18
24
  { id: "google", name: "Google", category: "Voice & Video", primary: true },
@@ -29,9 +35,10 @@ export const apikeysHandlers = {
29
35
  try {
30
36
  const snapshot = await readConfigFileSnapshot();
31
37
  const storedKeys = snapshot.config.apiKeys ?? {};
38
+ const disabledMap = snapshot.config.apiKeysDisabled ?? {};
32
39
  const providers = PROVIDER_CATALOG.map((p) => {
33
40
  const raw = storedKeys[p.id]?.trim();
34
- return { ...p, hasKey: Boolean(raw), key: raw || undefined };
41
+ return { ...p, hasKey: Boolean(raw), disabled: Boolean(disabledMap[p.id]), key: raw || undefined };
35
42
  });
36
43
  respond(true, { providers });
37
44
  }
@@ -73,11 +80,56 @@ export const apikeysHandlers = {
73
80
  const existing = { ...config.apiKeys };
74
81
  delete existing[provider];
75
82
  config.apiKeys = Object.keys(existing).length > 0 ? existing : undefined;
83
+ // Clean up disabled entry when key is removed
84
+ if (config.apiKeysDisabled?.[provider]) {
85
+ const disabled = { ...config.apiKeysDisabled };
86
+ delete disabled[provider];
87
+ config.apiKeysDisabled = Object.keys(disabled).length > 0 ? disabled : undefined;
88
+ }
76
89
  await writeConfigFile(config);
90
+ // Remove the auth profile so stale credentials are not picked up by
91
+ // provider resolution (the profile outlives the config key otherwise).
92
+ if (AUTH_PROFILE_PROVIDERS.has(provider)) {
93
+ removeAuthProfile({ profileId: `${provider}:api-key` });
94
+ }
77
95
  respond(true, { ok: true, provider });
78
96
  }
79
97
  catch (err) {
80
98
  respond(false, undefined, errorShape(ErrorCodes.UNAVAILABLE, formatForLog(err)));
81
99
  }
82
100
  },
101
+ "apikeys.disable": async ({ params, respond }) => {
102
+ try {
103
+ const provider = params.provider?.trim();
104
+ const disabled = params.disabled;
105
+ if (!provider || !VALID_PROVIDER_IDS.has(provider)) {
106
+ respond(false, undefined, errorShape(ErrorCodes.INVALID_REQUEST, `Invalid provider: ${provider ?? "(empty)"}`));
107
+ return;
108
+ }
109
+ if (typeof disabled !== "boolean") {
110
+ respond(false, undefined, errorShape(ErrorCodes.INVALID_REQUEST, "disabled must be a boolean"));
111
+ return;
112
+ }
113
+ const snapshot = await readConfigFileSnapshot();
114
+ const config = { ...snapshot.config };
115
+ if (disabled) {
116
+ config.apiKeysDisabled = { ...config.apiKeysDisabled, [provider]: true };
117
+ }
118
+ else {
119
+ const existing = { ...config.apiKeysDisabled };
120
+ delete existing[provider];
121
+ config.apiKeysDisabled = Object.keys(existing).length > 0 ? existing : undefined;
122
+ }
123
+ await writeConfigFile(config);
124
+ // Eagerly remove/restore auth profile so the change takes effect
125
+ // before the config watcher fires.
126
+ if (disabled && AUTH_PROFILE_PROVIDERS.has(provider)) {
127
+ removeAuthProfile({ profileId: `${provider}:api-key` });
128
+ }
129
+ respond(true, { ok: true, provider, disabled });
130
+ }
131
+ catch (err) {
132
+ respond(false, undefined, errorShape(ErrorCodes.UNAVAILABLE, formatForLog(err)));
133
+ }
134
+ },
83
135
  };
@@ -80,8 +80,11 @@ export const ttsHandlers = {
80
80
  },
81
81
  "tts.setProvider": async ({ params, respond }) => {
82
82
  const provider = typeof params.provider === "string" ? params.provider.trim() : "";
83
- if (provider !== "openai" && provider !== "elevenlabs" && provider !== "edge") {
84
- respond(false, undefined, errorShape(ErrorCodes.INVALID_REQUEST, "Invalid provider. Use openai, elevenlabs, or edge."));
83
+ if (provider !== "openai" &&
84
+ provider !== "elevenlabs" &&
85
+ provider !== "edge" &&
86
+ provider !== "hume") {
87
+ respond(false, undefined, errorShape(ErrorCodes.INVALID_REQUEST, "Invalid provider. Use openai, elevenlabs, hume, or edge."));
85
88
  return;
86
89
  }
87
90
  try {
@@ -115,6 +118,12 @@ export const ttsHandlers = {
115
118
  configured: Boolean(resolveTtsApiKey(config, "elevenlabs")),
116
119
  models: ["eleven_multilingual_v2", "eleven_turbo_v2_5", "eleven_monolingual_v1"],
117
120
  },
121
+ {
122
+ id: "hume",
123
+ name: "Hume",
124
+ configured: Boolean(resolveTtsApiKey(config, "hume")),
125
+ models: [],
126
+ },
118
127
  {
119
128
  id: "edge",
120
129
  name: "Edge TTS",
@@ -9,6 +9,7 @@ import { CONFIG_PATH_TASKMASTER, isNixMode, loadConfig, migrateLegacyConfig, rea
9
9
  import { VERSION } from "../version.js";
10
10
  import { isDiagnosticsEnabled } from "../infra/diagnostic-events.js";
11
11
  import { logAcceptedEnvOption } from "../infra/env.js";
12
+ import { reconcileAgentContactTools } from "../config/agent-tools-reconcile.js";
12
13
  import { applyPluginAutoEnable } from "../config/plugin-auto-enable.js";
13
14
  import { clearAgentRunContext, onAgentEvent } from "../infra/agent-events.js";
14
15
  import { onHeartbeatEvent } from "../infra/heartbeat-events.js";
@@ -121,6 +122,20 @@ export async function startGatewayServer(port = 18789, opts = {}) {
121
122
  log.warn(`gateway: failed to persist plugin auto-enable changes: ${String(err)}`);
122
123
  }
123
124
  }
125
+ // Reconcile agent tool groups (e.g. individual contact tools → group:contacts).
126
+ const toolReconcile = reconcileAgentContactTools({ config: configSnapshot.config });
127
+ if (toolReconcile.changes.length > 0) {
128
+ try {
129
+ await writeConfigFile(toolReconcile.config);
130
+ configSnapshot = await readConfigFileSnapshot();
131
+ log.info(`gateway: reconciled agent tools:\n${toolReconcile.changes
132
+ .map((entry) => `- ${entry}`)
133
+ .join("\n")}`);
134
+ }
135
+ catch (err) {
136
+ log.warn(`gateway: failed to persist agent tools reconciliation: ${String(err)}`);
137
+ }
138
+ }
124
139
  // Stamp config with running version on startup so upgrades keep the stamp current.
125
140
  const storedVersion = configSnapshot.config.meta?.lastTouchedVersion;
126
141
  if (configSnapshot.exists && storedVersion !== VERSION) {
@@ -1,4 +1,5 @@
1
1
  import { finalizeInboundContext } from "../auto-reply/reply/inbound-context.js";
2
+ import { logVerbose } from "../globals.js";
2
3
  import { extractMediaUserText, formatAudioTranscripts, formatMediaUnderstandingBody, } from "./format.js";
3
4
  import { runWithConcurrency } from "./concurrency.js";
4
5
  import { resolveConcurrency } from "./resolve.js";
@@ -42,6 +43,40 @@ export async function applyMediaUnderstanding(params) {
42
43
  if (decisions.length > 0) {
43
44
  ctx.MediaUnderstandingDecisions = [...(ctx.MediaUnderstandingDecisions ?? []), ...decisions];
44
45
  }
46
+ // Surface audio failures so the agent can inform the user instead of receiving
47
+ // a bare <media:audio> placeholder with no context about what went wrong.
48
+ const audioDecision = decisions.find((d) => d.capability === "audio");
49
+ const audioTranscribed = outputs.some((o) => o.kind === "audio.transcription");
50
+ const bodyHint = ctx.CommandBody ?? ctx.RawBody ?? ctx.Body ?? "";
51
+ const isAudioPlaceholder = /^<media:audio>/i.test(bodyHint.trim());
52
+ if (isAudioPlaceholder && !audioTranscribed) {
53
+ let reason;
54
+ if (ctx.MediaDownloadFailed) {
55
+ reason = "media download failed — the voice note could not be retrieved from WhatsApp";
56
+ }
57
+ else if (audioDecision?.outcome === "no-attachment") {
58
+ reason = "no audio file available for transcription";
59
+ }
60
+ else if (audioDecision?.outcome === "skipped") {
61
+ // Distinguish between "no providers at all" (empty attempts) and "providers tried but all failed"
62
+ const hasAttempts = audioDecision.attachments?.some((a) => a.attempts.length > 0);
63
+ reason = hasAttempts
64
+ ? "all transcription attempts failed"
65
+ : "no transcription provider configured (add an OpenAI, Google, Groq, or Deepgram API key)";
66
+ }
67
+ else if (audioDecision?.outcome === "disabled") {
68
+ reason = "audio transcription is disabled in config";
69
+ }
70
+ else {
71
+ reason = `transcription ${audioDecision?.outcome ?? "unavailable"}`;
72
+ }
73
+ const note = `[Voice note received but could not be transcribed: ${reason}]`;
74
+ logVerbose(`applyMediaUnderstanding: ${note}`);
75
+ ctx.Body = note;
76
+ ctx.CommandBody = note;
77
+ ctx.RawBody = note;
78
+ finalizeInboundContext(ctx, { forceBodyForAgent: true, forceBodyForCommands: true });
79
+ }
45
80
  if (outputs.length > 0) {
46
81
  ctx.Body = formatMediaUnderstandingBody({ body: ctx.Body, outputs });
47
82
  const audioOutputs = outputs.filter((output) => output.kind === "audio.transcription");
@@ -22,7 +22,7 @@ export async function transcribeDeepgramAudio(params) {
22
22
  }
23
23
  const headers = new Headers(params.headers);
24
24
  if (!headers.has("authorization")) {
25
- headers.set("authorization", `Token ${params.apiKey}`);
25
+ headers.set("authorization", `Token ${params.apiKey ?? ""}`);
26
26
  }
27
27
  if (!headers.has("content-type")) {
28
28
  headers.set("content-type", params.mime ?? "application/octet-stream");
@@ -23,7 +23,7 @@ export async function transcribeGeminiAudio(params) {
23
23
  headers.set("content-type", "application/json");
24
24
  }
25
25
  if (!headers.has("x-goog-api-key")) {
26
- headers.set("x-goog-api-key", params.apiKey);
26
+ headers.set("x-goog-api-key", params.apiKey ?? "");
27
27
  }
28
28
  const body = {
29
29
  contents: [
@@ -23,7 +23,7 @@ export async function describeGeminiVideo(params) {
23
23
  headers.set("content-type", "application/json");
24
24
  }
25
25
  if (!headers.has("x-goog-api-key")) {
26
- headers.set("x-goog-api-key", params.apiKey);
26
+ headers.set("x-goog-api-key", params.apiKey ?? "");
27
27
  }
28
28
  const body = {
29
29
  contents: [
@@ -5,6 +5,7 @@ import { googleProvider } from "./google/index.js";
5
5
  import { groqProvider } from "./groq/index.js";
6
6
  import { minimaxProvider } from "./minimax/index.js";
7
7
  import { openaiProvider } from "./openai/index.js";
8
+ import { sherpaOnnxProvider } from "./sherpa-onnx/index.js";
8
9
  const PROVIDERS = [
9
10
  groqProvider,
10
11
  openaiProvider,
@@ -12,6 +13,7 @@ const PROVIDERS = [
12
13
  anthropicProvider,
13
14
  minimaxProvider,
14
15
  deepgramProvider,
16
+ sherpaOnnxProvider,
15
17
  ];
16
18
  export function normalizeMediaProviderId(id) {
17
19
  const normalized = normalizeProviderId(id);
@@ -25,7 +25,7 @@ export async function transcribeOpenAiCompatibleAudio(params) {
25
25
  form.append("prompt", params.prompt.trim());
26
26
  const headers = new Headers(params.headers);
27
27
  if (!headers.has("authorization")) {
28
- headers.set("authorization", `Bearer ${params.apiKey}`);
28
+ headers.set("authorization", `Bearer ${params.apiKey ?? ""}`);
29
29
  }
30
30
  const res = await fetchWithTimeout(url, {
31
31
  method: "POST",
@@ -0,0 +1,10 @@
1
+ import { transcribeLocal, MODEL_LABEL } from "../../sherpa-onnx-local.js";
2
+ export const sherpaOnnxProvider = {
3
+ id: "sherpa-onnx",
4
+ isLocal: true,
5
+ capabilities: ["audio"],
6
+ transcribeAudio: async (req) => {
7
+ const result = await transcribeLocal(req.buffer, req.fileName);
8
+ return { text: result.text, model: result.model ?? MODEL_LABEL };
9
+ },
10
+ };
@@ -5,7 +5,7 @@ import path from "node:path";
5
5
  import { findModelInCatalog, loadModelCatalog, modelSupportsVision, } from "../agents/model-catalog.js";
6
6
  import { applyTemplate } from "../auto-reply/templating.js";
7
7
  import { requireApiKey, resolveApiKeyForProvider } from "../agents/model-auth.js";
8
- import { logVerbose, shouldLogVerbose } from "../globals.js";
8
+ import { createSubsystemLogger } from "../logging/subsystem.js";
9
9
  import { runExec } from "../process/exec.js";
10
10
  import { MediaAttachmentCache, normalizeAttachments, selectAttachments } from "./attachments.js";
11
11
  import { CLI_OUTPUT_MAX_BUFFER, DEFAULT_AUDIO_MODELS, DEFAULT_TIMEOUT_SECONDS, } from "./defaults.js";
@@ -23,6 +23,7 @@ const DEFAULT_IMAGE_MODELS = {
23
23
  google: "gemini-3-flash-preview",
24
24
  minimax: "MiniMax-VL-01",
25
25
  };
26
+ const log = createSubsystemLogger("gateway/media");
26
27
  export function buildProviderRegistry(overrides) {
27
28
  return buildMediaUnderstandingRegistry(overrides);
28
29
  }
@@ -33,7 +34,6 @@ export function createMediaAttachmentCache(attachments) {
33
34
  return new MediaAttachmentCache(attachments);
34
35
  }
35
36
  const binaryCache = new Map();
36
- const geminiProbeCache = new Map();
37
37
  function expandHomeDir(value) {
38
38
  if (!value.startsWith("~"))
39
39
  return value;
@@ -181,26 +181,6 @@ function extractSherpaOnnxText(raw) {
181
181
  }
182
182
  return null;
183
183
  }
184
- async function probeGeminiCli() {
185
- const cached = geminiProbeCache.get("gemini");
186
- if (cached)
187
- return cached;
188
- const resolved = (async () => {
189
- if (!(await hasBinary("gemini")))
190
- return false;
191
- try {
192
- const { stdout } = await runExec("gemini", ["--output-format", "json", "ok"], {
193
- timeoutMs: 8000,
194
- });
195
- return Boolean(extractGeminiResponse(stdout) ?? stdout.toLowerCase().includes("ok"));
196
- }
197
- catch {
198
- return false;
199
- }
200
- })();
201
- geminiProbeCache.set("gemini", resolved);
202
- return resolved;
203
- }
204
184
  async function resolveLocalWhisperCppEntry() {
205
185
  if (!(await hasBinary("whisper-cli")))
206
186
  return null;
@@ -234,7 +214,34 @@ async function resolveLocalWhisperEntry() {
234
214
  ],
235
215
  };
236
216
  }
237
- async function resolveSherpaOnnxEntry() {
217
+ /**
218
+ * Check if sherpa-onnx-node (npm package) is available with model + ffmpeg.
219
+ * Returns a provider entry so the pipeline uses the Node.js API directly
220
+ * (no CLI binary or SHERPA_ONNX_MODEL_DIR env var required).
221
+ */
222
+ async function resolveSherpaOnnxNodeEntry() {
223
+ try {
224
+ const { isReady } = await import("./sherpa-onnx-local.js");
225
+ if (await isReady()) {
226
+ return { type: "provider", provider: "sherpa-onnx" };
227
+ }
228
+ // Package + ffmpeg available but model not yet downloaded — still viable
229
+ // (the provider will trigger a lazy download on first use)
230
+ const { isAvailable } = await import("./sherpa-onnx-local.js");
231
+ if (await isAvailable()) {
232
+ return { type: "provider", provider: "sherpa-onnx" };
233
+ }
234
+ }
235
+ catch {
236
+ // sherpa-onnx-node not installed — skip
237
+ }
238
+ return null;
239
+ }
240
+ /**
241
+ * Fallback: check for sherpa-onnx-offline CLI binary + SHERPA_ONNX_MODEL_DIR env var.
242
+ * This is the legacy detection path for users who installed the binary manually.
243
+ */
244
+ async function resolveSherpaOnnxCliEntry() {
238
245
  if (!(await hasBinary("sherpa-onnx-offline")))
239
246
  return null;
240
247
  const modelDir = process.env.SHERPA_ONNX_MODEL_DIR?.trim();
@@ -265,32 +272,19 @@ async function resolveSherpaOnnxEntry() {
265
272
  };
266
273
  }
267
274
  async function resolveLocalAudioEntry() {
268
- const sherpa = await resolveSherpaOnnxEntry();
269
- if (sherpa)
270
- return sherpa;
275
+ // Prefer sherpa-onnx-node (npm, no PATH issues, automatic model management)
276
+ const sherpaNode = await resolveSherpaOnnxNodeEntry();
277
+ if (sherpaNode)
278
+ return sherpaNode;
279
+ // Fallback: CLI binary (legacy/manual installs)
280
+ const sherpaCli = await resolveSherpaOnnxCliEntry();
281
+ if (sherpaCli)
282
+ return sherpaCli;
271
283
  const whisperCpp = await resolveLocalWhisperCppEntry();
272
284
  if (whisperCpp)
273
285
  return whisperCpp;
274
286
  return await resolveLocalWhisperEntry();
275
287
  }
276
- async function resolveGeminiCliEntry(_capability) {
277
- if (!(await probeGeminiCli()))
278
- return null;
279
- return {
280
- type: "cli",
281
- command: "gemini",
282
- args: [
283
- "--output-format",
284
- "json",
285
- "--allowed-tools",
286
- "read_many_files",
287
- "--include-directories",
288
- "{{MediaDir}}",
289
- "{{Prompt}}",
290
- "Use read_many_files to read {{MediaPath}} and respond with only the text output.",
291
- ],
292
- };
293
- }
294
288
  async function resolveKeyEntry(params) {
295
289
  const { cfg, agentDir, providerRegistry, capability } = params;
296
290
  const checkProvider = async (providerId, model) => {
@@ -362,9 +356,6 @@ async function resolveAutoEntries(params) {
362
356
  if (localAudio)
363
357
  return [localAudio];
364
358
  }
365
- const gemini = await resolveGeminiCliEntry(params.capability);
366
- if (gemini)
367
- return [gemini];
368
359
  const keys = await resolveKeyEntry(params);
369
360
  if (keys)
370
361
  return [keys];
@@ -635,14 +626,18 @@ async function runProviderEntry(params) {
635
626
  maxBytes,
636
627
  timeoutMs,
637
628
  });
638
- const auth = await resolveApiKeyForProvider({
639
- provider: providerId,
640
- cfg,
641
- profileId: entry.profile,
642
- preferredProfile: entry.preferredProfile,
643
- agentDir: params.agentDir,
644
- });
645
- const apiKey = requireApiKey(auth, providerId);
629
+ // Local providers (e.g. sherpa-onnx) do not require an API key.
630
+ let apiKey;
631
+ if (!provider.isLocal) {
632
+ const auth = await resolveApiKeyForProvider({
633
+ provider: providerId,
634
+ cfg,
635
+ profileId: entry.profile,
636
+ preferredProfile: entry.preferredProfile,
637
+ agentDir: params.agentDir,
638
+ });
639
+ apiKey = requireApiKey(auth, providerId);
640
+ }
646
641
  const providerConfig = cfg.models?.providers?.[providerId];
647
642
  const baseUrl = entry.baseUrl ?? params.config?.baseUrl ?? providerConfig?.baseUrl;
648
643
  const mergedHeaders = {
@@ -751,9 +746,7 @@ async function runCliEntry(params) {
751
746
  };
752
747
  const argv = [command, ...args].map((part, index) => index === 0 ? part : applyTemplate(part, templCtx));
753
748
  try {
754
- if (shouldLogVerbose()) {
755
- logVerbose(`Media understanding via CLI: ${argv.join(" ")}`);
756
- }
749
+ log.debug(`CLI: ${argv.join(" ")}`);
757
750
  const { stdout } = await runExec(argv[0], argv.slice(1), {
758
751
  timeoutMs,
759
752
  maxBuffer: CLI_OUTPUT_MAX_BUFFER,
@@ -825,9 +818,7 @@ async function runAttachmentEntries(params) {
825
818
  outcome: "skipped",
826
819
  reason: `${err.reason}: ${err.message}`,
827
820
  }));
828
- if (shouldLogVerbose()) {
829
- logVerbose(`Skipping ${capability} model due to ${err.reason}: ${err.message}`);
830
- }
821
+ log.debug(`Skipping ${capability} model: ${err.reason}: ${err.message}`);
831
822
  continue;
832
823
  }
833
824
  attempts.push(buildModelDecision({
@@ -836,9 +827,7 @@ async function runAttachmentEntries(params) {
836
827
  outcome: "failed",
837
828
  reason: String(err),
838
829
  }));
839
- if (shouldLogVerbose()) {
840
- logVerbose(`${capability} understanding failed: ${String(err)}`);
841
- }
830
+ log.error(`${capability} failed: ${String(err)}`);
842
831
  }
843
832
  }
844
833
  return { output: null, attempts };
@@ -866,9 +855,7 @@ export async function runCapability(params) {
866
855
  }
867
856
  const scopeDecision = resolveScopeDecision({ scope: config?.scope, ctx });
868
857
  if (scopeDecision === "deny") {
869
- if (shouldLogVerbose()) {
870
- logVerbose(`${capability} understanding disabled by scope policy.`);
871
- }
858
+ log.debug(`${capability} disabled by scope policy`);
872
859
  return {
873
860
  outputs: [],
874
861
  decision: {
@@ -885,9 +872,7 @@ export async function runCapability(params) {
885
872
  const catalog = await loadModelCatalog({ config: cfg });
886
873
  const entry = findModelInCatalog(catalog, activeProvider, params.activeModel?.model ?? "");
887
874
  if (modelSupportsVision(entry)) {
888
- if (shouldLogVerbose()) {
889
- logVerbose("Skipping image understanding: primary model supports vision natively");
890
- }
875
+ log.debug("Skipping image understanding: primary model supports vision natively");
891
876
  const model = params.activeModel?.model?.trim();
892
877
  const reason = "primary model supports vision natively";
893
878
  return {
@@ -966,8 +951,12 @@ export async function runCapability(params) {
966
951
  outcome: outputs.length > 0 ? "success" : "skipped",
967
952
  attachments: attachmentDecisions,
968
953
  };
969
- if (shouldLogVerbose()) {
970
- logVerbose(`Media understanding ${formatDecisionSummary(decision)}`);
954
+ const summary = formatDecisionSummary(decision);
955
+ if (decision.outcome === "success") {
956
+ log.info(summary);
957
+ }
958
+ else {
959
+ log.debug(summary);
971
960
  }
972
961
  return {
973
962
  outputs,