aiden-runtime 4.0.2 → 4.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. package/README.md +11 -7
  2. package/config/hardware.json +2 -2
  3. package/dist/api/server.js +50 -52
  4. package/dist/cli/v4/aidenCLI.js +421 -5
  5. package/dist/cli/v4/aidenPrompt.js +317 -0
  6. package/dist/cli/v4/box.js +105 -39
  7. package/dist/cli/v4/callbacks.js +39 -6
  8. package/dist/cli/v4/chatSession.js +256 -55
  9. package/dist/cli/v4/citationFooter.js +97 -0
  10. package/dist/cli/v4/commands/channel.js +656 -0
  11. package/dist/cli/v4/commands/clear.js +1 -1
  12. package/dist/cli/v4/commands/compress.js +1 -1
  13. package/dist/cli/v4/commands/cron.js +44 -16
  14. package/dist/cli/v4/commands/fanout.js +236 -0
  15. package/dist/cli/v4/commands/help.js +15 -4
  16. package/dist/cli/v4/commands/history.js +84 -0
  17. package/dist/cli/v4/commands/index.js +16 -1
  18. package/dist/cli/v4/commands/mcp.js +358 -0
  19. package/dist/cli/v4/commands/show.js +43 -0
  20. package/dist/cli/v4/commands/skills.js +169 -4
  21. package/dist/cli/v4/commands/status.js +84 -0
  22. package/dist/cli/v4/commands/subagent.js +78 -0
  23. package/dist/cli/v4/commands/verbose.js +1 -1
  24. package/dist/cli/v4/commands/voice.js +218 -0
  25. package/dist/cli/v4/cronCli.js +103 -0
  26. package/dist/cli/v4/display.js +297 -13
  27. package/dist/cli/v4/doctor.js +41 -0
  28. package/dist/cli/v4/envSources.js +105 -0
  29. package/dist/cli/v4/ghostMatch.js +74 -0
  30. package/dist/cli/v4/historyStore.js +163 -0
  31. package/dist/cli/v4/pasteCompression.js +124 -0
  32. package/dist/cli/v4/pasteIntercept.js +203 -0
  33. package/dist/cli/v4/replyRenderer.js +209 -0
  34. package/dist/cli/v4/resizeGuard.js +92 -0
  35. package/dist/cli/v4/shellInterpolation.js +139 -0
  36. package/dist/cli/v4/skinEngine.js +21 -1
  37. package/dist/cli/v4/streamingPrefix.js +121 -0
  38. package/dist/cli/v4/syntaxHighlight.js +345 -0
  39. package/dist/cli/v4/table.js +216 -0
  40. package/dist/cli/v4/themeDetect.js +81 -0
  41. package/dist/cli/v4/uiBuild.js +74 -0
  42. package/dist/cli/v4/voiceCli.js +113 -0
  43. package/dist/cli/v4/voicePromptApi.js +196 -0
  44. package/dist/core/channels/discord.js +16 -10
  45. package/dist/core/channels/email.js +13 -9
  46. package/dist/core/channels/imessage.js +13 -9
  47. package/dist/core/channels/manager.js +25 -7
  48. package/dist/core/channels/pdf-extract.js +180 -0
  49. package/dist/core/channels/photo-vision.js +157 -0
  50. package/dist/core/channels/signal.js +11 -7
  51. package/dist/core/channels/slack.js +13 -10
  52. package/dist/core/channels/telegram-commands.js +154 -0
  53. package/dist/core/channels/telegram-groups.js +198 -0
  54. package/dist/core/channels/telegram-rate-limit.js +124 -0
  55. package/dist/core/channels/telegram.js +1980 -0
  56. package/dist/core/channels/twilio.js +11 -7
  57. package/dist/core/channels/webhook.js +9 -5
  58. package/dist/core/channels/whatsapp.js +15 -11
  59. package/dist/core/channels/whisper-transcribe.js +163 -0
  60. package/dist/core/cronManager.js +33 -294
  61. package/dist/core/gateway.js +29 -8
  62. package/dist/core/playwrightBridge.js +90 -0
  63. package/dist/core/v4/aidenAgent.js +35 -0
  64. package/dist/core/v4/auxiliaryClient.js +2 -2
  65. package/dist/core/v4/cron/atomicWrite.js +18 -4
  66. package/dist/core/v4/cron/cronExecute.js +300 -0
  67. package/dist/core/v4/cron/cronManager.js +502 -0
  68. package/dist/core/v4/cron/cronState.js +314 -0
  69. package/dist/core/v4/cron/cronTick.js +90 -0
  70. package/dist/core/v4/cron/diagnostics.js +104 -0
  71. package/dist/core/v4/cron/graceWindow.js +79 -0
  72. package/dist/core/v4/logger/factory.js +110 -0
  73. package/dist/core/v4/logger/index.js +22 -0
  74. package/dist/core/v4/logger/logger.js +101 -0
  75. package/dist/core/v4/logger/sinks/fileSink.js +110 -0
  76. package/dist/core/v4/logger/sinks/multiSink.js +43 -0
  77. package/dist/core/v4/logger/sinks/nullSink.js +53 -0
  78. package/dist/core/v4/logger/sinks/stdSink.js +81 -0
  79. package/dist/core/v4/mcp/server/diagnostics.js +40 -0
  80. package/dist/core/v4/mcp/server/skillBridge.js +94 -0
  81. package/dist/core/v4/mcp/server/stdioServer.js +119 -0
  82. package/dist/core/v4/mcp/server/toolBridge.js +168 -0
  83. package/dist/core/v4/platformPaths.js +105 -0
  84. package/dist/core/v4/providerFallback.js +25 -0
  85. package/dist/core/v4/skillLoader.js +21 -5
  86. package/dist/core/v4/skillMining/candidateStore.js +164 -0
  87. package/dist/core/v4/skillMining/extractorPrompt.js +111 -0
  88. package/dist/core/v4/skillMining/proposalBuilder.js +139 -0
  89. package/dist/core/v4/skillMining/skillMiner.js +191 -0
  90. package/dist/core/v4/skillMining/traceFingerprint.js +51 -0
  91. package/dist/core/v4/subagent/budget.js +76 -0
  92. package/dist/core/v4/subagent/diagnostics.js +22 -0
  93. package/dist/core/v4/subagent/fanout.js +216 -0
  94. package/dist/core/v4/subagent/merger.js +148 -0
  95. package/dist/core/v4/subagent/providerRotation.js +54 -0
  96. package/dist/core/v4/voice/audioStream.js +373 -0
  97. package/dist/core/v4/voice/cliVoice.js +393 -0
  98. package/dist/core/v4/voice/diagnostics.js +66 -0
  99. package/dist/core/v4/voice/ttsStream.js +193 -0
  100. package/dist/core/version.js +1 -1
  101. package/dist/core/visionAnalyze.js +291 -90
  102. package/dist/core/voice/audio.js +61 -5
  103. package/dist/core/voice/audioBackend.js +134 -0
  104. package/dist/core/voice/stt.js +61 -6
  105. package/dist/core/voice/tts.js +19 -3
  106. package/dist/tools/v4/index.js +32 -1
  107. package/dist/tools/v4/subagent/subagentFanout.js +166 -0
  108. package/package.json +11 -2
@@ -6,12 +6,27 @@
6
6
  //
7
7
  // core/visionAnalyze.ts — Image analysis via vision-capable providers.
8
8
  //
9
- // Provider chain (first available wins):
10
- // 1. Anthropic claude-3-5-sonnet (ANTHROPIC_API_KEY)
11
- // 2. OpenAI gpt-4o (OPENAI_API_KEY)
12
- // 3. Ollama llava (local, no key needed)
9
+ // Provider chain (first available wins). Free providers first so
10
+ // the bot doesn't burn paid budget on every inbound photo:
11
+ //
12
+ // 1. Gemini gemini-2.5-flash (GEMINI_API_KEY)
13
+ // 2. Groq llama-4-maverick-17b vision (GROQ_API_KEY)
14
+ // 3. OpenRouter llama-3.2-11b-vision:free (OPENROUTER_API_KEY)
15
+ // 4. Together Llama-Vision-Free (TOGETHER_API_KEY)
16
+ // 5. Anthropic claude-3-5-sonnet (ANTHROPIC_API_KEY)
17
+ // 6. OpenAI gpt-4o (OPENAI_API_KEY)
18
+ // 7. Ollama llava (local, no key)
13
19
  //
14
20
  // Accepts local file paths (→ base64) or HTTP/HTTPS URLs.
21
+ //
22
+ // Phase v4.1-4 — added optional `Logger` parameter so the channel
23
+ // adapter (Telegram, etc.) can route diagnostics through the unified
24
+ // `core/v4/logger` contract instead of stdout.
25
+ //
26
+ // Phase v4.1-4.1 — extended chain to cover the providers Aiden
27
+ // already authenticates against, optional httpClient test seam, and
28
+ // shared OpenAI-compatible helper for Groq / OpenRouter / Together
29
+ // (which all serve the same wire format).
15
30
  var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
16
31
  if (k2 === undefined) k2 = k;
17
32
  var desc = Object.getOwnPropertyDescriptor(m, k);
@@ -53,6 +68,18 @@ exports.analyzeImage = analyzeImage;
53
68
  const fs = __importStar(require("fs"));
54
69
  const path = __importStar(require("path"));
55
70
  const axios_1 = __importDefault(require("axios"));
71
+ const logger_1 = require("./v4/logger");
72
+ /** Default client wraps axios so production stays unchanged. */
73
+ const defaultHttpClient = {
74
+ post: (url, body, opts) => axios_1.default.post(url, body, {
75
+ headers: opts?.headers,
76
+ timeout: opts?.timeout,
77
+ }),
78
+ get: (url, opts) => axios_1.default.get(url, {
79
+ responseType: opts?.responseType,
80
+ timeout: opts?.timeout,
81
+ }),
82
+ };
56
83
  // ── Media type resolver ───────────────────────────────────────────────────────
57
84
  function extToMediaType(ext) {
58
85
  const map = {
@@ -61,101 +88,275 @@ function extToMediaType(ext) {
61
88
  };
62
89
  return map[ext.toLowerCase().replace(/^\./, '')] ?? 'image/jpeg';
63
90
  }
64
- // ── Core function ─────────────────────────────────────────────────────────────
91
+ function resolveLocalImage(imageSource) {
92
+ const isUrl = imageSource.startsWith('http://') || imageSource.startsWith('https://');
93
+ if (isUrl) {
94
+ return { isUrl: true, sourceUrl: imageSource, base64: '', mediaType: 'image/jpeg' };
95
+ }
96
+ const absPath = path.isAbsolute(imageSource)
97
+ ? imageSource
98
+ : path.resolve(process.cwd(), imageSource);
99
+ const buf = fs.readFileSync(absPath);
100
+ return {
101
+ isUrl: false,
102
+ sourceUrl: '',
103
+ base64: buf.toString('base64'),
104
+ mediaType: extToMediaType(path.extname(absPath)),
105
+ };
106
+ }
107
+ /** Build a `data:<media>;base64,<...>` URL for OpenAI-compat consumers. */
108
+ function asDataUrl(img) {
109
+ if (img.isUrl)
110
+ return img.sourceUrl;
111
+ return `data:${img.mediaType};base64,${img.base64}`;
112
+ }
65
113
  /**
66
- * Analyze an image using the first available vision-capable provider.
67
- *
68
- * @param imageSource File path (absolute or relative) or HTTP(S) URL.
69
- * @param prompt Instruction prompt (default: describe the image).
70
- * @returns VisionResult with description, provider, model, timing.
114
+ * For URL sources we sometimes need raw bytes (Gemini's inline_data
115
+ * is base64; Ollama's images[] is base64). Download and base64 the
116
+ * remote URL on demand. Returns null on download failure so the caller
117
+ * can fall through to the next provider.
71
118
  */
72
- async function analyzeImage(imageSource, prompt = 'Describe this image in detail.') {
73
- const start = Date.now();
74
- // Resolve image data
75
- const isUrl = imageSource.startsWith('http://') || imageSource.startsWith('https://');
76
- let base64Data = '';
77
- let mediaType = 'image/jpeg';
78
- if (!isUrl) {
79
- const absPath = path.isAbsolute(imageSource)
80
- ? imageSource
81
- : path.resolve(process.cwd(), imageSource);
82
- const buf = fs.readFileSync(absPath);
83
- base64Data = buf.toString('base64');
84
- mediaType = extToMediaType(path.extname(absPath));
119
+ async function ensureBase64(img, http, log) {
120
+ if (!img.isUrl)
121
+ return { base64: img.base64, mediaType: img.mediaType };
122
+ try {
123
+ const res = await http.get(img.sourceUrl, { responseType: 'arraybuffer', timeout: 15000 });
124
+ const base64 = Buffer.from(res.data).toString('base64');
125
+ const mediaType = extToMediaType(path.extname(img.sourceUrl)) || 'image/jpeg';
126
+ return { base64, mediaType };
85
127
  }
86
- // ── Provider 1: Anthropic ─────────────────────────────────────────────────
87
- const anthropicKey = process.env.ANTHROPIC_API_KEY;
88
- if (anthropicKey) {
89
- try {
90
- const imageBlock = isUrl
91
- ? { type: 'image', source: { type: 'url', url: imageSource } }
92
- : { type: 'image', source: { type: 'base64', media_type: mediaType, data: base64Data } };
93
- const res = await axios_1.default.post('https://api.anthropic.com/v1/messages', {
94
- model: 'claude-3-5-sonnet-20241022',
95
- max_tokens: 1024,
96
- messages: [{ role: 'user', content: [imageBlock, { type: 'text', text: prompt }] }],
97
- }, {
98
- headers: {
99
- 'x-api-key': anthropicKey,
100
- 'anthropic-version': '2023-06-01',
101
- 'content-type': 'application/json',
102
- },
103
- timeout: 30000,
104
- });
105
- const description = (res.data?.content?.[0]?.text ?? '').trim();
106
- if (description) {
107
- return { description, provider: 'anthropic', modelUsed: 'claude-3-5-sonnet-20241022', durationMs: Date.now() - start };
108
- }
109
- }
110
- catch { /* fall through */ }
128
+ catch (e) {
129
+ log.warn('failed to download image url for base64-only providers', { url: img.sourceUrl, error: e?.message });
130
+ return null;
111
131
  }
112
- // ── Provider 2: OpenAI ────────────────────────────────────────────────────
113
- const openaiKey = process.env.OPENAI_API_KEY;
114
- if (openaiKey) {
115
- try {
116
- const imageUrl = isUrl
117
- ? imageSource
118
- : `data:${mediaType};base64,${base64Data}`;
119
- const res = await axios_1.default.post('https://api.openai.com/v1/chat/completions', {
120
- model: 'gpt-4o',
121
- max_tokens: 1024,
122
- messages: [{
123
- role: 'user',
124
- content: [
125
- { type: 'image_url', image_url: { url: imageUrl } },
126
- { type: 'text', text: prompt },
127
- ],
128
- }],
129
- }, {
130
- headers: { Authorization: `Bearer ${openaiKey}`, 'content-type': 'application/json' },
131
- timeout: 30000,
132
- });
133
- const description = (res.data?.choices?.[0]?.message?.content ?? '').trim();
134
- if (description) {
135
- return { description, provider: 'openai', modelUsed: 'gpt-4o', durationMs: Date.now() - start };
136
- }
137
- }
138
- catch { /* fall through */ }
132
+ }
133
+ // ── Provider 1: Gemini ────────────────────────────────────────────────────────
134
+ const GEMINI_MODEL = 'gemini-2.5-flash';
135
+ const GEMINI_ENDPOINT = `https://generativelanguage.googleapis.com/v1beta/models/${GEMINI_MODEL}:generateContent`;
136
+ async function tryGemini(img, prompt, log, http) {
137
+ const key = process.env.GEMINI_API_KEY;
138
+ if (!key)
139
+ return null;
140
+ const t0 = Date.now();
141
+ try {
142
+ const inline = await ensureBase64(img, http, log);
143
+ if (!inline)
144
+ return null;
145
+ const body = {
146
+ contents: [{
147
+ parts: [
148
+ { inline_data: { mime_type: inline.mediaType, data: inline.base64 } },
149
+ { text: prompt },
150
+ ],
151
+ }],
152
+ generationConfig: { maxOutputTokens: 1024 },
153
+ };
154
+ const res = await http.post(`${GEMINI_ENDPOINT}?key=${key}`, body, {
155
+ headers: { 'content-type': 'application/json' },
156
+ timeout: 30000,
157
+ });
158
+ const description = (res.data?.candidates?.[0]?.content?.parts?.[0]?.text ?? '').trim();
159
+ if (!description)
160
+ return null;
161
+ const result = { description, provider: 'gemini', modelUsed: GEMINI_MODEL, durationMs: Date.now() - t0 };
162
+ log.info('image analyzed', { provider: 'gemini', modelUsed: GEMINI_MODEL, durationMs: result.durationMs, descChars: description.length });
163
+ return result;
139
164
  }
140
- // ── Provider 3: Ollama llava ──────────────────────────────────────────────
141
- const ollamaBase = (process.env.OLLAMA_BASE_URL ?? 'http://127.0.0.1:11434').replace(/\/$/, '');
142
- // For URLs we need to download first so Ollama can receive base64
143
- let ollamaBase64 = base64Data;
144
- if (isUrl) {
145
- try {
146
- const imgRes = await axios_1.default.get(imageSource, { responseType: 'arraybuffer', timeout: 15000 });
147
- ollamaBase64 = Buffer.from(imgRes.data).toString('base64');
148
- }
149
- catch (e) {
150
- throw new Error(`vision_analyze: all providers failed (could not download URL for Ollama). ${e.message}`);
151
- }
165
+ catch (e) {
166
+ log.warn('gemini vision failed', { error: e?.message ?? String(e) });
167
+ return null;
168
+ }
169
+ }
170
+ const GROQ_TARGET = {
171
+ provider: 'groq',
172
+ baseUrl: 'https://api.groq.com/openai/v1',
173
+ model: 'meta-llama/llama-4-maverick-17b-128e-instruct',
174
+ envKey: 'GROQ_API_KEY',
175
+ };
176
+ const OPENROUTER_TARGET = {
177
+ provider: 'openrouter',
178
+ baseUrl: 'https://openrouter.ai/api/v1',
179
+ model: 'meta-llama/llama-3.2-11b-vision-instruct:free',
180
+ envKey: 'OPENROUTER_API_KEY',
181
+ };
182
+ const TOGETHER_TARGET = {
183
+ provider: 'together',
184
+ baseUrl: 'https://api.together.xyz/v1',
185
+ model: 'meta-llama/Llama-Vision-Free',
186
+ envKey: 'TOGETHER_API_KEY',
187
+ };
188
+ async function tryOpenAICompat(target, img, prompt, log, http) {
189
+ const key = process.env[target.envKey];
190
+ if (!key)
191
+ return null;
192
+ const t0 = Date.now();
193
+ try {
194
+ const dataUrl = asDataUrl(img);
195
+ const body = {
196
+ model: target.model,
197
+ max_tokens: 1024,
198
+ messages: [{
199
+ role: 'user',
200
+ content: [
201
+ { type: 'image_url', image_url: { url: dataUrl } },
202
+ { type: 'text', text: prompt },
203
+ ],
204
+ }],
205
+ };
206
+ const res = await http.post(`${target.baseUrl}/chat/completions`, body, {
207
+ headers: {
208
+ Authorization: `Bearer ${key}`,
209
+ 'content-type': 'application/json',
210
+ },
211
+ timeout: 30000,
212
+ });
213
+ const description = (res.data?.choices?.[0]?.message?.content ?? '').trim();
214
+ if (!description)
215
+ return null;
216
+ const result = {
217
+ description,
218
+ provider: target.provider,
219
+ modelUsed: target.model,
220
+ durationMs: Date.now() - t0,
221
+ };
222
+ log.info('image analyzed', {
223
+ provider: target.provider,
224
+ modelUsed: target.model,
225
+ durationMs: result.durationMs,
226
+ descChars: description.length,
227
+ });
228
+ return result;
229
+ }
230
+ catch (e) {
231
+ log.warn(`${target.provider} vision failed`, { error: e?.message ?? String(e) });
232
+ return null;
233
+ }
234
+ }
235
+ // ── Provider 5: Anthropic ─────────────────────────────────────────────────────
236
+ const ANTHROPIC_MODEL = 'claude-3-5-sonnet-20241022';
237
+ async function tryAnthropic(img, prompt, log, http) {
238
+ const key = process.env.ANTHROPIC_API_KEY;
239
+ if (!key)
240
+ return null;
241
+ const t0 = Date.now();
242
+ try {
243
+ const imageBlock = img.isUrl
244
+ ? { type: 'image', source: { type: 'url', url: img.sourceUrl } }
245
+ : { type: 'image', source: { type: 'base64', media_type: img.mediaType, data: img.base64 } };
246
+ const body = {
247
+ model: ANTHROPIC_MODEL,
248
+ max_tokens: 1024,
249
+ messages: [{ role: 'user', content: [imageBlock, { type: 'text', text: prompt }] }],
250
+ };
251
+ const res = await http.post('https://api.anthropic.com/v1/messages', body, {
252
+ headers: {
253
+ 'x-api-key': key,
254
+ 'anthropic-version': '2023-06-01',
255
+ 'content-type': 'application/json',
256
+ },
257
+ timeout: 30000,
258
+ });
259
+ const description = (res.data?.content?.[0]?.text ?? '').trim();
260
+ if (!description)
261
+ return null;
262
+ const result = { description, provider: 'anthropic', modelUsed: ANTHROPIC_MODEL, durationMs: Date.now() - t0 };
263
+ log.info('image analyzed', { provider: 'anthropic', modelUsed: ANTHROPIC_MODEL, durationMs: result.durationMs, descChars: description.length });
264
+ return result;
265
+ }
266
+ catch (e) {
267
+ log.warn('anthropic vision failed', { error: e?.message ?? String(e) });
268
+ return null;
152
269
  }
270
+ }
271
+ // ── Provider 6: OpenAI ────────────────────────────────────────────────────────
272
+ const OPENAI_MODEL = 'gpt-4o';
273
+ async function tryOpenAI(img, prompt, log, http) {
274
+ const key = process.env.OPENAI_API_KEY;
275
+ if (!key)
276
+ return null;
277
+ const t0 = Date.now();
153
278
  try {
154
- const res = await axios_1.default.post(`${ollamaBase}/api/generate`, { model: 'llava', prompt, images: [ollamaBase64], stream: false }, { timeout: 60000 });
279
+ const body = {
280
+ model: OPENAI_MODEL,
281
+ max_tokens: 1024,
282
+ messages: [{
283
+ role: 'user',
284
+ content: [
285
+ { type: 'image_url', image_url: { url: asDataUrl(img) } },
286
+ { type: 'text', text: prompt },
287
+ ],
288
+ }],
289
+ };
290
+ const res = await http.post('https://api.openai.com/v1/chat/completions', body, {
291
+ headers: { Authorization: `Bearer ${key}`, 'content-type': 'application/json' },
292
+ timeout: 30000,
293
+ });
294
+ const description = (res.data?.choices?.[0]?.message?.content ?? '').trim();
295
+ if (!description)
296
+ return null;
297
+ const result = { description, provider: 'openai', modelUsed: OPENAI_MODEL, durationMs: Date.now() - t0 };
298
+ log.info('image analyzed', { provider: 'openai', modelUsed: OPENAI_MODEL, durationMs: result.durationMs, descChars: description.length });
299
+ return result;
300
+ }
301
+ catch (e) {
302
+ log.warn('openai vision failed', { error: e?.message ?? String(e) });
303
+ return null;
304
+ }
305
+ }
306
+ // ── Provider 7: Ollama llava ──────────────────────────────────────────────────
307
+ async function tryOllama(img, prompt, log, http) {
308
+ const ollamaBase = (process.env.OLLAMA_BASE_URL ?? 'http://127.0.0.1:11434').replace(/\/$/, '');
309
+ const t0 = Date.now();
310
+ try {
311
+ const inline = await ensureBase64(img, http, log);
312
+ if (!inline)
313
+ return null;
314
+ const res = await http.post(`${ollamaBase}/api/generate`, { model: 'llava', prompt, images: [inline.base64], stream: false }, { timeout: 60000 });
155
315
  const description = (res.data?.response ?? '').trim();
156
- return { description, provider: 'ollama', modelUsed: 'llava', durationMs: Date.now() - start };
316
+ if (!description)
317
+ return null;
318
+ const result = { description, provider: 'ollama', modelUsed: 'llava', durationMs: Date.now() - t0 };
319
+ log.info('image analyzed', { provider: 'ollama', modelUsed: 'llava', durationMs: result.durationMs, descChars: description.length });
320
+ return result;
157
321
  }
158
322
  catch (e) {
159
- throw new Error(`vision_analyze: all providers exhausted. ${e.message}`);
323
+ log.warn('ollama vision failed', { error: e?.message ?? String(e) });
324
+ return null;
325
+ }
326
+ }
327
+ // ── Main ─────────────────────────────────────────────────────────────────────
328
+ /**
329
+ * Analyze an image using the first available vision-capable provider.
330
+ *
331
+ * @param imageSource File path (absolute or relative) or HTTP(S) URL.
332
+ * @param prompt Instruction prompt (default: describe the image).
333
+ * @param logger Optional Logger from `core/v4/logger`; defaults
334
+ * to a noop sink for legacy callers.
335
+ * @param httpClient Phase v4.1-4.1 — optional HTTP client (test seam).
336
+ * Production leaves this unset; smokes inject a fake.
337
+ * @returns VisionResult with description, provider, model, timing.
338
+ */
339
+ async function analyzeImage(imageSource, prompt = 'Describe this image in detail.', logger = (0, logger_1.noopLogger)(), httpClient = defaultHttpClient) {
340
+ const img = resolveLocalImage(imageSource);
341
+ // Phase v4.1-4.1 — provider chain. Free providers first so the
342
+ // bot doesn't burn paid budget on every inbound photo. Each
343
+ // attempt returns null (key missing OR call failed) on which
344
+ // we fall through to the next; the first one that produces a
345
+ // non-empty description wins.
346
+ const providers = [
347
+ tryGemini,
348
+ (i, p, l, h) => tryOpenAICompat(GROQ_TARGET, i, p, l, h),
349
+ (i, p, l, h) => tryOpenAICompat(OPENROUTER_TARGET, i, p, l, h),
350
+ (i, p, l, h) => tryOpenAICompat(TOGETHER_TARGET, i, p, l, h),
351
+ tryAnthropic,
352
+ tryOpenAI,
353
+ tryOllama,
354
+ ];
355
+ for (const tryProvider of providers) {
356
+ const result = await tryProvider(img, prompt, logger, httpClient);
357
+ if (result)
358
+ return result;
160
359
  }
360
+ logger.warn('all vision providers exhausted');
361
+ throw new Error('vision_analyze: all providers exhausted (no API key found, or every provider call failed). Configure GEMINI_API_KEY / GROQ_API_KEY / OPENROUTER_API_KEY / TOGETHER_API_KEY / ANTHROPIC_API_KEY / OPENAI_API_KEY, or run a local Ollama with `llava` pulled.');
161
362
  }
@@ -95,12 +95,26 @@ Write-Output "${outputPath}"
95
95
  }
96
96
  async function _recordUnix(outputPath, durationMs) {
97
97
  const seconds = Math.ceil(durationMs / 1000);
98
+ // Phase v4.1-cross-platform: detect available backend up-front so
99
+ // a missing sox/arecord surfaces a friendly install hint instead of
100
+ // a raw spawn-failure stack trace.
101
+ // eslint-disable-next-line @typescript-eslint/no-var-requires
102
+ const { detectBackend, missingBackendMessage } = require('./audioBackend');
103
+ const backend = detectBackend('record');
104
+ if (!backend) {
105
+ throw new Error(`[Audio] ${missingBackendMessage('record')}`);
106
+ }
98
107
  // Try sox first, then arecord
99
108
  try {
100
109
  await execAsync(`sox -d -t wav "${outputPath}" trim 0 ${seconds}`, { timeout: durationMs + 5000 });
101
110
  }
102
111
  catch {
103
- await execAsync(`arecord -d ${seconds} -f S16_LE -r 16000 -c 1 "${outputPath}"`, { timeout: durationMs + 5000 });
112
+ try {
113
+ await execAsync(`arecord -d ${seconds} -f S16_LE -r 16000 -c 1 "${outputPath}"`, { timeout: durationMs + 5000 });
114
+ }
115
+ catch {
116
+ throw new Error(`[Audio] ${missingBackendMessage('record')}`);
117
+ }
104
118
  }
105
119
  return outputPath;
106
120
  }
@@ -144,23 +158,65 @@ async function playAudio(audioSource) {
144
158
  }
145
159
  }
146
160
  async function _playWindows(filePath) {
161
+ // Phase v4.1-voice-cli (Piece 0) — replaced the hard-coded
162
+ // `Start-Sleep -Seconds 10` with a NaturalDuration poll loop. The
163
+ // old code cut off any TTS reply longer than 10s mid-sentence;
164
+ // voice-mode replies of meaningful length need actual completion
165
+ // tracking. MediaPlayer.Open is async — we wait up to 5s for
166
+ // NaturalDuration to populate, then sleep the actual duration
167
+ // (capped at 5min as a runaway guard). The 10s fallback is
168
+ // preserved when NaturalDuration never resolves (codec issues,
169
+ // streaming sources).
147
170
  const escaped = filePath.replace(/\\/g, '\\\\');
148
- await execAsync(`powershell -Command "Add-Type -AssemblyName presentationCore; $mp = New-Object System.Windows.Media.MediaPlayer; $mp.Open([uri]'${escaped}'); $mp.Play(); Start-Sleep -Seconds 10; $mp.Stop(); $mp.Close()"`, { timeout: 30000 }).catch(async () => {
149
- // Fallback: system default media player
171
+ const psBody = [
172
+ 'Add-Type -AssemblyName presentationCore',
173
+ '$mp = New-Object System.Windows.Media.MediaPlayer',
174
+ `$mp.Open([uri]'${escaped}')`,
175
+ '$wait = 0',
176
+ 'while (-not $mp.NaturalDuration.HasTimeSpan -and $wait -lt 50) { Start-Sleep -Milliseconds 100; $wait++ }',
177
+ '$mp.Play()',
178
+ 'if ($mp.NaturalDuration.HasTimeSpan) {',
179
+ ' $secs = [Math]::Min(300, [Math]::Ceiling($mp.NaturalDuration.TimeSpan.TotalSeconds + 0.5))',
180
+ ' Start-Sleep -Seconds ([int]$secs)',
181
+ '} else { Start-Sleep -Seconds 10 }',
182
+ '$mp.Stop()',
183
+ '$mp.Close()',
184
+ ].join('; ');
185
+ await execAsync(`powershell -Command "${psBody}"`,
186
+ // 5 min cap on the duration poll + a generous teardown margin.
187
+ { timeout: 320000 }).catch(async () => {
188
+ // Fallback: system default media player (fire-and-forget — caller
189
+ // doesn't wait for completion, but at least audio plays).
150
190
  await execAsync(`powershell -Command "Start-Process '${escaped}'"`, { timeout: 5000 })
151
191
  .catch(() => { });
152
192
  });
153
193
  }
154
194
  async function _playUnix(filePath) {
195
+ // eslint-disable-next-line @typescript-eslint/no-var-requires
196
+ const { detectBackend, missingBackendMessage } = require('./audioBackend');
155
197
  if (process.platform === 'darwin') {
156
- await execAsync(`afplay "${filePath}"`, { timeout: 30000 });
198
+ try {
199
+ await execAsync(`afplay "${filePath}"`, { timeout: 30000 });
200
+ }
201
+ catch {
202
+ throw new Error(`[Audio] ${missingBackendMessage('playback')}`);
203
+ }
157
204
  }
158
205
  else {
206
+ // Linux — try paplay then aplay, surface friendly error if both fail.
207
+ const backend = detectBackend('playback');
208
+ if (!backend)
209
+ throw new Error(`[Audio] ${missingBackendMessage('playback')}`);
159
210
  try {
160
211
  await execAsync(`paplay "${filePath}"`, { timeout: 30000 });
161
212
  }
162
213
  catch {
163
- await execAsync(`aplay "${filePath}"`, { timeout: 30000 });
214
+ try {
215
+ await execAsync(`aplay "${filePath}"`, { timeout: 30000 });
216
+ }
217
+ catch {
218
+ throw new Error(`[Audio] ${missingBackendMessage('playback')}`);
219
+ }
164
220
  }
165
221
  }
166
222
  }
@@ -0,0 +1,134 @@
1
+ "use strict";
2
+ /**
3
+ * Copyright (c) 2026 Shiva Deore (Taracod). Licensed under AGPL-3.0.
4
+ *
5
+ * Aiden — local-first agent.
6
+ */
7
+ /**
8
+ * core/voice/audioBackend.ts — Phase v4.1-cross-platform
9
+ *
10
+ * Detects which audio playback / recording backend is available on
11
+ * the current platform and surfaces friendly install hints when the
12
+ * stack is missing. Used by `audio.ts` and `tts.ts` instead of
13
+ * blowing up with a raw spawn-failure stack trace.
14
+ *
15
+ * Windows : winmm.dll MCI via PowerShell (always available)
16
+ * macOS : afplay (playback, system) + sox (record)
17
+ * Linux : aplay/paplay (playback) + arecord/sox (record)
18
+ *
19
+ * The detection probe runs `<bin> --version` (or `which`) with a
20
+ * 1.5s timeout; total cost on first call is bounded under 2s.
21
+ * Results are cached for the process lifetime so repeated checks
22
+ * are free.
23
+ */
24
+ Object.defineProperty(exports, "__esModule", { value: true });
25
+ exports.detectBackend = detectBackend;
26
+ exports.missingBackendMessage = missingBackendMessage;
27
+ exports._resetBackendCacheForTests = _resetBackendCacheForTests;
28
+ exports.listKnownBackends = listKnownBackends;
29
+ const node_child_process_1 = require("node:child_process");
30
+ const BACKENDS = {
31
+ win32: {
32
+ playback: [
33
+ { bin: 'powershell', label: 'PowerShell + winmm.dll', installHint: 'PowerShell ships with Windows.', builtin: true },
34
+ ],
35
+ record: [
36
+ { bin: 'powershell', label: 'PowerShell + winmm.dll', installHint: 'PowerShell ships with Windows.', builtin: true },
37
+ ],
38
+ },
39
+ darwin: {
40
+ playback: [
41
+ { bin: 'afplay', label: 'afplay', installHint: 'afplay ships with macOS.', builtin: true },
42
+ { bin: 'sox', label: 'sox', installHint: 'brew install sox', builtin: false },
43
+ ],
44
+ record: [
45
+ { bin: 'sox', label: 'sox', installHint: 'brew install sox', builtin: false },
46
+ ],
47
+ },
48
+ linux: {
49
+ playback: [
50
+ { bin: 'paplay', label: 'paplay (PulseAudio)', installHint: 'sudo apt install pulseaudio-utils (or use ALSA: sudo apt install alsa-utils)', builtin: false },
51
+ { bin: 'aplay', label: 'aplay (ALSA)', installHint: 'sudo apt install alsa-utils', builtin: false },
52
+ { bin: 'sox', label: 'sox', installHint: 'sudo apt install sox', builtin: false },
53
+ ],
54
+ record: [
55
+ { bin: 'arecord', label: 'arecord (ALSA)', installHint: 'sudo apt install alsa-utils', builtin: false },
56
+ { bin: 'sox', label: 'sox', installHint: 'sudo apt install sox', builtin: false },
57
+ ],
58
+ },
59
+ // Catch-all for unknown platforms — no backends, friendly error.
60
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
61
+ fallback: { playback: [], record: [] },
62
+ // Other Node.js platforms get the empty fallback via lookup.
63
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
64
+ aix: { playback: [], record: [] },
65
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
66
+ freebsd: { playback: [], record: [] },
67
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
68
+ openbsd: { playback: [], record: [] },
69
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
70
+ sunos: { playback: [], record: [] },
71
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
72
+ android: { playback: [], record: [] },
73
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
74
+ cygwin: { playback: [], record: [] },
75
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
76
+ haiku: { playback: [], record: [] },
77
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
78
+ netbsd: { playback: [], record: [] },
79
+ };
80
+ const cache = new Map();
81
+ /** Probe whether `bin` is on PATH. Cross-platform via `which` / `where`. */
82
+ function probe(bin) {
83
+ if (cache.has(bin))
84
+ return cache.get(bin);
85
+ const cmd = process.platform === 'win32' ? `where ${bin}` : `which ${bin}`;
86
+ try {
87
+ (0, node_child_process_1.execSync)(cmd, { stdio: 'ignore', timeout: 1500, windowsHide: true });
88
+ cache.set(bin, true);
89
+ return true;
90
+ }
91
+ catch {
92
+ cache.set(bin, false);
93
+ return false;
94
+ }
95
+ }
96
+ /** Return the first available backend for `purpose` on the current platform, or null. */
97
+ function detectBackend(purpose) {
98
+ const platformKey = process.platform;
99
+ const slot = BACKENDS[platformKey] ?? BACKENDS.fallback;
100
+ const candidates = slot[purpose] ?? [];
101
+ for (const b of candidates) {
102
+ if (b.builtin || probe(b.bin))
103
+ return b;
104
+ }
105
+ return null;
106
+ }
107
+ /**
108
+ * Build a friendly multi-line message describing the missing backend
109
+ * and how to install it. Used by audio.ts / tts.ts when the chosen
110
+ * spawn fails OR detectBackend returns null up front.
111
+ */
112
+ function missingBackendMessage(purpose) {
113
+ const platformKey = process.platform;
114
+ const slot = BACKENDS[platformKey] ?? BACKENDS.fallback;
115
+ const candidates = slot[purpose] ?? [];
116
+ if (candidates.length === 0) {
117
+ return `Audio ${purpose} unavailable on ${process.platform}. Aiden does not yet ship a backend for this platform.`;
118
+ }
119
+ const labels = candidates.map((c) => c.label).join(' / ');
120
+ const installs = candidates.map((c) => ` - ${c.installHint}`).join('\n');
121
+ return `Audio ${purpose} backend not found. Aiden looked for: ${labels}\nInstall one of:\n${installs}`;
122
+ }
123
+ /**
124
+ * Reset the probe cache. Test-only; not exposed via the barrel.
125
+ */
126
+ function _resetBackendCacheForTests() {
127
+ cache.clear();
128
+ }
129
+ /** Public read-only view for diagnostics (used by `aiden doctor`). */
130
+ function listKnownBackends(purpose) {
131
+ const platformKey = process.platform;
132
+ const slot = BACKENDS[platformKey] ?? BACKENDS.fallback;
133
+ return slot[purpose] ?? [];
134
+ }