aiden-runtime 4.0.2 → 4.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -7
- package/config/hardware.json +2 -2
- package/dist/api/server.js +50 -52
- package/dist/cli/v4/aidenCLI.js +421 -5
- package/dist/cli/v4/aidenPrompt.js +317 -0
- package/dist/cli/v4/box.js +105 -39
- package/dist/cli/v4/callbacks.js +39 -6
- package/dist/cli/v4/chatSession.js +256 -55
- package/dist/cli/v4/citationFooter.js +97 -0
- package/dist/cli/v4/commands/channel.js +656 -0
- package/dist/cli/v4/commands/clear.js +1 -1
- package/dist/cli/v4/commands/compress.js +1 -1
- package/dist/cli/v4/commands/cron.js +44 -16
- package/dist/cli/v4/commands/fanout.js +236 -0
- package/dist/cli/v4/commands/help.js +15 -4
- package/dist/cli/v4/commands/history.js +84 -0
- package/dist/cli/v4/commands/index.js +16 -1
- package/dist/cli/v4/commands/mcp.js +358 -0
- package/dist/cli/v4/commands/show.js +43 -0
- package/dist/cli/v4/commands/skills.js +169 -4
- package/dist/cli/v4/commands/status.js +84 -0
- package/dist/cli/v4/commands/subagent.js +78 -0
- package/dist/cli/v4/commands/verbose.js +1 -1
- package/dist/cli/v4/commands/voice.js +218 -0
- package/dist/cli/v4/cronCli.js +103 -0
- package/dist/cli/v4/display.js +297 -13
- package/dist/cli/v4/doctor.js +41 -0
- package/dist/cli/v4/envSources.js +105 -0
- package/dist/cli/v4/ghostMatch.js +74 -0
- package/dist/cli/v4/historyStore.js +163 -0
- package/dist/cli/v4/pasteCompression.js +124 -0
- package/dist/cli/v4/pasteIntercept.js +203 -0
- package/dist/cli/v4/replyRenderer.js +209 -0
- package/dist/cli/v4/resizeGuard.js +92 -0
- package/dist/cli/v4/shellInterpolation.js +139 -0
- package/dist/cli/v4/skinEngine.js +21 -1
- package/dist/cli/v4/streamingPrefix.js +121 -0
- package/dist/cli/v4/syntaxHighlight.js +345 -0
- package/dist/cli/v4/table.js +216 -0
- package/dist/cli/v4/themeDetect.js +81 -0
- package/dist/cli/v4/uiBuild.js +74 -0
- package/dist/cli/v4/voiceCli.js +113 -0
- package/dist/cli/v4/voicePromptApi.js +196 -0
- package/dist/core/channels/discord.js +16 -10
- package/dist/core/channels/email.js +13 -9
- package/dist/core/channels/imessage.js +13 -9
- package/dist/core/channels/manager.js +25 -7
- package/dist/core/channels/pdf-extract.js +180 -0
- package/dist/core/channels/photo-vision.js +157 -0
- package/dist/core/channels/signal.js +11 -7
- package/dist/core/channels/slack.js +13 -10
- package/dist/core/channels/telegram-commands.js +154 -0
- package/dist/core/channels/telegram-groups.js +198 -0
- package/dist/core/channels/telegram-rate-limit.js +124 -0
- package/dist/core/channels/telegram.js +1980 -0
- package/dist/core/channels/twilio.js +11 -7
- package/dist/core/channels/webhook.js +9 -5
- package/dist/core/channels/whatsapp.js +15 -11
- package/dist/core/channels/whisper-transcribe.js +163 -0
- package/dist/core/cronManager.js +33 -294
- package/dist/core/gateway.js +29 -8
- package/dist/core/playwrightBridge.js +90 -0
- package/dist/core/v4/aidenAgent.js +35 -0
- package/dist/core/v4/auxiliaryClient.js +2 -2
- package/dist/core/v4/cron/atomicWrite.js +18 -4
- package/dist/core/v4/cron/cronExecute.js +300 -0
- package/dist/core/v4/cron/cronManager.js +502 -0
- package/dist/core/v4/cron/cronState.js +314 -0
- package/dist/core/v4/cron/cronTick.js +90 -0
- package/dist/core/v4/cron/diagnostics.js +104 -0
- package/dist/core/v4/cron/graceWindow.js +79 -0
- package/dist/core/v4/logger/factory.js +110 -0
- package/dist/core/v4/logger/index.js +22 -0
- package/dist/core/v4/logger/logger.js +101 -0
- package/dist/core/v4/logger/sinks/fileSink.js +110 -0
- package/dist/core/v4/logger/sinks/multiSink.js +43 -0
- package/dist/core/v4/logger/sinks/nullSink.js +53 -0
- package/dist/core/v4/logger/sinks/stdSink.js +81 -0
- package/dist/core/v4/mcp/server/diagnostics.js +40 -0
- package/dist/core/v4/mcp/server/skillBridge.js +94 -0
- package/dist/core/v4/mcp/server/stdioServer.js +119 -0
- package/dist/core/v4/mcp/server/toolBridge.js +168 -0
- package/dist/core/v4/platformPaths.js +105 -0
- package/dist/core/v4/providerFallback.js +25 -0
- package/dist/core/v4/skillLoader.js +21 -5
- package/dist/core/v4/skillMining/candidateStore.js +164 -0
- package/dist/core/v4/skillMining/extractorPrompt.js +111 -0
- package/dist/core/v4/skillMining/proposalBuilder.js +139 -0
- package/dist/core/v4/skillMining/skillMiner.js +191 -0
- package/dist/core/v4/skillMining/traceFingerprint.js +51 -0
- package/dist/core/v4/subagent/budget.js +76 -0
- package/dist/core/v4/subagent/diagnostics.js +22 -0
- package/dist/core/v4/subagent/fanout.js +216 -0
- package/dist/core/v4/subagent/merger.js +148 -0
- package/dist/core/v4/subagent/providerRotation.js +54 -0
- package/dist/core/v4/voice/audioStream.js +373 -0
- package/dist/core/v4/voice/cliVoice.js +393 -0
- package/dist/core/v4/voice/diagnostics.js +66 -0
- package/dist/core/v4/voice/ttsStream.js +193 -0
- package/dist/core/version.js +1 -1
- package/dist/core/visionAnalyze.js +291 -90
- package/dist/core/voice/audio.js +61 -5
- package/dist/core/voice/audioBackend.js +134 -0
- package/dist/core/voice/stt.js +61 -6
- package/dist/core/voice/tts.js +19 -3
- package/dist/tools/v4/index.js +32 -1
- package/dist/tools/v4/subagent/subagentFanout.js +166 -0
- package/package.json +11 -2
|
@@ -6,12 +6,27 @@
|
|
|
6
6
|
//
|
|
7
7
|
// core/visionAnalyze.ts — Image analysis via vision-capable providers.
|
|
8
8
|
//
|
|
9
|
-
// Provider chain (first available wins)
|
|
10
|
-
//
|
|
11
|
-
//
|
|
12
|
-
//
|
|
9
|
+
// Provider chain (first available wins). Free providers first so
|
|
10
|
+
// the bot doesn't burn paid budget on every inbound photo:
|
|
11
|
+
//
|
|
12
|
+
// 1. Gemini gemini-2.5-flash (GEMINI_API_KEY)
|
|
13
|
+
// 2. Groq llama-4-maverick-17b vision (GROQ_API_KEY)
|
|
14
|
+
// 3. OpenRouter llama-3.2-11b-vision:free (OPENROUTER_API_KEY)
|
|
15
|
+
// 4. Together Llama-Vision-Free (TOGETHER_API_KEY)
|
|
16
|
+
// 5. Anthropic claude-3-5-sonnet (ANTHROPIC_API_KEY)
|
|
17
|
+
// 6. OpenAI gpt-4o (OPENAI_API_KEY)
|
|
18
|
+
// 7. Ollama llava (local, no key)
|
|
13
19
|
//
|
|
14
20
|
// Accepts local file paths (→ base64) or HTTP/HTTPS URLs.
|
|
21
|
+
//
|
|
22
|
+
// Phase v4.1-4 — added optional `Logger` parameter so the channel
|
|
23
|
+
// adapter (Telegram, etc.) can route diagnostics through the unified
|
|
24
|
+
// `core/v4/logger` contract instead of stdout.
|
|
25
|
+
//
|
|
26
|
+
// Phase v4.1-4.1 — extended chain to cover the providers Aiden
|
|
27
|
+
// already authenticates against, optional httpClient test seam, and
|
|
28
|
+
// shared OpenAI-compatible helper for Groq / OpenRouter / Together
|
|
29
|
+
// (which all serve the same wire format).
|
|
15
30
|
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
16
31
|
if (k2 === undefined) k2 = k;
|
|
17
32
|
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
@@ -53,6 +68,18 @@ exports.analyzeImage = analyzeImage;
|
|
|
53
68
|
const fs = __importStar(require("fs"));
|
|
54
69
|
const path = __importStar(require("path"));
|
|
55
70
|
const axios_1 = __importDefault(require("axios"));
|
|
71
|
+
const logger_1 = require("./v4/logger");
|
|
72
|
+
/** Default client wraps axios so production stays unchanged. */
|
|
73
|
+
const defaultHttpClient = {
|
|
74
|
+
post: (url, body, opts) => axios_1.default.post(url, body, {
|
|
75
|
+
headers: opts?.headers,
|
|
76
|
+
timeout: opts?.timeout,
|
|
77
|
+
}),
|
|
78
|
+
get: (url, opts) => axios_1.default.get(url, {
|
|
79
|
+
responseType: opts?.responseType,
|
|
80
|
+
timeout: opts?.timeout,
|
|
81
|
+
}),
|
|
82
|
+
};
|
|
56
83
|
// ── Media type resolver ───────────────────────────────────────────────────────
|
|
57
84
|
function extToMediaType(ext) {
|
|
58
85
|
const map = {
|
|
@@ -61,101 +88,275 @@ function extToMediaType(ext) {
|
|
|
61
88
|
};
|
|
62
89
|
return map[ext.toLowerCase().replace(/^\./, '')] ?? 'image/jpeg';
|
|
63
90
|
}
|
|
64
|
-
|
|
91
|
+
function resolveLocalImage(imageSource) {
|
|
92
|
+
const isUrl = imageSource.startsWith('http://') || imageSource.startsWith('https://');
|
|
93
|
+
if (isUrl) {
|
|
94
|
+
return { isUrl: true, sourceUrl: imageSource, base64: '', mediaType: 'image/jpeg' };
|
|
95
|
+
}
|
|
96
|
+
const absPath = path.isAbsolute(imageSource)
|
|
97
|
+
? imageSource
|
|
98
|
+
: path.resolve(process.cwd(), imageSource);
|
|
99
|
+
const buf = fs.readFileSync(absPath);
|
|
100
|
+
return {
|
|
101
|
+
isUrl: false,
|
|
102
|
+
sourceUrl: '',
|
|
103
|
+
base64: buf.toString('base64'),
|
|
104
|
+
mediaType: extToMediaType(path.extname(absPath)),
|
|
105
|
+
};
|
|
106
|
+
}
|
|
107
|
+
/** Build a `data:<media>;base64,<...>` URL for OpenAI-compat consumers. */
|
|
108
|
+
function asDataUrl(img) {
|
|
109
|
+
if (img.isUrl)
|
|
110
|
+
return img.sourceUrl;
|
|
111
|
+
return `data:${img.mediaType};base64,${img.base64}`;
|
|
112
|
+
}
|
|
65
113
|
/**
|
|
66
|
-
*
|
|
67
|
-
*
|
|
68
|
-
*
|
|
69
|
-
*
|
|
70
|
-
* @returns VisionResult with description, provider, model, timing.
|
|
114
|
+
* For URL sources we sometimes need raw bytes (Gemini's inline_data
|
|
115
|
+
* is base64; Ollama's images[] is base64). Download and base64 the
|
|
116
|
+
* remote URL on demand. Returns null on download failure so the caller
|
|
117
|
+
* can fall through to the next provider.
|
|
71
118
|
*/
|
|
72
|
-
async function
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
? imageSource
|
|
81
|
-
: path.resolve(process.cwd(), imageSource);
|
|
82
|
-
const buf = fs.readFileSync(absPath);
|
|
83
|
-
base64Data = buf.toString('base64');
|
|
84
|
-
mediaType = extToMediaType(path.extname(absPath));
|
|
119
|
+
async function ensureBase64(img, http, log) {
|
|
120
|
+
if (!img.isUrl)
|
|
121
|
+
return { base64: img.base64, mediaType: img.mediaType };
|
|
122
|
+
try {
|
|
123
|
+
const res = await http.get(img.sourceUrl, { responseType: 'arraybuffer', timeout: 15000 });
|
|
124
|
+
const base64 = Buffer.from(res.data).toString('base64');
|
|
125
|
+
const mediaType = extToMediaType(path.extname(img.sourceUrl)) || 'image/jpeg';
|
|
126
|
+
return { base64, mediaType };
|
|
85
127
|
}
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
try {
|
|
90
|
-
const imageBlock = isUrl
|
|
91
|
-
? { type: 'image', source: { type: 'url', url: imageSource } }
|
|
92
|
-
: { type: 'image', source: { type: 'base64', media_type: mediaType, data: base64Data } };
|
|
93
|
-
const res = await axios_1.default.post('https://api.anthropic.com/v1/messages', {
|
|
94
|
-
model: 'claude-3-5-sonnet-20241022',
|
|
95
|
-
max_tokens: 1024,
|
|
96
|
-
messages: [{ role: 'user', content: [imageBlock, { type: 'text', text: prompt }] }],
|
|
97
|
-
}, {
|
|
98
|
-
headers: {
|
|
99
|
-
'x-api-key': anthropicKey,
|
|
100
|
-
'anthropic-version': '2023-06-01',
|
|
101
|
-
'content-type': 'application/json',
|
|
102
|
-
},
|
|
103
|
-
timeout: 30000,
|
|
104
|
-
});
|
|
105
|
-
const description = (res.data?.content?.[0]?.text ?? '').trim();
|
|
106
|
-
if (description) {
|
|
107
|
-
return { description, provider: 'anthropic', modelUsed: 'claude-3-5-sonnet-20241022', durationMs: Date.now() - start };
|
|
108
|
-
}
|
|
109
|
-
}
|
|
110
|
-
catch { /* fall through */ }
|
|
128
|
+
catch (e) {
|
|
129
|
+
log.warn('failed to download image url for base64-only providers', { url: img.sourceUrl, error: e?.message });
|
|
130
|
+
return null;
|
|
111
131
|
}
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
}
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
}
|
|
138
|
-
|
|
132
|
+
}
|
|
133
|
+
// ── Provider 1: Gemini ────────────────────────────────────────────────────────
|
|
134
|
+
const GEMINI_MODEL = 'gemini-2.5-flash';
|
|
135
|
+
const GEMINI_ENDPOINT = `https://generativelanguage.googleapis.com/v1beta/models/${GEMINI_MODEL}:generateContent`;
|
|
136
|
+
async function tryGemini(img, prompt, log, http) {
|
|
137
|
+
const key = process.env.GEMINI_API_KEY;
|
|
138
|
+
if (!key)
|
|
139
|
+
return null;
|
|
140
|
+
const t0 = Date.now();
|
|
141
|
+
try {
|
|
142
|
+
const inline = await ensureBase64(img, http, log);
|
|
143
|
+
if (!inline)
|
|
144
|
+
return null;
|
|
145
|
+
const body = {
|
|
146
|
+
contents: [{
|
|
147
|
+
parts: [
|
|
148
|
+
{ inline_data: { mime_type: inline.mediaType, data: inline.base64 } },
|
|
149
|
+
{ text: prompt },
|
|
150
|
+
],
|
|
151
|
+
}],
|
|
152
|
+
generationConfig: { maxOutputTokens: 1024 },
|
|
153
|
+
};
|
|
154
|
+
const res = await http.post(`${GEMINI_ENDPOINT}?key=${key}`, body, {
|
|
155
|
+
headers: { 'content-type': 'application/json' },
|
|
156
|
+
timeout: 30000,
|
|
157
|
+
});
|
|
158
|
+
const description = (res.data?.candidates?.[0]?.content?.parts?.[0]?.text ?? '').trim();
|
|
159
|
+
if (!description)
|
|
160
|
+
return null;
|
|
161
|
+
const result = { description, provider: 'gemini', modelUsed: GEMINI_MODEL, durationMs: Date.now() - t0 };
|
|
162
|
+
log.info('image analyzed', { provider: 'gemini', modelUsed: GEMINI_MODEL, durationMs: result.durationMs, descChars: description.length });
|
|
163
|
+
return result;
|
|
139
164
|
}
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
165
|
+
catch (e) {
|
|
166
|
+
log.warn('gemini vision failed', { error: e?.message ?? String(e) });
|
|
167
|
+
return null;
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
const GROQ_TARGET = {
|
|
171
|
+
provider: 'groq',
|
|
172
|
+
baseUrl: 'https://api.groq.com/openai/v1',
|
|
173
|
+
model: 'meta-llama/llama-4-maverick-17b-128e-instruct',
|
|
174
|
+
envKey: 'GROQ_API_KEY',
|
|
175
|
+
};
|
|
176
|
+
const OPENROUTER_TARGET = {
|
|
177
|
+
provider: 'openrouter',
|
|
178
|
+
baseUrl: 'https://openrouter.ai/api/v1',
|
|
179
|
+
model: 'meta-llama/llama-3.2-11b-vision-instruct:free',
|
|
180
|
+
envKey: 'OPENROUTER_API_KEY',
|
|
181
|
+
};
|
|
182
|
+
const TOGETHER_TARGET = {
|
|
183
|
+
provider: 'together',
|
|
184
|
+
baseUrl: 'https://api.together.xyz/v1',
|
|
185
|
+
model: 'meta-llama/Llama-Vision-Free',
|
|
186
|
+
envKey: 'TOGETHER_API_KEY',
|
|
187
|
+
};
|
|
188
|
+
async function tryOpenAICompat(target, img, prompt, log, http) {
|
|
189
|
+
const key = process.env[target.envKey];
|
|
190
|
+
if (!key)
|
|
191
|
+
return null;
|
|
192
|
+
const t0 = Date.now();
|
|
193
|
+
try {
|
|
194
|
+
const dataUrl = asDataUrl(img);
|
|
195
|
+
const body = {
|
|
196
|
+
model: target.model,
|
|
197
|
+
max_tokens: 1024,
|
|
198
|
+
messages: [{
|
|
199
|
+
role: 'user',
|
|
200
|
+
content: [
|
|
201
|
+
{ type: 'image_url', image_url: { url: dataUrl } },
|
|
202
|
+
{ type: 'text', text: prompt },
|
|
203
|
+
],
|
|
204
|
+
}],
|
|
205
|
+
};
|
|
206
|
+
const res = await http.post(`${target.baseUrl}/chat/completions`, body, {
|
|
207
|
+
headers: {
|
|
208
|
+
Authorization: `Bearer ${key}`,
|
|
209
|
+
'content-type': 'application/json',
|
|
210
|
+
},
|
|
211
|
+
timeout: 30000,
|
|
212
|
+
});
|
|
213
|
+
const description = (res.data?.choices?.[0]?.message?.content ?? '').trim();
|
|
214
|
+
if (!description)
|
|
215
|
+
return null;
|
|
216
|
+
const result = {
|
|
217
|
+
description,
|
|
218
|
+
provider: target.provider,
|
|
219
|
+
modelUsed: target.model,
|
|
220
|
+
durationMs: Date.now() - t0,
|
|
221
|
+
};
|
|
222
|
+
log.info('image analyzed', {
|
|
223
|
+
provider: target.provider,
|
|
224
|
+
modelUsed: target.model,
|
|
225
|
+
durationMs: result.durationMs,
|
|
226
|
+
descChars: description.length,
|
|
227
|
+
});
|
|
228
|
+
return result;
|
|
229
|
+
}
|
|
230
|
+
catch (e) {
|
|
231
|
+
log.warn(`${target.provider} vision failed`, { error: e?.message ?? String(e) });
|
|
232
|
+
return null;
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
// ── Provider 5: Anthropic ─────────────────────────────────────────────────────
|
|
236
|
+
const ANTHROPIC_MODEL = 'claude-3-5-sonnet-20241022';
|
|
237
|
+
async function tryAnthropic(img, prompt, log, http) {
|
|
238
|
+
const key = process.env.ANTHROPIC_API_KEY;
|
|
239
|
+
if (!key)
|
|
240
|
+
return null;
|
|
241
|
+
const t0 = Date.now();
|
|
242
|
+
try {
|
|
243
|
+
const imageBlock = img.isUrl
|
|
244
|
+
? { type: 'image', source: { type: 'url', url: img.sourceUrl } }
|
|
245
|
+
: { type: 'image', source: { type: 'base64', media_type: img.mediaType, data: img.base64 } };
|
|
246
|
+
const body = {
|
|
247
|
+
model: ANTHROPIC_MODEL,
|
|
248
|
+
max_tokens: 1024,
|
|
249
|
+
messages: [{ role: 'user', content: [imageBlock, { type: 'text', text: prompt }] }],
|
|
250
|
+
};
|
|
251
|
+
const res = await http.post('https://api.anthropic.com/v1/messages', body, {
|
|
252
|
+
headers: {
|
|
253
|
+
'x-api-key': key,
|
|
254
|
+
'anthropic-version': '2023-06-01',
|
|
255
|
+
'content-type': 'application/json',
|
|
256
|
+
},
|
|
257
|
+
timeout: 30000,
|
|
258
|
+
});
|
|
259
|
+
const description = (res.data?.content?.[0]?.text ?? '').trim();
|
|
260
|
+
if (!description)
|
|
261
|
+
return null;
|
|
262
|
+
const result = { description, provider: 'anthropic', modelUsed: ANTHROPIC_MODEL, durationMs: Date.now() - t0 };
|
|
263
|
+
log.info('image analyzed', { provider: 'anthropic', modelUsed: ANTHROPIC_MODEL, durationMs: result.durationMs, descChars: description.length });
|
|
264
|
+
return result;
|
|
265
|
+
}
|
|
266
|
+
catch (e) {
|
|
267
|
+
log.warn('anthropic vision failed', { error: e?.message ?? String(e) });
|
|
268
|
+
return null;
|
|
152
269
|
}
|
|
270
|
+
}
|
|
271
|
+
// ── Provider 6: OpenAI ────────────────────────────────────────────────────────
|
|
272
|
+
const OPENAI_MODEL = 'gpt-4o';
|
|
273
|
+
async function tryOpenAI(img, prompt, log, http) {
|
|
274
|
+
const key = process.env.OPENAI_API_KEY;
|
|
275
|
+
if (!key)
|
|
276
|
+
return null;
|
|
277
|
+
const t0 = Date.now();
|
|
153
278
|
try {
|
|
154
|
-
const
|
|
279
|
+
const body = {
|
|
280
|
+
model: OPENAI_MODEL,
|
|
281
|
+
max_tokens: 1024,
|
|
282
|
+
messages: [{
|
|
283
|
+
role: 'user',
|
|
284
|
+
content: [
|
|
285
|
+
{ type: 'image_url', image_url: { url: asDataUrl(img) } },
|
|
286
|
+
{ type: 'text', text: prompt },
|
|
287
|
+
],
|
|
288
|
+
}],
|
|
289
|
+
};
|
|
290
|
+
const res = await http.post('https://api.openai.com/v1/chat/completions', body, {
|
|
291
|
+
headers: { Authorization: `Bearer ${key}`, 'content-type': 'application/json' },
|
|
292
|
+
timeout: 30000,
|
|
293
|
+
});
|
|
294
|
+
const description = (res.data?.choices?.[0]?.message?.content ?? '').trim();
|
|
295
|
+
if (!description)
|
|
296
|
+
return null;
|
|
297
|
+
const result = { description, provider: 'openai', modelUsed: OPENAI_MODEL, durationMs: Date.now() - t0 };
|
|
298
|
+
log.info('image analyzed', { provider: 'openai', modelUsed: OPENAI_MODEL, durationMs: result.durationMs, descChars: description.length });
|
|
299
|
+
return result;
|
|
300
|
+
}
|
|
301
|
+
catch (e) {
|
|
302
|
+
log.warn('openai vision failed', { error: e?.message ?? String(e) });
|
|
303
|
+
return null;
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
// ── Provider 7: Ollama llava ──────────────────────────────────────────────────
|
|
307
|
+
async function tryOllama(img, prompt, log, http) {
|
|
308
|
+
const ollamaBase = (process.env.OLLAMA_BASE_URL ?? 'http://127.0.0.1:11434').replace(/\/$/, '');
|
|
309
|
+
const t0 = Date.now();
|
|
310
|
+
try {
|
|
311
|
+
const inline = await ensureBase64(img, http, log);
|
|
312
|
+
if (!inline)
|
|
313
|
+
return null;
|
|
314
|
+
const res = await http.post(`${ollamaBase}/api/generate`, { model: 'llava', prompt, images: [inline.base64], stream: false }, { timeout: 60000 });
|
|
155
315
|
const description = (res.data?.response ?? '').trim();
|
|
156
|
-
|
|
316
|
+
if (!description)
|
|
317
|
+
return null;
|
|
318
|
+
const result = { description, provider: 'ollama', modelUsed: 'llava', durationMs: Date.now() - t0 };
|
|
319
|
+
log.info('image analyzed', { provider: 'ollama', modelUsed: 'llava', durationMs: result.durationMs, descChars: description.length });
|
|
320
|
+
return result;
|
|
157
321
|
}
|
|
158
322
|
catch (e) {
|
|
159
|
-
|
|
323
|
+
log.warn('ollama vision failed', { error: e?.message ?? String(e) });
|
|
324
|
+
return null;
|
|
325
|
+
}
|
|
326
|
+
}
|
|
327
|
+
// ── Main ─────────────────────────────────────────────────────────────────────
|
|
328
|
+
/**
|
|
329
|
+
* Analyze an image using the first available vision-capable provider.
|
|
330
|
+
*
|
|
331
|
+
* @param imageSource File path (absolute or relative) or HTTP(S) URL.
|
|
332
|
+
* @param prompt Instruction prompt (default: describe the image).
|
|
333
|
+
* @param logger Optional Logger from `core/v4/logger`; defaults
|
|
334
|
+
* to a noop sink for legacy callers.
|
|
335
|
+
* @param httpClient Phase v4.1-4.1 — optional HTTP client (test seam).
|
|
336
|
+
* Production leaves this unset; smokes inject a fake.
|
|
337
|
+
* @returns VisionResult with description, provider, model, timing.
|
|
338
|
+
*/
|
|
339
|
+
async function analyzeImage(imageSource, prompt = 'Describe this image in detail.', logger = (0, logger_1.noopLogger)(), httpClient = defaultHttpClient) {
|
|
340
|
+
const img = resolveLocalImage(imageSource);
|
|
341
|
+
// Phase v4.1-4.1 — provider chain. Free providers first so the
|
|
342
|
+
// bot doesn't burn paid budget on every inbound photo. Each
|
|
343
|
+
// attempt returns null (key missing OR call failed) on which
|
|
344
|
+
// we fall through to the next; the first one that produces a
|
|
345
|
+
// non-empty description wins.
|
|
346
|
+
const providers = [
|
|
347
|
+
tryGemini,
|
|
348
|
+
(i, p, l, h) => tryOpenAICompat(GROQ_TARGET, i, p, l, h),
|
|
349
|
+
(i, p, l, h) => tryOpenAICompat(OPENROUTER_TARGET, i, p, l, h),
|
|
350
|
+
(i, p, l, h) => tryOpenAICompat(TOGETHER_TARGET, i, p, l, h),
|
|
351
|
+
tryAnthropic,
|
|
352
|
+
tryOpenAI,
|
|
353
|
+
tryOllama,
|
|
354
|
+
];
|
|
355
|
+
for (const tryProvider of providers) {
|
|
356
|
+
const result = await tryProvider(img, prompt, logger, httpClient);
|
|
357
|
+
if (result)
|
|
358
|
+
return result;
|
|
160
359
|
}
|
|
360
|
+
logger.warn('all vision providers exhausted');
|
|
361
|
+
throw new Error('vision_analyze: all providers exhausted (no API key found, or every provider call failed). Configure GEMINI_API_KEY / GROQ_API_KEY / OPENROUTER_API_KEY / TOGETHER_API_KEY / ANTHROPIC_API_KEY / OPENAI_API_KEY, or run a local Ollama with `llava` pulled.');
|
|
161
362
|
}
|
package/dist/core/voice/audio.js
CHANGED
|
@@ -95,12 +95,26 @@ Write-Output "${outputPath}"
|
|
|
95
95
|
}
|
|
96
96
|
async function _recordUnix(outputPath, durationMs) {
|
|
97
97
|
const seconds = Math.ceil(durationMs / 1000);
|
|
98
|
+
// Phase v4.1-cross-platform: detect available backend up-front so
|
|
99
|
+
// a missing sox/arecord surfaces a friendly install hint instead of
|
|
100
|
+
// a raw spawn-failure stack trace.
|
|
101
|
+
// eslint-disable-next-line @typescript-eslint/no-var-requires
|
|
102
|
+
const { detectBackend, missingBackendMessage } = require('./audioBackend');
|
|
103
|
+
const backend = detectBackend('record');
|
|
104
|
+
if (!backend) {
|
|
105
|
+
throw new Error(`[Audio] ${missingBackendMessage('record')}`);
|
|
106
|
+
}
|
|
98
107
|
// Try sox first, then arecord
|
|
99
108
|
try {
|
|
100
109
|
await execAsync(`sox -d -t wav "${outputPath}" trim 0 ${seconds}`, { timeout: durationMs + 5000 });
|
|
101
110
|
}
|
|
102
111
|
catch {
|
|
103
|
-
|
|
112
|
+
try {
|
|
113
|
+
await execAsync(`arecord -d ${seconds} -f S16_LE -r 16000 -c 1 "${outputPath}"`, { timeout: durationMs + 5000 });
|
|
114
|
+
}
|
|
115
|
+
catch {
|
|
116
|
+
throw new Error(`[Audio] ${missingBackendMessage('record')}`);
|
|
117
|
+
}
|
|
104
118
|
}
|
|
105
119
|
return outputPath;
|
|
106
120
|
}
|
|
@@ -144,23 +158,65 @@ async function playAudio(audioSource) {
|
|
|
144
158
|
}
|
|
145
159
|
}
|
|
146
160
|
async function _playWindows(filePath) {
|
|
161
|
+
// Phase v4.1-voice-cli (Piece 0) — replaced the hard-coded
|
|
162
|
+
// `Start-Sleep -Seconds 10` with a NaturalDuration poll loop. The
|
|
163
|
+
// old code cut off any TTS reply longer than 10s mid-sentence;
|
|
164
|
+
// voice-mode replies of meaningful length need actual completion
|
|
165
|
+
// tracking. MediaPlayer.Open is async — we wait up to 5s for
|
|
166
|
+
// NaturalDuration to populate, then sleep the actual duration
|
|
167
|
+
// (capped at 5min as a runaway guard). The 10s fallback is
|
|
168
|
+
// preserved when NaturalDuration never resolves (codec issues,
|
|
169
|
+
// streaming sources).
|
|
147
170
|
const escaped = filePath.replace(/\\/g, '\\\\');
|
|
148
|
-
|
|
149
|
-
|
|
171
|
+
const psBody = [
|
|
172
|
+
'Add-Type -AssemblyName presentationCore',
|
|
173
|
+
'$mp = New-Object System.Windows.Media.MediaPlayer',
|
|
174
|
+
`$mp.Open([uri]'${escaped}')`,
|
|
175
|
+
'$wait = 0',
|
|
176
|
+
'while (-not $mp.NaturalDuration.HasTimeSpan -and $wait -lt 50) { Start-Sleep -Milliseconds 100; $wait++ }',
|
|
177
|
+
'$mp.Play()',
|
|
178
|
+
'if ($mp.NaturalDuration.HasTimeSpan) {',
|
|
179
|
+
' $secs = [Math]::Min(300, [Math]::Ceiling($mp.NaturalDuration.TimeSpan.TotalSeconds + 0.5))',
|
|
180
|
+
' Start-Sleep -Seconds ([int]$secs)',
|
|
181
|
+
'} else { Start-Sleep -Seconds 10 }',
|
|
182
|
+
'$mp.Stop()',
|
|
183
|
+
'$mp.Close()',
|
|
184
|
+
].join('; ');
|
|
185
|
+
await execAsync(`powershell -Command "${psBody}"`,
|
|
186
|
+
// 5 min cap on the duration poll + a generous teardown margin.
|
|
187
|
+
{ timeout: 320000 }).catch(async () => {
|
|
188
|
+
// Fallback: system default media player (fire-and-forget — caller
|
|
189
|
+
// doesn't wait for completion, but at least audio plays).
|
|
150
190
|
await execAsync(`powershell -Command "Start-Process '${escaped}'"`, { timeout: 5000 })
|
|
151
191
|
.catch(() => { });
|
|
152
192
|
});
|
|
153
193
|
}
|
|
154
194
|
async function _playUnix(filePath) {
|
|
195
|
+
// eslint-disable-next-line @typescript-eslint/no-var-requires
|
|
196
|
+
const { detectBackend, missingBackendMessage } = require('./audioBackend');
|
|
155
197
|
if (process.platform === 'darwin') {
|
|
156
|
-
|
|
198
|
+
try {
|
|
199
|
+
await execAsync(`afplay "${filePath}"`, { timeout: 30000 });
|
|
200
|
+
}
|
|
201
|
+
catch {
|
|
202
|
+
throw new Error(`[Audio] ${missingBackendMessage('playback')}`);
|
|
203
|
+
}
|
|
157
204
|
}
|
|
158
205
|
else {
|
|
206
|
+
// Linux — try paplay then aplay, surface friendly error if both fail.
|
|
207
|
+
const backend = detectBackend('playback');
|
|
208
|
+
if (!backend)
|
|
209
|
+
throw new Error(`[Audio] ${missingBackendMessage('playback')}`);
|
|
159
210
|
try {
|
|
160
211
|
await execAsync(`paplay "${filePath}"`, { timeout: 30000 });
|
|
161
212
|
}
|
|
162
213
|
catch {
|
|
163
|
-
|
|
214
|
+
try {
|
|
215
|
+
await execAsync(`aplay "${filePath}"`, { timeout: 30000 });
|
|
216
|
+
}
|
|
217
|
+
catch {
|
|
218
|
+
throw new Error(`[Audio] ${missingBackendMessage('playback')}`);
|
|
219
|
+
}
|
|
164
220
|
}
|
|
165
221
|
}
|
|
166
222
|
}
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Copyright (c) 2026 Shiva Deore (Taracod). Licensed under AGPL-3.0.
|
|
4
|
+
*
|
|
5
|
+
* Aiden — local-first agent.
|
|
6
|
+
*/
|
|
7
|
+
/**
|
|
8
|
+
* core/voice/audioBackend.ts — Phase v4.1-cross-platform
|
|
9
|
+
*
|
|
10
|
+
* Detects which audio playback / recording backend is available on
|
|
11
|
+
* the current platform and surfaces friendly install hints when the
|
|
12
|
+
* stack is missing. Used by `audio.ts` and `tts.ts` instead of
|
|
13
|
+
* blowing up with a raw spawn-failure stack trace.
|
|
14
|
+
*
|
|
15
|
+
* Windows : winmm.dll MCI via PowerShell (always available)
|
|
16
|
+
* macOS : afplay (playback, system) + sox (record)
|
|
17
|
+
* Linux : aplay/paplay (playback) + arecord/sox (record)
|
|
18
|
+
*
|
|
19
|
+
* The detection probe runs `<bin> --version` (or `which`) with a
|
|
20
|
+
* 1.5s timeout; total cost on first call is bounded under 2s.
|
|
21
|
+
* Results are cached for the process lifetime so repeated checks
|
|
22
|
+
* are free.
|
|
23
|
+
*/
|
|
24
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
25
|
+
exports.detectBackend = detectBackend;
|
|
26
|
+
exports.missingBackendMessage = missingBackendMessage;
|
|
27
|
+
exports._resetBackendCacheForTests = _resetBackendCacheForTests;
|
|
28
|
+
exports.listKnownBackends = listKnownBackends;
|
|
29
|
+
const node_child_process_1 = require("node:child_process");
|
|
30
|
+
const BACKENDS = {
|
|
31
|
+
win32: {
|
|
32
|
+
playback: [
|
|
33
|
+
{ bin: 'powershell', label: 'PowerShell + winmm.dll', installHint: 'PowerShell ships with Windows.', builtin: true },
|
|
34
|
+
],
|
|
35
|
+
record: [
|
|
36
|
+
{ bin: 'powershell', label: 'PowerShell + winmm.dll', installHint: 'PowerShell ships with Windows.', builtin: true },
|
|
37
|
+
],
|
|
38
|
+
},
|
|
39
|
+
darwin: {
|
|
40
|
+
playback: [
|
|
41
|
+
{ bin: 'afplay', label: 'afplay', installHint: 'afplay ships with macOS.', builtin: true },
|
|
42
|
+
{ bin: 'sox', label: 'sox', installHint: 'brew install sox', builtin: false },
|
|
43
|
+
],
|
|
44
|
+
record: [
|
|
45
|
+
{ bin: 'sox', label: 'sox', installHint: 'brew install sox', builtin: false },
|
|
46
|
+
],
|
|
47
|
+
},
|
|
48
|
+
linux: {
|
|
49
|
+
playback: [
|
|
50
|
+
{ bin: 'paplay', label: 'paplay (PulseAudio)', installHint: 'sudo apt install pulseaudio-utils (or use ALSA: sudo apt install alsa-utils)', builtin: false },
|
|
51
|
+
{ bin: 'aplay', label: 'aplay (ALSA)', installHint: 'sudo apt install alsa-utils', builtin: false },
|
|
52
|
+
{ bin: 'sox', label: 'sox', installHint: 'sudo apt install sox', builtin: false },
|
|
53
|
+
],
|
|
54
|
+
record: [
|
|
55
|
+
{ bin: 'arecord', label: 'arecord (ALSA)', installHint: 'sudo apt install alsa-utils', builtin: false },
|
|
56
|
+
{ bin: 'sox', label: 'sox', installHint: 'sudo apt install sox', builtin: false },
|
|
57
|
+
],
|
|
58
|
+
},
|
|
59
|
+
// Catch-all for unknown platforms — no backends, friendly error.
|
|
60
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
61
|
+
fallback: { playback: [], record: [] },
|
|
62
|
+
// Other Node.js platforms get the empty fallback via lookup.
|
|
63
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
64
|
+
aix: { playback: [], record: [] },
|
|
65
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
66
|
+
freebsd: { playback: [], record: [] },
|
|
67
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
68
|
+
openbsd: { playback: [], record: [] },
|
|
69
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
70
|
+
sunos: { playback: [], record: [] },
|
|
71
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
72
|
+
android: { playback: [], record: [] },
|
|
73
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
74
|
+
cygwin: { playback: [], record: [] },
|
|
75
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
76
|
+
haiku: { playback: [], record: [] },
|
|
77
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
78
|
+
netbsd: { playback: [], record: [] },
|
|
79
|
+
};
|
|
80
|
+
const cache = new Map();
|
|
81
|
+
/** Probe whether `bin` is on PATH. Cross-platform via `which` / `where`. */
|
|
82
|
+
function probe(bin) {
|
|
83
|
+
if (cache.has(bin))
|
|
84
|
+
return cache.get(bin);
|
|
85
|
+
const cmd = process.platform === 'win32' ? `where ${bin}` : `which ${bin}`;
|
|
86
|
+
try {
|
|
87
|
+
(0, node_child_process_1.execSync)(cmd, { stdio: 'ignore', timeout: 1500, windowsHide: true });
|
|
88
|
+
cache.set(bin, true);
|
|
89
|
+
return true;
|
|
90
|
+
}
|
|
91
|
+
catch {
|
|
92
|
+
cache.set(bin, false);
|
|
93
|
+
return false;
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
/** Return the first available backend for `purpose` on the current platform, or null. */
|
|
97
|
+
function detectBackend(purpose) {
|
|
98
|
+
const platformKey = process.platform;
|
|
99
|
+
const slot = BACKENDS[platformKey] ?? BACKENDS.fallback;
|
|
100
|
+
const candidates = slot[purpose] ?? [];
|
|
101
|
+
for (const b of candidates) {
|
|
102
|
+
if (b.builtin || probe(b.bin))
|
|
103
|
+
return b;
|
|
104
|
+
}
|
|
105
|
+
return null;
|
|
106
|
+
}
|
|
107
|
+
/**
|
|
108
|
+
* Build a friendly multi-line message describing the missing backend
|
|
109
|
+
* and how to install it. Used by audio.ts / tts.ts when the chosen
|
|
110
|
+
* spawn fails OR detectBackend returns null up front.
|
|
111
|
+
*/
|
|
112
|
+
function missingBackendMessage(purpose) {
|
|
113
|
+
const platformKey = process.platform;
|
|
114
|
+
const slot = BACKENDS[platformKey] ?? BACKENDS.fallback;
|
|
115
|
+
const candidates = slot[purpose] ?? [];
|
|
116
|
+
if (candidates.length === 0) {
|
|
117
|
+
return `Audio ${purpose} unavailable on ${process.platform}. Aiden does not yet ship a backend for this platform.`;
|
|
118
|
+
}
|
|
119
|
+
const labels = candidates.map((c) => c.label).join(' / ');
|
|
120
|
+
const installs = candidates.map((c) => ` - ${c.installHint}`).join('\n');
|
|
121
|
+
return `Audio ${purpose} backend not found. Aiden looked for: ${labels}\nInstall one of:\n${installs}`;
|
|
122
|
+
}
|
|
123
|
+
/**
|
|
124
|
+
* Reset the probe cache. Test-only; not exposed via the barrel.
|
|
125
|
+
*/
|
|
126
|
+
function _resetBackendCacheForTests() {
|
|
127
|
+
cache.clear();
|
|
128
|
+
}
|
|
129
|
+
/** Public read-only view for diagnostics (used by `aiden doctor`). */
|
|
130
|
+
function listKnownBackends(purpose) {
|
|
131
|
+
const platformKey = process.platform;
|
|
132
|
+
const slot = BACKENDS[platformKey] ?? BACKENDS.fallback;
|
|
133
|
+
return slot[purpose] ?? [];
|
|
134
|
+
}
|