arisa 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,437 @@
1
+ /**
2
+ * @module core/index
3
+ * @role HTTP server entry point for Core process.
4
+ * @responsibilities
5
+ * - Listen on :7777 for messages from Daemon
6
+ * - Route /message requests through media → processor → file-detector → format
7
+ * - Expose /health endpoint for Daemon health checks
8
+ * - Handle /reset, scheduler parsing, and command dispatch
9
+ * - Initialize scheduler on startup
10
+ * @dependencies All core/* modules, shared/*
11
+ * @effects Network (HTTP server), spawns Claude CLI, disk I/O
12
+ */
13
+
14
+ import { config } from "../shared/config";
15
+
16
+ // Initialize encrypted secrets
17
+ await config.secrets.initialize();
18
+ import { createLogger } from "../shared/logger";
19
+ import { serveWithRetry, claimProcess } from "../shared/ports";
20
+ import type { IncomingMessage, CoreResponse, ScheduledTask } from "../shared/types";
21
+ import { processWithClaude, processWithCodex, isClaudeRateLimitResponse } from "./processor";
22
+ import { transcribeAudio, describeImage, generateSpeech, isMediaConfigured, isSpeechConfigured } from "./media";
23
+ import { detectFiles } from "./file-detector";
24
+
25
+ import { addExchange, getForeignContext, clearHistory, getLastBackend } from "./history";
26
+ import { getOnboarding, checkDeps } from "./onboarding";
27
+ import { initScheduler, addTask, cancelAllChatTasks } from "./scheduler";
28
+ import { detectScheduleIntent } from "./intent";
29
+ import { initAuth, isAuthorized, tryAuthorize } from "./auth";
30
+ import { initAttachments, saveAttachment } from "./attachments";
31
+ import { saveMessageRecord, getMessageRecord } from "../shared/db";
32
+
33
+ const log = createLogger("core");
34
+
35
+ // Kill previous Core if still running, write our PID
36
+ claimProcess("core");
37
+
38
+ // Per-chat backend state — default based on what's installed (claude > codex)
39
+ const backendState = new Map<string, "claude" | "codex">();
40
+
41
+ function defaultBackend(): "claude" | "codex" {
42
+ const deps = checkDeps();
43
+ return deps.claude ? "claude" : "codex";
44
+ }
45
+
46
+ function getBackend(chatId: string): "claude" | "codex" {
47
+ const current = backendState.get(chatId);
48
+ if (current) return current;
49
+
50
+ const fromHistory = getLastBackend(chatId);
51
+ if (fromHistory) {
52
+ backendState.set(chatId, fromHistory);
53
+ return fromHistory;
54
+ }
55
+
56
+ return defaultBackend();
57
+ }
58
+
59
+ // Initialize auth + scheduler + attachments
60
+ await initAuth();
61
+ await initScheduler();
62
+ await initAttachments();
63
+
64
+ const server = await serveWithRetry({
65
+ port: config.corePort,
66
+ async fetch(req) {
67
+ const url = new URL(req.url);
68
+
69
+ if (url.pathname === "/health" && req.method === "GET") {
70
+ return Response.json({ status: "ok", timestamp: Date.now() });
71
+ }
72
+
73
+ if (url.pathname === "/message" && req.method === "POST") {
74
+ try {
75
+ const body = await req.json();
76
+ const msg: IncomingMessage = body.message;
77
+
78
+ if (!msg) {
79
+ return Response.json({ error: "Missing message" }, { status: 400 });
80
+ }
81
+
82
+ log.debug(`Inbound message | chatId=${msg.chatId} | sender=${msg.sender} | type=${msg.text ? "text" : "media"}`);
83
+
84
+ // Auth gate: require token before anything else
85
+ if (!isAuthorized(msg.chatId)) {
86
+ if (msg.text && await tryAuthorize(msg.chatId, msg.text)) {
87
+ return Response.json({ text: "Authorized. Welcome to Arisa!" } as CoreResponse);
88
+ }
89
+ return Response.json({ text: "Send the auth token to start. Check the server console." } as CoreResponse);
90
+ }
91
+
92
+ // Onboarding: first message from this chat
93
+ const onboarding = await getOnboarding(msg.chatId);
94
+ if (onboarding?.blocking) {
95
+ return Response.json({ text: onboarding.message } as CoreResponse);
96
+ }
97
+
98
+ // Initialize message text
99
+ let messageText = msg.text || "";
100
+
101
+ // Prepend reply context if message quotes another message
102
+ if (msg.replyTo) {
103
+ let quotedText = msg.replyTo.text || "";
104
+ let quotedSender = msg.replyTo.sender;
105
+ let quotedDate = new Date(msg.replyTo.timestamp).toLocaleString("es-AR");
106
+ let attachmentInfo = "";
107
+
108
+ // Try ledger lookup for richer context
109
+ if (msg.replyTo.messageId) {
110
+ const ledger = await getMessageRecord(msg.chatId, msg.replyTo.messageId);
111
+ if (ledger) {
112
+ quotedText = ledger.text || quotedText;
113
+ quotedSender = ledger.sender;
114
+ quotedDate = new Date(ledger.timestamp).toLocaleString("es-AR");
115
+ if (ledger.mediaDescription) {
116
+ attachmentInfo += `\nMedia description: ${ledger.mediaDescription}`;
117
+ }
118
+ if (ledger.attachmentPath) {
119
+ attachmentInfo += `\nAttachment: ${ledger.attachmentPath}`;
120
+ }
121
+ }
122
+ }
123
+
124
+ if (!quotedText && !attachmentInfo) {
125
+ quotedText = "[media or unknown content]";
126
+ }
127
+
128
+ messageText = `━━━ QUOTED MESSAGE ━━━
129
+ From: ${quotedSender}
130
+ Date: ${quotedDate}
131
+ Content: "${quotedText}"${attachmentInfo}
132
+ ━━━━━━━━━━━━━━━━━━━━
133
+
134
+ ${messageText}`;
135
+ }
136
+
137
+ // Handle /reset command
138
+ if (msg.command === "/reset") {
139
+ const { writeFileSync } = await import("fs");
140
+ writeFileSync(config.resetFlagPath, "reset");
141
+ clearHistory(msg.chatId);
142
+ const { resetRouterState } = await import("./router");
143
+ resetRouterState();
144
+ const response: CoreResponse = { text: "Conversation reset! Next message will start a fresh conversation." };
145
+ return Response.json(response);
146
+ }
147
+
148
+ // Handle /cancel command — stop all scheduled tasks
149
+ if (msg.command === "/cancel") {
150
+ const removed = await cancelAllChatTasks(msg.chatId);
151
+ const text = removed > 0
152
+ ? `Cancelled ${removed} task${removed > 1 ? "s" : ""}.`
153
+ : "No active tasks to cancel.";
154
+ return Response.json({ text } as CoreResponse);
155
+ }
156
+
157
+ // Handle /codex command — switch to codex backend
158
+ if (msg.command === "/codex") {
159
+ const deps = checkDeps();
160
+ if (!deps.codex) {
161
+ const hint = deps.os === "macOS"
162
+ ? "<code>npm install -g @openai/codex</code>"
163
+ : "<code>npm install -g @openai/codex</code>";
164
+ return Response.json({ text: `Codex CLI is not installed.\n${hint}` } as CoreResponse);
165
+ }
166
+ backendState.set(msg.chatId, "codex");
167
+ log.info(`Backend switched to codex for chat ${msg.chatId}`);
168
+ const response: CoreResponse = { text: "Codex mode activated. Use /claude to switch back." };
169
+ return Response.json(response);
170
+ }
171
+
172
+ // Handle /claude command — switch to claude backend
173
+ if (msg.command === "/claude") {
174
+ const deps = checkDeps();
175
+ if (!deps.claude) {
176
+ const hint = deps.os === "macOS"
177
+ ? "<code>brew install claude-code</code> o <code>npm install -g @anthropic-ai/claude-code</code>"
178
+ : "<code>npm install -g @anthropic-ai/claude-code</code>";
179
+ return Response.json({ text: `Claude CLI is not installed.\n${hint}` } as CoreResponse);
180
+ }
181
+ backendState.set(msg.chatId, "claude");
182
+ log.info(`Backend switched to claude for chat ${msg.chatId}`);
183
+ const response: CoreResponse = { text: "Claude mode activated. Use /codex to switch back." };
184
+ return Response.json(response);
185
+ }
186
+
187
+ // Handle /speak command — generate speech via ElevenLabs
188
+ if (msg.command === "/speak") {
189
+ if (!config.elevenlabsApiKey) {
190
+ return Response.json({ text: "ELEVENLABS_API_KEY not configured. Add it to ~/.arisa/.env" } as CoreResponse);
191
+ }
192
+ const textToSpeak = messageText.replace(/^\/speak\s*/, "").trim();
193
+ if (!textToSpeak) {
194
+ return Response.json({ text: "Usage: /speak <text to convert to speech>" } as CoreResponse);
195
+ }
196
+ try {
197
+ const audioPath = await generateSpeech(textToSpeak);
198
+ const response: CoreResponse = {
199
+ text: "",
200
+ audio: audioPath,
201
+ };
202
+ return Response.json(response);
203
+ } catch (error) {
204
+ log.error(`Speech generation failed: ${error}`);
205
+ return Response.json({ text: "Failed to generate speech. Check logs for details." } as CoreResponse);
206
+ }
207
+ }
208
+
209
+ // Process media first — track metadata for message ledger
210
+ let ledgerMediaType: "image" | "audio" | "document" | undefined;
211
+ let ledgerAttachmentPath: string | undefined;
212
+ let ledgerMediaDescription: string | undefined;
213
+
214
+ if (msg.audio) {
215
+ const audioPath = await saveAttachment(msg.chatId, "audio", msg.audio.base64, msg.audio.filename);
216
+ ledgerMediaType = "audio";
217
+ ledgerAttachmentPath = audioPath;
218
+ if (isMediaConfigured()) {
219
+ try {
220
+ const transcription = await transcribeAudio(msg.audio.base64, msg.audio.filename);
221
+ if (transcription.trim()) {
222
+ ledgerMediaDescription = transcription;
223
+ messageText = `[Audio saved to ${audioPath}]\n[Voice message transcription]: ${transcription}`;
224
+ } else {
225
+ messageText = `[Audio saved to ${audioPath}]\n[Transcription returned empty. Ask the user to try again or send text.]`;
226
+ }
227
+ } catch (error) {
228
+ log.error(`Transcription failed: ${error}`);
229
+ messageText = `[Audio saved to ${audioPath}]\n[Transcription failed. The audio file is still accessible at the path above.]`;
230
+ }
231
+ } else {
232
+ messageText = `[Audio saved to ${audioPath}]\n[Cannot transcribe because OPENAI_API_KEY is not configured. The audio file is still accessible at the path above.]`;
233
+ }
234
+ }
235
+
236
+ if (msg.image) {
237
+ const caption = msg.image.caption || "";
238
+ const imgPath = await saveAttachment(msg.chatId, "image", msg.image.base64);
239
+ ledgerMediaType = "image";
240
+ ledgerAttachmentPath = imgPath;
241
+
242
+ if (caption && isMediaConfigured()) {
243
+ // User sent text with the image → describe it via Vision
244
+ try {
245
+ const description = await describeImage(msg.image.base64, caption);
246
+ if (description.trim()) {
247
+ ledgerMediaDescription = description;
248
+ messageText = `[Image saved to ${imgPath}]\n[Image description: ${description}]\n${caption}`;
249
+ } else {
250
+ messageText = `[Image saved to ${imgPath}]\n[Image content could not be interpreted]\n${caption}`;
251
+ }
252
+ } catch (error) {
253
+ log.error(`Image analysis failed: ${error}`);
254
+ messageText = `[Image saved to ${imgPath}]\n[Error analyzing the image]\n${caption}`;
255
+ }
256
+ } else if (caption) {
257
+ // Has caption but no OpenAI key
258
+ messageText = `[Image saved to ${imgPath}]\n[Cannot describe image — OPENAI_API_KEY not configured. The image file is accessible at the path above.]\n${caption}`;
259
+ } else {
260
+ // No caption → just save, no GPT call
261
+ messageText = `[Image saved to ${imgPath}]`;
262
+ }
263
+ }
264
+
265
+ if (msg.document) {
266
+ const docPath = await saveAttachment(msg.chatId, "document", msg.document.base64, msg.document.filename, msg.document.mimeType);
267
+ ledgerMediaType = "document";
268
+ ledgerAttachmentPath = docPath;
269
+ const caption = msg.document.caption || "";
270
+ messageText = caption
271
+ ? `[Document saved to ${docPath}] (${msg.document.mimeType})\n${caption}`
272
+ : `[Document saved to ${docPath}] (${msg.document.mimeType})`;
273
+ }
274
+
275
+ if (!messageText) {
276
+ const response: CoreResponse = { text: "Empty message received." };
277
+ return Response.json(response);
278
+ }
279
+
280
+ // Save incoming message to ledger (after media processing so we have descriptions)
281
+ if (msg.messageId) {
282
+ saveMessageRecord({
283
+ id: `${msg.chatId}_${msg.messageId}`,
284
+ chatId: msg.chatId,
285
+ messageId: msg.messageId,
286
+ direction: "in",
287
+ sender: msg.sender,
288
+ timestamp: msg.timestamp,
289
+ text: messageText,
290
+ mediaType: ledgerMediaType,
291
+ attachmentPath: ledgerAttachmentPath,
292
+ mediaDescription: ledgerMediaDescription,
293
+ }).catch((e) => log.error(`Failed to save incoming message record: ${e}`));
294
+ }
295
+
296
+ // Detect scheduling intent via haiku (language-agnostic)
297
+ const scheduleIntent = await detectScheduleIntent(messageText);
298
+ if (scheduleIntent) {
299
+ if (scheduleIntent.type === "cancel") {
300
+ const removed = await cancelAllChatTasks(msg.chatId);
301
+ const text = removed > 0
302
+ ? scheduleIntent.confirmation
303
+ : "No active tasks to cancel.";
304
+ return Response.json({ text } as CoreResponse);
305
+ }
306
+
307
+ const taskId = `${Date.now()}_${Math.random().toString(36).substring(7)}`;
308
+ const task: ScheduledTask = {
309
+ id: taskId,
310
+ chatId: msg.chatId,
311
+ sender: msg.sender,
312
+ senderId: msg.senderId,
313
+ type: scheduleIntent.type,
314
+ message: scheduleIntent.message,
315
+ originalMessage: messageText,
316
+ createdAt: Date.now(),
317
+ ...(scheduleIntent.type === "once" && scheduleIntent.delaySeconds
318
+ ? { runAt: Date.now() + scheduleIntent.delaySeconds * 1000 }
319
+ : {}),
320
+ ...(scheduleIntent.type === "cron" && scheduleIntent.cron
321
+ ? { cron: scheduleIntent.cron }
322
+ : {}),
323
+ };
324
+ await addTask(task);
325
+ const response: CoreResponse = { text: scheduleIntent.confirmation };
326
+ return Response.json(response);
327
+ }
328
+
329
+ // Route based on current backend state
330
+ const backend = getBackend(msg.chatId);
331
+ const deps = checkDeps();
332
+ const canFallback = backend === "codex" ? deps.claude : deps.codex;
333
+ let agentResponse: string;
334
+ let historyResponse: string | null = null;
335
+ let usedBackend: "claude" | "codex" = backend;
336
+
337
+ // Inject cross-backend context if switching
338
+ const foreignCtx = getForeignContext(msg.chatId, backend);
339
+ const enrichedMessage = foreignCtx ? foreignCtx + messageText : messageText;
340
+
341
+ log.info(`Routing | backend: ${backend} | foreignCtx: ${!!foreignCtx} | enrichedChars: ${enrichedMessage.length}`);
342
+
343
+ if (backend === "codex") {
344
+ try {
345
+ agentResponse = await processWithCodex(enrichedMessage);
346
+ if (agentResponse.startsWith("Error processing with Codex") && canFallback) {
347
+ log.warn("Codex failed, falling back to Claude");
348
+ agentResponse = await processWithClaude(enrichedMessage, msg.chatId);
349
+ usedBackend = "claude";
350
+ }
351
+ } catch (error) {
352
+ if (canFallback) {
353
+ log.warn(`Codex threw, falling back to Claude: ${error}`);
354
+ agentResponse = await processWithClaude(enrichedMessage, msg.chatId);
355
+ usedBackend = "claude";
356
+ } else {
357
+ agentResponse = "Error processing with Codex. Please try again.";
358
+ }
359
+ }
360
+ } else {
361
+ try {
362
+ agentResponse = await processWithClaude(enrichedMessage, msg.chatId);
363
+ if (isClaudeRateLimitResponse(agentResponse) && canFallback) {
364
+ log.warn("Claude credits exhausted, falling back to Codex");
365
+ const codexResponse = await processWithCodex(enrichedMessage);
366
+ agentResponse = `Claude is out of credits right now, so I switched this reply to Codex.\n---CHUNK---\n${codexResponse}`;
367
+ historyResponse = codexResponse;
368
+ usedBackend = "codex";
369
+ // Persist the switch so subsequent messages don't keep re-injecting
370
+ // cross-backend context while Claude has no credits.
371
+ backendState.set(msg.chatId, "codex");
372
+ }
373
+ } catch (error) {
374
+ const errMsg = error instanceof Error ? error.message : String(error);
375
+ if (canFallback) {
376
+ log.warn(`Claude threw, falling back to Codex: ${errMsg}`);
377
+ agentResponse = await processWithCodex(enrichedMessage);
378
+ usedBackend = "codex";
379
+ } else {
380
+ agentResponse = `Claude error: ${errMsg.slice(0, 200)}`;
381
+ }
382
+ }
383
+ }
384
+
385
+ // Log exchange for shared history
386
+ addExchange(msg.chatId, messageText, historyResponse ?? agentResponse, usedBackend);
387
+
388
+ log.info(`Response | backend: ${usedBackend} | responseChars: ${agentResponse.length}`);
389
+ log.debug(`Response raw >>>>\n${agentResponse}\n<<<<`);
390
+
391
+ // Detect [VOICE]...[/VOICE] tags — generate speech via ElevenLabs
392
+ let audioPath: string | undefined;
393
+ let textResponse = agentResponse;
394
+
395
+ const voiceMatch = agentResponse.match(/\[VOICE\]([\s\S]*?)\[\/VOICE\]/);
396
+ if (voiceMatch && isSpeechConfigured()) {
397
+ const speechText = voiceMatch[1].trim();
398
+ textResponse = agentResponse.replace(/\[VOICE\][\s\S]*?\[\/VOICE\]/, "").trim();
399
+ try {
400
+ audioPath = await generateSpeech(speechText, config.elevenlabsVoiceId);
401
+ log.info(`Speech generated for ${speechText.length} chars`);
402
+ } catch (error) {
403
+ log.error(`Speech generation failed: ${error}`);
404
+ // Fallback: send the voice text as regular text so the message isn't empty
405
+ if (!textResponse) {
406
+ textResponse = speechText;
407
+ }
408
+ }
409
+ }
410
+
411
+ // Prepend onboarding info if first message (non-blocking)
412
+ const fullResponse = onboarding
413
+ ? onboarding.message + "\n\n" + textResponse
414
+ : textResponse;
415
+
416
+ const files = detectFiles(textResponse);
417
+
418
+ const response: CoreResponse = {
419
+ text: fullResponse,
420
+ files: files.length > 0 ? files : undefined,
421
+ audio: audioPath,
422
+ };
423
+
424
+ return Response.json(response);
425
+ } catch (error) {
426
+ const errMsg = error instanceof Error ? error.message : String(error);
427
+ log.error(`Request processing error: ${errMsg}`);
428
+ const summary = errMsg.length > 200 ? errMsg.slice(0, 200) + "..." : errMsg;
429
+ return Response.json({ text: `Internal error: ${summary}` } as CoreResponse);
430
+ }
431
+ }
432
+
433
+ return Response.json({ error: "Not found" }, { status: 404 });
434
+ },
435
+ });
436
+
437
+ log.info(`Core server listening on port ${config.corePort}`);
@@ -0,0 +1,112 @@
1
+ /**
2
+ * @module core/intent
3
+ * @role Use a fast model to detect scheduling intents from any language.
4
+ * @responsibilities
5
+ * - Classify messages as schedule requests or regular messages
6
+ * - Extract schedule type (once/cron), timing, and reminder text
7
+ * - Works with whatever CLI is available (claude or codex)
8
+ * @dependencies shared/config
9
+ * @effects Spawns claude or codex CLI
10
+ */
11
+
12
+ import { config } from "../shared/config";
13
+ import { createLogger } from "../shared/logger";
14
+
15
+ const log = createLogger("core");
16
+
17
+ export interface ScheduleIntent {
18
+ type: "once" | "cron" | "cancel";
19
+ delaySeconds?: number;
20
+ cron?: string;
21
+ message: string;
22
+ confirmation: string;
23
+ }
24
+
25
+ const INTENT_PROMPT = `You are a scheduling intent detector. Analyze the user message and determine if they want to schedule a reminder, recurring notification, or cancel/stop existing tasks.
26
+
27
+ If it IS a scheduling request, respond with ONLY this JSON (no markdown, no explanation):
28
+ For one-time reminders:
29
+ {"type":"once","delaySeconds":300,"message":"the reminder text","confirmation":"I'll remind you in 5 minutes"}
30
+
31
+ For recurring reminders:
32
+ {"type":"cron","cron":"*/5 * * * *","message":"the reminder text","confirmation":"I'll remind you every 5 minutes"}
33
+
34
+ For cancelling/stopping tasks:
35
+ {"type":"cancel","message":"","confirmation":"All tasks cancelled."}
36
+
37
+ If it is NOT a scheduling or cancellation request, respond with ONLY:
38
+ {"type":"none"}
39
+
40
+ Rules:
41
+ - One-time: "in X seconds/minutes/hours" or equivalent in any language → once
42
+ - Recurring: "every X seconds/minutes/hours" or equivalent in any language → cron
43
+ - Cancel: "stop/cancel/remove all tasks/reminders" or equivalent in any language → cancel
44
+ - For seconds-based cron, use 6-field format: */N * * * * *
45
+ - For minutes-based cron: */N * * * *
46
+ - For hours-based cron: 0 */N * * *
47
+ - Extract the actual reminder content, not the scheduling instruction
48
+ - Write the confirmation in the same language as the user's message
49
+ - Support any language
50
+ - Only detect clear scheduling intent, not vague mentions of time`;
51
+
52
+ function buildCmd(cli: "claude" | "codex", prompt: string): string[] {
53
+ if (cli === "claude") {
54
+ return ["claude", "--dangerously-skip-permissions", "--model", "haiku", "-p", prompt];
55
+ }
56
+ return ["codex", "exec", "--dangerously-bypass-approvals-and-sandbox", "-C", config.projectDir, prompt];
57
+ }
58
+
59
+ // Track which CLI actually works (not just Bun.which, which can find broken shims)
60
+ let verifiedCli: "claude" | "codex" | null = null;
61
+
62
+ async function trySpawn(prompt: string, cli: "claude" | "codex"): Promise<string | null> {
63
+ const cmd = buildCmd(cli, prompt);
64
+ const proc = Bun.spawn(cmd, { cwd: config.projectDir, stdout: "pipe", stderr: "pipe" });
65
+
66
+ const timeout = setTimeout(() => proc.kill(), 15_000);
67
+ const exitCode = await proc.exited;
68
+ clearTimeout(timeout);
69
+
70
+ if (exitCode !== 0) return null;
71
+
72
+ return (await new Response(proc.stdout).text()).trim();
73
+ }
74
+
75
+ function getCliOrder(): Array<"claude" | "codex"> {
76
+ if (verifiedCli) return [verifiedCli];
77
+ const order: Array<"claude" | "codex"> = [];
78
+ if (Bun.which("claude") !== null) order.push("claude");
79
+ if (Bun.which("codex") !== null) order.push("codex");
80
+ return order;
81
+ }
82
+
83
+ export async function detectScheduleIntent(message: string): Promise<ScheduleIntent | null> {
84
+ const clis = getCliOrder();
85
+ if (clis.length === 0) return null;
86
+
87
+ const fullPrompt = `${INTENT_PROMPT}\n\nUser message: ${message}`;
88
+
89
+ for (const cli of clis) {
90
+ try {
91
+ const raw = await trySpawn(fullPrompt, cli);
92
+ if (raw === null) continue;
93
+
94
+ // This CLI works — remember it
95
+ verifiedCli = cli;
96
+
97
+ const jsonMatch = raw.match(/\{[\s\S]*\}/);
98
+ if (!jsonMatch) return null;
99
+
100
+ const parsed = JSON.parse(jsonMatch[0]);
101
+ if (parsed.type === "none") return null;
102
+ if (parsed.type !== "once" && parsed.type !== "cron" && parsed.type !== "cancel") return null;
103
+
104
+ return parsed as ScheduleIntent;
105
+ } catch (e) {
106
+ log.warn(`Intent detection with ${cli} failed: ${e}`);
107
+ // Try next CLI
108
+ }
109
+ }
110
+
111
+ return null;
112
+ }
@@ -0,0 +1,144 @@
1
+ /**
2
+ * @module core/media
3
+ * @role Handle voice transcription (Whisper), image analysis (Vision), and speech synthesis (ElevenLabs).
4
+ * @responsibilities
5
+ * - Transcribe audio buffers via OpenAI Whisper API
6
+ * - Describe images via OpenAI Vision API
7
+ * - Generate speech from text via ElevenLabs API
8
+ * - Manage temp files for audio processing
9
+ * @dependencies shared/config
10
+ * @effects Network calls to OpenAI API and ElevenLabs API, temp file I/O in runtime voice_temp/
11
+ * @contract transcribeAudio(base64, filename) => Promise<string>
12
+ * @contract describeImage(base64, caption?) => Promise<string>
13
+ * @contract generateSpeech(text, voice?) => Promise<string>
14
+ */
15
+
16
+ import { writeFileSync, unlinkSync, mkdirSync, existsSync } from "fs";
17
+ import { join } from "path";
18
+ import OpenAI from "openai";
19
+ import { ElevenLabsClient } from "elevenlabs";
20
+ import { config } from "../shared/config";
21
+ import { createLogger } from "../shared/logger";
22
+
23
+ const log = createLogger("core");
24
+
25
+ let openai: OpenAI | null = null;
26
+ let elevenlabs: ElevenLabsClient | null = null;
27
+
28
+ function getClient(): OpenAI {
29
+ if (!openai) {
30
+ if (!config.openaiApiKey) {
31
+ throw new Error("OPENAI_API_KEY not configured");
32
+ }
33
+ openai = new OpenAI({ apiKey: config.openaiApiKey });
34
+ }
35
+ return openai;
36
+ }
37
+
38
+ function getElevenLabsClient(): ElevenLabsClient {
39
+ if (!elevenlabs) {
40
+ if (!config.elevenlabsApiKey) {
41
+ throw new Error("ELEVENLABS_API_KEY not configured");
42
+ }
43
+ elevenlabs = new ElevenLabsClient({ apiKey: config.elevenlabsApiKey });
44
+ }
45
+ return elevenlabs;
46
+ }
47
+
48
+ export async function transcribeAudio(base64: string, filename: string): Promise<string> {
49
+ const client = getClient();
50
+
51
+ if (!existsSync(config.voiceTempDir)) {
52
+ mkdirSync(config.voiceTempDir, { recursive: true });
53
+ }
54
+
55
+ const tempPath = join(config.voiceTempDir, filename);
56
+ const buffer = Buffer.from(base64, "base64");
57
+ writeFileSync(tempPath, buffer);
58
+
59
+ try {
60
+ const file = Bun.file(tempPath);
61
+ const transcription = await client.audio.transcriptions.create({
62
+ file: file,
63
+ model: "whisper-1",
64
+ });
65
+ log.info(`Transcribed audio: "${transcription.text.substring(0, 80)}..."`);
66
+ return transcription.text;
67
+ } finally {
68
+ try { unlinkSync(tempPath); } catch { /* ignore */ }
69
+ }
70
+ }
71
+
72
+ export async function describeImage(base64: string, caption?: string): Promise<string> {
73
+ const client = getClient();
74
+
75
+ const prompt = caption
76
+ ? `The user sent this image with the text: "${caption}". Describe in detail what you see and respond considering the attached text.`
77
+ : "Describe in detail what you see in this image.";
78
+
79
+ const response = await client.chat.completions.create({
80
+ model: "gpt-5.2",
81
+ messages: [
82
+ {
83
+ role: "user",
84
+ content: [
85
+ { type: "image_url", image_url: { url: `data:image/jpeg;base64,${base64}` } },
86
+ { type: "text", text: prompt },
87
+ ],
88
+ },
89
+ ],
90
+ response_format: { type: "text" },
91
+ verbosity: "low",
92
+ reasoning_effort: "none",
93
+ store: false,
94
+ });
95
+
96
+ const description = response.choices[0]?.message?.content || "";
97
+ log.info(`Image described (gpt-5.2): "${description.substring(0, 80)}..."`);
98
+ return description;
99
+ }
100
+
101
+ export async function generateSpeech(text: string, voiceId: string = config.elevenlabsVoiceId): Promise<string> {
102
+ const client = getElevenLabsClient();
103
+
104
+ if (!existsSync(config.voiceTempDir)) {
105
+ mkdirSync(config.voiceTempDir, { recursive: true });
106
+ }
107
+
108
+ const outputPath = join(config.voiceTempDir, `speech_${Date.now()}.mp3`);
109
+
110
+ try {
111
+ const audio = await client.textToSpeech.convert(voiceId, {
112
+ text,
113
+ model_id: "eleven_turbo_v2_5",
114
+ });
115
+
116
+ const chunks: Uint8Array[] = [];
117
+ for await (const chunk of audio) {
118
+ chunks.push(chunk);
119
+ }
120
+
121
+ const buffer = Buffer.concat(chunks);
122
+ writeFileSync(outputPath, buffer);
123
+
124
+ log.info(`Generated speech: ${text.substring(0, 80)}... (voice: ${voiceId})`);
125
+ return outputPath;
126
+ } catch (error) {
127
+ // Invalidate cached client on auth errors so a new key takes effect without restart
128
+ const errStr = String(error);
129
+ if (errStr.includes("401") || errStr.includes("403") || errStr.includes("Unauthorized")) {
130
+ elevenlabs = null;
131
+ log.warn("ElevenLabs client invalidated due to auth error — update ELEVENLABS_API_KEY in .env");
132
+ }
133
+ log.error(`Failed to generate speech: ${error}`);
134
+ throw error;
135
+ }
136
+ }
137
+
138
+ export function isMediaConfigured(): boolean {
139
+ return !!config.openaiApiKey;
140
+ }
141
+
142
+ export function isSpeechConfigured(): boolean {
143
+ return !!config.elevenlabsApiKey;
144
+ }