npm - arisa - Versions diffs - 2.0.0 - Mend

arisa 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

package/CLAUDE.md +191 -0
package/README.md +200 -0
package/SOUL.md +36 -0
package/bin/arisa.js +85 -0
package/package.json +43 -0
package/scripts/test-secrets.ts +22 -0
package/src/core/attachments.ts +104 -0
package/src/core/auth.ts +58 -0
package/src/core/context.ts +30 -0
package/src/core/file-detector.ts +39 -0
package/src/core/format.ts +159 -0
package/src/core/history.ts +193 -0
package/src/core/index.ts +437 -0
package/src/core/intent.ts +112 -0
package/src/core/media.ts +144 -0
package/src/core/onboarding.ts +115 -0
package/src/core/processor.ts +268 -0
package/src/core/router.ts +64 -0
package/src/core/scheduler.ts +192 -0
package/src/daemon/agent-cli.ts +119 -0
package/src/daemon/autofix.ts +116 -0
package/src/daemon/bridge.ts +162 -0
package/src/daemon/channels/base.ts +10 -0
package/src/daemon/channels/telegram.ts +306 -0
package/src/daemon/fallback.ts +49 -0
package/src/daemon/index.ts +213 -0
package/src/daemon/lifecycle.ts +288 -0
package/src/daemon/setup.ts +79 -0
package/src/shared/config.ts +130 -0
package/src/shared/db.ts +304 -0
package/src/shared/deepbase-secure.ts +39 -0
package/src/shared/logger.ts +42 -0
package/src/shared/paths.ts +90 -0
package/src/shared/ports.ts +98 -0
package/src/shared/secrets.ts +136 -0
package/src/shared/types.ts +103 -0
package/tsconfig.json +19 -0

package/src/core/index.ts ADDED Viewed

@@ -0,0 +1,437 @@
+/**
+ * @module core/index
+ * @role HTTP server entry point for Core process.
+ * @responsibilities
+ *   - Listen on :7777 for messages from Daemon
+ *   - Route /message requests through media → processor → file-detector → format
+ *   - Expose /health endpoint for Daemon health checks
+ *   - Handle /reset, scheduler parsing, and command dispatch
+ *   - Initialize scheduler on startup
+ * @dependencies All core/* modules, shared/*
+ * @effects Network (HTTP server), spawns Claude CLI, disk I/O
+ */
+import { config } from "../shared/config";
+// Initialize encrypted secrets
+await config.secrets.initialize();
+import { createLogger } from "../shared/logger";
+import { serveWithRetry, claimProcess } from "../shared/ports";
+import type { IncomingMessage, CoreResponse, ScheduledTask } from "../shared/types";
+import { processWithClaude, processWithCodex, isClaudeRateLimitResponse } from "./processor";
+import { transcribeAudio, describeImage, generateSpeech, isMediaConfigured, isSpeechConfigured } from "./media";
+import { detectFiles } from "./file-detector";
+import { addExchange, getForeignContext, clearHistory, getLastBackend } from "./history";
+import { getOnboarding, checkDeps } from "./onboarding";
+import { initScheduler, addTask, cancelAllChatTasks } from "./scheduler";
+import { detectScheduleIntent } from "./intent";
+import { initAuth, isAuthorized, tryAuthorize } from "./auth";
+import { initAttachments, saveAttachment } from "./attachments";
+import { saveMessageRecord, getMessageRecord } from "../shared/db";
+const log = createLogger("core");
+// Kill previous Core if still running, write our PID
+claimProcess("core");
+// Per-chat backend state — default based on what's installed (claude > codex)
+const backendState = new Map<string, "claude" | "codex">();
+function defaultBackend(): "claude" | "codex" {
+  const deps = checkDeps();
+  return deps.claude ? "claude" : "codex";
+}
+function getBackend(chatId: string): "claude" | "codex" {
+  const current = backendState.get(chatId);
+  if (current) return current;
+  const fromHistory = getLastBackend(chatId);
+  if (fromHistory) {
+    backendState.set(chatId, fromHistory);
+    return fromHistory;
+  }
+  return defaultBackend();
+}
+// Initialize auth + scheduler + attachments
+await initAuth();
+await initScheduler();
+await initAttachments();
+const server = await serveWithRetry({
+  port: config.corePort,
+  async fetch(req) {
+    const url = new URL(req.url);
+    if (url.pathname === "/health" && req.method === "GET") {
+      return Response.json({ status: "ok", timestamp: Date.now() });
+    }
+    if (url.pathname === "/message" && req.method === "POST") {
+      try {
+        const body = await req.json();
+        const msg: IncomingMessage = body.message;
+        if (!msg) {
+          return Response.json({ error: "Missing message" }, { status: 400 });
+        }
+        log.debug(`Inbound message | chatId=${msg.chatId} | sender=${msg.sender} | type=${msg.text ? "text" : "media"}`);
+        // Auth gate: require token before anything else
+        if (!isAuthorized(msg.chatId)) {
+          if (msg.text && await tryAuthorize(msg.chatId, msg.text)) {
+            return Response.json({ text: "Authorized. Welcome to Arisa!" } as CoreResponse);
+          }
+          return Response.json({ text: "Send the auth token to start. Check the server console." } as CoreResponse);
+        }
+        // Onboarding: first message from this chat
+        const onboarding = await getOnboarding(msg.chatId);
+        if (onboarding?.blocking) {
+          return Response.json({ text: onboarding.message } as CoreResponse);
+        }
+        // Initialize message text
+        let messageText = msg.text || "";
+        // Prepend reply context if message quotes another message
+        if (msg.replyTo) {
+          let quotedText = msg.replyTo.text || "";
+          let quotedSender = msg.replyTo.sender;
+          let quotedDate = new Date(msg.replyTo.timestamp).toLocaleString("es-AR");
+          let attachmentInfo = "";
+          // Try ledger lookup for richer context
+          if (msg.replyTo.messageId) {
+            const ledger = await getMessageRecord(msg.chatId, msg.replyTo.messageId);
+            if (ledger) {
+              quotedText = ledger.text || quotedText;
+              quotedSender = ledger.sender;
+              quotedDate = new Date(ledger.timestamp).toLocaleString("es-AR");
+              if (ledger.mediaDescription) {
+                attachmentInfo += `\nMedia description: ${ledger.mediaDescription}`;
+              }
+              if (ledger.attachmentPath) {
+                attachmentInfo += `\nAttachment: ${ledger.attachmentPath}`;
+              }
+            }
+          }
+          if (!quotedText && !attachmentInfo) {
+            quotedText = "[media or unknown content]";
+          }
+          messageText = `━━━ QUOTED MESSAGE ━━━
+From: ${quotedSender}
+Date: ${quotedDate}
+Content: "${quotedText}"${attachmentInfo}
+━━━━━━━━━━━━━━━━━━━━
+${messageText}`;
+        }
+        // Handle /reset command
+        if (msg.command === "/reset") {
+          const { writeFileSync } = await import("fs");
+          writeFileSync(config.resetFlagPath, "reset");
+          clearHistory(msg.chatId);
+          const { resetRouterState } = await import("./router");
+          resetRouterState();
+          const response: CoreResponse = { text: "Conversation reset! Next message will start a fresh conversation." };
+          return Response.json(response);
+        }
+        // Handle /cancel command — stop all scheduled tasks
+        if (msg.command === "/cancel") {
+          const removed = await cancelAllChatTasks(msg.chatId);
+          const text = removed > 0
+            ? `Cancelled ${removed} task${removed > 1 ? "s" : ""}.`
+            : "No active tasks to cancel.";
+          return Response.json({ text } as CoreResponse);
+        }
+        // Handle /codex command — switch to codex backend
+        if (msg.command === "/codex") {
+          const deps = checkDeps();
+          if (!deps.codex) {
+            const hint = deps.os === "macOS"
+              ? "<code>npm install -g @openai/codex</code>"
+              : "<code>npm install -g @openai/codex</code>";
+            return Response.json({ text: `Codex CLI is not installed.\n${hint}` } as CoreResponse);
+          }
+          backendState.set(msg.chatId, "codex");
+          log.info(`Backend switched to codex for chat ${msg.chatId}`);
+          const response: CoreResponse = { text: "Codex mode activated. Use /claude to switch back." };
+          return Response.json(response);
+        }
+        // Handle /claude command — switch to claude backend
+        if (msg.command === "/claude") {
+          const deps = checkDeps();
+          if (!deps.claude) {
+            const hint = deps.os === "macOS"
+              ? "<code>brew install claude-code</code> o <code>npm install -g @anthropic-ai/claude-code</code>"
+              : "<code>npm install -g @anthropic-ai/claude-code</code>";
+            return Response.json({ text: `Claude CLI is not installed.\n${hint}` } as CoreResponse);
+          }
+          backendState.set(msg.chatId, "claude");
+          log.info(`Backend switched to claude for chat ${msg.chatId}`);
+          const response: CoreResponse = { text: "Claude mode activated. Use /codex to switch back." };
+          return Response.json(response);
+        }
+        // Handle /speak command — generate speech via ElevenLabs
+        if (msg.command === "/speak") {
+          if (!config.elevenlabsApiKey) {
+            return Response.json({ text: "ELEVENLABS_API_KEY not configured. Add it to ~/.arisa/.env" } as CoreResponse);
+          }
+          const textToSpeak = messageText.replace(/^\/speak\s*/, "").trim();
+          if (!textToSpeak) {
+            return Response.json({ text: "Usage: /speak <text to convert to speech>" } as CoreResponse);
+          }
+          try {
+            const audioPath = await generateSpeech(textToSpeak);
+            const response: CoreResponse = {
+              text: "",
+              audio: audioPath,
+            };
+            return Response.json(response);
+          } catch (error) {
+            log.error(`Speech generation failed: ${error}`);
+            return Response.json({ text: "Failed to generate speech. Check logs for details." } as CoreResponse);
+          }
+        }
+        // Process media first — track metadata for message ledger
+        let ledgerMediaType: "image" | "audio" | "document" | undefined;
+        let ledgerAttachmentPath: string | undefined;
+        let ledgerMediaDescription: string | undefined;
+        if (msg.audio) {
+          const audioPath = await saveAttachment(msg.chatId, "audio", msg.audio.base64, msg.audio.filename);
+          ledgerMediaType = "audio";
+          ledgerAttachmentPath = audioPath;
+          if (isMediaConfigured()) {
+            try {
+              const transcription = await transcribeAudio(msg.audio.base64, msg.audio.filename);
+              if (transcription.trim()) {
+                ledgerMediaDescription = transcription;
+                messageText = `[Audio saved to ${audioPath}]\n[Voice message transcription]: ${transcription}`;
+              } else {
+                messageText = `[Audio saved to ${audioPath}]\n[Transcription returned empty. Ask the user to try again or send text.]`;
+              }
+            } catch (error) {
+              log.error(`Transcription failed: ${error}`);
+              messageText = `[Audio saved to ${audioPath}]\n[Transcription failed. The audio file is still accessible at the path above.]`;
+            }
+          } else {
+            messageText = `[Audio saved to ${audioPath}]\n[Cannot transcribe because OPENAI_API_KEY is not configured. The audio file is still accessible at the path above.]`;
+          }
+        }
+        if (msg.image) {
+          const caption = msg.image.caption || "";
+          const imgPath = await saveAttachment(msg.chatId, "image", msg.image.base64);
+          ledgerMediaType = "image";
+          ledgerAttachmentPath = imgPath;
+          if (caption && isMediaConfigured()) {
+            // User sent text with the image → describe it via Vision
+            try {
+              const description = await describeImage(msg.image.base64, caption);
+              if (description.trim()) {
+                ledgerMediaDescription = description;
+                messageText = `[Image saved to ${imgPath}]\n[Image description: ${description}]\n${caption}`;
+              } else {
+                messageText = `[Image saved to ${imgPath}]\n[Image content could not be interpreted]\n${caption}`;
+              }
+            } catch (error) {
+              log.error(`Image analysis failed: ${error}`);
+              messageText = `[Image saved to ${imgPath}]\n[Error analyzing the image]\n${caption}`;
+            }
+          } else if (caption) {
+            // Has caption but no OpenAI key
+            messageText = `[Image saved to ${imgPath}]\n[Cannot describe image — OPENAI_API_KEY not configured. The image file is accessible at the path above.]\n${caption}`;
+          } else {
+            // No caption → just save, no GPT call
+            messageText = `[Image saved to ${imgPath}]`;
+          }
+        }
+        if (msg.document) {
+          const docPath = await saveAttachment(msg.chatId, "document", msg.document.base64, msg.document.filename, msg.document.mimeType);
+          ledgerMediaType = "document";
+          ledgerAttachmentPath = docPath;
+          const caption = msg.document.caption || "";
+          messageText = caption
+            ? `[Document saved to ${docPath}] (${msg.document.mimeType})\n${caption}`
+            : `[Document saved to ${docPath}] (${msg.document.mimeType})`;
+        }
+        if (!messageText) {
+          const response: CoreResponse = { text: "Empty message received." };
+          return Response.json(response);
+        }
+        // Save incoming message to ledger (after media processing so we have descriptions)
+        if (msg.messageId) {
+          saveMessageRecord({
+            id: `${msg.chatId}_${msg.messageId}`,
+            chatId: msg.chatId,
+            messageId: msg.messageId,
+            direction: "in",
+            sender: msg.sender,
+            timestamp: msg.timestamp,
+            text: messageText,
+            mediaType: ledgerMediaType,
+            attachmentPath: ledgerAttachmentPath,
+            mediaDescription: ledgerMediaDescription,
+          }).catch((e) => log.error(`Failed to save incoming message record: ${e}`));
+        }
+        // Detect scheduling intent via haiku (language-agnostic)
+        const scheduleIntent = await detectScheduleIntent(messageText);
+        if (scheduleIntent) {
+          if (scheduleIntent.type === "cancel") {
+            const removed = await cancelAllChatTasks(msg.chatId);
+            const text = removed > 0
+              ? scheduleIntent.confirmation
+              : "No active tasks to cancel.";
+            return Response.json({ text } as CoreResponse);
+          }
+          const taskId = `${Date.now()}_${Math.random().toString(36).substring(7)}`;
+          const task: ScheduledTask = {
+            id: taskId,
+            chatId: msg.chatId,
+            sender: msg.sender,
+            senderId: msg.senderId,
+            type: scheduleIntent.type,
+            message: scheduleIntent.message,
+            originalMessage: messageText,
+            createdAt: Date.now(),
+            ...(scheduleIntent.type === "once" && scheduleIntent.delaySeconds
+              ? { runAt: Date.now() + scheduleIntent.delaySeconds * 1000 }
+              : {}),
+            ...(scheduleIntent.type === "cron" && scheduleIntent.cron
+              ? { cron: scheduleIntent.cron }
+              : {}),
+          };
+          await addTask(task);
+          const response: CoreResponse = { text: scheduleIntent.confirmation };
+          return Response.json(response);
+        }
+        // Route based on current backend state
+        const backend = getBackend(msg.chatId);
+        const deps = checkDeps();
+        const canFallback = backend === "codex" ? deps.claude : deps.codex;
+        let agentResponse: string;
+        let historyResponse: string | null = null;
+        let usedBackend: "claude" | "codex" = backend;
+        // Inject cross-backend context if switching
+        const foreignCtx = getForeignContext(msg.chatId, backend);
+        const enrichedMessage = foreignCtx ? foreignCtx + messageText : messageText;
+        log.info(`Routing | backend: ${backend} | foreignCtx: ${!!foreignCtx} | enrichedChars: ${enrichedMessage.length}`);
+        if (backend === "codex") {
+          try {
+            agentResponse = await processWithCodex(enrichedMessage);
+            if (agentResponse.startsWith("Error processing with Codex") && canFallback) {
+              log.warn("Codex failed, falling back to Claude");
+              agentResponse = await processWithClaude(enrichedMessage, msg.chatId);
+              usedBackend = "claude";
+            }
+          } catch (error) {
+            if (canFallback) {
+              log.warn(`Codex threw, falling back to Claude: ${error}`);
+              agentResponse = await processWithClaude(enrichedMessage, msg.chatId);
+              usedBackend = "claude";
+            } else {
+              agentResponse = "Error processing with Codex. Please try again.";
+            }
+          }
+        } else {
+          try {
+            agentResponse = await processWithClaude(enrichedMessage, msg.chatId);
+            if (isClaudeRateLimitResponse(agentResponse) && canFallback) {
+              log.warn("Claude credits exhausted, falling back to Codex");
+              const codexResponse = await processWithCodex(enrichedMessage);
+              agentResponse = `Claude is out of credits right now, so I switched this reply to Codex.\n---CHUNK---\n${codexResponse}`;
+              historyResponse = codexResponse;
+              usedBackend = "codex";
+              // Persist the switch so subsequent messages don't keep re-injecting
+              // cross-backend context while Claude has no credits.
+              backendState.set(msg.chatId, "codex");
+            }
+          } catch (error) {
+            const errMsg = error instanceof Error ? error.message : String(error);
+            if (canFallback) {
+              log.warn(`Claude threw, falling back to Codex: ${errMsg}`);
+              agentResponse = await processWithCodex(enrichedMessage);
+              usedBackend = "codex";
+            } else {
+              agentResponse = `Claude error: ${errMsg.slice(0, 200)}`;
+            }
+          }
+        }
+        // Log exchange for shared history
+        addExchange(msg.chatId, messageText, historyResponse ?? agentResponse, usedBackend);
+        log.info(`Response | backend: ${usedBackend} | responseChars: ${agentResponse.length}`);
+        log.debug(`Response raw >>>>\n${agentResponse}\n<<<<`);
+        // Detect [VOICE]...[/VOICE] tags — generate speech via ElevenLabs
+        let audioPath: string | undefined;
+        let textResponse = agentResponse;
+        const voiceMatch = agentResponse.match(/\[VOICE\]([\s\S]*?)\[\/VOICE\]/);
+        if (voiceMatch && isSpeechConfigured()) {
+          const speechText = voiceMatch[1].trim();
+          textResponse = agentResponse.replace(/\[VOICE\][\s\S]*?\[\/VOICE\]/, "").trim();
+          try {
+            audioPath = await generateSpeech(speechText, config.elevenlabsVoiceId);
+            log.info(`Speech generated for ${speechText.length} chars`);
+          } catch (error) {
+            log.error(`Speech generation failed: ${error}`);
+            // Fallback: send the voice text as regular text so the message isn't empty
+            if (!textResponse) {
+              textResponse = speechText;
+            }
+          }
+        }
+        // Prepend onboarding info if first message (non-blocking)
+        const fullResponse = onboarding
+          ? onboarding.message + "\n\n" + textResponse
+          : textResponse;
+        const files = detectFiles(textResponse);
+        const response: CoreResponse = {
+          text: fullResponse,
+          files: files.length > 0 ? files : undefined,
+          audio: audioPath,
+        };
+        return Response.json(response);
+      } catch (error) {
+        const errMsg = error instanceof Error ? error.message : String(error);
+        log.error(`Request processing error: ${errMsg}`);
+        const summary = errMsg.length > 200 ? errMsg.slice(0, 200) + "..." : errMsg;
+        return Response.json({ text: `Internal error: ${summary}` } as CoreResponse);
+      }
+    }
+    return Response.json({ error: "Not found" }, { status: 404 });
+  },
+});
+log.info(`Core server listening on port ${config.corePort}`);

package/src/core/intent.ts ADDED Viewed

@@ -0,0 +1,112 @@
+/**
+ * @module core/intent
+ * @role Use a fast model to detect scheduling intents from any language.
+ * @responsibilities
+ *   - Classify messages as schedule requests or regular messages
+ *   - Extract schedule type (once/cron), timing, and reminder text
+ *   - Works with whatever CLI is available (claude or codex)
+ * @dependencies shared/config
+ * @effects Spawns claude or codex CLI
+ */
+import { config } from "../shared/config";
+import { createLogger } from "../shared/logger";
+const log = createLogger("core");
+export interface ScheduleIntent {
+  type: "once" | "cron" | "cancel";
+  delaySeconds?: number;
+  cron?: string;
+  message: string;
+  confirmation: string;
+}
+const INTENT_PROMPT = `You are a scheduling intent detector. Analyze the user message and determine if they want to schedule a reminder, recurring notification, or cancel/stop existing tasks.
+If it IS a scheduling request, respond with ONLY this JSON (no markdown, no explanation):
+For one-time reminders:
+{"type":"once","delaySeconds":300,"message":"the reminder text","confirmation":"I'll remind you in 5 minutes"}
+For recurring reminders:
+{"type":"cron","cron":"*/5 * * * *","message":"the reminder text","confirmation":"I'll remind you every 5 minutes"}
+For cancelling/stopping tasks:
+{"type":"cancel","message":"","confirmation":"All tasks cancelled."}
+If it is NOT a scheduling or cancellation request, respond with ONLY:
+{"type":"none"}
+Rules:
+- One-time: "in X seconds/minutes/hours" or equivalent in any language → once
+- Recurring: "every X seconds/minutes/hours" or equivalent in any language → cron
+- Cancel: "stop/cancel/remove all tasks/reminders" or equivalent in any language → cancel
+- For seconds-based cron, use 6-field format: */N * * * * *
+- For minutes-based cron: */N * * * *
+- For hours-based cron: 0 */N * * *
+- Extract the actual reminder content, not the scheduling instruction
+- Write the confirmation in the same language as the user's message
+- Support any language
+- Only detect clear scheduling intent, not vague mentions of time`;
+function buildCmd(cli: "claude" | "codex", prompt: string): string[] {
+  if (cli === "claude") {
+    return ["claude", "--dangerously-skip-permissions", "--model", "haiku", "-p", prompt];
+  }
+  return ["codex", "exec", "--dangerously-bypass-approvals-and-sandbox", "-C", config.projectDir, prompt];
+}
+// Track which CLI actually works (not just Bun.which, which can find broken shims)
+let verifiedCli: "claude" | "codex" | null = null;
+async function trySpawn(prompt: string, cli: "claude" | "codex"): Promise<string | null> {
+  const cmd = buildCmd(cli, prompt);
+  const proc = Bun.spawn(cmd, { cwd: config.projectDir, stdout: "pipe", stderr: "pipe" });
+  const timeout = setTimeout(() => proc.kill(), 15_000);
+  const exitCode = await proc.exited;
+  clearTimeout(timeout);
+  if (exitCode !== 0) return null;
+  return (await new Response(proc.stdout).text()).trim();
+}
+function getCliOrder(): Array<"claude" | "codex"> {
+  if (verifiedCli) return [verifiedCli];
+  const order: Array<"claude" | "codex"> = [];
+  if (Bun.which("claude") !== null) order.push("claude");
+  if (Bun.which("codex") !== null) order.push("codex");
+  return order;
+}
+export async function detectScheduleIntent(message: string): Promise<ScheduleIntent | null> {
+  const clis = getCliOrder();
+  if (clis.length === 0) return null;
+  const fullPrompt = `${INTENT_PROMPT}\n\nUser message: ${message}`;
+  for (const cli of clis) {
+    try {
+      const raw = await trySpawn(fullPrompt, cli);
+      if (raw === null) continue;
+      // This CLI works — remember it
+      verifiedCli = cli;
+      const jsonMatch = raw.match(/\{[\s\S]*\}/);
+      if (!jsonMatch) return null;
+      const parsed = JSON.parse(jsonMatch[0]);
+      if (parsed.type === "none") return null;
+      if (parsed.type !== "once" && parsed.type !== "cron" && parsed.type !== "cancel") return null;
+      return parsed as ScheduleIntent;
+    } catch (e) {
+      log.warn(`Intent detection with ${cli} failed: ${e}`);
+      // Try next CLI
+    }
+  }
+  return null;
+}

package/src/core/media.ts ADDED Viewed

@@ -0,0 +1,144 @@
+/**
+ * @module core/media
+ * @role Handle voice transcription (Whisper), image analysis (Vision), and speech synthesis (ElevenLabs).
+ * @responsibilities
+ *   - Transcribe audio buffers via OpenAI Whisper API
+ *   - Describe images via OpenAI Vision API
+ *   - Generate speech from text via ElevenLabs API
+ *   - Manage temp files for audio processing
+ * @dependencies shared/config
+ * @effects Network calls to OpenAI API and ElevenLabs API, temp file I/O in runtime voice_temp/
+ * @contract transcribeAudio(base64, filename) => Promise<string>
+ * @contract describeImage(base64, caption?) => Promise<string>
+ * @contract generateSpeech(text, voice?) => Promise<string>
+ */
+import { writeFileSync, unlinkSync, mkdirSync, existsSync } from "fs";
+import { join } from "path";
+import OpenAI from "openai";
+import { ElevenLabsClient } from "elevenlabs";
+import { config } from "../shared/config";
+import { createLogger } from "../shared/logger";
+const log = createLogger("core");
+let openai: OpenAI | null = null;
+let elevenlabs: ElevenLabsClient | null = null;
+function getClient(): OpenAI {
+  if (!openai) {
+    if (!config.openaiApiKey) {
+      throw new Error("OPENAI_API_KEY not configured");
+    }
+    openai = new OpenAI({ apiKey: config.openaiApiKey });
+  }
+  return openai;
+}
+function getElevenLabsClient(): ElevenLabsClient {
+  if (!elevenlabs) {
+    if (!config.elevenlabsApiKey) {
+      throw new Error("ELEVENLABS_API_KEY not configured");
+    }
+    elevenlabs = new ElevenLabsClient({ apiKey: config.elevenlabsApiKey });
+  }
+  return elevenlabs;
+}
+export async function transcribeAudio(base64: string, filename: string): Promise<string> {
+  const client = getClient();
+  if (!existsSync(config.voiceTempDir)) {
+    mkdirSync(config.voiceTempDir, { recursive: true });
+  }
+  const tempPath = join(config.voiceTempDir, filename);
+  const buffer = Buffer.from(base64, "base64");
+  writeFileSync(tempPath, buffer);
+  try {
+    const file = Bun.file(tempPath);
+    const transcription = await client.audio.transcriptions.create({
+      file: file,
+      model: "whisper-1",
+    });
+    log.info(`Transcribed audio: "${transcription.text.substring(0, 80)}..."`);
+    return transcription.text;
+  } finally {
+    try { unlinkSync(tempPath); } catch { /* ignore */ }
+  }
+}
+export async function describeImage(base64: string, caption?: string): Promise<string> {
+  const client = getClient();
+  const prompt = caption
+    ? `The user sent this image with the text: "${caption}". Describe in detail what you see and respond considering the attached text.`
+    : "Describe in detail what you see in this image.";
+  const response = await client.chat.completions.create({
+    model: "gpt-5.2",
+    messages: [
+      {
+        role: "user",
+        content: [
+          { type: "image_url", image_url: { url: `data:image/jpeg;base64,${base64}` } },
+          { type: "text", text: prompt },
+        ],
+      },
+    ],
+    response_format: { type: "text" },
+    verbosity: "low",
+    reasoning_effort: "none",
+    store: false,
+  });
+  const description = response.choices[0]?.message?.content || "";
+  log.info(`Image described (gpt-5.2): "${description.substring(0, 80)}..."`);
+  return description;
+}
+export async function generateSpeech(text: string, voiceId: string = config.elevenlabsVoiceId): Promise<string> {
+  const client = getElevenLabsClient();
+  if (!existsSync(config.voiceTempDir)) {
+    mkdirSync(config.voiceTempDir, { recursive: true });
+  }
+  const outputPath = join(config.voiceTempDir, `speech_${Date.now()}.mp3`);
+  try {
+    const audio = await client.textToSpeech.convert(voiceId, {
+      text,
+      model_id: "eleven_turbo_v2_5",
+    });
+    const chunks: Uint8Array[] = [];
+    for await (const chunk of audio) {
+      chunks.push(chunk);
+    }
+    const buffer = Buffer.concat(chunks);
+    writeFileSync(outputPath, buffer);
+    log.info(`Generated speech: ${text.substring(0, 80)}... (voice: ${voiceId})`);
+    return outputPath;
+  } catch (error) {
+    // Invalidate cached client on auth errors so a new key takes effect without restart
+    const errStr = String(error);
+    if (errStr.includes("401") || errStr.includes("403") || errStr.includes("Unauthorized")) {
+      elevenlabs = null;
+      log.warn("ElevenLabs client invalidated due to auth error — update ELEVENLABS_API_KEY in .env");
+    }
+    log.error(`Failed to generate speech: ${error}`);
+    throw error;
+  }
+}
+export function isMediaConfigured(): boolean {
+  return !!config.openaiApiKey;
+}
+export function isSpeechConfigured(): boolean {
+  return !!config.elevenlabsApiKey;
+}