npm - @geravant/sinain - Versions diffs - 1.8.0 → 1.10.0 - Mend

@geravant/sinain 1.8.0 → 1.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

package/.env.example +14 -13
package/HEARTBEAT.md +1 -1
package/README.md +4 -7
package/cli.js +16 -2
package/config-shared.js +469 -0
package/config.js +152 -0
package/index.ts +1 -3
package/launcher.js +7 -1
package/onboard.js +345 -0
package/package.json +8 -2
package/sense_client/__main__.py +8 -4
package/sense_client/gate.py +1 -0
package/sense_client/ocr.py +58 -25
package/sense_client/sender.py +2 -0
package/sense_client/vision.py +31 -11
package/sinain-agent/CLAUDE.md +0 -1
package/sinain-agent/run.sh +2 -1
package/sinain-core/src/agent/analyzer.ts +56 -58
package/sinain-core/src/agent/loop.ts +37 -11
package/sinain-core/src/audio/transcription.ts +20 -5
package/sinain-core/src/config.ts +20 -16
package/sinain-core/src/cost/tracker.ts +64 -0
package/sinain-core/src/escalation/escalator.ts +31 -59
package/sinain-core/src/index.ts +41 -45
package/sinain-core/src/overlay/commands.ts +12 -0
package/sinain-core/src/overlay/ws-handler.ts +27 -0
package/sinain-core/src/server.ts +41 -0
package/sinain-core/src/types.ts +46 -11
package/sinain-knowledge/curation/engine.ts +0 -17
package/sinain-knowledge/protocol/heartbeat.md +1 -1
package/sinain-mcp-server/index.ts +4 -20
package/sinain-memory/git_backup.sh +0 -19

package/sense_client/ocr.py CHANGED Viewed

@@ -1,4 +1,5 @@
 """OCR backends for UI text extraction: macOS Vision, Windows.Media.Ocr, and Tesseract."""
 from __future__ import annotations
 import io
@@ -24,8 +25,13 @@ class OCRResult:
 class LocalOCR:
     """Tesseract OCR wrapper for UI text extraction."""
-    def __init__(self, lang: str = "eng", psm: int = 11,
-                 min_confidence: int = 30, enabled: bool = True):
+    def __init__(
+        self,
+        lang: str = "eng",
+        psm: int = 11,
+        min_confidence: int = 30,
+        enabled: bool = True,
+    ):
         self.lang = lang
         self.psm = psm
         self.min_confidence = min_confidence
@@ -87,8 +93,12 @@ class LocalOCR:
 class VisionOCR:
     """macOS Vision framework OCR using pyobjc."""
-    def __init__(self, languages: list[str] | None = None,
-                 min_confidence: float = 0.5, enabled: bool = True):
+    def __init__(
+        self,
+        languages: list[str] | None = None,
+        min_confidence: float = 0.5,
+        enabled: bool = True,
+    ):
         self.languages = languages or ["en", "ru"]
         self.min_confidence = min_confidence
         self.enabled = enabled
@@ -101,8 +111,12 @@ class VisionOCR:
             import objc  # noqa: F401
             import Quartz  # noqa: F401
             from Foundation import NSURL, NSData  # noqa: F401
-            objc.loadBundle('Vision', bundle_path='/System/Library/Frameworks/Vision.framework',
-                            module_globals=globals())
+            objc.loadBundle(
+                "Vision",
+                bundle_path="/System/Library/Frameworks/Vision.framework",
+                module_globals=globals(),
+            )
             self._available = True
         except Exception as e:
             print(f"[ocr] Vision framework unavailable: {e}", flush=True)
@@ -120,9 +134,8 @@ class VisionOCR:
     def _do_extract(self, image: Image.Image) -> OCRResult:
         import objc
-        import Vision
-        from Foundation import NSData
         import Quartz
+        from Foundation import NSData
         # Convert PIL Image to CGImage via PNG bytes
         buf = io.BytesIO()
@@ -138,15 +151,17 @@ class VisionOCR:
             return OCRResult(text="", confidence=0, word_count=0)
         # Create and configure request
-        request = Vision.VNRecognizeTextRequest.alloc().init()
-        request.setRecognitionLevel_(Vision.VNRequestTextRecognitionLevelAccurate)
+        request = VNRecognizeTextRequest.alloc().init()
+        request.setRecognitionLevel_(0)  # VNRequestTextRecognitionLevelAccurate
         request.setRecognitionLanguages_(self.languages)
         request.setUsesLanguageCorrection_(True)
         # Execute
-        handler = Vision.VNImageRequestHandler.alloc().initWithCGImage_options_(cg_image, None)
-        success = handler.performRequests_error_([request], objc.nil)
-        if not success[0]:
+        handler = VNImageRequestHandler.alloc().initWithCGImage_options_(cg_image, None)
+        result = handler.performRequests_error_([request], objc.nil)
+        # PyObjC may return (bool, error) tuple or just bool depending on version
+        success = result[0] if isinstance(result, tuple) else result
+        if not success:
             return OCRResult(text="", confidence=0, word_count=0)
         results = request.results()
@@ -159,7 +174,8 @@ class VisionOCR:
         for observation in results:
             candidate = observation.topCandidates_(1)
-            if not candidate:
+            # PyObjC may return bool instead of list depending on version
+            if not candidate or isinstance(candidate, bool):
                 continue
             text = candidate[0].string()
             conf = candidate[0].confidence()
@@ -197,8 +213,9 @@ class VisionOCR:
 class WinOCR:
     """Windows.Media.Ocr backend via winrt-python (Windows 10+)."""
-    def __init__(self, language: str = "en", min_confidence: float = 0.5,
-                 enabled: bool = True):
+    def __init__(
+        self, language: str = "en", min_confidence: float = 0.5, enabled: bool = True
+    ):
         self.language = language
         self.min_confidence = min_confidence
         self.enabled = enabled
@@ -209,8 +226,8 @@ class WinOCR:
             return
         try:
-            from winrt.windows.media.ocr import OcrEngine
             from winrt.windows.globalization import Language
+            from winrt.windows.media.ocr import OcrEngine
             lang = Language(language)
             if OcrEngine.is_language_supported(lang):
@@ -234,11 +251,15 @@ class WinOCR:
     def _do_extract(self, image: Image.Image) -> OCRResult:
         import asyncio
         from winrt.windows.graphics.imaging import (
-            SoftwareBitmap, BitmapPixelFormat, BitmapAlphaMode,
+            BitmapAlphaMode,
+            BitmapPixelFormat,
+            SoftwareBitmap,
         )
         from winrt.windows.storage.streams import (
-            InMemoryRandomAccessStream, DataWriter,
+            DataWriter,
+            InMemoryRandomAccessStream,
         )
         # Convert PIL to BMP bytes and load as SoftwareBitmap
@@ -254,13 +275,15 @@ class WinOCR:
             stream.seek(0)
             from winrt.windows.graphics.imaging import BitmapDecoder
             decoder = await BitmapDecoder.create_async(stream)
             bitmap = await decoder.get_software_bitmap_async()
             # Convert to supported pixel format if needed
             if bitmap.bitmap_pixel_format != BitmapPixelFormat.BGRA8:
-                bitmap = SoftwareBitmap.convert(bitmap, BitmapPixelFormat.BGRA8,
-                                                 BitmapAlphaMode.PREMULTIPLIED)
+                bitmap = SoftwareBitmap.convert(
+                    bitmap, BitmapPixelFormat.BGRA8, BitmapAlphaMode.PREMULTIPLIED
+                )
             result = await self._engine.recognize_async(bitmap)
             return result
@@ -318,10 +341,15 @@ def create_ocr(config: dict):
             enabled=enabled,
         )
         if vision._available:
-            print(f"[ocr] using Vision backend (languages={vision.languages})", flush=True)
+            print(
+                f"[ocr] using Vision backend (languages={vision.languages})", flush=True
+            )
             return vision
         if backend == "vision":
-            print("[ocr] Vision requested but unavailable, falling back to Tesseract", flush=True)
+            print(
+                "[ocr] Vision requested but unavailable, falling back to Tesseract",
+                flush=True,
+            )
     # Windows: try Windows.Media.Ocr
     if sys.platform == "win32" and backend in ("auto", "winocr"):
@@ -332,10 +360,15 @@ def create_ocr(config: dict):
             enabled=enabled,
         )
         if winocr._available:
-            print(f"[ocr] using WinOCR backend (language={winocr.language})", flush=True)
+            print(
+                f"[ocr] using WinOCR backend (language={winocr.language})", flush=True
+            )
             return winocr
         if backend == "winocr":
-            print("[ocr] WinOCR requested but unavailable, falling back to Tesseract", flush=True)
+            print(
+                "[ocr] WinOCR requested but unavailable, falling back to Tesseract",
+                flush=True,
+            )
     # Fallback to Tesseract (cross-platform)
     print("[ocr] using Tesseract backend", flush=True)

package/sense_client/sender.py CHANGED Viewed

@@ -51,6 +51,8 @@ class SenseSender:
                 "narrative": event.observation.narrative,
                 "concepts": event.observation.concepts,
             }
+        if event.vision_cost:
+            payload["vision_cost"] = event.vision_cost
         for attempt in range(_MAX_RETRIES):
             try:

package/sense_client/vision.py CHANGED Viewed

@@ -18,6 +18,7 @@ import json
 import logging
 import os
 import time
+import uuid
 from abc import ABC, abstractmethod
 from typing import TYPE_CHECKING, Optional
@@ -27,14 +28,23 @@ if TYPE_CHECKING:
 logger = logging.getLogger("sinain.vision")
+class VisionResult:
+    """Result of a vision call: text + optional cost info."""
+    __slots__ = ("text", "cost")
+    def __init__(self, text: Optional[str], cost: Optional[dict] = None):
+        self.text = text
+        self.cost = cost  # {cost, tokens_in, tokens_out, model, cost_id}
 class VisionProvider(ABC):
     """Abstract base for vision inference backends."""
     name: str = "unknown"
     @abstractmethod
-    def describe(self, image: "Image.Image", prompt: Optional[str] = None) -> Optional[str]:
-        """Describe image content. Returns None on failure."""
+    def describe(self, image: "Image.Image", prompt: Optional[str] = None) -> VisionResult:
+        """Describe image content. Returns VisionResult (text may be None on failure)."""
         ...
     @abstractmethod
@@ -53,8 +63,8 @@ class OllamaVisionProvider(VisionProvider):
                                      timeout=timeout, max_tokens=max_tokens)
         self.name = f"ollama ({model})"
-    def describe(self, image: "Image.Image", prompt: Optional[str] = None) -> Optional[str]:
-        return self._client.describe(image, prompt)
+    def describe(self, image: "Image.Image", prompt: Optional[str] = None) -> VisionResult:
+        return VisionResult(self._client.describe(image, prompt))
     def is_available(self) -> bool:
         return self._client.is_available()
@@ -73,9 +83,9 @@ class OpenRouterVisionProvider(VisionProvider):
         self._max_tokens = max_tokens
         self.name = f"openrouter ({model})"
-    def describe(self, image: "Image.Image", prompt: Optional[str] = None) -> Optional[str]:
+    def describe(self, image: "Image.Image", prompt: Optional[str] = None) -> VisionResult:
         if not self._api_key:
-            return None
+            return VisionResult(None)
         try:
             import requests
@@ -83,7 +93,7 @@ class OpenRouterVisionProvider(VisionProvider):
             # Encode image
             img_b64 = self._encode(image)
             if not img_b64:
-                return None
+                return VisionResult(None)
             prompt_text = prompt or "Describe what's on this screen concisely (2-3 sentences)."
@@ -112,13 +122,23 @@ class OpenRouterVisionProvider(VisionProvider):
             resp.raise_for_status()
             data = resp.json()
             content = data["choices"][0]["message"]["content"].strip()
-            logger.debug("openrouter vision: model=%s tokens=%s",
-                         self._model, data.get("usage", {}).get("total_tokens", "?"))
-            return content if content else None
+            usage = data.get("usage", {})
+            logger.debug("openrouter vision: model=%s tokens=%s cost=%s",
+                         self._model, usage.get("total_tokens", "?"), usage.get("cost", "?"))
+            cost_info = None
+            if usage.get("cost") is not None:
+                cost_info = {
+                    "cost": usage["cost"],
+                    "tokens_in": usage.get("prompt_tokens", 0),
+                    "tokens_out": usage.get("completion_tokens", 0),
+                    "model": self._model,
+                    "cost_id": uuid.uuid4().hex[:16],
+                }
+            return VisionResult(content if content else None, cost_info)
         except Exception as e:
             logger.debug("openrouter vision failed: %s", e)
-            return None
+            return VisionResult(None)
     def is_available(self) -> bool:
         return bool(self._api_key)

package/sinain-agent/CLAUDE.md CHANGED Viewed

@@ -47,7 +47,6 @@ When responding to escalations:
 1. Call `sinain_heartbeat_tick` with a brief session summary
 2. The tool runs the full pipeline automatically:
-   - Git backup of memory directory
    - Signal analysis (detects opportunities from session patterns)
    - **Session distillation** — fetches new feed items from sinain-core, distills patterns/learnings
    - **Knowledge integration** — updates playbook (working memory) and knowledge graph (long-term memory)

package/sinain-agent/run.sh CHANGED Viewed

@@ -78,6 +78,7 @@ invoke_agent() {
     codex)
       codex exec -s danger-full-access \
         --dangerously-bypass-approvals-and-sandbox \
+        --skip-git-repo-check \
         "$prompt"
       ;;
     junie)
@@ -271,7 +272,7 @@ while true; do
       # MCP path: agent runs task with sinain tools available
       SPAWN_PROMPT="You have a background task to complete. Task: $SPAWN_TASK
-Complete this task thoroughly. Use sinain_get_knowledge and sinain_knowledge_query if you need context from past sessions. Use web search, file operations, and code execution as needed. Create end-to-end artifacts. Summarize your findings concisely."
+Complete this task thoroughly. Use sinain_get_knowledge and sinain_knowledge_query if you need context from past sessions. Summarize your findings concisely."
       SPAWN_RESULT=$(invoke_agent "$SPAWN_PROMPT" "$SPAWN_MAX_TURNS" || echo "ERROR: agent invocation failed")
     else
       # Pipe path: agent gets task text directly

package/sinain-core/src/agent/analyzer.ts CHANGED Viewed

@@ -1,13 +1,10 @@
-import type { AgentConfig, AgentResult, ContextWindow, RecorderStatus, RecordCommand } from "../types.js";
+import type { AnalysisConfig, AgentResult, ContextWindow, RecorderStatus, RecordCommand } from "../types.js";
 import { normalizeAppName } from "./context-window.js";
 import { log, error } from "../log.js";
 import { levelFor, applyLevel } from "../privacy/index.js";
 const TAG = "agent";
-/** Guard: only one Ollama vision call at a time (latest-wins, skip if busy). */
-let ollamaInFlight = false;
 /**
  * Model-specific timeouts in milliseconds.
  * Only increases timeouts for slow models to avoid false timeouts.
@@ -56,12 +53,13 @@ You produce outputs as JSON.
 Respond ONLY with valid JSON. No markdown, no code fences, no explanation.
 Your entire response must be parseable by JSON.parse().
-{"hud":"...","digest":"...","record":{"command":"start"|"stop","label":"..."}}
+{"hud":"...","digest":"...","record":{"command":"start"|"stop","label":"..."},"task":"..."}
 Output fields:
 - "hud" (required): max 60 words describing what user is doing NOW
 - "digest" (required): 5-8 sentences with detailed activity description
 - "record" (optional): control recording — {"command":"start","label":"Meeting name"} or {"command":"stop"}
+- "task" (optional): natural language instruction to spawn a background task
 When to use "record":
 - START when user begins a meeting, call, lecture, YouTube video, or important audio content
@@ -69,7 +67,24 @@ When to use "record":
 - Provide descriptive labels like "Team standup", "Client call", "YouTube: [video title from OCR]"
 - For YouTube/video content: extract video title from screen OCR for the label
-Do NOT set a "task" field — background tasks are spawned by user commands only.
+When to use "task":
+- User explicitly asks for research, lookup, or action
+- Something needs external search or processing that isn't a real-time response
+- Example: "Search for React 19 migration guide", "Find docs for this API"
+When to spawn "task" for video content:
+- If user watches a YouTube video for 2+ minutes AND no task has been spawned for this video yet, spawn: "Summarize YouTube video: [title or URL from OCR]"
+- ONLY spawn ONCE per video - do not repeat spawn for the same video in subsequent ticks
+- Extract video title or URL from screen OCR to include in the task
+When to spawn "task" for coding problems:
+- If user is actively working on a coding problem/challenge for 1+ minutes:
+  - Spawn: "Solve coding problem: [problem description/title from OCR]"
+- This includes LeetCode, HackerRank, interviews, coding assessments, or any visible coding challenge
+- Look for problem signals: "Input:", "Output:", "Example", "Constraints:", problem titles, test cases
+- Include as much context as possible from the screen OCR (problem description, examples, constraints)
+- ONLY spawn ONCE per distinct problem - do not repeat for the same problem
+- The spawned task should provide a complete solution with code and explanation
 Audio sources: [\ud83d\udd0a]=system/speaker audio, [\ud83c\udf99]=microphone (user's voice).
 Treat [\ud83c\udf99] as direct user speech. Treat [\ud83d\udd0a] as external audio.
@@ -193,75 +208,54 @@ function parseTask(parsed: any): string | undefined {
  */
 export async function analyzeContext(
   contextWindow: ContextWindow,
-  config: AgentConfig,
+  config: AnalysisConfig,
   recorderStatus: RecorderStatus | null = null,
   traitSystemPrompt?: string,
 ): Promise<AgentResult> {
   const userPrompt = buildUserPrompt(contextWindow, recorderStatus);
-  // Apply privacy gating for images sent to OpenRouter
+  // Apply privacy gating for images based on provider
   let images = contextWindow.images || [];
+  const privacyDest = config.provider === "ollama" ? "local_llm" : "openrouter";
   try {
-    const imgLevel = levelFor("screen_images", "openrouter");
-    if (imgLevel === "none") {
-      images = [];
-    }
+    if (levelFor("screen_images", privacyDest) === "none") images = [];
   } catch { /* privacy not initialized, keep images */ }
   const systemPrompt = traitSystemPrompt ?? SYSTEM_PROMPT;
-  // Try local Ollama first when enabled (handles both vision and text-only ticks)
-  // Guard: skip if a previous Ollama call is still in-flight (avoids "no slots available")
-  if (config.localVisionEnabled && !ollamaInFlight) {
-    ollamaInFlight = true;
-    try {
-      const result = await callOllamaVision(systemPrompt, userPrompt, images, config);
-      const mode = images.length > 0 ? "vision" : "text";
-      log(TAG, `local ollama (${config.localVisionModel}, ${mode}): success`);
-      return result;
-    } catch (err: any) {
-      log(TAG, `local ollama failed: ${err.message || err}, falling back to OpenRouter`);
-    } finally {
-      ollamaInFlight = false;
-    }
+  if (config.provider === "ollama") {
+    return await callOllama(systemPrompt, userPrompt, images, config);
   }
-  // Skip OpenRouter entirely if no API key (local-only mode)
-  if (!config.openrouterApiKey) {
-    if (config.localVisionEnabled) {
-      throw new Error("local ollama failed and no OpenRouter API key — cannot analyze");
-    }
-    throw new Error("no OpenRouter API key configured");
+  // OpenRouter path: model chain with fallbacks
+  if (!config.apiKey) {
+    throw new Error("ANALYSIS_API_KEY / OPENROUTER_API_KEY not set");
   }
   const models = [config.model, ...config.fallbackModels];
-  // Auto-upgrade: use vision model when images are present
-  if (images.length > 0 && config.visionModel) {
-    // Insert vision model at the front if not already there
-    if (!models.includes(config.visionModel)) {
-      models.unshift(config.visionModel);
-    }
+  // Auto-upgrade to vision model when images are present
+  if (images.length > 0 && config.visionModel && !models.includes(config.visionModel)) {
+    models.unshift(config.visionModel);
   }
   let lastError: Error | null = null;
   for (const model of models) {
     try {
-      return await callModel(systemPrompt, userPrompt, images, model, config);
+      return await callOpenRouter(systemPrompt, userPrompt, images, model, config);
     } catch (err: any) {
       lastError = err;
       log(TAG, `model ${model} failed: ${err.message || err}, trying next...`);
     }
   }
   throw lastError || new Error("all models failed");
 }
-async function callModel(
+async function callOpenRouter(
   systemPrompt: string,
   userPrompt: string,
   images: ContextWindow["images"],
   model: string,
-  config: AgentConfig,
+  config: AnalysisConfig,
 ): Promise<AgentResult> {
   const start = Date.now();
   const controller = new AbortController();
@@ -289,10 +283,10 @@ async function callModel(
     const imageCount = images?.length || 0;
-    const response = await fetch("https://openrouter.ai/api/v1/chat/completions", {
+    const response = await fetch(config.endpoint, {
       method: "POST",
       headers: {
-        "Authorization": `Bearer ${config.openrouterApiKey}`,
+        "Authorization": `Bearer ${config.apiKey}`,
         "Content-Type": "application/json",
       },
       body: JSON.stringify({
@@ -324,6 +318,7 @@ async function callModel(
     try {
       const jsonStr = raw.replace(/^```\w*\s*\n?/, "").replace(/\n?\s*```\s*$/, "").trim();
       const parsed = JSON.parse(jsonStr);
+      const apiCost = typeof data.usage?.cost === "number" ? data.usage.cost : undefined;
       return {
         hud: parsed.hud || "\u2014",
         digest: parsed.digest || "\u2014",
@@ -334,10 +329,12 @@ async function callModel(
         tokensOut: data.usage?.completion_tokens || 0,
         model,
         parsedOk: true,
+        cost: apiCost,
       };
     } catch {
       // Second chance: extract embedded JSON object
       const match = raw.match(/\{[\s\S]*\}/);
+      const apiCost = typeof data.usage?.cost === "number" ? data.usage.cost : undefined;
       if (match) {
         try {
           const parsed = JSON.parse(match[0]);
@@ -352,6 +349,7 @@ async function callModel(
               tokensOut: data.usage?.completion_tokens || 0,
               model,
               parsedOk: true,
+              cost: apiCost,
             };
           }
         } catch { /* fall through */ }
@@ -367,6 +365,7 @@ async function callModel(
         tokensOut: data.usage?.completion_tokens || 0,
         model,
         parsedOk: false,
+        cost: apiCost,
       };
     }
   } finally {
@@ -375,28 +374,27 @@ async function callModel(
 }
 /**
- * Call Ollama local vision model for image analysis.
- * Uses the /api/chat endpoint with base64 images.
- * Falls back to OpenRouter on any failure.
+ * Call Ollama local model for context analysis.
+ * Uses the /api/chat endpoint with optional base64 images.
  */
-async function callOllamaVision(
+async function callOllama(
   systemPrompt: string,
   userPrompt: string,
   images: ContextWindow["images"],
-  config: AgentConfig,
+  config: AnalysisConfig,
 ): Promise<AgentResult> {
   const start = Date.now();
   const controller = new AbortController();
-  const timeout = setTimeout(() => controller.abort(), config.localVisionTimeout);
+  const timeout = setTimeout(() => controller.abort(), config.timeout);
   try {
     const imageB64List = (images || []).map((img) => img.data);
-    const response = await fetch(`${config.localVisionUrl}/api/chat`, {
+    const response = await fetch(`${config.endpoint}/api/chat`, {
       method: "POST",
       headers: { "Content-Type": "application/json" },
       body: JSON.stringify({
-        model: config.localVisionModel,
+        model: config.model,
         messages: [
           { role: "system", content: systemPrompt },
           { role: "user", content: userPrompt, images: imageB64List },
@@ -422,7 +420,7 @@ async function callOllamaVision(
     const tokensIn = data.prompt_eval_count || 0;
     const tokensOut = data.eval_count || 0;
-    log(TAG, `ollama vision: model=${config.localVisionModel} latency=${latencyMs}ms tokens=${tokensIn}+${tokensOut}`);
+    log(TAG, `ollama vision: model=${config.model} latency=${latencyMs}ms tokens=${tokensIn}+${tokensOut}`);
     // Parse the response (same format as OpenRouter)
     // Parse JSON response (same logic as callModel)
@@ -436,7 +434,7 @@ async function callOllamaVision(
         task: parseTask(parsed),
         latencyMs,
         tokensIn, tokensOut,
-        model: config.localVisionModel,
+        model: config.model,
         parsedOk: true,
       };
     } catch {
@@ -452,7 +450,7 @@ async function callOllamaVision(
               task: parseTask(parsed),
               latencyMs,
               tokensIn, tokensOut,
-              model: config.localVisionModel,
+              model: config.model,
               parsedOk: true,
             };
           }
@@ -463,7 +461,7 @@ async function callOllamaVision(
         digest: content || "\u2014",
         latencyMs,
         tokensIn, tokensOut,
-        model: config.localVisionModel,
+        model: config.model,
         parsedOk: false,
       };
     }