@holoscript/holoscript-agent 2.1.4 → 2.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/runner.js CHANGED
@@ -297,13 +297,13 @@ var MESH_TOOLS = [
297
297
  },
298
298
  {
299
299
  name: "vision_analyze",
300
- description: "Analyze an image using the local Fara-7B vision model (Ollama on loopback). Reads the image file at `image_path`, sends it to fara:7b via the local Ollama API, and returns the model's text analysis. Counts as a productive tool call \u2014 use for GUI-grounding, visual QA, image captioning, or any task that requires perceiving image content. Only available on surfaces with a local Ollama instance running fara:7b.",
300
+ description: "Analyze an image using the local Fara-7B vision model (Ollama on loopback). Reads the image file at `image_path` (max 512KB \u2014 downscale larger images first), sends it to the vision model via the local Ollama API (env: HOLOSCRIPT_AGENT_VISION_MODEL), and returns the model's text analysis. Counts as a productive tool call \u2014 use for GUI-grounding, visual QA, image captioning, or any task that requires perceiving image content. Only available on surfaces with a local Ollama instance and HOLOSCRIPT_AGENT_LOCAL_LLM_BASE_URL set.",
301
301
  input_schema: {
302
302
  type: "object",
303
303
  properties: {
304
304
  image_path: {
305
305
  type: "string",
306
- description: "Absolute path to the image file (png, jpg, webp, gif)"
306
+ description: "Absolute path to the image file (png, jpg, webp) \u2014 must be under 512KB"
307
307
  },
308
308
  prompt: {
309
309
  type: "string",
@@ -311,7 +311,7 @@ var MESH_TOOLS = [
311
311
  },
312
312
  model: {
313
313
  type: "string",
314
- description: 'Ollama model tag to use (default: "fara:7b")'
314
+ description: "Ollama model tag override (default: HOLOSCRIPT_AGENT_VISION_MODEL env var)"
315
315
  }
316
316
  },
317
317
  required: ["image_path"]
@@ -565,19 +565,29 @@ ${truncated}`);
565
565
  const denied = checkReadAllowed(imagePath);
566
566
  if (denied) return errResult(use.id, `vision_analyze: ${denied}`);
567
567
  const prompt = String(use.input.prompt ?? "Describe this image in detail.");
568
- const model = String(use.input.model ?? "fara:7b");
568
+ const model = String(
569
+ use.input.model ?? process.env.HOLOSCRIPT_AGENT_VISION_MODEL ?? "fara:7b"
570
+ );
569
571
  const ollamaBase = process.env.HOLOSCRIPT_AGENT_LOCAL_LLM_BASE_URL;
570
572
  if (!ollamaBase) {
571
573
  return errResult(
572
574
  use.id,
573
- "vision_analyze: HOLOSCRIPT_AGENT_LOCAL_LLM_BASE_URL is not set \u2014 configure it to point to your local Ollama instance (e.g. http://holojetson.local:11434)"
575
+ "vision_analyze: HOLOSCRIPT_AGENT_LOCAL_LLM_BASE_URL is not set \u2014 configure it to point to your local Ollama instance"
574
576
  );
575
577
  }
578
+ const MAX_IMAGE_BYTES = 512e3;
576
579
  const TIMEOUT_MS = 12e4;
577
580
  const controller = new AbortController();
578
581
  const timer = setTimeout(() => controller.abort(), TIMEOUT_MS);
579
582
  try {
580
583
  const imageBytes = await readFile2(imagePath);
584
+ if (imageBytes.length > MAX_IMAGE_BYTES) {
585
+ clearTimeout(timer);
586
+ return errResult(
587
+ use.id,
588
+ `vision_analyze: image is ${Math.round(imageBytes.length / 1024)}KB \u2014 exceeds ${MAX_IMAGE_BYTES / 1024}KB limit. Downscale the image first (e.g. to 256\xD7256 or smaller) then retry vision_analyze.`
589
+ );
590
+ }
581
591
  const imageB64 = imageBytes.toString("base64");
582
592
  const res = await fetch(`${ollamaBase}/api/generate`, {
583
593
  method: "POST",