@holoscript/holoscript-agent 2.1.1 → 2.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -394,6 +394,8 @@ function isProductiveToolUse(use) {
394
394
  return true;
395
395
  case "str_replace":
396
396
  return true;
397
+ case "vision_analyze":
398
+ return true;
397
399
  default:
398
400
  return false;
399
401
  }
@@ -530,6 +532,28 @@ var MESH_TOOLS = [
530
532
  },
531
533
  required: ["title"]
532
534
  }
535
+ },
536
+ {
537
+ name: "vision_analyze",
538
+ description: "Analyze an image using the local Fara-7B vision model (Ollama on loopback). Reads the image file at `image_path`, sends it to fara:7b via the local Ollama API, and returns the model's text analysis. Counts as a productive tool call \u2014 use for GUI-grounding, visual QA, image captioning, or any task that requires perceiving image content. Only available on surfaces with a local Ollama instance running fara:7b.",
539
+ input_schema: {
540
+ type: "object",
541
+ properties: {
542
+ image_path: {
543
+ type: "string",
544
+ description: "Absolute path to the image file (png, jpg, webp, gif)"
545
+ },
546
+ prompt: {
547
+ type: "string",
548
+ description: 'Instruction for the vision model (default: "Describe this image in detail.")'
549
+ },
550
+ model: {
551
+ type: "string",
552
+ description: 'Ollama model tag to use (default: "fara:7b")'
553
+ }
554
+ },
555
+ required: ["image_path"]
556
+ }
533
557
  }
534
558
  ];
535
559
  function resolveActiveTools(brain, opts = {}) {
@@ -773,6 +797,49 @@ ${truncated}`);
773
797
  const result = await opts.addTask([{ title, description, priority: priority2, source, tags }]);
774
798
  return okResult(use.id, `delegate_task: posted "${title}" to board \u2014 ${result.added} task(s) added`);
775
799
  }
800
+ if (use.name === "vision_analyze") {
801
+ const imagePath = String(use.input.image_path ?? "").trim();
802
+ if (!imagePath) return errResult(use.id, "vision_analyze: image_path is required");
803
+ const denied = checkReadAllowed(imagePath);
804
+ if (denied) return errResult(use.id, `vision_analyze: ${denied}`);
805
+ const prompt = String(use.input.prompt ?? "Describe this image in detail.");
806
+ const model = String(use.input.model ?? "fara:7b");
807
+ const ollamaBase = process.env.HOLOSCRIPT_AGENT_LOCAL_LLM_BASE_URL;
808
+ if (!ollamaBase) {
809
+ return errResult(
810
+ use.id,
811
+ "vision_analyze: HOLOSCRIPT_AGENT_LOCAL_LLM_BASE_URL is not set \u2014 configure it to point to your local Ollama instance (e.g. http://holojetson.local:11434)"
812
+ );
813
+ }
814
+ const TIMEOUT_MS = 12e4;
815
+ const controller = new AbortController();
816
+ const timer = setTimeout(() => controller.abort(), TIMEOUT_MS);
817
+ try {
818
+ const imageBytes = await readFile2(imagePath);
819
+ const imageB64 = imageBytes.toString("base64");
820
+ const res = await fetch(`${ollamaBase}/api/generate`, {
821
+ method: "POST",
822
+ headers: { "content-type": "application/json" },
823
+ body: JSON.stringify({ model, prompt, images: [imageB64], stream: false }),
824
+ signal: controller.signal
825
+ });
826
+ clearTimeout(timer);
827
+ if (!res.ok) {
828
+ const text = await res.text();
829
+ return errResult(use.id, `vision_analyze: Ollama HTTP ${res.status}: ${text.slice(0, 500)}`);
830
+ }
831
+ const json = await res.json();
832
+ if (json.error) return errResult(use.id, `vision_analyze: model error \u2014 ${json.error}`);
833
+ return okResult(use.id, json.response ?? "");
834
+ } catch (err) {
835
+ clearTimeout(timer);
836
+ const msg = err instanceof Error ? err.message : String(err);
837
+ return errResult(
838
+ use.id,
839
+ msg.includes("abort") ? `vision_analyze: timed out after ${TIMEOUT_MS}ms` : `vision_analyze: ${msg}`
840
+ );
841
+ }
842
+ }
776
843
  return errResult(use.id, `unknown tool: ${use.name}`);
777
844
  } catch (err) {
778
845
  return errResult(use.id, err instanceof Error ? err.message : String(err));
@@ -1188,12 +1255,18 @@ var AgentRunner = class {
1188
1255
  }
1189
1256
  if (productiveCallCount === 0 && toolsCalled.size > 0 && iters < MAX_TOOL_ITERS) {
1190
1257
  iters++;
1258
+ if (messages.length > 0 && messages[messages.length - 1].role === "assistant") {
1259
+ messages.pop();
1260
+ }
1191
1261
  messages.push({
1192
1262
  role: "user",
1193
- content: "You gathered data but did not write the task deliverable. Call write_file NOW with the exact output path from the task description. Embed all data you gathered into the write_file content field. Do NOT output text \u2014 your only valid response is a write_file tool call."
1263
+ content: `You read data but did NOT call write_file. This is a TASK FAILURE unless you act now.
1264
+ Task: ${target.title}
1265
+ Required output path (from task description): ${target.description.match(/path[:\s]+([^\s\n,]+\.json)/i)?.[1] ?? "see task description"}
1266
+ Call write_file NOW. Embed ALL data from the tool result above into the content. Do NOT output any text \u2014 your ONLY valid response is a write_file tool call.`
1194
1267
  });
1195
1268
  const reResp = await provider.complete(
1196
- { messages, maxTokens: 8192, temperature: 0.4, tools: activeTools },
1269
+ { messages, maxTokens: 8192, temperature: 0, tools: activeTools },
1197
1270
  identity.llmModel
1198
1271
  );
1199
1272
  aggUsage = {