oh-my-opencode 1.0.1 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,2 @@
1
+ import type { AgentConfig } from "@opencode-ai/sdk";
2
+ export declare const multimodalLookerAgent: AgentConfig;
@@ -1,4 +1,4 @@
1
1
  import type { AgentConfig } from "@opencode-ai/sdk";
2
- export type AgentName = "oracle" | "librarian" | "explore" | "frontend-ui-ux-engineer" | "document-writer";
2
+ export type AgentName = "oracle" | "librarian" | "explore" | "frontend-ui-ux-engineer" | "document-writer" | "multimodal-looker";
3
3
  export type AgentOverrideConfig = Partial<AgentConfig>;
4
4
  export type AgentOverrides = Partial<Record<AgentName, AgentOverrideConfig>>;
package/dist/index.js CHANGED
@@ -2358,6 +2358,47 @@ STOP HERE - DO NOT CONTINUE TO NEXT TASK
2358
2358
  You are a technical writer who creates documentation that developers actually want to read.
2359
2359
  </guide>`
2360
2360
  };
2361
+
2362
+ // src/agents/multimodal-looker.ts
2363
+ var multimodalLookerAgent = {
2364
+ description: "Analyze media files (PDFs, images, diagrams) that require interpretation beyond raw text. Extracts specific information or summaries from documents, describes visual content. Use when you need analyzed/extracted data rather than literal file contents.",
2365
+ mode: "subagent",
2366
+ model: "google/gemini-2.5-flash",
2367
+ temperature: 0.1,
2368
+ tools: { Read: true },
2369
+ prompt: `You interpret media files that cannot be read as plain text.
2370
+
2371
+ Your job: examine the attached file and extract ONLY what was requested.
2372
+
2373
+ When to use you:
2374
+ - Media files the Read tool cannot interpret
2375
+ - Extracting specific information or summaries from documents
2376
+ - Describing visual content in images or diagrams
2377
+ - When analyzed/extracted data is needed, not raw file contents
2378
+
2379
+ When NOT to use you:
2380
+ - Source code or plain text files needing exact contents (use Read)
2381
+ - Files that need editing afterward (need literal content from Read)
2382
+ - Simple file reading where no interpretation is needed
2383
+
2384
+ How you work:
2385
+ 1. Receive a file path and a goal describing what to extract
2386
+ 2. Read and analyze the file deeply
2387
+ 3. Return ONLY the relevant extracted information
2388
+ 4. The main agent never processes the raw file - you save context tokens
2389
+
2390
+ For PDFs: extract text, structure, tables, data from specific sections
2391
+ For images: describe layouts, UI elements, text, diagrams, charts
2392
+ For diagrams: explain relationships, flows, architecture depicted
2393
+
2394
+ Response rules:
2395
+ - Return extracted information directly, no preamble
2396
+ - If info not found, state clearly what's missing
2397
+ - Match the language of the request
2398
+ - Be thorough on the goal, concise on everything else
2399
+
2400
+ Your output goes straight to the main agent for continued work.`
2401
+ };
2361
2402
  // src/shared/frontmatter.ts
2362
2403
  function parseFrontmatter(content) {
2363
2404
  const frontmatterRegex = /^---\r?\n([\s\S]*?)\r?\n---\r?\n([\s\S]*)$/;
@@ -2586,13 +2627,42 @@ function log(message, data) {
2586
2627
  fs.appendFileSync(logFile, logEntry);
2587
2628
  } catch {}
2588
2629
  }
2630
+ // src/shared/deep-merge.ts
2631
+ var DANGEROUS_KEYS = new Set(["__proto__", "constructor", "prototype"]);
2632
+ var MAX_DEPTH = 50;
2633
+ function isPlainObject(value) {
2634
+ return typeof value === "object" && value !== null && !Array.isArray(value) && Object.prototype.toString.call(value) === "[object Object]";
2635
+ }
2636
+ function deepMerge(base, override, depth = 0) {
2637
+ if (!base && !override)
2638
+ return;
2639
+ if (!base)
2640
+ return override;
2641
+ if (!override)
2642
+ return base;
2643
+ if (depth > MAX_DEPTH)
2644
+ return override ?? base;
2645
+ const result = { ...base };
2646
+ for (const key of Object.keys(override)) {
2647
+ if (DANGEROUS_KEYS.has(key))
2648
+ continue;
2649
+ const baseValue = base[key];
2650
+ const overrideValue = override[key];
2651
+ if (overrideValue === undefined)
2652
+ continue;
2653
+ if (isPlainObject(baseValue) && isPlainObject(overrideValue)) {
2654
+ result[key] = deepMerge(baseValue, overrideValue, depth + 1);
2655
+ } else {
2656
+ result[key] = overrideValue;
2657
+ }
2658
+ }
2659
+ return result;
2660
+ }
2661
+
2589
2662
  // src/shared/snake-case.ts
2590
2663
  function camelToSnake(str) {
2591
2664
  return str.replace(/[A-Z]/g, (letter) => `_${letter.toLowerCase()}`);
2592
2665
  }
2593
- function isPlainObject(value) {
2594
- return typeof value === "object" && value !== null && !Array.isArray(value);
2595
- }
2596
2666
  function objectToSnakeCase(obj, deep = true) {
2597
2667
  const result = {};
2598
2668
  for (const [key, value] of Object.entries(obj)) {
@@ -2665,36 +2735,22 @@ function isHookDisabled(config, hookType) {
2665
2735
  }
2666
2736
  return false;
2667
2737
  }
2668
- // src/shared/deep-merge.ts
2669
- var DANGEROUS_KEYS = new Set(["__proto__", "constructor", "prototype"]);
2670
- var MAX_DEPTH = 50;
2671
- function isPlainObject2(value) {
2672
- return typeof value === "object" && value !== null && !Array.isArray(value) && Object.prototype.toString.call(value) === "[object Object]";
2738
+ // src/shared/file-utils.ts
2739
+ import { lstatSync, readlinkSync } from "fs";
2740
+ import { resolve } from "path";
2741
+ function isMarkdownFile(entry) {
2742
+ return !entry.name.startsWith(".") && entry.name.endsWith(".md") && entry.isFile();
2673
2743
  }
2674
- function deepMerge(base, override, depth = 0) {
2675
- if (!base && !override)
2676
- return;
2677
- if (!base)
2678
- return override;
2679
- if (!override)
2680
- return base;
2681
- if (depth > MAX_DEPTH)
2682
- return override ?? base;
2683
- const result = { ...base };
2684
- for (const key of Object.keys(override)) {
2685
- if (DANGEROUS_KEYS.has(key))
2686
- continue;
2687
- const baseValue = base[key];
2688
- const overrideValue = override[key];
2689
- if (overrideValue === undefined)
2690
- continue;
2691
- if (isPlainObject2(baseValue) && isPlainObject2(overrideValue)) {
2692
- result[key] = deepMerge(baseValue, overrideValue, depth + 1);
2693
- } else {
2694
- result[key] = overrideValue;
2744
+ function resolveSymlink(filePath) {
2745
+ try {
2746
+ const stats = lstatSync(filePath, { throwIfNoEntry: false });
2747
+ if (stats?.isSymbolicLink()) {
2748
+ return resolve(filePath, "..", readlinkSync(filePath));
2695
2749
  }
2750
+ return filePath;
2751
+ } catch {
2752
+ return filePath;
2696
2753
  }
2697
- return result;
2698
2754
  }
2699
2755
  // src/agents/utils.ts
2700
2756
  var allBuiltinAgents = {
@@ -2702,7 +2758,8 @@ var allBuiltinAgents = {
2702
2758
  librarian: librarianAgent,
2703
2759
  explore: exploreAgent,
2704
2760
  "frontend-ui-ux-engineer": frontendUiUxEngineerAgent,
2705
- "document-writer": documentWriterAgent
2761
+ "document-writer": documentWriterAgent,
2762
+ "multimodal-looker": multimodalLookerAgent
2706
2763
  };
2707
2764
  function mergeAgentConfig(base, override) {
2708
2765
  return deepMerge(base, override);
@@ -4042,7 +4099,7 @@ function createGrepOutputTruncatorHook(ctx) {
4042
4099
  }
4043
4100
  // src/hooks/directory-agents-injector/index.ts
4044
4101
  import { existsSync as existsSync8, readFileSync as readFileSync4 } from "fs";
4045
- import { dirname as dirname2, join as join9, resolve } from "path";
4102
+ import { dirname as dirname2, join as join9, resolve as resolve2 } from "path";
4046
4103
 
4047
4104
  // src/hooks/directory-agents-injector/storage.ts
4048
4105
  import {
@@ -4108,7 +4165,7 @@ function createDirectoryAgentsInjectorHook(ctx) {
4108
4165
  return null;
4109
4166
  if (title.startsWith("/"))
4110
4167
  return title;
4111
- return resolve(ctx.directory, title);
4168
+ return resolve2(ctx.directory, title);
4112
4169
  }
4113
4170
  function findAgentsMdUp(startDir) {
4114
4171
  const found = [];
@@ -4183,7 +4240,7 @@ ${content}`;
4183
4240
  }
4184
4241
  // src/hooks/directory-readme-injector/index.ts
4185
4242
  import { existsSync as existsSync10, readFileSync as readFileSync6 } from "fs";
4186
- import { dirname as dirname3, join as join12, resolve as resolve2 } from "path";
4243
+ import { dirname as dirname3, join as join12, resolve as resolve3 } from "path";
4187
4244
 
4188
4245
  // src/hooks/directory-readme-injector/storage.ts
4189
4246
  import {
@@ -4249,7 +4306,7 @@ function createDirectoryReadmeInjectorHook(ctx) {
4249
4306
  return null;
4250
4307
  if (title.startsWith("/"))
4251
4308
  return title;
4252
- return resolve2(ctx.directory, title);
4309
+ return resolve3(ctx.directory, title);
4253
4310
  }
4254
4311
  function findReadmeMdUp(startDir) {
4255
4312
  const found = [];
@@ -5974,7 +6031,7 @@ ${result.message}`;
5974
6031
  // src/hooks/rules-injector/index.ts
5975
6032
  import { readFileSync as readFileSync9 } from "fs";
5976
6033
  import { homedir as homedir7 } from "os";
5977
- import { relative as relative3, resolve as resolve3 } from "path";
6034
+ import { relative as relative3, resolve as resolve4 } from "path";
5978
6035
 
5979
6036
  // src/hooks/rules-injector/finder.ts
5980
6037
  import {
@@ -6331,7 +6388,7 @@ function createRulesInjectorHook(ctx) {
6331
6388
  return null;
6332
6389
  if (title.startsWith("/"))
6333
6390
  return title;
6334
- return resolve3(ctx.directory, title);
6391
+ return resolve4(ctx.directory, title);
6335
6392
  }
6336
6393
  const toolExecuteAfter = async (input, output) => {
6337
6394
  if (!TRACKED_TOOLS.includes(input.tool.toLowerCase()))
@@ -6927,8 +6984,8 @@ function startCallbackServer(timeoutMs = 5 * 60 * 1000) {
6927
6984
  });
6928
6985
  const actualPort = server.port;
6929
6986
  const waitForCallback = () => {
6930
- return new Promise((resolve4, reject) => {
6931
- resolveCallback = resolve4;
6987
+ return new Promise((resolve5, reject) => {
6988
+ resolveCallback = resolve5;
6932
6989
  rejectCallback = reject;
6933
6990
  timeoutId = setTimeout(() => {
6934
6991
  cleanup();
@@ -7874,7 +7931,7 @@ async function attemptFetch(options) {
7874
7931
  if (attempt < maxPermissionRetries) {
7875
7932
  const delay = calculateRetryDelay2(attempt);
7876
7933
  debugLog6(`[RETRY] GCP permission error, retry ${attempt + 1}/${maxPermissionRetries} after ${delay}ms`);
7877
- await new Promise((resolve4) => setTimeout(resolve4, delay));
7934
+ await new Promise((resolve5) => setTimeout(resolve5, delay));
7878
7935
  continue;
7879
7936
  }
7880
7937
  debugLog6(`[RETRY] GCP permission error, max retries exceeded`);
@@ -8189,9 +8246,6 @@ async function createGoogleAntigravityAuthPlugin({
8189
8246
  import { existsSync as existsSync19, readdirSync as readdirSync4, readFileSync as readFileSync11 } from "fs";
8190
8247
  import { homedir as homedir9 } from "os";
8191
8248
  import { join as join24, basename } from "path";
8192
- function isMarkdownFile(entry) {
8193
- return !entry.name.startsWith(".") && entry.name.endsWith(".md") && entry.isFile();
8194
- }
8195
8249
  function loadCommandsFromDir(commandsDir, scope) {
8196
8250
  if (!existsSync19(commandsDir)) {
8197
8251
  return [];
@@ -8263,9 +8317,9 @@ function loadOpencodeProjectCommands() {
8263
8317
  return commandsToRecord(commands);
8264
8318
  }
8265
8319
  // src/features/claude-code-skill-loader/loader.ts
8266
- import { existsSync as existsSync20, readdirSync as readdirSync5, readFileSync as readFileSync12, lstatSync, readlinkSync } from "fs";
8320
+ import { existsSync as existsSync20, readdirSync as readdirSync5, readFileSync as readFileSync12 } from "fs";
8267
8321
  import { homedir as homedir10 } from "os";
8268
- import { join as join25, resolve as resolve4 } from "path";
8322
+ import { join as join25 } from "path";
8269
8323
  function loadSkillsFromDir(skillsDir, scope) {
8270
8324
  if (!existsSync20(skillsDir)) {
8271
8325
  return [];
@@ -8278,14 +8332,7 @@ function loadSkillsFromDir(skillsDir, scope) {
8278
8332
  const skillPath = join25(skillsDir, entry.name);
8279
8333
  if (!entry.isDirectory() && !entry.isSymbolicLink())
8280
8334
  continue;
8281
- let resolvedPath = skillPath;
8282
- try {
8283
- if (lstatSync(skillPath, { throwIfNoEntry: false })?.isSymbolicLink()) {
8284
- resolvedPath = resolve4(skillPath, "..", readlinkSync(skillPath));
8285
- }
8286
- } catch {
8287
- continue;
8288
- }
8335
+ const resolvedPath = resolveSymlink(skillPath);
8289
8336
  const skillMdPath = join25(resolvedPath, "SKILL.md");
8290
8337
  if (!existsSync20(skillMdPath))
8291
8338
  continue;
@@ -8352,9 +8399,6 @@ function parseToolsConfig(toolsStr) {
8352
8399
  }
8353
8400
  return result;
8354
8401
  }
8355
- function isMarkdownFile2(entry) {
8356
- return !entry.name.startsWith(".") && entry.name.endsWith(".md") && entry.isFile();
8357
- }
8358
8402
  function loadAgentsFromDir(agentsDir, scope) {
8359
8403
  if (!existsSync21(agentsDir)) {
8360
8404
  return [];
@@ -8362,7 +8406,7 @@ function loadAgentsFromDir(agentsDir, scope) {
8362
8406
  const entries = readdirSync6(agentsDir, { withFileTypes: true });
8363
8407
  const agents = [];
8364
8408
  for (const entry of entries) {
8365
- if (!isMarkdownFile2(entry))
8409
+ if (!isMarkdownFile(entry))
8366
8410
  continue;
8367
8411
  const agentPath = join26(agentsDir, entry.name);
8368
8412
  const agentName = basename2(entry.name, ".md");
@@ -10325,7 +10369,7 @@ __export(exports_util, {
10325
10369
  jsonStringifyReplacer: () => jsonStringifyReplacer,
10326
10370
  joinValues: () => joinValues,
10327
10371
  issue: () => issue,
10328
- isPlainObject: () => isPlainObject3,
10372
+ isPlainObject: () => isPlainObject2,
10329
10373
  isObject: () => isObject,
10330
10374
  hexToUint8Array: () => hexToUint8Array,
10331
10375
  getSizableOrigin: () => getSizableOrigin,
@@ -10507,7 +10551,7 @@ var allowsEval = cached(() => {
10507
10551
  return false;
10508
10552
  }
10509
10553
  });
10510
- function isPlainObject3(o) {
10554
+ function isPlainObject2(o) {
10511
10555
  if (isObject(o) === false)
10512
10556
  return false;
10513
10557
  const ctor = o.constructor;
@@ -10522,7 +10566,7 @@ function isPlainObject3(o) {
10522
10566
  return true;
10523
10567
  }
10524
10568
  function shallowClone(o) {
10525
- if (isPlainObject3(o))
10569
+ if (isPlainObject2(o))
10526
10570
  return { ...o };
10527
10571
  if (Array.isArray(o))
10528
10572
  return [...o];
@@ -10705,7 +10749,7 @@ function omit(schema, mask) {
10705
10749
  return clone(schema, def);
10706
10750
  }
10707
10751
  function extend(schema, shape) {
10708
- if (!isPlainObject3(shape)) {
10752
+ if (!isPlainObject2(shape)) {
10709
10753
  throw new Error("Invalid input to extend: expected a plain object");
10710
10754
  }
10711
10755
  const checks = schema._zod.def.checks;
@@ -10724,7 +10768,7 @@ function extend(schema, shape) {
10724
10768
  return clone(schema, def);
10725
10769
  }
10726
10770
  function safeExtend(schema, shape) {
10727
- if (!isPlainObject3(shape)) {
10771
+ if (!isPlainObject2(shape)) {
10728
10772
  throw new Error("Invalid input to safeExtend: expected a plain object");
10729
10773
  }
10730
10774
  const def = {
@@ -12874,7 +12918,7 @@ function mergeValues(a, b) {
12874
12918
  if (a instanceof Date && b instanceof Date && +a === +b) {
12875
12919
  return { valid: true, data: a };
12876
12920
  }
12877
- if (isPlainObject3(a) && isPlainObject3(b)) {
12921
+ if (isPlainObject2(a) && isPlainObject2(b)) {
12878
12922
  const bKeys = Object.keys(b);
12879
12923
  const sharedKeys = Object.keys(a).filter((key) => bKeys.indexOf(key) !== -1);
12880
12924
  const newObj = { ...a, ...b };
@@ -13004,7 +13048,7 @@ var $ZodRecord = /* @__PURE__ */ $constructor("$ZodRecord", (inst, def) => {
13004
13048
  $ZodType.init(inst, def);
13005
13049
  inst._zod.parse = (payload, ctx) => {
13006
13050
  const input = payload.value;
13007
- if (!isPlainObject3(input)) {
13051
+ if (!isPlainObject2(input)) {
13008
13052
  payload.issues.push({
13009
13053
  expected: "record",
13010
13054
  code: "invalid_type",
@@ -23351,11 +23395,7 @@ function discoverCommandsFromDir(commandsDir, scope) {
23351
23395
  const entries = readdirSync7(commandsDir, { withFileTypes: true });
23352
23396
  const commands = [];
23353
23397
  for (const entry of entries) {
23354
- if (entry.name.startsWith("."))
23355
- continue;
23356
- if (!entry.name.endsWith(".md"))
23357
- continue;
23358
- if (!entry.isFile())
23398
+ if (!isMarkdownFile(entry))
23359
23399
  continue;
23360
23400
  const commandPath = join32(commandsDir, entry.name);
23361
23401
  const commandName = basename3(entry.name, ".md");
@@ -23519,9 +23559,9 @@ var SkillFrontmatterSchema = exports_external.object({
23519
23559
  metadata: exports_external.record(exports_external.string(), exports_external.string()).optional()
23520
23560
  });
23521
23561
  // src/tools/skill/tools.ts
23522
- import { existsSync as existsSync30, readdirSync as readdirSync8, lstatSync as lstatSync2, readlinkSync as readlinkSync2, readFileSync as readFileSync18 } from "fs";
23562
+ import { existsSync as existsSync30, readdirSync as readdirSync8, readFileSync as readFileSync18 } from "fs";
23523
23563
  import { homedir as homedir16 } from "os";
23524
- import { join as join33, resolve as resolve7, basename as basename4 } from "path";
23564
+ import { join as join33, basename as basename4 } from "path";
23525
23565
  function parseSkillFrontmatter(data) {
23526
23566
  return {
23527
23567
  name: typeof data.name === "string" ? data.name : "",
@@ -23542,15 +23582,7 @@ function discoverSkillsFromDir(skillsDir, scope) {
23542
23582
  continue;
23543
23583
  const skillPath = join33(skillsDir, entry.name);
23544
23584
  if (entry.isDirectory() || entry.isSymbolicLink()) {
23545
- let resolvedPath = skillPath;
23546
- try {
23547
- const stats = lstatSync2(skillPath, { throwIfNoEntry: false });
23548
- if (stats?.isSymbolicLink()) {
23549
- resolvedPath = resolve7(skillPath, "..", readlinkSync2(skillPath));
23550
- }
23551
- } catch {
23552
- continue;
23553
- }
23585
+ const resolvedPath = resolveSymlink(skillPath);
23554
23586
  const skillMdPath = join33(resolvedPath, "SKILL.md");
23555
23587
  if (!existsSync30(skillMdPath))
23556
23588
  continue;
@@ -23579,17 +23611,6 @@ function discoverSkillsSync() {
23579
23611
  var availableSkills = discoverSkillsSync();
23580
23612
  var skillListForDescription = availableSkills.map((s) => `- ${s.name}: ${s.description} (${s.scope})`).join(`
23581
23613
  `);
23582
- function resolveSymlink(skillPath) {
23583
- try {
23584
- const stats = lstatSync2(skillPath, { throwIfNoEntry: false });
23585
- if (stats?.isSymbolicLink()) {
23586
- return resolve7(skillPath, "..", readlinkSync2(skillPath));
23587
- }
23588
- return skillPath;
23589
- } catch {
23590
- return skillPath;
23591
- }
23592
- }
23593
23614
  async function parseSkillMd(skillPath) {
23594
23615
  const resolvedPath = resolveSymlink(skillPath);
23595
23616
  const skillMdPath = join33(resolvedPath, "SKILL.md");
@@ -23861,7 +23882,7 @@ Use \`background_output\` tool with task_id="${task.id}" to check progress:
23861
23882
  });
23862
23883
  }
23863
23884
  function delay(ms) {
23864
- return new Promise((resolve8) => setTimeout(resolve8, ms));
23885
+ return new Promise((resolve7) => setTimeout(resolve7, ms));
23865
23886
  }
23866
23887
  function truncateText(text, maxLength) {
23867
23888
  if (text.length <= maxLength)
@@ -24189,6 +24210,97 @@ session_id: ${sessionID}
24189
24210
  `);
24190
24211
  return output;
24191
24212
  }
24213
+ // src/tools/look-at/constants.ts
24214
+ var MULTIMODAL_LOOKER_AGENT = "multimodal-looker";
24215
+ var LOOK_AT_DESCRIPTION = `Analyze media files (PDFs, images, diagrams) that require visual interpretation.
24216
+
24217
+ Use this tool to extract specific information from files that cannot be processed as plain text:
24218
+ - PDF documents: extract text, tables, structure, specific sections
24219
+ - Images: describe layouts, UI elements, text content, diagrams
24220
+ - Charts/Graphs: explain data, trends, relationships
24221
+ - Screenshots: identify UI components, text, visual elements
24222
+ - Architecture diagrams: explain flows, connections, components
24223
+
24224
+ Parameters:
24225
+ - file_path: Absolute path to the file to analyze
24226
+ - goal: What specific information to extract (be specific for better results)
24227
+
24228
+ Examples:
24229
+ - "Extract all API endpoints from this OpenAPI spec PDF"
24230
+ - "Describe the UI layout and components in this screenshot"
24231
+ - "Explain the data flow in this architecture diagram"
24232
+ - "List all table data from page 3 of this PDF"
24233
+
24234
+ This tool uses a separate context window with Gemini 2.5 Flash for multimodal analysis,
24235
+ saving tokens in the main conversation while providing accurate visual interpretation.`;
24236
+ // src/tools/look-at/tools.ts
24237
+ function createLookAt(ctx) {
24238
+ return tool({
24239
+ description: LOOK_AT_DESCRIPTION,
24240
+ args: {
24241
+ file_path: tool.schema.string().describe("Absolute path to the file to analyze"),
24242
+ goal: tool.schema.string().describe("What specific information to extract from the file")
24243
+ },
24244
+ async execute(args, toolContext) {
24245
+ log(`[look_at] Analyzing file: ${args.file_path}, goal: ${args.goal}`);
24246
+ const prompt = `Analyze this file and extract the requested information.
24247
+
24248
+ File path: ${args.file_path}
24249
+ Goal: ${args.goal}
24250
+
24251
+ Read the file using the Read tool, then provide ONLY the extracted information that matches the goal.
24252
+ Be thorough on what was requested, concise on everything else.
24253
+ If the requested information is not found, clearly state what is missing.`;
24254
+ log(`[look_at] Creating session with parent: ${toolContext.sessionID}`);
24255
+ const createResult = await ctx.client.session.create({
24256
+ body: {
24257
+ parentID: toolContext.sessionID,
24258
+ title: `look_at: ${args.goal.substring(0, 50)}`
24259
+ }
24260
+ });
24261
+ if (createResult.error) {
24262
+ log(`[look_at] Session create error:`, createResult.error);
24263
+ return `Error: Failed to create session: ${createResult.error}`;
24264
+ }
24265
+ const sessionID = createResult.data.id;
24266
+ log(`[look_at] Created session: ${sessionID}`);
24267
+ log(`[look_at] Sending prompt to session ${sessionID}`);
24268
+ await ctx.client.session.prompt({
24269
+ path: { id: sessionID },
24270
+ body: {
24271
+ agent: MULTIMODAL_LOOKER_AGENT,
24272
+ tools: {
24273
+ task: false,
24274
+ call_omo_agent: false,
24275
+ look_at: false
24276
+ },
24277
+ parts: [{ type: "text", text: prompt }]
24278
+ }
24279
+ });
24280
+ log(`[look_at] Prompt sent, fetching messages...`);
24281
+ const messagesResult = await ctx.client.session.messages({
24282
+ path: { id: sessionID }
24283
+ });
24284
+ if (messagesResult.error) {
24285
+ log(`[look_at] Messages error:`, messagesResult.error);
24286
+ return `Error: Failed to get messages: ${messagesResult.error}`;
24287
+ }
24288
+ const messages = messagesResult.data;
24289
+ log(`[look_at] Got ${messages.length} messages`);
24290
+ const lastAssistantMessage = messages.filter((m) => m.info.role === "assistant").sort((a, b) => (b.info.time?.created || 0) - (a.info.time?.created || 0))[0];
24291
+ if (!lastAssistantMessage) {
24292
+ log(`[look_at] No assistant message found`);
24293
+ return `Error: No response from multimodal-looker agent`;
24294
+ }
24295
+ log(`[look_at] Found assistant message with ${lastAssistantMessage.parts.length} parts`);
24296
+ const textParts = lastAssistantMessage.parts.filter((p) => p.type === "text");
24297
+ const responseText = textParts.map((p) => p.text).join(`
24298
+ `);
24299
+ log(`[look_at] Got response, length: ${responseText.length}`);
24300
+ return responseText;
24301
+ }
24302
+ });
24303
+ }
24192
24304
  // src/tools/index.ts
24193
24305
  function createBackgroundTools(manager, client2) {
24194
24306
  return {
@@ -24706,13 +24818,15 @@ var OhMyOpenCodePlugin = async (ctx) => {
24706
24818
  const backgroundNotificationHook = isHookEnabled("background-notification") ? createBackgroundNotificationHook(backgroundManager) : null;
24707
24819
  const backgroundTools = createBackgroundTools(backgroundManager, ctx.client);
24708
24820
  const callOmoAgent = createCallOmoAgent(ctx, backgroundManager);
24821
+ const lookAt = createLookAt(ctx);
24709
24822
  const googleAuthHooks = pluginConfig.google_auth ? await createGoogleAntigravityAuthPlugin(ctx) : null;
24710
24823
  return {
24711
24824
  ...googleAuthHooks ? { auth: googleAuthHooks.auth } : {},
24712
24825
  tool: {
24713
24826
  ...builtinTools,
24714
24827
  ...backgroundTools,
24715
- call_omo_agent: callOmoAgent
24828
+ call_omo_agent: callOmoAgent,
24829
+ look_at: lookAt
24716
24830
  },
24717
24831
  "chat.message": async (input, output) => {
24718
24832
  await claudeCodeHooks["chat.message"]?.(input, output);
@@ -24743,6 +24857,14 @@ var OhMyOpenCodePlugin = async (ctx) => {
24743
24857
  call_omo_agent: false
24744
24858
  };
24745
24859
  }
24860
+ if (config3.agent["multimodal-looker"]) {
24861
+ config3.agent["multimodal-looker"].tools = {
24862
+ ...config3.agent["multimodal-looker"].tools,
24863
+ task: false,
24864
+ call_omo_agent: false,
24865
+ look_at: false
24866
+ };
24867
+ }
24746
24868
  const mcpResult = pluginConfig.claude_code?.mcp ?? true ? await loadMcpConfigs() : { servers: {} };
24747
24869
  config3.mcp = {
24748
24870
  ...config3.mcp,
@@ -1,3 +1,4 @@
1
+ export declare function isPlainObject(value: unknown): value is Record<string, unknown>;
1
2
  /**
2
3
  * Deep merges two objects, with override values taking precedence.
3
4
  * - Objects are recursively merged
@@ -0,0 +1,6 @@
1
+ export declare function isMarkdownFile(entry: {
2
+ name: string;
3
+ isFile: () => boolean;
4
+ }): boolean;
5
+ export declare function isSymbolicLink(filePath: string): boolean;
6
+ export declare function resolveSymlink(filePath: string): string;
@@ -8,3 +8,4 @@ export * from "./tool-name";
8
8
  export * from "./pattern-matcher";
9
9
  export * from "./hook-disabled";
10
10
  export * from "./deep-merge";
11
+ export * from "./file-utils";
@@ -2,6 +2,7 @@ import type { PluginInput } from "@opencode-ai/plugin";
2
2
  import type { BackgroundManager } from "../features/background-agent";
3
3
  type OpencodeClient = PluginInput["client"];
4
4
  export { createCallOmoAgent } from "./call-omo-agent";
5
+ export { createLookAt } from "./look-at";
5
6
  export declare function createBackgroundTools(manager: BackgroundManager, client: OpencodeClient): {
6
7
  background_task: {
7
8
  description: string;
@@ -0,0 +1,2 @@
1
+ export declare const MULTIMODAL_LOOKER_AGENT: "multimodal-looker";
2
+ export declare const LOOK_AT_DESCRIPTION = "Analyze media files (PDFs, images, diagrams) that require visual interpretation.\n\nUse this tool to extract specific information from files that cannot be processed as plain text:\n- PDF documents: extract text, tables, structure, specific sections\n- Images: describe layouts, UI elements, text content, diagrams\n- Charts/Graphs: explain data, trends, relationships\n- Screenshots: identify UI components, text, visual elements\n- Architecture diagrams: explain flows, connections, components\n\nParameters:\n- file_path: Absolute path to the file to analyze\n- goal: What specific information to extract (be specific for better results)\n\nExamples:\n- \"Extract all API endpoints from this OpenAPI spec PDF\"\n- \"Describe the UI layout and components in this screenshot\"\n- \"Explain the data flow in this architecture diagram\"\n- \"List all table data from page 3 of this PDF\"\n\nThis tool uses a separate context window with Gemini 2.5 Flash for multimodal analysis,\nsaving tokens in the main conversation while providing accurate visual interpretation.";
@@ -0,0 +1,3 @@
1
+ export * from "./types";
2
+ export * from "./constants";
3
+ export { createLookAt } from "./tools";
@@ -0,0 +1,12 @@
1
+ import { type PluginInput } from "@opencode-ai/plugin";
2
+ export declare function createLookAt(ctx: PluginInput): {
3
+ description: string;
4
+ args: {
5
+ file_path: import("zod").ZodString;
6
+ goal: import("zod").ZodString;
7
+ };
8
+ execute(args: {
9
+ file_path: string;
10
+ goal: string;
11
+ }, context: import("@opencode-ai/plugin").ToolContext): Promise<string>;
12
+ };
@@ -0,0 +1,4 @@
1
+ export interface LookAtArgs {
2
+ file_path: string;
3
+ goal: string;
4
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "oh-my-opencode",
3
- "version": "1.0.1",
3
+ "version": "1.1.0",
4
4
  "description": "OpenCode plugin - custom agents (oracle, librarian) and enhanced features",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",