@link-assistant/hive-mind 1.10.2 → 1.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,31 @@
1
1
  # @link-assistant/hive-mind
2
2
 
3
+ ## 1.11.0
4
+
5
+ ### Minor Changes
6
+
7
+ - ca28333: Add system prompt guidance for visual UI work when model supports vision
8
+
9
+ **Changes:**
10
+ - Add `checkModelVisionCapability` function in claude.lib.mjs to detect if a model supports image input using models.dev API
11
+ - Add vision-specific system prompt section in claude.prompts.lib.mjs and agent.prompts.lib.mjs
12
+ - When model supports vision, add guidance for including screenshots/renders of visual UI changes in pull request descriptions
13
+ - Use "When x, do y." style as requested
14
+
15
+ **Vision prompt guidance includes:**
16
+ - When working on visual UI changes, include a render or screenshot in the PR description
17
+ - When showing visual results, save screenshots to the repository (e.g., docs/screenshots/)
18
+ - When referencing images, use permanent raw file links in the PR description markdown
19
+ - When uploading images, commit them first, then use raw GitHub URL format
20
+ - When the visual result is important, mention it explicitly with embedded image
21
+
22
+ **Technical details:**
23
+ - Uses models.dev API to check if 'image' is in the model's input modalities
24
+ - All current Claude models (opus, sonnet, haiku) support vision
25
+ - Gracefully handles unknown models by returning false
26
+
27
+ Fixes #1175
28
+
3
29
  ## 1.10.2
4
30
 
5
31
  ### Patch Changes
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@link-assistant/hive-mind",
3
- "version": "1.10.2",
3
+ "version": "1.11.0",
4
4
  "description": "AI-powered issue solver and hive mind for collaborative problem solving",
5
5
  "main": "src/hive.mjs",
6
6
  "type": "module",
package/src/agent.lib.mjs CHANGED
@@ -19,9 +19,9 @@ import { timeouts } from './config.lib.mjs';
19
19
  import { detectUsageLimit, formatUsageLimitMessage } from './usage-limit.lib.mjs';
20
20
 
21
21
  // Import pricing functions from claude.lib.mjs
22
- // We reuse fetchModelInfo to get pricing data from models.dev API
22
+ // We reuse fetchModelInfo and checkModelVisionCapability to get data from models.dev API
23
23
  const claudeLib = await import('./claude.lib.mjs');
24
- const { fetchModelInfo } = claudeLib;
24
+ const { fetchModelInfo, checkModelVisionCapability } = claudeLib;
25
25
 
26
26
  /**
27
27
  * Parse agent JSON output to extract token usage from step_finish events
@@ -250,6 +250,13 @@ export const executeAgent = async params => {
250
250
  // Import prompt building functions from agent.prompts.lib.mjs
251
251
  const { buildUserPrompt, buildSystemPrompt } = await import('./agent.prompts.lib.mjs');
252
252
 
253
+ // Check if the model supports vision using models.dev API
254
+ const mappedModel = mapModelToId(argv.model);
255
+ const modelSupportsVision = await checkModelVisionCapability(mappedModel);
256
+ if (argv.verbose) {
257
+ await log(`👁️ Model vision capability: ${modelSupportsVision ? 'supported' : 'not supported'}`, { verbose: true });
258
+ }
259
+
253
260
  // Build the user prompt
254
261
  const prompt = buildUserPrompt({
255
262
  issueUrl,
@@ -281,6 +288,7 @@ export const executeAgent = async params => {
281
288
  isContinueMode,
282
289
  forkedRepo,
283
290
  argv,
291
+ modelSupportsVision,
284
292
  });
285
293
 
286
294
  // Log prompt details in verbose mode
@@ -81,7 +81,7 @@ export const buildUserPrompt = params => {
81
81
  * @returns {string} The formatted system prompt
82
82
  */
83
83
  export const buildSystemPrompt = params => {
84
- const { owner, repo, issueNumber, prNumber, branchName, workspaceTmpDir, argv } = params;
84
+ const { owner, repo, issueNumber, prNumber, branchName, workspaceTmpDir, argv, modelSupportsVision } = params;
85
85
 
86
86
  // Build thinking instruction based on --think level
87
87
  let thinkLine = '';
@@ -223,7 +223,18 @@ GitHub CLI command patterns.
223
223
  - When adding PR comment, use gh pr comment NUMBER --body "text" --repo OWNER/REPO.
224
224
  - When adding issue comment, use gh issue comment NUMBER --body "text" --repo OWNER/REPO.
225
225
  - When viewing PR details, use gh pr view NUMBER --repo OWNER/REPO.
226
- - When filtering with jq, use gh api repos/${owner}/${repo}/pulls/${prNumber}/comments --paginate --jq 'reverse | .[0:5]'.${ciExamples}${getArchitectureCareSubPrompt(argv)}`;
226
+ - When filtering with jq, use gh api repos/${owner}/${repo}/pulls/${prNumber}/comments --paginate --jq 'reverse | .[0:5]'.${
227
+ modelSupportsVision
228
+ ? `
229
+
230
+ Visual UI work and screenshots.
231
+ - When you work on visual UI changes (frontend, CSS, HTML, design), include a render or screenshot of the final result in the pull request description.
232
+ - When you need to show visual results, take a screenshot and save it to the repository (e.g., in a docs/screenshots/ or assets/ folder).
233
+ - When you save screenshots to the repository, use permanent raw file links in the pull request description markdown (e.g., https://raw.githubusercontent.com/${owner}/${repo}/${branchName}/docs/screenshots/result.png).
234
+ - When uploading images, commit them to the branch first, then reference them using the raw GitHub URL format.
235
+ - When the visual result is important for review, mention it explicitly in the pull request description with the embedded image.`
236
+ : ''
237
+ }${ciExamples}${getArchitectureCareSubPrompt(argv)}`;
227
238
  };
228
239
 
229
240
  // Export all functions as default object too
@@ -340,6 +340,14 @@ export const executeClaude = async params => {
340
340
 
341
341
  // Import prompt building functions from claude.prompts.lib.mjs
342
342
  const { buildUserPrompt, buildSystemPrompt } = await import('./claude.prompts.lib.mjs');
343
+
344
+ // Check if the model supports vision using models.dev API
345
+ const mappedModel = mapModelToId(argv.model);
346
+ const modelSupportsVision = await checkModelVisionCapability(mappedModel);
347
+ if (argv.verbose) {
348
+ await log(`👁️ Model vision capability: ${modelSupportsVision ? 'supported' : 'not supported'}`, { verbose: true });
349
+ }
350
+
343
351
  // Build the user prompt
344
352
  const prompt = buildUserPrompt({
345
353
  issueUrl,
@@ -372,6 +380,7 @@ export const executeClaude = async params => {
372
380
  isContinueMode,
373
381
  forkedRepo,
374
382
  argv,
383
+ modelSupportsVision,
375
384
  });
376
385
  // Log prompt details in verbose mode
377
386
  if (argv.verbose) {
@@ -478,6 +487,27 @@ export const fetchModelInfo = async modelId => {
478
487
  return null;
479
488
  }
480
489
  };
490
+
491
+ /**
492
+ * Check if a model supports vision (image input) using models.dev API
493
+ * @param {string} modelId - The model ID (e.g., "claude-sonnet-4-5-20250929")
494
+ * @returns {Promise<boolean>} True if the model supports vision, false otherwise
495
+ */
496
+ export const checkModelVisionCapability = async modelId => {
497
+ try {
498
+ const modelInfo = await fetchModelInfo(modelId);
499
+ if (!modelInfo) {
500
+ return false;
501
+ }
502
+ // Check if 'image' is in the input modalities
503
+ const inputModalities = modelInfo.modalities?.input || [];
504
+ return inputModalities.includes('image');
505
+ } catch {
506
+ // If we can't determine vision capability, default to false
507
+ return false;
508
+ }
509
+ };
510
+
481
511
  /**
482
512
  * Calculate USD cost for a model's usage with detailed breakdown
483
513
  * @param {Object} usage - Token usage object
@@ -1441,4 +1471,5 @@ export default {
1441
1471
  getClaudeVersion,
1442
1472
  setClaudeVersion,
1443
1473
  resolveThinkingSettings,
1474
+ checkModelVisionCapability,
1444
1475
  };
@@ -90,7 +90,7 @@ export const buildUserPrompt = params => {
90
90
  * @returns {string} The formatted system prompt
91
91
  */
92
92
  export const buildSystemPrompt = params => {
93
- const { owner, repo, issueNumber, prNumber, branchName, workspaceTmpDir, argv } = params;
93
+ const { owner, repo, issueNumber, prNumber, branchName, workspaceTmpDir, argv, modelSupportsVision } = params;
94
94
 
95
95
  // Note: --think keywords are deprecated for Claude Code >= 2.1.12
96
96
  // Thinking is now enabled by default with 31,999 token budget
@@ -313,6 +313,17 @@ Agent Commander usage (unified subagent delegation).
313
313
  - Benefits: Saves main agent context, supports any agent type, provides unified API across different AI tools.
314
314
  - Note: The subagent will have access to the same working directory and can read/write files as needed.`
315
315
  : ''
316
+ }${
317
+ modelSupportsVision
318
+ ? `
319
+
320
+ Visual UI work and screenshots.
321
+ - When you work on visual UI changes (frontend, CSS, HTML, design), include a render or screenshot of the final result in the pull request description.
322
+ - When you need to show visual results, take a screenshot and save it to the repository (e.g., in a docs/screenshots/ or assets/ folder).
323
+ - When you save screenshots to the repository, use permanent raw file links in the pull request description markdown (e.g., https://raw.githubusercontent.com/${owner}/${repo}/${branchName}/docs/screenshots/result.png).
324
+ - When uploading images, commit them to the branch first, then reference them using the raw GitHub URL format.
325
+ - When the visual result is important for review, mention it explicitly in the pull request description with the embedded image.`
326
+ : ''
316
327
  }${ciExamples}${getArchitectureCareSubPrompt(argv)}`;
317
328
  };
318
329