npm - @link-assistant/hive-mind - Versions diffs - 1.10.2 → 1.11.0 - Mend

@link-assistant/hive-mind 1.10.2 → 1.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/CHANGELOG.md +26 -0
package/package.json +1 -1
package/src/agent.lib.mjs +10 -2
package/src/agent.prompts.lib.mjs +13 -2
package/src/claude.lib.mjs +31 -0
package/src/claude.prompts.lib.mjs +12 -1

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,31 @@
 # @link-assistant/hive-mind
+## 1.11.0
+### Minor Changes
+- ca28333: Add system prompt guidance for visual UI work when model supports vision
+  **Changes:**
+  - Add `checkModelVisionCapability` function in claude.lib.mjs to detect if a model supports image input using models.dev API
+  - Add vision-specific system prompt section in claude.prompts.lib.mjs and agent.prompts.lib.mjs
+  - When model supports vision, add guidance for including screenshots/renders of visual UI changes in pull request descriptions
+  - Use "When x, do y." style as requested
+  **Vision prompt guidance includes:**
+  - When working on visual UI changes, include a render or screenshot in the PR description
+  - When showing visual results, save screenshots to the repository (e.g., docs/screenshots/)
+  - When referencing images, use permanent raw file links in the PR description markdown
+  - When uploading images, commit them first, then use raw GitHub URL format
+  - When the visual result is important, mention it explicitly with embedded image
+  **Technical details:**
+  - Uses models.dev API to check if 'image' is in the model's input modalities
+  - All current Claude models (opus, sonnet, haiku) support vision
+  - Gracefully handles unknown models by returning false
+  Fixes #1175
 ## 1.10.2
 ### Patch Changes

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@link-assistant/hive-mind",
-  "version": "1.10.2",
+  "version": "1.11.0",
   "description": "AI-powered issue solver and hive mind for collaborative problem solving",
   "main": "src/hive.mjs",
   "type": "module",

package/src/agent.lib.mjs CHANGED Viewed

@@ -19,9 +19,9 @@ import { timeouts } from './config.lib.mjs';
 import { detectUsageLimit, formatUsageLimitMessage } from './usage-limit.lib.mjs';
 // Import pricing functions from claude.lib.mjs
-// We reuse fetchModelInfo to get pricing data from models.dev API
+// We reuse fetchModelInfo and checkModelVisionCapability to get data from models.dev API
 const claudeLib = await import('./claude.lib.mjs');
-const { fetchModelInfo } = claudeLib;
+const { fetchModelInfo, checkModelVisionCapability } = claudeLib;
 /**
  * Parse agent JSON output to extract token usage from step_finish events
@@ -250,6 +250,13 @@ export const executeAgent = async params => {
   // Import prompt building functions from agent.prompts.lib.mjs
   const { buildUserPrompt, buildSystemPrompt } = await import('./agent.prompts.lib.mjs');
+  // Check if the model supports vision using models.dev API
+  const mappedModel = mapModelToId(argv.model);
+  const modelSupportsVision = await checkModelVisionCapability(mappedModel);
+  if (argv.verbose) {
+    await log(`👁️  Model vision capability: ${modelSupportsVision ? 'supported' : 'not supported'}`, { verbose: true });
+  }
   // Build the user prompt
   const prompt = buildUserPrompt({
     issueUrl,
@@ -281,6 +288,7 @@ export const executeAgent = async params => {
     isContinueMode,
     forkedRepo,
     argv,
+    modelSupportsVision,
   });
   // Log prompt details in verbose mode

package/src/agent.prompts.lib.mjs CHANGED Viewed

@@ -81,7 +81,7 @@ export const buildUserPrompt = params => {
  * @returns {string} The formatted system prompt
  */
 export const buildSystemPrompt = params => {
-  const { owner, repo, issueNumber, prNumber, branchName, workspaceTmpDir, argv } = params;
+  const { owner, repo, issueNumber, prNumber, branchName, workspaceTmpDir, argv, modelSupportsVision } = params;
   // Build thinking instruction based on --think level
   let thinkLine = '';
@@ -223,7 +223,18 @@ GitHub CLI command patterns.
    - When adding PR comment, use gh pr comment NUMBER --body "text" --repo OWNER/REPO.
    - When adding issue comment, use gh issue comment NUMBER --body "text" --repo OWNER/REPO.
    - When viewing PR details, use gh pr view NUMBER --repo OWNER/REPO.
-   - When filtering with jq, use gh api repos/${owner}/${repo}/pulls/${prNumber}/comments --paginate --jq 'reverse | .[0:5]'.${ciExamples}${getArchitectureCareSubPrompt(argv)}`;
+   - When filtering with jq, use gh api repos/${owner}/${repo}/pulls/${prNumber}/comments --paginate --jq 'reverse | .[0:5]'.${
+     modelSupportsVision
+       ? `
+Visual UI work and screenshots.
+   - When you work on visual UI changes (frontend, CSS, HTML, design), include a render or screenshot of the final result in the pull request description.
+   - When you need to show visual results, take a screenshot and save it to the repository (e.g., in a docs/screenshots/ or assets/ folder).
+   - When you save screenshots to the repository, use permanent raw file links in the pull request description markdown (e.g., https://raw.githubusercontent.com/${owner}/${repo}/${branchName}/docs/screenshots/result.png).
+   - When uploading images, commit them to the branch first, then reference them using the raw GitHub URL format.
+   - When the visual result is important for review, mention it explicitly in the pull request description with the embedded image.`
+       : ''
+   }${ciExamples}${getArchitectureCareSubPrompt(argv)}`;
 };
 // Export all functions as default object too

package/src/claude.lib.mjs CHANGED Viewed

@@ -340,6 +340,14 @@ export const executeClaude = async params => {
   // Import prompt building functions from claude.prompts.lib.mjs
   const { buildUserPrompt, buildSystemPrompt } = await import('./claude.prompts.lib.mjs');
+  // Check if the model supports vision using models.dev API
+  const mappedModel = mapModelToId(argv.model);
+  const modelSupportsVision = await checkModelVisionCapability(mappedModel);
+  if (argv.verbose) {
+    await log(`👁️  Model vision capability: ${modelSupportsVision ? 'supported' : 'not supported'}`, { verbose: true });
+  }
   // Build the user prompt
   const prompt = buildUserPrompt({
     issueUrl,
@@ -372,6 +380,7 @@ export const executeClaude = async params => {
     isContinueMode,
     forkedRepo,
     argv,
+    modelSupportsVision,
   });
   // Log prompt details in verbose mode
   if (argv.verbose) {
@@ -478,6 +487,27 @@ export const fetchModelInfo = async modelId => {
     return null;
   }
 };
+/**
+ * Check if a model supports vision (image input) using models.dev API
+ * @param {string} modelId - The model ID (e.g., "claude-sonnet-4-5-20250929")
+ * @returns {Promise<boolean>} True if the model supports vision, false otherwise
+ */
+export const checkModelVisionCapability = async modelId => {
+  try {
+    const modelInfo = await fetchModelInfo(modelId);
+    if (!modelInfo) {
+      return false;
+    }
+    // Check if 'image' is in the input modalities
+    const inputModalities = modelInfo.modalities?.input || [];
+    return inputModalities.includes('image');
+  } catch {
+    // If we can't determine vision capability, default to false
+    return false;
+  }
+};
 /**
  * Calculate USD cost for a model's usage with detailed breakdown
  * @param {Object} usage - Token usage object
@@ -1441,4 +1471,5 @@ export default {
   getClaudeVersion,
   setClaudeVersion,
   resolveThinkingSettings,
+  checkModelVisionCapability,
 };

package/src/claude.prompts.lib.mjs CHANGED Viewed

@@ -90,7 +90,7 @@ export const buildUserPrompt = params => {
  * @returns {string} The formatted system prompt
  */
 export const buildSystemPrompt = params => {
-  const { owner, repo, issueNumber, prNumber, branchName, workspaceTmpDir, argv } = params;
+  const { owner, repo, issueNumber, prNumber, branchName, workspaceTmpDir, argv, modelSupportsVision } = params;
   // Note: --think keywords are deprecated for Claude Code >= 2.1.12
   // Thinking is now enabled by default with 31,999 token budget
@@ -313,6 +313,17 @@ Agent Commander usage (unified subagent delegation).
    - Benefits: Saves main agent context, supports any agent type, provides unified API across different AI tools.
    - Note: The subagent will have access to the same working directory and can read/write files as needed.`
        : ''
+   }${
+     modelSupportsVision
+       ? `
+Visual UI work and screenshots.
+   - When you work on visual UI changes (frontend, CSS, HTML, design), include a render or screenshot of the final result in the pull request description.
+   - When you need to show visual results, take a screenshot and save it to the repository (e.g., in a docs/screenshots/ or assets/ folder).
+   - When you save screenshots to the repository, use permanent raw file links in the pull request description markdown (e.g., https://raw.githubusercontent.com/${owner}/${repo}/${branchName}/docs/screenshots/result.png).
+   - When uploading images, commit them to the branch first, then reference them using the raw GitHub URL format.
+   - When the visual result is important for review, mention it explicitly in the pull request description with the embedded image.`
+       : ''
    }${ciExamples}${getArchitectureCareSubPrompt(argv)}`;
 };