npm - vsegments - Versions diffs - 0.1.4 → 0.1.6 - Mend

vsegments 0.1.4 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/.claude/settings.local.json +12 -0
package/bin/cli.js +40 -2
package/package.json +3 -2
package/src/core.js +78 -40

package/.claude/settings.local.json ADDED Viewed

@@ -0,0 +1,12 @@
+{
+  "permissions": {
+    "allow": [
+      "Bash(npm rebuild:*)",
+      "Bash(vsegments:*)",
+      "Bash(node -e:*)",
+      "Bash(git add:*)",
+      "Bash(git commit:*)",
+      "Bash(npm publish:*)"
+    ]
+  }
+}

package/bin/cli.js CHANGED Viewed

@@ -11,11 +11,13 @@ const VSegments = require('../src/index');
 const program = new Command();
+const pkg = require('../package.json');
 program
   .name('vsegments')
   .description('Visual segmentation and bounding box detection using Google Gemini AI')
-  .version('0.1.0')
-  .requiredOption('-f, --file <image>', 'Path to input image file')
+  .version(pkg.version)
+  .option('-f, --file <image>', 'Path to input image file')
   .option('--segment', 'Perform segmentation instead of bounding box detection')
   .option('--api-key <key>', 'Google API key (default: GOOGLE_API_KEY env var)')
   .option('-m, --model <model>', 'Model name to use', 'gemini-flash-latest')
@@ -38,6 +40,42 @@ program.parse(process.argv);
 const options = program.opts();
+// Show welcome message if no file provided
+if (!options.file) {
+  console.log(`
+vsegments v${pkg.version}
+Visual segmentation and bounding box detection using Google Gemini AI
+QUICK START
+  vsegments -f image.jpg                   Detect objects with bounding boxes
+  vsegments -f image.jpg --segment         Perform segmentation with masks
+  vsegments -f image.jpg -o output.png     Save visualization to file
+CUSTOM PROMPTS
+  vsegments -f photo.jpg -p "find all faces"
+  vsegments -f room.jpg -p "furniture items"
+OUTPUT OPTIONS
+  --json results.json    Export detection data as JSON
+  --compact              Print minimal output: "1. label [x y xx yy]"
+  --raw                  Show raw API response
+CONFIGURATION
+  --api-key <key>        Google API key (or set GOOGLE_API_KEY env var)
+  --model <name>         Model to use (default: gemini-flash-latest)
+  --temperature <0-1>    Sampling temperature (default: 0.5)
+  --max-objects <n>      Max objects to detect (default: 25)
+VISUALIZATION
+  --line-width <n>       Bounding box line width (default: 4)
+  --font-size <n>        Label font size (default: 14)
+  --alpha <0-1>          Mask transparency (default: 0.7)
+Run 'vsegments --help' for full options.
+`);
+  process.exit(0);
+}
 async function main() {
   try {
     // Validate file exists

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "vsegments",
-  "version": "0.1.4",
+  "version": "0.1.6",
   "description": "Visual segmentation and bounding box detection using Google Gemini AI",
   "main": "src/index.js",
   "types": "src/index.d.ts",
@@ -43,7 +43,8 @@
     "@google/generative-ai": "^0.21.0",
     "canvas": "^2.11.2",
     "commander": "^12.0.0",
-    "sharp": "^0.33.0"
+    "sharp": "^0.33.0",
+    "vsegments": "^0.1.4"
   },
   "devDependencies": {
     "@types/jest": "^30.0.0",

package/src/core.js CHANGED Viewed

@@ -7,11 +7,11 @@ const { loadImage } = require('canvas');
 const fs = require('fs').promises;
 const { SegmentationResult } = require('./models');
 const { parseBoundingBoxes, parseSegmentationMasks } = require('./utils');
-const {
-  loadImageToCanvas,
-  plotBoundingBoxes,
+const {
+  loadImageToCanvas,
+  plotBoundingBoxes,
   plotSegmentationMasks,
-  saveCanvas
+  saveCanvas
 } = require('./visualize');
 class VSegments {
@@ -25,26 +25,38 @@ class VSegments {
    */
   constructor(options = {}) {
     this.apiKey = options.apiKey || process.env.GOOGLE_API_KEY;
     if (!this.apiKey) {
       throw new Error(
         'API key must be provided or set in GOOGLE_API_KEY environment variable'
       );
     }
     this.model = options.model || 'gemini-3-pro-preview';
     this.temperature = options.temperature !== undefined ? options.temperature : 0.5;
     this.maxObjects = options.maxObjects || 25;
     // Initialize Google AI client
     this.genAI = new GoogleGenerativeAI(this.apiKey);
     // Default system instructions
     this.defaultSystemInstructions = `
-Return bounding boxes as a JSON array with labels. Never return masks or code fencing. Limit to ${this.maxObjects} objects.
-If an object is present multiple times, name them according to their unique characteristic (colors, size, position, unique characteristics, etc..).
+Return bounding boxes as a JSON array with labels.
+Never return masks or code fencing. Limit to ${this.maxObjects} objects.
+Follow this intuition:
+If the object is a face number them according to how someone would draw the features of the face. First the left eye, then the right eye
+then the nose, then the left ear, then the right ear, then the mouth, then the chin.
+Same with Animal faces.
+General guideline:
+Follow a drawing order intuitively. People usually do not draw first an eye, and then the background and then the shirt but they
+follow a more natural order with symmetry in mind.
+No more than 10 features!
     `.trim();
     // Safety settings
     this.safetySettings = [
       {
@@ -53,7 +65,7 @@ If an object is present multiple times, name them according to their unique char
       },
     ];
   }
   /**
    * Load image from file and convert to format for API
    * @param {string} imagePath - Path to image file
@@ -62,18 +74,44 @@ If an object is present multiple times, name them according to their unique char
   async _loadImage(imagePath) {
     let imageBuffer = await fs.readFile(imagePath);
     let mimeType = this._getMimeType(imagePath);
     // Convert SVG to PNG for API compatibility
     if (mimeType === 'image/svg+xml') {
       const sharp = require('sharp');
-      imageBuffer = await sharp(imageBuffer)
+      // Remove common registration/cut line colors from SVG before conversion
+      // These colors are often used for print registration marks, cut lines, etc.
+      let svgString = imageBuffer.toString('utf-8');
+      const registrationColors = [
+        '#ec008c', '#ED008C', // Magenta/pink registration
+        '#00ff00', '#00FF00', // Green registration
+        '#ff0000', '#FF0000', // Red registration (when used for cut lines)
+      ];
+      // Remove elements with registration colors
+      for (const color of registrationColors) {
+        // Remove stroke colors
+        const strokeRegex = new RegExp(`stroke="${color}"`, 'gi');
+        svgString = svgString.replace(strokeRegex, 'stroke="none"');
+        // Remove fill colors
+        const fillRegex = new RegExp(`fill="${color}"`, 'gi');
+        svgString = svgString.replace(fillRegex, 'fill="none"');
+      }
+      // Increase thin stroke widths for better visibility
+      svgString = svgString.replace(/stroke-width:\s*0\.5pt/gi, 'stroke-width: 2pt');
+      svgString = svgString.replace(/stroke-width="0\.5pt"/gi, 'stroke-width="2pt"');
+      imageBuffer = await sharp(Buffer.from(svgString), { density: 300 })
+        .resize(1024, 1024, { fit: 'inside', withoutEnlargement: false })
+        .flatten({ background: { r: 245, g: 245, b: 245 } })  // Light gray background for better contrast
         .png()
         .toBuffer();
       mimeType = 'image/png';
     }
     const base64Data = imageBuffer.toString('base64');
     return {
       inlineData: {
         data: base64Data,
@@ -81,7 +119,7 @@ If an object is present multiple times, name them according to their unique char
       }
     };
   }
   /**
    * Get MIME type from file extension
    * @param {string} filePath - File path
@@ -99,7 +137,7 @@ If an object is present multiple times, name them according to their unique char
     };
     return mimeTypes[ext] || 'image/jpeg';
   }
   /**
    * Get system instructions with custom additions
    * @param {string} customInstructions - Additional instructions
@@ -107,14 +145,14 @@ If an object is present multiple times, name them according to their unique char
    */
   _getSystemInstructions(customInstructions) {
     let instructions = this.defaultSystemInstructions;
     if (customInstructions) {
       instructions += '\n' + customInstructions;
     }
     return instructions;
   }
   /**
    * Detect bounding boxes in an image
    * @param {string} imagePath - Path to image file
@@ -130,17 +168,17 @@ If an object is present multiple times, name them according to their unique char
       customInstructions = null,
       maxSize = 1024
     } = options;
     // Load image
     const image = await this._loadImage(imagePath);
     // Get model
     const model = this.genAI.getGenerativeModel({
       model: this.model,
       safetySettings: this.safetySettings,
       systemInstruction: this._getSystemInstructions(customInstructions)
     });
     // Generate content
     let result, response, text;
     try {
@@ -150,7 +188,7 @@ If an object is present multiple times, name them according to their unique char
           temperature: this.temperature,
         }
       });
       response = result.response;
       text = response.text();
     } catch (error) {
@@ -161,13 +199,13 @@ If an object is present multiple times, name them according to their unique char
       }
       throw error;
     }
     // Parse response
     const boxes = parseBoundingBoxes(text);
     return new SegmentationResult(boxes, null, text);
   }
   /**
    * Perform segmentation on an image
    * @param {string} imagePath - Path to image file
@@ -181,21 +219,21 @@ If an object is present multiple times, name them according to their unique char
       prompt = 'Give the segmentation masks for the objects. Output a JSON list of segmentation masks where each entry contains the 2D bounding box in the key "box_2d", the segmentation mask in key "mask", and the text label in the key "label". Use descriptive labels.',
       maxSize = 1024
     } = options;
     // Load image
     const image = await this._loadImage(imagePath);
     // Get image dimensions
     const img = await loadImage(imagePath);
     const imgWidth = img.width;
     const imgHeight = img.height;
     // Get model (no system instructions for segmentation)
     const model = this.genAI.getGenerativeModel({
       model: this.model,
       safetySettings: this.safetySettings
     });
     // Generate content
     let result, response, text;
     try {
@@ -205,7 +243,7 @@ If an object is present multiple times, name them according to their unique char
           temperature: this.temperature,
         }
       });
       response = result.response;
       text = response.text();
     } catch (error) {
@@ -216,14 +254,14 @@ If an object is present multiple times, name them according to their unique char
       }
       throw error;
     }
     // Parse response
     const boxes = parseBoundingBoxes(text);
     const masks = await parseSegmentationMasks(text, imgHeight, imgWidth);
     return new SegmentationResult(boxes, masks, text);
   }
   /**
    * Visualize detection/segmentation results
    * @param {string} imagePath - Path to original image
@@ -242,10 +280,10 @@ If an object is present multiple times, name them according to their unique char
       fontSize = 14,
       alpha = 0.7
     } = options;
     // Load image to canvas
     const canvas = await loadImageToCanvas(imagePath, 2048);
     // Draw visualizations
     if (result.masks) {
       plotSegmentationMasks(canvas, result.masks, {
@@ -259,12 +297,12 @@ If an object is present multiple times, name them according to their unique char
         fontSize
       });
     }
     // Save if requested
     if (outputPath) {
       await saveCanvas(canvas, outputPath);
     }
     return canvas;
   }
 }