npm - vsegments - Versions diffs - 0.1.0 - Mend

vsegments 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/src/core.js ADDED Viewed

@@ -0,0 +1,241 @@
+/**
+ * Core VSegments class for image segmentation and bounding box detection
+ */
+const { GoogleGenerativeAI } = require('@google/generative-ai');
+const { loadImage } = require('canvas');
+const fs = require('fs').promises;
+const { SegmentationResult } = require('./models');
+const { parseBoundingBoxes, parseSegmentationMasks } = require('./utils');
+const {
+  loadImageToCanvas,
+  plotBoundingBoxes,
+  plotSegmentationMasks,
+  saveCanvas
+} = require('./visualize');
+class VSegments {
+  /**
+   * Main class for visual segmentation using Google Gemini AI
+   * @param {Object} options - Configuration options
+   * @param {string} options.apiKey - Google API key (defaults to GOOGLE_API_KEY env var)
+   * @param {string} options.model - Model name (default: gemini-flash-latest)
+   * @param {number} options.temperature - Sampling temperature (default: 0.5)
+   * @param {number} options.maxObjects - Maximum objects to detect (default: 25)
+   */
+  constructor(options = {}) {
+    this.apiKey = options.apiKey || process.env.GOOGLE_API_KEY;
+    if (!this.apiKey) {
+      throw new Error(
+        'API key must be provided or set in GOOGLE_API_KEY environment variable'
+      );
+    }
+    this.model = options.model || 'gemini-flash-latest';
+    this.temperature = options.temperature !== undefined ? options.temperature : 0.5;
+    this.maxObjects = options.maxObjects || 25;
+    // Initialize Google AI client
+    this.genAI = new GoogleGenerativeAI(this.apiKey);
+    // Default system instructions
+    this.defaultSystemInstructions = `
+Return bounding boxes as a JSON array with labels. Never return masks or code fencing. Limit to ${this.maxObjects} objects.
+If an object is present multiple times, name them according to their unique characteristic (colors, size, position, unique characteristics, etc..).
+    `.trim();
+    // Safety settings
+    this.safetySettings = [
+      {
+        category: 'HARM_CATEGORY_DANGEROUS_CONTENT',
+        threshold: 'BLOCK_ONLY_HIGH',
+      },
+    ];
+  }
+  /**
+   * Load image from file and convert to format for API
+   * @param {string} imagePath - Path to image file
+   * @returns {Promise<Object>} - Image data for API
+   */
+  async _loadImage(imagePath) {
+    const imageBuffer = await fs.readFile(imagePath);
+    const base64Data = imageBuffer.toString('base64');
+    const mimeType = this._getMimeType(imagePath);
+    return {
+      inlineData: {
+        data: base64Data,
+        mimeType: mimeType
+      }
+    };
+  }
+  /**
+   * Get MIME type from file extension
+   * @param {string} filePath - File path
+   * @returns {string} - MIME type
+   */
+  _getMimeType(filePath) {
+    const ext = filePath.toLowerCase().split('.').pop();
+    const mimeTypes = {
+      'jpg': 'image/jpeg',
+      'jpeg': 'image/jpeg',
+      'png': 'image/png',
+      'gif': 'image/gif',
+      'webp': 'image/webp'
+    };
+    return mimeTypes[ext] || 'image/jpeg';
+  }
+  /**
+   * Get system instructions with custom additions
+   * @param {string} customInstructions - Additional instructions
+   * @returns {string} - Complete system instructions
+   */
+  _getSystemInstructions(customInstructions) {
+    let instructions = this.defaultSystemInstructions;
+    if (customInstructions) {
+      instructions += '\n' + customInstructions;
+    }
+    return instructions;
+  }
+  /**
+   * Detect bounding boxes in an image
+   * @param {string} imagePath - Path to image file
+   * @param {Object} options - Detection options
+   * @param {string} options.prompt - Custom prompt
+   * @param {string} options.customInstructions - Additional system instructions
+   * @param {number} options.maxSize - Maximum image dimension
+   * @returns {Promise<SegmentationResult>} - Detection results
+   */
+  async detectBoxes(imagePath, options = {}) {
+    const {
+      prompt = 'Detect the 2d bounding boxes',
+      customInstructions = null,
+      maxSize = 1024
+    } = options;
+    // Load image
+    const image = await this._loadImage(imagePath);
+    // Get model
+    const model = this.genAI.getGenerativeModel({
+      model: this.model,
+      safetySettings: this.safetySettings,
+      systemInstruction: this._getSystemInstructions(customInstructions)
+    });
+    // Generate content
+    const result = await model.generateContent({
+      contents: [{ role: 'user', parts: [{ text: prompt }, image] }],
+      generationConfig: {
+        temperature: this.temperature,
+      }
+    });
+    const response = result.response;
+    const text = response.text();
+    // Parse response
+    const boxes = parseBoundingBoxes(text);
+    return new SegmentationResult(boxes, null, text);
+  }
+  /**
+   * Perform segmentation on an image
+   * @param {string} imagePath - Path to image file
+   * @param {Object} options - Segmentation options
+   * @param {string} options.prompt - Custom prompt
+   * @param {number} options.maxSize - Maximum image dimension
+   * @returns {Promise<SegmentationResult>} - Segmentation results
+   */
+  async segment(imagePath, options = {}) {
+    const {
+      prompt = 'Give the segmentation masks for the objects. Output a JSON list of segmentation masks where each entry contains the 2D bounding box in the key "box_2d", the segmentation mask in key "mask", and the text label in the key "label". Use descriptive labels.',
+      maxSize = 1024
+    } = options;
+    // Load image
+    const image = await this._loadImage(imagePath);
+    // Get image dimensions
+    const img = await loadImage(imagePath);
+    const imgWidth = img.width;
+    const imgHeight = img.height;
+    // Get model (no system instructions for segmentation)
+    const model = this.genAI.getGenerativeModel({
+      model: this.model,
+      safetySettings: this.safetySettings
+    });
+    // Generate content
+    const result = await model.generateContent({
+      contents: [{ role: 'user', parts: [{ text: prompt }, image] }],
+      generationConfig: {
+        temperature: this.temperature,
+      }
+    });
+    const response = result.response;
+    const text = response.text();
+    // Parse response
+    const boxes = parseBoundingBoxes(text);
+    const masks = await parseSegmentationMasks(text, imgHeight, imgWidth);
+    return new SegmentationResult(boxes, masks, text);
+  }
+  /**
+   * Visualize detection/segmentation results
+   * @param {string} imagePath - Path to original image
+   * @param {SegmentationResult} result - Detection/segmentation results
+   * @param {Object} options - Visualization options
+   * @param {string} options.outputPath - Path to save output
+   * @param {number} options.lineWidth - Bounding box line width
+   * @param {number} options.fontSize - Label font size
+   * @param {number} options.alpha - Mask transparency (0-1)
+   * @returns {Promise<Canvas>} - Canvas with visualizations
+   */
+  async visualize(imagePath, result, options = {}) {
+    const {
+      outputPath = null,
+      lineWidth = 4,
+      fontSize = 14,
+      alpha = 0.7
+    } = options;
+    // Load image to canvas
+    const canvas = await loadImageToCanvas(imagePath, 2048);
+    // Draw visualizations
+    if (result.masks) {
+      plotSegmentationMasks(canvas, result.masks, {
+        lineWidth,
+        fontSize,
+        alpha
+      });
+    } else {
+      plotBoundingBoxes(canvas, result.boxes, {
+        lineWidth,
+        fontSize
+      });
+    }
+    // Save if requested
+    if (outputPath) {
+      await saveCanvas(canvas, outputPath);
+    }
+    return canvas;
+  }
+}
+module.exports = VSegments;

package/src/index.d.ts ADDED Viewed

@@ -0,0 +1,83 @@
+/**
+ * TypeScript definitions for vsegments
+ */
+/// <reference types="node" />
+import { Canvas } from 'canvas';
+export interface BoundingBoxData {
+  label: string;
+  y1: number;
+  x1: number;
+  y2: number;
+  x2: number;
+}
+export class BoundingBox {
+  label: string;
+  y1: number;
+  x1: number;
+  y2: number;
+  x2: number;
+  constructor(label: string, y1: number, x1: number, y2: number, x2: number);
+  toAbsolute(imgWidth: number, imgHeight: number): [number, number, number, number];
+  static fromDict(data: any): BoundingBox;
+}
+export class SegmentationMask {
+  y0: number;
+  x0: number;
+  y1: number;
+  x1: number;
+  mask: Buffer;
+  label: string;
+  constructor(y0: number, x0: number, y1: number, x1: number, mask: Buffer, label: string);
+}
+export class SegmentationResult {
+  boxes: BoundingBox[];
+  masks: SegmentationMask[] | null;
+  rawResponse: string | null;
+  constructor(boxes: BoundingBox[], masks?: SegmentationMask[] | null, rawResponse?: string | null);
+  get length(): number;
+}
+export interface VSegmentsOptions {
+  apiKey?: string;
+  model?: string;
+  temperature?: number;
+  maxObjects?: number;
+}
+export interface DetectBoxesOptions {
+  prompt?: string;
+  customInstructions?: string;
+  maxSize?: number;
+}
+export interface SegmentOptions {
+  prompt?: string;
+  maxSize?: number;
+}
+export interface VisualizeOptions {
+  outputPath?: string;
+  lineWidth?: number;
+  fontSize?: number;
+  alpha?: number;
+}
+export default class VSegments {
+  constructor(options?: VSegmentsOptions);
+  detectBoxes(imagePath: string, options?: DetectBoxesOptions): Promise<SegmentationResult>;
+  segment(imagePath: string, options?: SegmentOptions): Promise<SegmentationResult>;
+  visualize(imagePath: string, result: SegmentationResult, options?: VisualizeOptions): Promise<Canvas>;
+}
+export { VSegments };
+export const version: string;

package/src/index.js ADDED Viewed

@@ -0,0 +1,20 @@
+/**
+ * vsegments - Visual Segmentation Library
+ *
+ * A Node.js library for image segmentation and bounding box detection
+ * using Google Gemini AI.
+ *
+ * @module vsegments
+ * @author Marco Kotrotsos
+ * @license MIT
+ */
+const VSegments = require('./core');
+const { BoundingBox, SegmentationMask, SegmentationResult } = require('./models');
+module.exports = VSegments;
+module.exports.VSegments = VSegments;
+module.exports.BoundingBox = BoundingBox;
+module.exports.SegmentationMask = SegmentationMask;
+module.exports.SegmentationResult = SegmentationResult;
+module.exports.version = '0.1.0';

package/src/models.js ADDED Viewed

@@ -0,0 +1,99 @@
+/**
+ * Data models for vsegments
+ */
+class BoundingBox {
+  /**
+   * Represents a 2D bounding box with label
+   * @param {string} label - Object label
+   * @param {number} y1 - Top coordinate (normalized 0-1000)
+   * @param {number} x1 - Left coordinate (normalized 0-1000)
+   * @param {number} y2 - Bottom coordinate (normalized 0-1000)
+   * @param {number} x2 - Right coordinate (normalized 0-1000)
+   */
+  constructor(label, y1, x1, y2, x2) {
+    this.label = label;
+    this.y1 = y1;
+    this.x1 = x1;
+    this.y2 = y2;
+    this.x2 = x2;
+  }
+  /**
+   * Convert normalized coordinates to absolute pixel coordinates
+   * @param {number} imgWidth - Image width in pixels
+   * @param {number} imgHeight - Image height in pixels
+   * @returns {number[]} - [absX1, absY1, absX2, absY2]
+   */
+  toAbsolute(imgWidth, imgHeight) {
+    const absX1 = Math.round((this.x1 / 1000) * imgWidth);
+    const absY1 = Math.round((this.y1 / 1000) * imgHeight);
+    const absX2 = Math.round((this.x2 / 1000) * imgWidth);
+    const absY2 = Math.round((this.y2 / 1000) * imgHeight);
+    return [absX1, absY1, absX2, absY2];
+  }
+  /**
+   * Create BoundingBox from API response object
+   * @param {Object} data - Raw data from API
+   * @returns {BoundingBox}
+   */
+  static fromDict(data) {
+    const box = data.box_2d;
+    return new BoundingBox(
+      data.label,
+      box[0],
+      box[1],
+      box[2],
+      box[3]
+    );
+  }
+}
+class SegmentationMask {
+  /**
+   * Represents a segmentation mask for an object
+   * @param {number} y0 - Top coordinate (absolute pixels)
+   * @param {number} x0 - Left coordinate (absolute pixels)
+   * @param {number} y1 - Bottom coordinate (absolute pixels)
+   * @param {number} x1 - Right coordinate (absolute pixels)
+   * @param {Buffer} mask - Mask data [height, width] with values 0-255
+   * @param {string} label - Object label
+   */
+  constructor(y0, x0, y1, x1, mask, label) {
+    this.y0 = y0;
+    this.x0 = x0;
+    this.y1 = y1;
+    this.x1 = x1;
+    this.mask = mask;
+    this.label = label;
+  }
+}
+class SegmentationResult {
+  /**
+   * Container for segmentation/detection results
+   * @param {BoundingBox[]} boxes - Array of bounding boxes
+   * @param {SegmentationMask[]|null} masks - Array of segmentation masks (optional)
+   * @param {string|null} rawResponse - Raw API response (optional)
+   */
+  constructor(boxes, masks = null, rawResponse = null) {
+    this.boxes = boxes;
+    this.masks = masks;
+    this.rawResponse = rawResponse;
+  }
+  /**
+   * Get number of detected objects
+   * @returns {number}
+   */
+  get length() {
+    return this.boxes.length;
+  }
+}
+module.exports = {
+  BoundingBox,
+  SegmentationMask,
+  SegmentationResult
+};

package/src/utils.js ADDED Viewed

@@ -0,0 +1,118 @@
+/**
+ * Utility functions for parsing and processing API responses
+ */
+const { BoundingBox, SegmentationMask } = require('./models');
+const { createCanvas, loadImage } = require('canvas');
+/**
+ * Parse JSON output, removing markdown fencing if present
+ * @param {string} jsonOutput - Raw JSON string
+ * @returns {string} - Cleaned JSON string
+ */
+function parseJson(jsonOutput) {
+  const lines = jsonOutput.split('\n');
+  for (let i = 0; i < lines.length; i++) {
+    if (lines[i].trim() === '```json') {
+      jsonOutput = lines.slice(i + 1).join('\n');
+      jsonOutput = jsonOutput.split('```')[0];
+      break;
+    }
+  }
+  return jsonOutput.trim();
+}
+/**
+ * Parse bounding boxes from API response
+ * @param {string} responseText - Raw response text from API
+ * @returns {BoundingBox[]} - Array of BoundingBox objects
+ */
+function parseBoundingBoxes(responseText) {
+  const cleanedJson = parseJson(responseText);
+  const data = JSON.parse(cleanedJson);
+  const boxes = [];
+  for (const item of data) {
+    if (item.box_2d) {
+      boxes.push(BoundingBox.fromDict(item));
+    }
+  }
+  return boxes;
+}
+/**
+ * Parse segmentation masks from API response
+ * @param {string} responseText - Raw response text
+ * @param {number} imgHeight - Image height
+ * @param {number} imgWidth - Image width
+ * @returns {Promise<SegmentationMask[]>} - Array of SegmentationMask objects
+ */
+async function parseSegmentationMasks(responseText, imgHeight, imgWidth) {
+  const cleanedJson = parseJson(responseText);
+  const data = JSON.parse(cleanedJson);
+  const masks = [];
+  for (const item of data) {
+    if (!item.box_2d || !item.mask) continue;
+    const box = item.box_2d;
+    const label = item.label;
+    // Convert normalized bbox to absolute coordinates
+    const absY0 = Math.round((box[0] / 1000) * imgHeight);
+    const absX0 = Math.round((box[1] / 1000) * imgWidth);
+    const absY1 = Math.round((box[2] / 1000) * imgHeight);
+    const absX1 = Math.round((box[3] / 1000) * imgWidth);
+    const bboxWidth = absX1 - absX0;
+    const bboxHeight = absY1 - absY0;
+    // Decode base64 mask
+    const maskBase64 = item.mask.replace(/^data:image\/png;base64,/, '');
+    const maskBuffer = Buffer.from(maskBase64, 'base64');
+    // Load mask image and resize to bbox size
+    const maskImg = await loadImage(maskBuffer);
+    const canvas = createCanvas(bboxWidth, bboxHeight);
+    const ctx = canvas.getContext('2d');
+    ctx.drawImage(maskImg, 0, 0, bboxWidth, bboxHeight);
+    // Get image data and extract alpha channel
+    const imageData = ctx.getImageData(0, 0, bboxWidth, bboxHeight);
+    const maskData = new Uint8Array(bboxWidth * bboxHeight);
+    for (let i = 0; i < imageData.data.length; i += 4) {
+      maskData[i / 4] = imageData.data[i + 3]; // Alpha channel
+    }
+    // Create full-size mask
+    const fullMask = Buffer.alloc(imgHeight * imgWidth);
+    for (let y = 0; y < bboxHeight; y++) {
+      for (let x = 0; x < bboxWidth; x++) {
+        const srcIdx = y * bboxWidth + x;
+        const dstY = absY0 + y;
+        const dstX = absX0 + x;
+        if (dstY < imgHeight && dstX < imgWidth) {
+          const dstIdx = dstY * imgWidth + dstX;
+          fullMask[dstIdx] = maskData[srcIdx];
+        }
+      }
+    }
+    masks.push(new SegmentationMask(
+      absY0, absX0, absY1, absX1, fullMask, label
+    ));
+  }
+  return masks;
+}
+module.exports = {
+  parseJson,
+  parseBoundingBoxes,
+  parseSegmentationMasks
+};