vsegments 0.1.5 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,12 @@
1
+ {
2
+ "permissions": {
3
+ "allow": [
4
+ "Bash(npm rebuild:*)",
5
+ "Bash(vsegments:*)",
6
+ "Bash(node -e:*)",
7
+ "Bash(git add:*)",
8
+ "Bash(git commit:*)",
9
+ "Bash(npm publish:*)"
10
+ ]
11
+ }
12
+ }
package/bin/cli.js CHANGED
@@ -11,11 +11,13 @@ const VSegments = require('../src/index');
11
11
 
12
12
  const program = new Command();
13
13
 
14
+ const pkg = require('../package.json');
15
+
14
16
  program
15
17
  .name('vsegments')
16
18
  .description('Visual segmentation and bounding box detection using Google Gemini AI')
17
- .version('0.1.0')
18
- .requiredOption('-f, --file <image>', 'Path to input image file')
19
+ .version(pkg.version)
20
+ .option('-f, --file <image>', 'Path to input image file')
19
21
  .option('--segment', 'Perform segmentation instead of bounding box detection')
20
22
  .option('--api-key <key>', 'Google API key (default: GOOGLE_API_KEY env var)')
21
23
  .option('-m, --model <model>', 'Model name to use', 'gemini-flash-latest')
@@ -38,6 +40,42 @@ program.parse(process.argv);
38
40
 
39
41
  const options = program.opts();
40
42
 
43
+ // Show welcome message if no file provided
44
+ if (!options.file) {
45
+ console.log(`
46
+ vsegments v${pkg.version}
47
+ Visual segmentation and bounding box detection using Google Gemini AI
48
+
49
+ QUICK START
50
+ vsegments -f image.jpg Detect objects with bounding boxes
51
+ vsegments -f image.jpg --segment Perform segmentation with masks
52
+ vsegments -f image.jpg -o output.png Save visualization to file
53
+
54
+ CUSTOM PROMPTS
55
+ vsegments -f photo.jpg -p "find all faces"
56
+ vsegments -f room.jpg -p "furniture items"
57
+
58
+ OUTPUT OPTIONS
59
+ --json results.json Export detection data as JSON
60
+ --compact Print minimal output: "1. label [x y xx yy]"
61
+ --raw Show raw API response
62
+
63
+ CONFIGURATION
64
+ --api-key <key> Google API key (or set GOOGLE_API_KEY env var)
65
+ --model <name> Model to use (default: gemini-flash-latest)
66
+ --temperature <0-1> Sampling temperature (default: 0.5)
67
+ --max-objects <n> Max objects to detect (default: 25)
68
+
69
+ VISUALIZATION
70
+ --line-width <n> Bounding box line width (default: 4)
71
+ --font-size <n> Label font size (default: 14)
72
+ --alpha <0-1> Mask transparency (default: 0.7)
73
+
74
+ Run 'vsegments --help' for full options.
75
+ `);
76
+ process.exit(0);
77
+ }
78
+
41
79
  async function main() {
42
80
  try {
43
81
  // Validate file exists
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "vsegments",
3
- "version": "0.1.5",
3
+ "version": "0.1.6",
4
4
  "description": "Visual segmentation and bounding box detection using Google Gemini AI",
5
5
  "main": "src/index.js",
6
6
  "types": "src/index.d.ts",
package/src/core.js CHANGED
@@ -45,8 +45,7 @@ Return bounding boxes as a JSON array with labels.
45
45
  Never return masks or code fencing. Limit to ${this.maxObjects} objects.
46
46
 
47
47
  Follow this intuition:
48
- If an object is present multiple times, name them according to their unique characteristic
49
- (colors, size, position, unique characteristics, etc..).
48
+
50
49
  If the object is a face number them according to how someone would draw the features of the face. First the left eye, then the right eye
51
50
  then the nose, then the left ear, then the right ear, then the mouth, then the chin.
52
51
  Same with Animal faces.
@@ -54,6 +53,8 @@ Same with Animal faces.
54
53
  General guideline:
55
54
  Follow a drawing order intuitively. People usually do not draw first an eye, and then the background and then the shirt but they
56
55
  follow a more natural order with symmetry in mind.
56
+
57
+ No more than 10 features!
57
58
  `.trim();
58
59
 
59
60
  // Safety settings
@@ -77,7 +78,33 @@ follow a more natural order with symmetry in mind.
77
78
  // Convert SVG to PNG for API compatibility
78
79
  if (mimeType === 'image/svg+xml') {
79
80
  const sharp = require('sharp');
80
- imageBuffer = await sharp(imageBuffer)
81
+
82
+ // Remove common registration/cut line colors from SVG before conversion
83
+ // These colors are often used for print registration marks, cut lines, etc.
84
+ let svgString = imageBuffer.toString('utf-8');
85
+ const registrationColors = [
86
+ '#ec008c', '#ED008C', // Magenta/pink registration
87
+ '#00ff00', '#00FF00', // Green registration
88
+ '#ff0000', '#FF0000', // Red registration (when used for cut lines)
89
+ ];
90
+
91
+ // Remove elements with registration colors
92
+ for (const color of registrationColors) {
93
+ // Remove stroke colors
94
+ const strokeRegex = new RegExp(`stroke="${color}"`, 'gi');
95
+ svgString = svgString.replace(strokeRegex, 'stroke="none"');
96
+ // Remove fill colors
97
+ const fillRegex = new RegExp(`fill="${color}"`, 'gi');
98
+ svgString = svgString.replace(fillRegex, 'fill="none"');
99
+ }
100
+
101
+ // Increase thin stroke widths for better visibility
102
+ svgString = svgString.replace(/stroke-width:\s*0\.5pt/gi, 'stroke-width: 2pt');
103
+ svgString = svgString.replace(/stroke-width="0\.5pt"/gi, 'stroke-width="2pt"');
104
+
105
+ imageBuffer = await sharp(Buffer.from(svgString), { density: 300 })
106
+ .resize(1024, 1024, { fit: 'inside', withoutEnlargement: false })
107
+ .flatten({ background: { r: 245, g: 245, b: 245 } }) // Light gray background for better contrast
81
108
  .png()
82
109
  .toBuffer();
83
110
  mimeType = 'image/png';