vsegments 0.1.5 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +13 -0
- package/bin/cli.js +40 -2
- package/package.json +1 -1
- package/src/core.js +38 -7
- package/src/index.d.ts +2 -0
package/bin/cli.js
CHANGED
|
@@ -11,11 +11,13 @@ const VSegments = require('../src/index');
|
|
|
11
11
|
|
|
12
12
|
const program = new Command();
|
|
13
13
|
|
|
14
|
+
const pkg = require('../package.json');
|
|
15
|
+
|
|
14
16
|
program
|
|
15
17
|
.name('vsegments')
|
|
16
18
|
.description('Visual segmentation and bounding box detection using Google Gemini AI')
|
|
17
|
-
.version(
|
|
18
|
-
.
|
|
19
|
+
.version(pkg.version)
|
|
20
|
+
.option('-f, --file <image>', 'Path to input image file')
|
|
19
21
|
.option('--segment', 'Perform segmentation instead of bounding box detection')
|
|
20
22
|
.option('--api-key <key>', 'Google API key (default: GOOGLE_API_KEY env var)')
|
|
21
23
|
.option('-m, --model <model>', 'Model name to use', 'gemini-flash-latest')
|
|
@@ -38,6 +40,42 @@ program.parse(process.argv);
|
|
|
38
40
|
|
|
39
41
|
const options = program.opts();
|
|
40
42
|
|
|
43
|
+
// Show welcome message if no file provided
|
|
44
|
+
if (!options.file) {
|
|
45
|
+
console.log(`
|
|
46
|
+
vsegments v${pkg.version}
|
|
47
|
+
Visual segmentation and bounding box detection using Google Gemini AI
|
|
48
|
+
|
|
49
|
+
QUICK START
|
|
50
|
+
vsegments -f image.jpg Detect objects with bounding boxes
|
|
51
|
+
vsegments -f image.jpg --segment Perform segmentation with masks
|
|
52
|
+
vsegments -f image.jpg -o output.png Save visualization to file
|
|
53
|
+
|
|
54
|
+
CUSTOM PROMPTS
|
|
55
|
+
vsegments -f photo.jpg -p "find all faces"
|
|
56
|
+
vsegments -f room.jpg -p "furniture items"
|
|
57
|
+
|
|
58
|
+
OUTPUT OPTIONS
|
|
59
|
+
--json results.json Export detection data as JSON
|
|
60
|
+
--compact Print minimal output: "1. label [x y xx yy]"
|
|
61
|
+
--raw Show raw API response
|
|
62
|
+
|
|
63
|
+
CONFIGURATION
|
|
64
|
+
--api-key <key> Google API key (or set GOOGLE_API_KEY env var)
|
|
65
|
+
--model <name> Model to use (default: gemini-flash-latest)
|
|
66
|
+
--temperature <0-1> Sampling temperature (default: 0.5)
|
|
67
|
+
--max-objects <n> Max objects to detect (default: 25)
|
|
68
|
+
|
|
69
|
+
VISUALIZATION
|
|
70
|
+
--line-width <n> Bounding box line width (default: 4)
|
|
71
|
+
--font-size <n> Label font size (default: 14)
|
|
72
|
+
--alpha <0-1> Mask transparency (default: 0.7)
|
|
73
|
+
|
|
74
|
+
Run 'vsegments --help' for full options.
|
|
75
|
+
`);
|
|
76
|
+
process.exit(0);
|
|
77
|
+
}
|
|
78
|
+
|
|
41
79
|
async function main() {
|
|
42
80
|
try {
|
|
43
81
|
// Validate file exists
|
package/package.json
CHANGED
package/src/core.js
CHANGED
|
@@ -41,12 +41,11 @@ class VSegments {
|
|
|
41
41
|
|
|
42
42
|
// Default system instructions
|
|
43
43
|
this.defaultSystemInstructions = `
|
|
44
|
-
Return bounding boxes as a JSON array with labels.
|
|
45
|
-
Never return masks or code fencing. Limit to ${this.maxObjects} objects.
|
|
44
|
+
Return bounding boxes as a JSON array with labels.
|
|
45
|
+
Never return masks or code fencing. Limit to ${this.maxObjects} objects maximum.
|
|
46
46
|
|
|
47
47
|
Follow this intuition:
|
|
48
|
-
|
|
49
|
-
(colors, size, position, unique characteristics, etc..).
|
|
48
|
+
|
|
50
49
|
If the object is a face number them according to how someone would draw the features of the face. First the left eye, then the right eye
|
|
51
50
|
then the nose, then the left ear, then the right ear, then the mouth, then the chin.
|
|
52
51
|
Same with Animal faces.
|
|
@@ -54,6 +53,8 @@ Same with Animal faces.
|
|
|
54
53
|
General guideline:
|
|
55
54
|
Follow a drawing order intuitively. People usually do not draw first an eye, and then the background and then the shirt but they
|
|
56
55
|
follow a more natural order with symmetry in mind.
|
|
56
|
+
|
|
57
|
+
Return no more than ${this.maxObjects} features!
|
|
57
58
|
`.trim();
|
|
58
59
|
|
|
59
60
|
// Safety settings
|
|
@@ -77,7 +78,33 @@ follow a more natural order with symmetry in mind.
|
|
|
77
78
|
// Convert SVG to PNG for API compatibility
|
|
78
79
|
if (mimeType === 'image/svg+xml') {
|
|
79
80
|
const sharp = require('sharp');
|
|
80
|
-
|
|
81
|
+
|
|
82
|
+
// Remove common registration/cut line colors from SVG before conversion
|
|
83
|
+
// These colors are often used for print registration marks, cut lines, etc.
|
|
84
|
+
let svgString = imageBuffer.toString('utf-8');
|
|
85
|
+
const registrationColors = [
|
|
86
|
+
'#ec008c', '#ED008C', // Magenta/pink registration
|
|
87
|
+
'#00ff00', '#00FF00', // Green registration
|
|
88
|
+
'#ff0000', '#FF0000', // Red registration (when used for cut lines)
|
|
89
|
+
];
|
|
90
|
+
|
|
91
|
+
// Remove elements with registration colors
|
|
92
|
+
for (const color of registrationColors) {
|
|
93
|
+
// Remove stroke colors
|
|
94
|
+
const strokeRegex = new RegExp(`stroke="${color}"`, 'gi');
|
|
95
|
+
svgString = svgString.replace(strokeRegex, 'stroke="none"');
|
|
96
|
+
// Remove fill colors
|
|
97
|
+
const fillRegex = new RegExp(`fill="${color}"`, 'gi');
|
|
98
|
+
svgString = svgString.replace(fillRegex, 'fill="none"');
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
// Increase thin stroke widths for better visibility
|
|
102
|
+
svgString = svgString.replace(/stroke-width:\s*0\.5pt/gi, 'stroke-width: 2pt');
|
|
103
|
+
svgString = svgString.replace(/stroke-width="0\.5pt"/gi, 'stroke-width="2pt"');
|
|
104
|
+
|
|
105
|
+
imageBuffer = await sharp(Buffer.from(svgString), { density: 300 })
|
|
106
|
+
.resize(1024, 1024, { fit: 'inside', withoutEnlargement: false })
|
|
107
|
+
.flatten({ background: { r: 245, g: 245, b: 245 } }) // Light gray background for better contrast
|
|
81
108
|
.png()
|
|
82
109
|
.toBuffer();
|
|
83
110
|
mimeType = 'image/png';
|
|
@@ -132,6 +159,7 @@ follow a more natural order with symmetry in mind.
|
|
|
132
159
|
* @param {Object} options - Detection options
|
|
133
160
|
* @param {string} options.prompt - Custom prompt
|
|
134
161
|
* @param {string} options.customInstructions - Additional system instructions
|
|
162
|
+
* @param {number} options.temperature - Sampling temperature (overrides constructor value)
|
|
135
163
|
* @param {number} options.maxSize - Maximum image dimension
|
|
136
164
|
* @returns {Promise<SegmentationResult>} - Detection results
|
|
137
165
|
*/
|
|
@@ -139,6 +167,7 @@ follow a more natural order with symmetry in mind.
|
|
|
139
167
|
const {
|
|
140
168
|
prompt = 'Detect the 2d bounding boxes',
|
|
141
169
|
customInstructions = null,
|
|
170
|
+
temperature = this.temperature,
|
|
142
171
|
maxSize = 1024
|
|
143
172
|
} = options;
|
|
144
173
|
|
|
@@ -158,7 +187,7 @@ follow a more natural order with symmetry in mind.
|
|
|
158
187
|
result = await model.generateContent({
|
|
159
188
|
contents: [{ role: 'user', parts: [{ text: prompt }, image] }],
|
|
160
189
|
generationConfig: {
|
|
161
|
-
temperature:
|
|
190
|
+
temperature: temperature,
|
|
162
191
|
}
|
|
163
192
|
});
|
|
164
193
|
|
|
@@ -184,12 +213,14 @@ follow a more natural order with symmetry in mind.
|
|
|
184
213
|
* @param {string} imagePath - Path to image file
|
|
185
214
|
* @param {Object} options - Segmentation options
|
|
186
215
|
* @param {string} options.prompt - Custom prompt
|
|
216
|
+
* @param {number} options.temperature - Sampling temperature (overrides constructor value)
|
|
187
217
|
* @param {number} options.maxSize - Maximum image dimension
|
|
188
218
|
* @returns {Promise<SegmentationResult>} - Segmentation results
|
|
189
219
|
*/
|
|
190
220
|
async segment(imagePath, options = {}) {
|
|
191
221
|
const {
|
|
192
222
|
prompt = 'Give the segmentation masks for the objects. Output a JSON list of segmentation masks where each entry contains the 2D bounding box in the key "box_2d", the segmentation mask in key "mask", and the text label in the key "label". Use descriptive labels.',
|
|
223
|
+
temperature = this.temperature,
|
|
193
224
|
maxSize = 1024
|
|
194
225
|
} = options;
|
|
195
226
|
|
|
@@ -213,7 +244,7 @@ follow a more natural order with symmetry in mind.
|
|
|
213
244
|
result = await model.generateContent({
|
|
214
245
|
contents: [{ role: 'user', parts: [{ text: prompt }, image] }],
|
|
215
246
|
generationConfig: {
|
|
216
|
-
temperature:
|
|
247
|
+
temperature: temperature,
|
|
217
248
|
}
|
|
218
249
|
});
|
|
219
250
|
|
package/src/index.d.ts
CHANGED
|
@@ -56,11 +56,13 @@ export interface VSegmentsOptions {
|
|
|
56
56
|
export interface DetectBoxesOptions {
|
|
57
57
|
prompt?: string;
|
|
58
58
|
customInstructions?: string;
|
|
59
|
+
temperature?: number;
|
|
59
60
|
maxSize?: number;
|
|
60
61
|
}
|
|
61
62
|
|
|
62
63
|
export interface SegmentOptions {
|
|
63
64
|
prompt?: string;
|
|
65
|
+
temperature?: number;
|
|
64
66
|
maxSize?: number;
|
|
65
67
|
}
|
|
66
68
|
|