vsegments 0.1.4 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +3 -2
- package/src/core.js +50 -39
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "vsegments",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.5",
|
|
4
4
|
"description": "Visual segmentation and bounding box detection using Google Gemini AI",
|
|
5
5
|
"main": "src/index.js",
|
|
6
6
|
"types": "src/index.d.ts",
|
|
@@ -43,7 +43,8 @@
|
|
|
43
43
|
"@google/generative-ai": "^0.21.0",
|
|
44
44
|
"canvas": "^2.11.2",
|
|
45
45
|
"commander": "^12.0.0",
|
|
46
|
-
"sharp": "^0.33.0"
|
|
46
|
+
"sharp": "^0.33.0",
|
|
47
|
+
"vsegments": "^0.1.4"
|
|
47
48
|
},
|
|
48
49
|
"devDependencies": {
|
|
49
50
|
"@types/jest": "^30.0.0",
|
package/src/core.js
CHANGED
|
@@ -7,11 +7,11 @@ const { loadImage } = require('canvas');
|
|
|
7
7
|
const fs = require('fs').promises;
|
|
8
8
|
const { SegmentationResult } = require('./models');
|
|
9
9
|
const { parseBoundingBoxes, parseSegmentationMasks } = require('./utils');
|
|
10
|
-
const {
|
|
11
|
-
loadImageToCanvas,
|
|
12
|
-
plotBoundingBoxes,
|
|
10
|
+
const {
|
|
11
|
+
loadImageToCanvas,
|
|
12
|
+
plotBoundingBoxes,
|
|
13
13
|
plotSegmentationMasks,
|
|
14
|
-
saveCanvas
|
|
14
|
+
saveCanvas
|
|
15
15
|
} = require('./visualize');
|
|
16
16
|
|
|
17
17
|
class VSegments {
|
|
@@ -25,26 +25,37 @@ class VSegments {
|
|
|
25
25
|
*/
|
|
26
26
|
constructor(options = {}) {
|
|
27
27
|
this.apiKey = options.apiKey || process.env.GOOGLE_API_KEY;
|
|
28
|
-
|
|
28
|
+
|
|
29
29
|
if (!this.apiKey) {
|
|
30
30
|
throw new Error(
|
|
31
31
|
'API key must be provided or set in GOOGLE_API_KEY environment variable'
|
|
32
32
|
);
|
|
33
33
|
}
|
|
34
|
-
|
|
34
|
+
|
|
35
35
|
this.model = options.model || 'gemini-3-pro-preview';
|
|
36
36
|
this.temperature = options.temperature !== undefined ? options.temperature : 0.5;
|
|
37
37
|
this.maxObjects = options.maxObjects || 25;
|
|
38
|
-
|
|
38
|
+
|
|
39
39
|
// Initialize Google AI client
|
|
40
40
|
this.genAI = new GoogleGenerativeAI(this.apiKey);
|
|
41
|
-
|
|
41
|
+
|
|
42
42
|
// Default system instructions
|
|
43
43
|
this.defaultSystemInstructions = `
|
|
44
|
-
Return bounding boxes as a JSON array with labels.
|
|
45
|
-
|
|
44
|
+
Return bounding boxes as a JSON array with labels.
|
|
45
|
+
Never return masks or code fencing. Limit to ${this.maxObjects} objects.
|
|
46
|
+
|
|
47
|
+
Follow this intuition:
|
|
48
|
+
If an object is present multiple times, name them according to their unique characteristic
|
|
49
|
+
(colors, size, position, unique characteristics, etc..).
|
|
50
|
+
If the object is a face number them according to how someone would draw the features of the face. First the left eye, then the right eye
|
|
51
|
+
then the nose, then the left ear, then the right ear, then the mouth, then the chin.
|
|
52
|
+
Same with Animal faces.
|
|
53
|
+
|
|
54
|
+
General guideline:
|
|
55
|
+
Follow a drawing order intuitively. People usually do not draw first an eye, and then the background and then the shirt but they
|
|
56
|
+
follow a more natural order with symmetry in mind.
|
|
46
57
|
`.trim();
|
|
47
|
-
|
|
58
|
+
|
|
48
59
|
// Safety settings
|
|
49
60
|
this.safetySettings = [
|
|
50
61
|
{
|
|
@@ -53,7 +64,7 @@ If an object is present multiple times, name them according to their unique char
|
|
|
53
64
|
},
|
|
54
65
|
];
|
|
55
66
|
}
|
|
56
|
-
|
|
67
|
+
|
|
57
68
|
/**
|
|
58
69
|
* Load image from file and convert to format for API
|
|
59
70
|
* @param {string} imagePath - Path to image file
|
|
@@ -62,7 +73,7 @@ If an object is present multiple times, name them according to their unique char
|
|
|
62
73
|
async _loadImage(imagePath) {
|
|
63
74
|
let imageBuffer = await fs.readFile(imagePath);
|
|
64
75
|
let mimeType = this._getMimeType(imagePath);
|
|
65
|
-
|
|
76
|
+
|
|
66
77
|
// Convert SVG to PNG for API compatibility
|
|
67
78
|
if (mimeType === 'image/svg+xml') {
|
|
68
79
|
const sharp = require('sharp');
|
|
@@ -71,9 +82,9 @@ If an object is present multiple times, name them according to their unique char
|
|
|
71
82
|
.toBuffer();
|
|
72
83
|
mimeType = 'image/png';
|
|
73
84
|
}
|
|
74
|
-
|
|
85
|
+
|
|
75
86
|
const base64Data = imageBuffer.toString('base64');
|
|
76
|
-
|
|
87
|
+
|
|
77
88
|
return {
|
|
78
89
|
inlineData: {
|
|
79
90
|
data: base64Data,
|
|
@@ -81,7 +92,7 @@ If an object is present multiple times, name them according to their unique char
|
|
|
81
92
|
}
|
|
82
93
|
};
|
|
83
94
|
}
|
|
84
|
-
|
|
95
|
+
|
|
85
96
|
/**
|
|
86
97
|
* Get MIME type from file extension
|
|
87
98
|
* @param {string} filePath - File path
|
|
@@ -99,7 +110,7 @@ If an object is present multiple times, name them according to their unique char
|
|
|
99
110
|
};
|
|
100
111
|
return mimeTypes[ext] || 'image/jpeg';
|
|
101
112
|
}
|
|
102
|
-
|
|
113
|
+
|
|
103
114
|
/**
|
|
104
115
|
* Get system instructions with custom additions
|
|
105
116
|
* @param {string} customInstructions - Additional instructions
|
|
@@ -107,14 +118,14 @@ If an object is present multiple times, name them according to their unique char
|
|
|
107
118
|
*/
|
|
108
119
|
_getSystemInstructions(customInstructions) {
|
|
109
120
|
let instructions = this.defaultSystemInstructions;
|
|
110
|
-
|
|
121
|
+
|
|
111
122
|
if (customInstructions) {
|
|
112
123
|
instructions += '\n' + customInstructions;
|
|
113
124
|
}
|
|
114
|
-
|
|
125
|
+
|
|
115
126
|
return instructions;
|
|
116
127
|
}
|
|
117
|
-
|
|
128
|
+
|
|
118
129
|
/**
|
|
119
130
|
* Detect bounding boxes in an image
|
|
120
131
|
* @param {string} imagePath - Path to image file
|
|
@@ -130,17 +141,17 @@ If an object is present multiple times, name them according to their unique char
|
|
|
130
141
|
customInstructions = null,
|
|
131
142
|
maxSize = 1024
|
|
132
143
|
} = options;
|
|
133
|
-
|
|
144
|
+
|
|
134
145
|
// Load image
|
|
135
146
|
const image = await this._loadImage(imagePath);
|
|
136
|
-
|
|
147
|
+
|
|
137
148
|
// Get model
|
|
138
149
|
const model = this.genAI.getGenerativeModel({
|
|
139
150
|
model: this.model,
|
|
140
151
|
safetySettings: this.safetySettings,
|
|
141
152
|
systemInstruction: this._getSystemInstructions(customInstructions)
|
|
142
153
|
});
|
|
143
|
-
|
|
154
|
+
|
|
144
155
|
// Generate content
|
|
145
156
|
let result, response, text;
|
|
146
157
|
try {
|
|
@@ -150,7 +161,7 @@ If an object is present multiple times, name them according to their unique char
|
|
|
150
161
|
temperature: this.temperature,
|
|
151
162
|
}
|
|
152
163
|
});
|
|
153
|
-
|
|
164
|
+
|
|
154
165
|
response = result.response;
|
|
155
166
|
text = response.text();
|
|
156
167
|
} catch (error) {
|
|
@@ -161,13 +172,13 @@ If an object is present multiple times, name them according to their unique char
|
|
|
161
172
|
}
|
|
162
173
|
throw error;
|
|
163
174
|
}
|
|
164
|
-
|
|
175
|
+
|
|
165
176
|
// Parse response
|
|
166
177
|
const boxes = parseBoundingBoxes(text);
|
|
167
|
-
|
|
178
|
+
|
|
168
179
|
return new SegmentationResult(boxes, null, text);
|
|
169
180
|
}
|
|
170
|
-
|
|
181
|
+
|
|
171
182
|
/**
|
|
172
183
|
* Perform segmentation on an image
|
|
173
184
|
* @param {string} imagePath - Path to image file
|
|
@@ -181,21 +192,21 @@ If an object is present multiple times, name them according to their unique char
|
|
|
181
192
|
prompt = 'Give the segmentation masks for the objects. Output a JSON list of segmentation masks where each entry contains the 2D bounding box in the key "box_2d", the segmentation mask in key "mask", and the text label in the key "label". Use descriptive labels.',
|
|
182
193
|
maxSize = 1024
|
|
183
194
|
} = options;
|
|
184
|
-
|
|
195
|
+
|
|
185
196
|
// Load image
|
|
186
197
|
const image = await this._loadImage(imagePath);
|
|
187
|
-
|
|
198
|
+
|
|
188
199
|
// Get image dimensions
|
|
189
200
|
const img = await loadImage(imagePath);
|
|
190
201
|
const imgWidth = img.width;
|
|
191
202
|
const imgHeight = img.height;
|
|
192
|
-
|
|
203
|
+
|
|
193
204
|
// Get model (no system instructions for segmentation)
|
|
194
205
|
const model = this.genAI.getGenerativeModel({
|
|
195
206
|
model: this.model,
|
|
196
207
|
safetySettings: this.safetySettings
|
|
197
208
|
});
|
|
198
|
-
|
|
209
|
+
|
|
199
210
|
// Generate content
|
|
200
211
|
let result, response, text;
|
|
201
212
|
try {
|
|
@@ -205,7 +216,7 @@ If an object is present multiple times, name them according to their unique char
|
|
|
205
216
|
temperature: this.temperature,
|
|
206
217
|
}
|
|
207
218
|
});
|
|
208
|
-
|
|
219
|
+
|
|
209
220
|
response = result.response;
|
|
210
221
|
text = response.text();
|
|
211
222
|
} catch (error) {
|
|
@@ -216,14 +227,14 @@ If an object is present multiple times, name them according to their unique char
|
|
|
216
227
|
}
|
|
217
228
|
throw error;
|
|
218
229
|
}
|
|
219
|
-
|
|
230
|
+
|
|
220
231
|
// Parse response
|
|
221
232
|
const boxes = parseBoundingBoxes(text);
|
|
222
233
|
const masks = await parseSegmentationMasks(text, imgHeight, imgWidth);
|
|
223
|
-
|
|
234
|
+
|
|
224
235
|
return new SegmentationResult(boxes, masks, text);
|
|
225
236
|
}
|
|
226
|
-
|
|
237
|
+
|
|
227
238
|
/**
|
|
228
239
|
* Visualize detection/segmentation results
|
|
229
240
|
* @param {string} imagePath - Path to original image
|
|
@@ -242,10 +253,10 @@ If an object is present multiple times, name them according to their unique char
|
|
|
242
253
|
fontSize = 14,
|
|
243
254
|
alpha = 0.7
|
|
244
255
|
} = options;
|
|
245
|
-
|
|
256
|
+
|
|
246
257
|
// Load image to canvas
|
|
247
258
|
const canvas = await loadImageToCanvas(imagePath, 2048);
|
|
248
|
-
|
|
259
|
+
|
|
249
260
|
// Draw visualizations
|
|
250
261
|
if (result.masks) {
|
|
251
262
|
plotSegmentationMasks(canvas, result.masks, {
|
|
@@ -259,12 +270,12 @@ If an object is present multiple times, name them according to their unique char
|
|
|
259
270
|
fontSize
|
|
260
271
|
});
|
|
261
272
|
}
|
|
262
|
-
|
|
273
|
+
|
|
263
274
|
// Save if requested
|
|
264
275
|
if (outputPath) {
|
|
265
276
|
await saveCanvas(canvas, outputPath);
|
|
266
277
|
}
|
|
267
|
-
|
|
278
|
+
|
|
268
279
|
return canvas;
|
|
269
280
|
}
|
|
270
281
|
}
|