vsegments 0.1.2 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/QUICKSTART.md +8 -3
- package/README.md +33 -1
- package/TROUBLESHOOTING.md +256 -0
- package/coverage/clover.xml +55 -43
- package/coverage/coverage-final.json +1 -1
- package/coverage/lcov-report/core.js.html +132 -39
- package/coverage/lcov-report/index.html +15 -15
- package/coverage/lcov-report/index.js.html +1 -1
- package/coverage/lcov-report/models.js.html +1 -1
- package/coverage/lcov-report/utils.js.html +1 -1
- package/coverage/lcov-report/visualize.js.html +1 -1
- package/coverage/lcov.info +91 -73
- package/package.json +5 -4
- package/src/core.js +51 -20
package/package.json
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "vsegments",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.4",
|
|
4
4
|
"description": "Visual segmentation and bounding box detection using Google Gemini AI",
|
|
5
5
|
"main": "src/index.js",
|
|
6
6
|
"types": "src/index.d.ts",
|
|
7
7
|
"bin": {
|
|
8
|
-
"vsegments": "
|
|
8
|
+
"vsegments": "bin/cli.js"
|
|
9
9
|
},
|
|
10
10
|
"scripts": {
|
|
11
11
|
"test": "jest",
|
|
@@ -30,7 +30,7 @@
|
|
|
30
30
|
"license": "MIT",
|
|
31
31
|
"repository": {
|
|
32
32
|
"type": "git",
|
|
33
|
-
"url": "git@github.com
|
|
33
|
+
"url": "git+ssh://git@github.com/nxtphaseai/node_vsegments.git"
|
|
34
34
|
},
|
|
35
35
|
"homepage": "https://github.com/nxtphaseai/node_vsegments#readme",
|
|
36
36
|
"bugs": {
|
|
@@ -42,7 +42,8 @@
|
|
|
42
42
|
"dependencies": {
|
|
43
43
|
"@google/generative-ai": "^0.21.0",
|
|
44
44
|
"canvas": "^2.11.2",
|
|
45
|
-
"commander": "^12.0.0"
|
|
45
|
+
"commander": "^12.0.0",
|
|
46
|
+
"sharp": "^0.33.0"
|
|
46
47
|
},
|
|
47
48
|
"devDependencies": {
|
|
48
49
|
"@types/jest": "^30.0.0",
|
package/src/core.js
CHANGED
|
@@ -32,7 +32,7 @@ class VSegments {
|
|
|
32
32
|
);
|
|
33
33
|
}
|
|
34
34
|
|
|
35
|
-
this.model = options.model || 'gemini-
|
|
35
|
+
this.model = options.model || 'gemini-3-pro-preview';
|
|
36
36
|
this.temperature = options.temperature !== undefined ? options.temperature : 0.5;
|
|
37
37
|
this.maxObjects = options.maxObjects || 25;
|
|
38
38
|
|
|
@@ -60,9 +60,19 @@ If an object is present multiple times, name them according to their unique char
|
|
|
60
60
|
* @returns {Promise<Object>} - Image data for API
|
|
61
61
|
*/
|
|
62
62
|
async _loadImage(imagePath) {
|
|
63
|
-
|
|
63
|
+
let imageBuffer = await fs.readFile(imagePath);
|
|
64
|
+
let mimeType = this._getMimeType(imagePath);
|
|
65
|
+
|
|
66
|
+
// Convert SVG to PNG for API compatibility
|
|
67
|
+
if (mimeType === 'image/svg+xml') {
|
|
68
|
+
const sharp = require('sharp');
|
|
69
|
+
imageBuffer = await sharp(imageBuffer)
|
|
70
|
+
.png()
|
|
71
|
+
.toBuffer();
|
|
72
|
+
mimeType = 'image/png';
|
|
73
|
+
}
|
|
74
|
+
|
|
64
75
|
const base64Data = imageBuffer.toString('base64');
|
|
65
|
-
const mimeType = this._getMimeType(imagePath);
|
|
66
76
|
|
|
67
77
|
return {
|
|
68
78
|
inlineData: {
|
|
@@ -84,7 +94,8 @@ If an object is present multiple times, name them according to their unique char
|
|
|
84
94
|
'jpeg': 'image/jpeg',
|
|
85
95
|
'png': 'image/png',
|
|
86
96
|
'gif': 'image/gif',
|
|
87
|
-
'webp': 'image/webp'
|
|
97
|
+
'webp': 'image/webp',
|
|
98
|
+
'svg': 'image/svg+xml'
|
|
88
99
|
};
|
|
89
100
|
return mimeTypes[ext] || 'image/jpeg';
|
|
90
101
|
}
|
|
@@ -131,15 +142,25 @@ If an object is present multiple times, name them according to their unique char
|
|
|
131
142
|
});
|
|
132
143
|
|
|
133
144
|
// Generate content
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
145
|
+
let result, response, text;
|
|
146
|
+
try {
|
|
147
|
+
result = await model.generateContent({
|
|
148
|
+
contents: [{ role: 'user', parts: [{ text: prompt }, image] }],
|
|
149
|
+
generationConfig: {
|
|
150
|
+
temperature: this.temperature,
|
|
151
|
+
}
|
|
152
|
+
});
|
|
153
|
+
|
|
154
|
+
response = result.response;
|
|
155
|
+
text = response.text();
|
|
156
|
+
} catch (error) {
|
|
157
|
+
if (error.status === 500) {
|
|
158
|
+
throw new Error(
|
|
159
|
+
`Google Gemini API error (500): This may be a temporary issue. Try again later or verify your API key and image. Original error: ${error.message}`
|
|
160
|
+
);
|
|
138
161
|
}
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
const response = result.response;
|
|
142
|
-
const text = response.text();
|
|
162
|
+
throw error;
|
|
163
|
+
}
|
|
143
164
|
|
|
144
165
|
// Parse response
|
|
145
166
|
const boxes = parseBoundingBoxes(text);
|
|
@@ -176,15 +197,25 @@ If an object is present multiple times, name them according to their unique char
|
|
|
176
197
|
});
|
|
177
198
|
|
|
178
199
|
// Generate content
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
200
|
+
let result, response, text;
|
|
201
|
+
try {
|
|
202
|
+
result = await model.generateContent({
|
|
203
|
+
contents: [{ role: 'user', parts: [{ text: prompt }, image] }],
|
|
204
|
+
generationConfig: {
|
|
205
|
+
temperature: this.temperature,
|
|
206
|
+
}
|
|
207
|
+
});
|
|
208
|
+
|
|
209
|
+
response = result.response;
|
|
210
|
+
text = response.text();
|
|
211
|
+
} catch (error) {
|
|
212
|
+
if (error.status === 500) {
|
|
213
|
+
throw new Error(
|
|
214
|
+
`Google Gemini API error (500): This may be a temporary issue. Try again later or verify your API key and image. Original error: ${error.message}`
|
|
215
|
+
);
|
|
183
216
|
}
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
const response = result.response;
|
|
187
|
-
const text = response.text();
|
|
217
|
+
throw error;
|
|
218
|
+
}
|
|
188
219
|
|
|
189
220
|
// Parse response
|
|
190
221
|
const boxes = parseBoundingBoxes(text);
|