vsegments 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,11 +1,11 @@
1
1
  {
2
2
  "name": "vsegments",
3
- "version": "0.1.2",
3
+ "version": "0.1.4",
4
4
  "description": "Visual segmentation and bounding box detection using Google Gemini AI",
5
5
  "main": "src/index.js",
6
6
  "types": "src/index.d.ts",
7
7
  "bin": {
8
- "vsegments": "./bin/cli.js"
8
+ "vsegments": "bin/cli.js"
9
9
  },
10
10
  "scripts": {
11
11
  "test": "jest",
@@ -30,7 +30,7 @@
30
30
  "license": "MIT",
31
31
  "repository": {
32
32
  "type": "git",
33
- "url": "git@github.com:nxtphaseai/node_vsegments.git"
33
+ "url": "git+ssh://git@github.com/nxtphaseai/node_vsegments.git"
34
34
  },
35
35
  "homepage": "https://github.com/nxtphaseai/node_vsegments#readme",
36
36
  "bugs": {
@@ -42,7 +42,8 @@
42
42
  "dependencies": {
43
43
  "@google/generative-ai": "^0.21.0",
44
44
  "canvas": "^2.11.2",
45
- "commander": "^12.0.0"
45
+ "commander": "^12.0.0",
46
+ "sharp": "^0.33.0"
46
47
  },
47
48
  "devDependencies": {
48
49
  "@types/jest": "^30.0.0",
package/src/core.js CHANGED
@@ -32,7 +32,7 @@ class VSegments {
32
32
  );
33
33
  }
34
34
 
35
- this.model = options.model || 'gemini-flash-latest';
35
+ this.model = options.model || 'gemini-3-pro-preview';
36
36
  this.temperature = options.temperature !== undefined ? options.temperature : 0.5;
37
37
  this.maxObjects = options.maxObjects || 25;
38
38
 
@@ -60,9 +60,19 @@ If an object is present multiple times, name them according to their unique char
60
60
  * @returns {Promise<Object>} - Image data for API
61
61
  */
62
62
  async _loadImage(imagePath) {
63
- const imageBuffer = await fs.readFile(imagePath);
63
+ let imageBuffer = await fs.readFile(imagePath);
64
+ let mimeType = this._getMimeType(imagePath);
65
+
66
+ // Convert SVG to PNG for API compatibility
67
+ if (mimeType === 'image/svg+xml') {
68
+ const sharp = require('sharp');
69
+ imageBuffer = await sharp(imageBuffer)
70
+ .png()
71
+ .toBuffer();
72
+ mimeType = 'image/png';
73
+ }
74
+
64
75
  const base64Data = imageBuffer.toString('base64');
65
- const mimeType = this._getMimeType(imagePath);
66
76
 
67
77
  return {
68
78
  inlineData: {
@@ -84,7 +94,8 @@ If an object is present multiple times, name them according to their unique char
84
94
  'jpeg': 'image/jpeg',
85
95
  'png': 'image/png',
86
96
  'gif': 'image/gif',
87
- 'webp': 'image/webp'
97
+ 'webp': 'image/webp',
98
+ 'svg': 'image/svg+xml'
88
99
  };
89
100
  return mimeTypes[ext] || 'image/jpeg';
90
101
  }
@@ -131,15 +142,25 @@ If an object is present multiple times, name them according to their unique char
131
142
  });
132
143
 
133
144
  // Generate content
134
- const result = await model.generateContent({
135
- contents: [{ role: 'user', parts: [{ text: prompt }, image] }],
136
- generationConfig: {
137
- temperature: this.temperature,
145
+ let result, response, text;
146
+ try {
147
+ result = await model.generateContent({
148
+ contents: [{ role: 'user', parts: [{ text: prompt }, image] }],
149
+ generationConfig: {
150
+ temperature: this.temperature,
151
+ }
152
+ });
153
+
154
+ response = result.response;
155
+ text = response.text();
156
+ } catch (error) {
157
+ if (error.status === 500) {
158
+ throw new Error(
159
+ `Google Gemini API error (500): This may be a temporary issue. Try again later or verify your API key and image. Original error: ${error.message}`
160
+ );
138
161
  }
139
- });
140
-
141
- const response = result.response;
142
- const text = response.text();
162
+ throw error;
163
+ }
143
164
 
144
165
  // Parse response
145
166
  const boxes = parseBoundingBoxes(text);
@@ -176,15 +197,25 @@ If an object is present multiple times, name them according to their unique char
176
197
  });
177
198
 
178
199
  // Generate content
179
- const result = await model.generateContent({
180
- contents: [{ role: 'user', parts: [{ text: prompt }, image] }],
181
- generationConfig: {
182
- temperature: this.temperature,
200
+ let result, response, text;
201
+ try {
202
+ result = await model.generateContent({
203
+ contents: [{ role: 'user', parts: [{ text: prompt }, image] }],
204
+ generationConfig: {
205
+ temperature: this.temperature,
206
+ }
207
+ });
208
+
209
+ response = result.response;
210
+ text = response.text();
211
+ } catch (error) {
212
+ if (error.status === 500) {
213
+ throw new Error(
214
+ `Google Gemini API error (500): This may be a temporary issue. Try again later or verify your API key and image. Original error: ${error.message}`
215
+ );
183
216
  }
184
- });
185
-
186
- const response = result.response;
187
- const text = response.text();
217
+ throw error;
218
+ }
188
219
 
189
220
  // Parse response
190
221
  const boxes = parseBoundingBoxes(text);