@coreviz/sdk 1.0.9 → 1.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/coreviz.d.ts CHANGED
@@ -14,6 +14,7 @@ export interface TagOptions {
14
14
  prompt: string;
15
15
  options?: string[];
16
16
  multiple?: boolean;
17
+ mode?: 'api' | 'local';
17
18
  }
18
19
  export interface TagResponse {
19
20
  tags: string[];
@@ -35,7 +36,9 @@ export declare class CoreViz {
35
36
  describe(image: string, options?: DescribeOptions): Promise<string>;
36
37
  edit(image: string, options: EditOptions): Promise<string>;
37
38
  tag(image: string, options: TagOptions): Promise<TagResponse>;
39
+ private tagLocal;
38
40
  embed(input: string, options?: EmbedOptions): Promise<EmbedResponse>;
39
41
  private embedLocal;
40
42
  resize(input: string | File, maxWidth?: number, maxHeight?: number): Promise<string>;
43
+ similarity(vecA: number[], vecB: number[]): number;
41
44
  }
package/dist/coreviz.js CHANGED
@@ -67,7 +67,7 @@ class CoreViz {
67
67
  }
68
68
  async describe(image, options) {
69
69
  try {
70
- const resizedImage = await (0, resize_1.resize)(image);
70
+ const resizedImage = await (0, resize_1.resize)(image, 512, 512);
71
71
  const headers = this.getHeaders();
72
72
  const response = await fetch(`https://lab.coreviz.io/api/ai/describe`, {
73
73
  method: 'POST',
@@ -83,7 +83,7 @@ class CoreViz {
83
83
  }
84
84
  async edit(image, options) {
85
85
  try {
86
- const resizedImage = await (0, resize_1.resize)(image);
86
+ const resizedImage = await (0, resize_1.resize)(image, 1024, 1024);
87
87
  const headers = this.getHeaders();
88
88
  const response = await fetch(`https://lab.coreviz.io/api/ai/edit`, {
89
89
  method: 'POST',
@@ -104,8 +104,12 @@ class CoreViz {
104
104
  }
105
105
  }
106
106
  async tag(image, options) {
107
+ const mode = options?.mode || 'api';
108
+ if (mode === 'local') {
109
+ return this.tagLocal(image, options);
110
+ }
107
111
  try {
108
- const resizedImage = await (0, resize_1.resize)(image);
112
+ const resizedImage = await (0, resize_1.resize)(image, 512, 512);
109
113
  const headers = this.getHeaders();
110
114
  const response = await fetch("https://lab.coreviz.io/api/ai/tag", {
111
115
  method: 'POST',
@@ -134,6 +138,108 @@ class CoreViz {
134
138
  throw err instanceof Error ? err : new Error("An unexpected error occurred.");
135
139
  }
136
140
  }
141
+ async tagLocal(imageInput, options) {
142
+ try {
143
+ // Dynamic import to avoid loading transformers if not used
144
+ const { AutoProcessor, AutoModelForImageTextToText, RawImage, env } = await Promise.resolve().then(() => __importStar(require('@huggingface/transformers')));
145
+ // Configure transformers.js for browser usage
146
+ env.allowRemoteModels = true;
147
+ const processor = await AutoProcessor.from_pretrained('onnx-community/FastVLM-0.5B-ONNX');
148
+ const model = await AutoModelForImageTextToText.from_pretrained('onnx-community/FastVLM-0.5B-ONNX', {
149
+ dtype: {
150
+ embed_tokens: "fp16",
151
+ vision_encoder: "q4",
152
+ decoder_model_merged: "q4",
153
+ },
154
+ });
155
+ let rawImg;
156
+ if (imageInput.startsWith('http')) {
157
+ rawImg = await RawImage.fromURL(imageInput);
158
+ }
159
+ else if (imageInput.startsWith('data:image')) {
160
+ const base64Data = imageInput.split(',')[1];
161
+ const binary = atob(base64Data);
162
+ const array = new Uint8Array(binary.length);
163
+ for (let i = 0; i < binary.length; i++) {
164
+ array[i] = binary.charCodeAt(i);
165
+ }
166
+ rawImg = await RawImage.fromBlob(new Blob([array]));
167
+ }
168
+ else {
169
+ rawImg = await RawImage.read(imageInput);
170
+ }
171
+ let systemPrompt = `You are a precise image tagging AI.
172
+ Rules:
173
+ 1. Return ONLY a comma-separated list of tags.
174
+ 2. DO NOT provide any conversational text, introductions, or explanations.
175
+ 3. DO NOT use full sentences.
176
+ 4. If options are provided, select strictly from them.
177
+
178
+ Example 1:
179
+ What animals are in the image?
180
+ Example Output:
181
+ cat
182
+
183
+ Example 2:
184
+ What color cars are visible in the image?
185
+ Output:
186
+ red, blue, green
187
+
188
+ Example 3:
189
+ What is the jersey number of the player?
190
+ Output:
191
+ 10
192
+ `;
193
+ let userPrompt = `${options.prompt}`;
194
+ if (options.options && options.options.length > 0) {
195
+ userPrompt += `\nSelect from these options: ${options.options.join(', ')}.`;
196
+ }
197
+ if (!options.multiple) {
198
+ userPrompt += `\nReturn a single tag.`;
199
+ }
200
+ const messages = [
201
+ {
202
+ role: 'system',
203
+ content: systemPrompt,
204
+ },
205
+ { role: 'user', content: `<image>\n${userPrompt}` },
206
+ ];
207
+ let promptText = processor.apply_chat_template(messages, {
208
+ add_generation_prompt: true,
209
+ });
210
+ if (typeof promptText === 'string') {
211
+ promptText += options.multiple ? "Tags: " : "Tag: ";
212
+ }
213
+ const inputs = await processor(rawImg, promptText, {
214
+ add_special_tokens: false,
215
+ });
216
+ const outputs = await model.generate({
217
+ ...inputs,
218
+ max_new_tokens: 120,
219
+ do_sample: false,
220
+ repetition_penalty: 1.2,
221
+ });
222
+ const decoded = processor.batch_decode(outputs.slice(null, [inputs.input_ids.dims.at(-1), null]), { skip_special_tokens: true });
223
+ let resultText = decoded[0].trim();
224
+ // Cleanup potential repetition of priming token
225
+ resultText = resultText.replace(/^(Tags?:\s*)/i, '');
226
+ let tags = [];
227
+ if (options.multiple) {
228
+ tags = resultText.split(',').map(s => s.trim()).filter(s => s.length > 0);
229
+ }
230
+ else {
231
+ tags = [resultText];
232
+ }
233
+ return {
234
+ tags,
235
+ raw: { result: resultText }
236
+ };
237
+ }
238
+ catch (err) {
239
+ console.error(err);
240
+ throw err instanceof Error ? err : new Error("Local tagging failed: " + String(err));
241
+ }
242
+ }
137
243
  async embed(input, options) {
138
244
  const mode = options?.mode || 'api';
139
245
  if (mode === 'local') {
@@ -153,7 +259,7 @@ class CoreViz {
153
259
  isImage = input.startsWith('data:image') || input.startsWith('http://') || input.startsWith('https://');
154
260
  }
155
261
  if (isImage) {
156
- const resizedImage = await (0, resize_1.resize)(input);
262
+ const resizedImage = await (0, resize_1.resize)(input, 512, 512);
157
263
  body.image = resizedImage;
158
264
  }
159
265
  else {
@@ -239,5 +345,20 @@ class CoreViz {
239
345
  async resize(input, maxWidth, maxHeight) {
240
346
  return (0, resize_1.resize)(input, maxWidth, maxHeight);
241
347
  }
348
+ similarity(vecA, vecB) {
349
+ if (vecA.length !== vecB.length)
350
+ return 0;
351
+ let dotProduct = 0;
352
+ let normA = 0;
353
+ let normB = 0;
354
+ for (let i = 0; i < vecA.length; i++) {
355
+ dotProduct += vecA[i] * vecB[i];
356
+ normA += vecA[i] * vecA[i];
357
+ normB += vecB[i] * vecB[i];
358
+ }
359
+ if (normA === 0 || normB === 0)
360
+ return 0;
361
+ return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
362
+ }
242
363
  }
243
364
  exports.CoreViz = CoreViz;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@coreviz/sdk",
3
- "version": "1.0.9",
3
+ "version": "1.0.11",
4
4
  "description": "CoreViz SDK",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",