@coreviz/sdk 1.0.4 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/coreviz.js +64 -9
  2. package/package.json +3 -2
package/dist/coreviz.js CHANGED
@@ -174,19 +174,74 @@ class CoreViz {
174
174
  async embedLocal(input, options) {
175
175
  try {
176
176
  // Dynamic import to avoid loading transformers if not used
177
- const { pipeline } = await Promise.resolve().then(() => __importStar(require('@huggingface/transformers')));
178
- // Initialize the pipeline with the specified model
179
- // This will automatically download and cache the model if not present
180
- const extractor = await pipeline('feature-extraction', 'Xenova/clip-vit-large-patch14');
181
- // Generate embeddings
182
- // transformers.js handles text strings, image URLs, and image paths automatically
183
- const output = await extractor(input, { pooling: 'mean', normalize: true });
177
+ const { AutoTokenizer, AutoProcessor, CLIPTextModelWithProjection, CLIPVisionModelWithProjection, RawImage, env } = await Promise.resolve().then(() => __importStar(require('@huggingface/transformers')));
178
+ // Force browser backend to use webgpu if available
179
+ // @ts-ignore
180
+ if (typeof navigator !== 'undefined' && navigator.gpu && env.backends?.onnx?.wasm) {
181
+ // @ts-ignore
182
+ env.backends.onnx.wasm.proxy = false;
183
+ }
184
+ const MODEL_ID = 'Xenova/clip-vit-large-patch14';
185
+ const device = 'webgpu';
186
+ console.log(`Loading local model ${MODEL_ID}...`);
187
+ const start = Date.now();
188
+ // Load tokenizer and processor
189
+ const tokenizer = await AutoTokenizer.from_pretrained(MODEL_ID);
190
+ const processor = await AutoProcessor.from_pretrained(MODEL_ID);
191
+ // Load models with device preference
192
+ const text_model = await CLIPTextModelWithProjection.from_pretrained(MODEL_ID, {
193
+ device: device,
194
+ });
195
+ const vision_model = await CLIPVisionModelWithProjection.from_pretrained(MODEL_ID, {
196
+ device: device,
197
+ });
198
+ console.log(`Model loaded in ${Date.now() - start}ms`);
199
+ // Check if input is likely an image
200
+ const isImage = options?.type === 'image' ||
201
+ input.startsWith('data:image') ||
202
+ input.startsWith('http://') ||
203
+ input.startsWith('https://') ||
204
+ /\.(jpg|jpeg|png|webp|gif|bmp|tiff|tif)$/i.test(input);
205
+ let normalized_embeds;
206
+ if (isImage) {
207
+ let image;
208
+ if (input.startsWith('http')) {
209
+ image = await RawImage.fromURL(input);
210
+ }
211
+ else if (input.startsWith('data:image')) {
212
+ // Extract base64 data
213
+ const base64Data = input.split(',')[1];
214
+ const binary = atob(base64Data);
215
+ const array = new Uint8Array(binary.length);
216
+ for (let i = 0; i < binary.length; i++) {
217
+ array[i] = binary.charCodeAt(i);
218
+ }
219
+ image = await RawImage.fromBlob(new Blob([array]));
220
+ }
221
+ else {
222
+ // Assume local path
223
+ image = await RawImage.read(input);
224
+ }
225
+ const image_inputs = await processor(image);
226
+ const { image_embeds } = await vision_model(image_inputs);
227
+ normalized_embeds = image_embeds.normalize(2, -1);
228
+ }
229
+ else {
230
+ const text_inputs = tokenizer(input, {
231
+ padding: true,
232
+ truncation: true,
233
+ return_tensors: 'pt',
234
+ });
235
+ const { text_embeds } = await text_model(text_inputs);
236
+ normalized_embeds = text_embeds.normalize(2, -1);
237
+ }
184
238
  // Convert Float32Array to regular array
185
- // @ts-ignore - Output type inference might fail with dynamic import
186
- const embedding = Array.from(output.data);
239
+ // @ts-ignore
240
+ const embedding = Array.from(normalized_embeds.data);
187
241
  return { embedding };
188
242
  }
189
243
  catch (err) {
244
+ console.error(err);
190
245
  throw err instanceof Error ? err : new Error("Local embedding failed: " + String(err));
191
246
  }
192
247
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@coreviz/sdk",
3
- "version": "1.0.4",
3
+ "version": "1.0.6",
4
4
  "description": "CoreViz SDK",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
@@ -32,6 +32,7 @@
32
32
  "typescript": "^5.9.3"
33
33
  },
34
34
  "dependencies": {
35
- "sharp": "^0.34.5"
35
+ "sharp": "^0.34.5",
36
+ "@huggingface/transformers": "^3.8.0"
36
37
  }
37
38
  }