npm - @coreviz/sdk - Versions diffs - 1.0.4 → 1.0.6 - Mend

@coreviz/sdk 1.0.4 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/dist/coreviz.js +64 -9
package/package.json +3 -2

package/dist/coreviz.js CHANGED Viewed

@@ -174,19 +174,74 @@ class CoreViz {
     async embedLocal(input, options) {
         try {
             // Dynamic import to avoid loading transformers if not used
-            const { pipeline } = await Promise.resolve().then(() => __importStar(require('@huggingface/transformers')));
-            // Initialize the pipeline with the specified model
-            // This will automatically download and cache the model if not present
-            const extractor = await pipeline('feature-extraction', 'Xenova/clip-vit-large-patch14');
-            // Generate embeddings
-            // transformers.js handles text strings, image URLs, and image paths automatically
-            const output = await extractor(input, { pooling: 'mean', normalize: true });
+            const { AutoTokenizer, AutoProcessor, CLIPTextModelWithProjection, CLIPVisionModelWithProjection, RawImage, env } = await Promise.resolve().then(() => __importStar(require('@huggingface/transformers')));
+            // Force browser backend to use webgpu if available
+            // @ts-ignore
+            if (typeof navigator !== 'undefined' && navigator.gpu && env.backends?.onnx?.wasm) {
+                // @ts-ignore
+                env.backends.onnx.wasm.proxy = false;
+            }
+            const MODEL_ID = 'Xenova/clip-vit-large-patch14';
+            const device = 'webgpu';
+            console.log(`Loading local model ${MODEL_ID}...`);
+            const start = Date.now();
+            // Load tokenizer and processor
+            const tokenizer = await AutoTokenizer.from_pretrained(MODEL_ID);
+            const processor = await AutoProcessor.from_pretrained(MODEL_ID);
+            // Load models with device preference
+            const text_model = await CLIPTextModelWithProjection.from_pretrained(MODEL_ID, {
+                device: device,
+            });
+            const vision_model = await CLIPVisionModelWithProjection.from_pretrained(MODEL_ID, {
+                device: device,
+            });
+            console.log(`Model loaded in ${Date.now() - start}ms`);
+            // Check if input is likely an image
+            const isImage = options?.type === 'image' ||
+                input.startsWith('data:image') ||
+                input.startsWith('http://') ||
+                input.startsWith('https://') ||
+                /\.(jpg|jpeg|png|webp|gif|bmp|tiff|tif)$/i.test(input);
+            let normalized_embeds;
+            if (isImage) {
+                let image;
+                if (input.startsWith('http')) {
+                    image = await RawImage.fromURL(input);
+                }
+                else if (input.startsWith('data:image')) {
+                    // Extract base64 data
+                    const base64Data = input.split(',')[1];
+                    const binary = atob(base64Data);
+                    const array = new Uint8Array(binary.length);
+                    for (let i = 0; i < binary.length; i++) {
+                        array[i] = binary.charCodeAt(i);
+                    }
+                    image = await RawImage.fromBlob(new Blob([array]));
+                }
+                else {
+                    // Assume local path
+                    image = await RawImage.read(input);
+                }
+                const image_inputs = await processor(image);
+                const { image_embeds } = await vision_model(image_inputs);
+                normalized_embeds = image_embeds.normalize(2, -1);
+            }
+            else {
+                const text_inputs = tokenizer(input, {
+                    padding: true,
+                    truncation: true,
+                    return_tensors: 'pt',
+                });
+                const { text_embeds } = await text_model(text_inputs);
+                normalized_embeds = text_embeds.normalize(2, -1);
+            }
             // Convert Float32Array to regular array
-            // @ts-ignore - Output type inference might fail with dynamic import
-            const embedding = Array.from(output.data);
+            // @ts-ignore
+            const embedding = Array.from(normalized_embeds.data);
             return { embedding };
         }
         catch (err) {
+            console.error(err);
             throw err instanceof Error ? err : new Error("Local embedding failed: " + String(err));
         }
     }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@coreviz/sdk",
-  "version": "1.0.4",
+  "version": "1.0.6",
   "description": "CoreViz SDK",
   "main": "dist/index.js",
   "types": "dist/index.d.ts",
@@ -32,6 +32,7 @@
     "typescript": "^5.9.3"
   },
   "dependencies": {
-    "sharp": "^0.34.5"
+    "sharp": "^0.34.5",
+    "@huggingface/transformers": "^3.8.0"
   }
 }