@coreviz/sdk 1.0.5 → 1.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/coreviz.js +53 -9
  2. package/package.json +1 -1
package/dist/coreviz.js CHANGED
@@ -174,19 +174,63 @@ class CoreViz {
174
174
  async embedLocal(input, options) {
175
175
  try {
176
176
  // Dynamic import to avoid loading transformers if not used
177
- const { pipeline } = await Promise.resolve().then(() => __importStar(require('@huggingface/transformers')));
178
- // Initialize the pipeline with the specified model
179
- // This will automatically download and cache the model if not present
180
- const extractor = await pipeline('feature-extraction', 'Xenova/clip-vit-large-patch14');
181
- // Generate embeddings
182
- // transformers.js handles text strings, image URLs, and image paths automatically
183
- const output = await extractor(input, { pooling: 'mean', normalize: true });
177
+ const { AutoTokenizer, AutoProcessor, CLIPTextModelWithProjection, CLIPVisionModelWithProjection, RawImage } = await Promise.resolve().then(() => __importStar(require('@huggingface/transformers')));
178
+ const MODEL_ID = 'Xenova/clip-vit-large-patch14';
179
+ console.log(`Loading local model ${MODEL_ID}...`);
180
+ const start = Date.now();
181
+ // Load tokenizer and processor
182
+ const tokenizer = await AutoTokenizer.from_pretrained(MODEL_ID);
183
+ const processor = await AutoProcessor.from_pretrained(MODEL_ID);
184
+ // Load models with device preference
185
+ const text_model = await CLIPTextModelWithProjection.from_pretrained(MODEL_ID);
186
+ const vision_model = await CLIPVisionModelWithProjection.from_pretrained(MODEL_ID);
187
+ console.log(`Model loaded in ${Date.now() - start}ms`);
188
+ // Check if input is likely an image
189
+ const isImage = options?.type === 'image' ||
190
+ input.startsWith('data:image') ||
191
+ input.startsWith('http://') ||
192
+ input.startsWith('https://') ||
193
+ /\.(jpg|jpeg|png|webp|gif|bmp|tiff|tif)$/i.test(input);
194
+ let normalized_embeds;
195
+ if (isImage) {
196
+ let image;
197
+ if (input.startsWith('http')) {
198
+ image = await RawImage.fromURL(input);
199
+ }
200
+ else if (input.startsWith('data:image')) {
201
+ // Extract base64 data
202
+ const base64Data = input.split(',')[1];
203
+ const binary = atob(base64Data);
204
+ const array = new Uint8Array(binary.length);
205
+ for (let i = 0; i < binary.length; i++) {
206
+ array[i] = binary.charCodeAt(i);
207
+ }
208
+ image = await RawImage.fromBlob(new Blob([array]));
209
+ }
210
+ else {
211
+ // Assume local path
212
+ image = await RawImage.read(input);
213
+ }
214
+ const image_inputs = await processor(image);
215
+ const { image_embeds } = await vision_model(image_inputs);
216
+ normalized_embeds = image_embeds.normalize(2, -1);
217
+ }
218
+ else {
219
+ const text_inputs = tokenizer(input, {
220
+ padding: true,
221
+ truncation: true,
222
+ return_tensors: 'pt',
223
+ });
224
+ const { text_embeds } = await text_model(text_inputs);
225
+ normalized_embeds = text_embeds.normalize(2, -1);
226
+ }
184
227
  // Convert Float32Array to regular array
185
- // @ts-ignore - Output type inference might fail with dynamic import
186
- const embedding = Array.from(output.data);
228
+ // @ts-ignore
229
+ const embedding = Array.from(normalized_embeds.data);
187
230
  return { embedding };
188
231
  }
189
232
  catch (err) {
233
+ console.error(err);
190
234
  throw err instanceof Error ? err : new Error("Local embedding failed: " + String(err));
191
235
  }
192
236
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@coreviz/sdk",
3
- "version": "1.0.5",
3
+ "version": "1.0.7",
4
4
  "description": "CoreViz SDK",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",