@coreviz/sdk 1.0.5 → 1.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/coreviz.js +53 -9
- package/package.json +1 -1
package/dist/coreviz.js
CHANGED
|
@@ -174,19 +174,63 @@ class CoreViz {
|
|
|
174
174
|
async embedLocal(input, options) {
|
|
175
175
|
try {
|
|
176
176
|
// Dynamic import to avoid loading transformers if not used
|
|
177
|
-
const {
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
const
|
|
181
|
-
//
|
|
182
|
-
|
|
183
|
-
const
|
|
177
|
+
const { AutoTokenizer, AutoProcessor, CLIPTextModelWithProjection, CLIPVisionModelWithProjection, RawImage } = await Promise.resolve().then(() => __importStar(require('@huggingface/transformers')));
|
|
178
|
+
const MODEL_ID = 'Xenova/clip-vit-large-patch14';
|
|
179
|
+
console.log(`Loading local model ${MODEL_ID}...`);
|
|
180
|
+
const start = Date.now();
|
|
181
|
+
// Load tokenizer and processor
|
|
182
|
+
const tokenizer = await AutoTokenizer.from_pretrained(MODEL_ID);
|
|
183
|
+
const processor = await AutoProcessor.from_pretrained(MODEL_ID);
|
|
184
|
+
// Load models with device preference
|
|
185
|
+
const text_model = await CLIPTextModelWithProjection.from_pretrained(MODEL_ID);
|
|
186
|
+
const vision_model = await CLIPVisionModelWithProjection.from_pretrained(MODEL_ID);
|
|
187
|
+
console.log(`Model loaded in ${Date.now() - start}ms`);
|
|
188
|
+
// Check if input is likely an image
|
|
189
|
+
const isImage = options?.type === 'image' ||
|
|
190
|
+
input.startsWith('data:image') ||
|
|
191
|
+
input.startsWith('http://') ||
|
|
192
|
+
input.startsWith('https://') ||
|
|
193
|
+
/\.(jpg|jpeg|png|webp|gif|bmp|tiff|tif)$/i.test(input);
|
|
194
|
+
let normalized_embeds;
|
|
195
|
+
if (isImage) {
|
|
196
|
+
let image;
|
|
197
|
+
if (input.startsWith('http')) {
|
|
198
|
+
image = await RawImage.fromURL(input);
|
|
199
|
+
}
|
|
200
|
+
else if (input.startsWith('data:image')) {
|
|
201
|
+
// Extract base64 data
|
|
202
|
+
const base64Data = input.split(',')[1];
|
|
203
|
+
const binary = atob(base64Data);
|
|
204
|
+
const array = new Uint8Array(binary.length);
|
|
205
|
+
for (let i = 0; i < binary.length; i++) {
|
|
206
|
+
array[i] = binary.charCodeAt(i);
|
|
207
|
+
}
|
|
208
|
+
image = await RawImage.fromBlob(new Blob([array]));
|
|
209
|
+
}
|
|
210
|
+
else {
|
|
211
|
+
// Assume local path
|
|
212
|
+
image = await RawImage.read(input);
|
|
213
|
+
}
|
|
214
|
+
const image_inputs = await processor(image);
|
|
215
|
+
const { image_embeds } = await vision_model(image_inputs);
|
|
216
|
+
normalized_embeds = image_embeds.normalize(2, -1);
|
|
217
|
+
}
|
|
218
|
+
else {
|
|
219
|
+
const text_inputs = tokenizer(input, {
|
|
220
|
+
padding: true,
|
|
221
|
+
truncation: true,
|
|
222
|
+
return_tensors: 'pt',
|
|
223
|
+
});
|
|
224
|
+
const { text_embeds } = await text_model(text_inputs);
|
|
225
|
+
normalized_embeds = text_embeds.normalize(2, -1);
|
|
226
|
+
}
|
|
184
227
|
// Convert Float32Array to regular array
|
|
185
|
-
// @ts-ignore
|
|
186
|
-
const embedding = Array.from(
|
|
228
|
+
// @ts-ignore
|
|
229
|
+
const embedding = Array.from(normalized_embeds.data);
|
|
187
230
|
return { embedding };
|
|
188
231
|
}
|
|
189
232
|
catch (err) {
|
|
233
|
+
console.error(err);
|
|
190
234
|
throw err instanceof Error ? err : new Error("Local embedding failed: " + String(err));
|
|
191
235
|
}
|
|
192
236
|
}
|