ruvector 0.1.54 → 0.1.56

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,348 @@
1
+ /**
2
+ * Model Loader for RuVector ONNX Embeddings WASM
3
+ *
4
+ * Provides easy loading of pre-trained models from HuggingFace Hub
5
+ */
6
+
7
+ /**
8
+ * Pre-configured models with their HuggingFace URLs
9
+ */
10
+ export const MODELS = {
11
+ // Sentence Transformers - Small & Fast
12
+ 'all-MiniLM-L6-v2': {
13
+ name: 'all-MiniLM-L6-v2',
14
+ dimension: 384,
15
+ maxLength: 256,
16
+ size: '23MB',
17
+ description: 'Fast, general-purpose embeddings',
18
+ model: 'https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2/resolve/main/onnx/model.onnx',
19
+ tokenizer: 'https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2/resolve/main/tokenizer.json',
20
+ },
21
+ 'all-MiniLM-L12-v2': {
22
+ name: 'all-MiniLM-L12-v2',
23
+ dimension: 384,
24
+ maxLength: 256,
25
+ size: '33MB',
26
+ description: 'Better quality, balanced speed',
27
+ model: 'https://huggingface.co/sentence-transformers/all-MiniLM-L12-v2/resolve/main/onnx/model.onnx',
28
+ tokenizer: 'https://huggingface.co/sentence-transformers/all-MiniLM-L12-v2/resolve/main/tokenizer.json',
29
+ },
30
+
31
+ // BGE Models - State of the art
32
+ 'bge-small-en-v1.5': {
33
+ name: 'bge-small-en-v1.5',
34
+ dimension: 384,
35
+ maxLength: 512,
36
+ size: '33MB',
37
+ description: 'State-of-the-art small model',
38
+ model: 'https://huggingface.co/BAAI/bge-small-en-v1.5/resolve/main/onnx/model.onnx',
39
+ tokenizer: 'https://huggingface.co/BAAI/bge-small-en-v1.5/resolve/main/tokenizer.json',
40
+ },
41
+ 'bge-base-en-v1.5': {
42
+ name: 'bge-base-en-v1.5',
43
+ dimension: 768,
44
+ maxLength: 512,
45
+ size: '110MB',
46
+ description: 'Best overall quality',
47
+ model: 'https://huggingface.co/BAAI/bge-base-en-v1.5/resolve/main/onnx/model.onnx',
48
+ tokenizer: 'https://huggingface.co/BAAI/bge-base-en-v1.5/resolve/main/tokenizer.json',
49
+ },
50
+
51
+ // E5 Models - Microsoft
52
+ 'e5-small-v2': {
53
+ name: 'e5-small-v2',
54
+ dimension: 384,
55
+ maxLength: 512,
56
+ size: '33MB',
57
+ description: 'Excellent for search & retrieval',
58
+ model: 'https://huggingface.co/intfloat/e5-small-v2/resolve/main/onnx/model.onnx',
59
+ tokenizer: 'https://huggingface.co/intfloat/e5-small-v2/resolve/main/tokenizer.json',
60
+ },
61
+
62
+ // GTE Models - Alibaba
63
+ 'gte-small': {
64
+ name: 'gte-small',
65
+ dimension: 384,
66
+ maxLength: 512,
67
+ size: '33MB',
68
+ description: 'Good multilingual support',
69
+ model: 'https://huggingface.co/thenlper/gte-small/resolve/main/onnx/model.onnx',
70
+ tokenizer: 'https://huggingface.co/thenlper/gte-small/resolve/main/tokenizer.json',
71
+ },
72
+ };
73
+
74
+ /**
75
+ * Default model for quick start
76
+ */
77
+ export const DEFAULT_MODEL = 'all-MiniLM-L6-v2';
78
+
79
+ /**
80
+ * Model loader with caching support
81
+ */
82
+ export class ModelLoader {
83
+ constructor(options = {}) {
84
+ this.cache = options.cache ?? true;
85
+ this.cacheStorage = options.cacheStorage ?? 'ruvector-models';
86
+ this.onProgress = options.onProgress ?? null;
87
+ }
88
+
89
+ /**
90
+ * Load a pre-configured model by name
91
+ * @param {string} modelName - Model name from MODELS
92
+ * @returns {Promise<{modelBytes: Uint8Array, tokenizerJson: string, config: object}>}
93
+ */
94
+ async loadModel(modelName = DEFAULT_MODEL) {
95
+ const modelConfig = MODELS[modelName];
96
+ if (!modelConfig) {
97
+ throw new Error(`Unknown model: ${modelName}. Available: ${Object.keys(MODELS).join(', ')}`);
98
+ }
99
+
100
+ console.log(`Loading model: ${modelConfig.name} (${modelConfig.size})`);
101
+
102
+ const [modelBytes, tokenizerJson] = await Promise.all([
103
+ this.fetchWithCache(modelConfig.model, `${modelName}-model.onnx`, 'arraybuffer'),
104
+ this.fetchWithCache(modelConfig.tokenizer, `${modelName}-tokenizer.json`, 'text'),
105
+ ]);
106
+
107
+ return {
108
+ modelBytes: new Uint8Array(modelBytes),
109
+ tokenizerJson,
110
+ config: modelConfig,
111
+ };
112
+ }
113
+
114
+ /**
115
+ * Load model from custom URLs
116
+ * @param {string} modelUrl - URL to ONNX model
117
+ * @param {string} tokenizerUrl - URL to tokenizer.json
118
+ * @returns {Promise<{modelBytes: Uint8Array, tokenizerJson: string}>}
119
+ */
120
+ async loadFromUrls(modelUrl, tokenizerUrl) {
121
+ const [modelBytes, tokenizerJson] = await Promise.all([
122
+ this.fetchWithCache(modelUrl, null, 'arraybuffer'),
123
+ this.fetchWithCache(tokenizerUrl, null, 'text'),
124
+ ]);
125
+
126
+ return {
127
+ modelBytes: new Uint8Array(modelBytes),
128
+ tokenizerJson,
129
+ };
130
+ }
131
+
132
+ /**
133
+ * Load model from local files (Node.js)
134
+ * @param {string} modelPath - Path to ONNX model
135
+ * @param {string} tokenizerPath - Path to tokenizer.json
136
+ * @returns {Promise<{modelBytes: Uint8Array, tokenizerJson: string}>}
137
+ */
138
+ async loadFromFiles(modelPath, tokenizerPath) {
139
+ // Node.js environment
140
+ if (typeof process !== 'undefined' && process.versions?.node) {
141
+ const fs = await import('fs/promises');
142
+ const [modelBytes, tokenizerJson] = await Promise.all([
143
+ fs.readFile(modelPath),
144
+ fs.readFile(tokenizerPath, 'utf8'),
145
+ ]);
146
+ return {
147
+ modelBytes: new Uint8Array(modelBytes),
148
+ tokenizerJson,
149
+ };
150
+ }
151
+ throw new Error('loadFromFiles is only available in Node.js');
152
+ }
153
+
154
+ /**
155
+ * Fetch with optional caching (uses Cache API in browsers)
156
+ */
157
+ async fetchWithCache(url, cacheKey, responseType) {
158
+ // Try cache first (browser only)
159
+ if (this.cache && typeof caches !== 'undefined' && cacheKey) {
160
+ try {
161
+ const cache = await caches.open(this.cacheStorage);
162
+ const cached = await cache.match(cacheKey);
163
+ if (cached) {
164
+ console.log(` Cache hit: ${cacheKey}`);
165
+ return responseType === 'arraybuffer'
166
+ ? await cached.arrayBuffer()
167
+ : await cached.text();
168
+ }
169
+ } catch (e) {
170
+ // Cache API not available, continue with fetch
171
+ }
172
+ }
173
+
174
+ // Fetch from network
175
+ console.log(` Downloading: ${url}`);
176
+ const response = await this.fetchWithProgress(url);
177
+
178
+ if (!response.ok) {
179
+ throw new Error(`Failed to fetch ${url}: ${response.status} ${response.statusText}`);
180
+ }
181
+
182
+ // Clone for caching
183
+ const responseClone = response.clone();
184
+
185
+ // Cache the response (browser only)
186
+ if (this.cache && typeof caches !== 'undefined' && cacheKey) {
187
+ try {
188
+ const cache = await caches.open(this.cacheStorage);
189
+ await cache.put(cacheKey, responseClone);
190
+ } catch (e) {
191
+ // Cache write failed, continue
192
+ }
193
+ }
194
+
195
+ return responseType === 'arraybuffer'
196
+ ? await response.arrayBuffer()
197
+ : await response.text();
198
+ }
199
+
200
+ /**
201
+ * Fetch with progress reporting
202
+ */
203
+ async fetchWithProgress(url) {
204
+ const response = await fetch(url);
205
+
206
+ if (!this.onProgress || !response.body) {
207
+ return response;
208
+ }
209
+
210
+ const contentLength = response.headers.get('content-length');
211
+ if (!contentLength) {
212
+ return response;
213
+ }
214
+
215
+ const total = parseInt(contentLength, 10);
216
+ let loaded = 0;
217
+
218
+ const reader = response.body.getReader();
219
+ const chunks = [];
220
+
221
+ while (true) {
222
+ const { done, value } = await reader.read();
223
+ if (done) break;
224
+
225
+ chunks.push(value);
226
+ loaded += value.length;
227
+
228
+ this.onProgress({
229
+ loaded,
230
+ total,
231
+ percent: Math.round((loaded / total) * 100),
232
+ });
233
+ }
234
+
235
+ const body = new Uint8Array(loaded);
236
+ let position = 0;
237
+ for (const chunk of chunks) {
238
+ body.set(chunk, position);
239
+ position += chunk.length;
240
+ }
241
+
242
+ return new Response(body, {
243
+ headers: response.headers,
244
+ status: response.status,
245
+ statusText: response.statusText,
246
+ });
247
+ }
248
+
249
+ /**
250
+ * Clear cached models
251
+ */
252
+ async clearCache() {
253
+ if (typeof caches !== 'undefined') {
254
+ await caches.delete(this.cacheStorage);
255
+ console.log('Model cache cleared');
256
+ }
257
+ }
258
+
259
+ /**
260
+ * List available models
261
+ */
262
+ static listModels() {
263
+ return Object.entries(MODELS).map(([key, config]) => ({
264
+ id: key,
265
+ ...config,
266
+ }));
267
+ }
268
+ }
269
+
270
+ /**
271
+ * Quick helper to create an embedder with a pre-configured model
272
+ *
273
+ * @example
274
+ * ```javascript
275
+ * import { createEmbedder } from './loader.js';
276
+ *
277
+ * const embedder = await createEmbedder('all-MiniLM-L6-v2');
278
+ * const embedding = embedder.embedOne("Hello world");
279
+ * ```
280
+ */
281
+ export async function createEmbedder(modelName = DEFAULT_MODEL, wasmModule = null) {
282
+ // Import WASM module if not provided
283
+ if (!wasmModule) {
284
+ wasmModule = await import('./ruvector_onnx_embeddings_wasm.js');
285
+ await wasmModule.default();
286
+ }
287
+
288
+ const loader = new ModelLoader();
289
+ const { modelBytes, tokenizerJson, config } = await loader.loadModel(modelName);
290
+
291
+ const embedderConfig = new wasmModule.WasmEmbedderConfig()
292
+ .setMaxLength(config.maxLength)
293
+ .setNormalize(true)
294
+ .setPooling(0); // Mean pooling
295
+
296
+ const embedder = wasmModule.WasmEmbedder.withConfig(
297
+ modelBytes,
298
+ tokenizerJson,
299
+ embedderConfig
300
+ );
301
+
302
+ return embedder;
303
+ }
304
+
305
+ /**
306
+ * Quick helper for one-off embedding (loads model, embeds, returns)
307
+ *
308
+ * @example
309
+ * ```javascript
310
+ * import { embed } from './loader.js';
311
+ *
312
+ * const embedding = await embed("Hello world");
313
+ * const embeddings = await embed(["Hello", "World"]);
314
+ * ```
315
+ */
316
+ export async function embed(text, modelName = DEFAULT_MODEL) {
317
+ const embedder = await createEmbedder(modelName);
318
+
319
+ if (Array.isArray(text)) {
320
+ return embedder.embedBatch(text);
321
+ }
322
+ return embedder.embedOne(text);
323
+ }
324
+
325
+ /**
326
+ * Quick helper for similarity comparison
327
+ *
328
+ * @example
329
+ * ```javascript
330
+ * import { similarity } from './loader.js';
331
+ *
332
+ * const score = await similarity("I love dogs", "I adore puppies");
333
+ * console.log(score); // ~0.85
334
+ * ```
335
+ */
336
+ export async function similarity(text1, text2, modelName = DEFAULT_MODEL) {
337
+ const embedder = await createEmbedder(modelName);
338
+ return embedder.similarity(text1, text2);
339
+ }
340
+
341
+ export default {
342
+ MODELS,
343
+ DEFAULT_MODEL,
344
+ ModelLoader,
345
+ createEmbedder,
346
+ embed,
347
+ similarity,
348
+ };
Binary file
@@ -0,0 +1,165 @@
1
+ /* tslint:disable */
2
+ /* eslint-disable */
3
+
4
+ /**
5
+ * Strategy for pooling token embeddings into a single sentence embedding
6
+ */
7
+ export enum PoolingStrategy {
8
+ /**
9
+ * Average all token embeddings (most common)
10
+ */
11
+ Mean = 0,
12
+ /**
13
+ * Use only the [CLS] token embedding
14
+ */
15
+ Cls = 1,
16
+ /**
17
+ * Take the maximum value across all tokens for each dimension
18
+ */
19
+ Max = 2,
20
+ /**
21
+ * Mean pooling normalized by sqrt of sequence length
22
+ */
23
+ MeanSqrtLen = 3,
24
+ /**
25
+ * Use the last token embedding (for decoder models)
26
+ */
27
+ LastToken = 4,
28
+ }
29
+
30
+ export class WasmEmbedder {
31
+ free(): void;
32
+ [Symbol.dispose](): void;
33
+ /**
34
+ * Get maximum sequence length
35
+ */
36
+ maxLength(): number;
37
+ /**
38
+ * Compute similarity between two texts
39
+ */
40
+ similarity(text1: string, text2: string): number;
41
+ /**
42
+ * Generate embeddings for multiple texts
43
+ */
44
+ embedBatch(texts: string[]): Float32Array;
45
+ /**
46
+ * Create embedder with custom configuration
47
+ */
48
+ static withConfig(model_bytes: Uint8Array, tokenizer_json: string, config: WasmEmbedderConfig): WasmEmbedder;
49
+ /**
50
+ * Create a new embedder from model and tokenizer bytes
51
+ *
52
+ * # Arguments
53
+ * * `model_bytes` - ONNX model file bytes
54
+ * * `tokenizer_json` - Tokenizer JSON configuration
55
+ */
56
+ constructor(model_bytes: Uint8Array, tokenizer_json: string);
57
+ /**
58
+ * Get the embedding dimension
59
+ */
60
+ dimension(): number;
61
+ /**
62
+ * Generate embedding for a single text
63
+ */
64
+ embedOne(text: string): Float32Array;
65
+ }
66
+
67
+ export class WasmEmbedderConfig {
68
+ free(): void;
69
+ [Symbol.dispose](): void;
70
+ /**
71
+ * Set pooling strategy (0=Mean, 1=Cls, 2=Max, 3=MeanSqrtLen, 4=LastToken)
72
+ */
73
+ setPooling(pooling: number): WasmEmbedderConfig;
74
+ /**
75
+ * Set whether to normalize embeddings
76
+ */
77
+ setNormalize(normalize: boolean): WasmEmbedderConfig;
78
+ /**
79
+ * Set maximum sequence length
80
+ */
81
+ setMaxLength(max_length: number): WasmEmbedderConfig;
82
+ /**
83
+ * Create a new configuration
84
+ */
85
+ constructor();
86
+ }
87
+
88
+ /**
89
+ * Compute cosine similarity between two embedding vectors (JS-friendly)
90
+ */
91
+ export function cosineSimilarity(a: Float32Array, b: Float32Array): number;
92
+
93
+ /**
94
+ * Initialize panic hook for better error messages in WASM
95
+ */
96
+ export function init(): void;
97
+
98
+ /**
99
+ * L2 normalize an embedding vector (JS-friendly)
100
+ */
101
+ export function normalizeL2(embedding: Float32Array): Float32Array;
102
+
103
+ /**
104
+ * Check if SIMD is available (for performance info)
105
+ */
106
+ export function simd_available(): boolean;
107
+
108
+ /**
109
+ * Get the library version
110
+ */
111
+ export function version(): string;
112
+
113
+ export type InitInput = RequestInfo | URL | Response | BufferSource | WebAssembly.Module;
114
+
115
+ export interface InitOutput {
116
+ readonly memory: WebAssembly.Memory;
117
+ readonly __wbg_wasmembedder_free: (a: number, b: number) => void;
118
+ readonly __wbg_wasmembedderconfig_free: (a: number, b: number) => void;
119
+ readonly cosineSimilarity: (a: number, b: number, c: number, d: number) => number;
120
+ readonly normalizeL2: (a: number, b: number) => [number, number];
121
+ readonly wasmembedder_dimension: (a: number) => number;
122
+ readonly wasmembedder_embedBatch: (a: number, b: number, c: number) => [number, number, number, number];
123
+ readonly wasmembedder_embedOne: (a: number, b: number, c: number) => [number, number, number, number];
124
+ readonly wasmembedder_maxLength: (a: number) => number;
125
+ readonly wasmembedder_new: (a: number, b: number, c: number, d: number) => [number, number, number];
126
+ readonly wasmembedder_similarity: (a: number, b: number, c: number, d: number, e: number) => [number, number, number];
127
+ readonly wasmembedder_withConfig: (a: number, b: number, c: number, d: number, e: number) => [number, number, number];
128
+ readonly wasmembedderconfig_new: () => number;
129
+ readonly wasmembedderconfig_setMaxLength: (a: number, b: number) => number;
130
+ readonly wasmembedderconfig_setNormalize: (a: number, b: number) => number;
131
+ readonly wasmembedderconfig_setPooling: (a: number, b: number) => number;
132
+ readonly init: () => void;
133
+ readonly simd_available: () => number;
134
+ readonly version: () => [number, number];
135
+ readonly __wbindgen_malloc: (a: number, b: number) => number;
136
+ readonly __wbindgen_realloc: (a: number, b: number, c: number, d: number) => number;
137
+ readonly __wbindgen_exn_store: (a: number) => void;
138
+ readonly __externref_table_alloc: () => number;
139
+ readonly __wbindgen_externrefs: WebAssembly.Table;
140
+ readonly __wbindgen_free: (a: number, b: number, c: number) => void;
141
+ readonly __externref_table_dealloc: (a: number) => void;
142
+ readonly __wbindgen_start: () => void;
143
+ }
144
+
145
+ export type SyncInitInput = BufferSource | WebAssembly.Module;
146
+
147
+ /**
148
+ * Instantiates the given `module`, which can either be bytes or
149
+ * a precompiled `WebAssembly.Module`.
150
+ *
151
+ * @param {{ module: SyncInitInput }} module - Passing `SyncInitInput` directly is deprecated.
152
+ *
153
+ * @returns {InitOutput}
154
+ */
155
+ export function initSync(module: { module: SyncInitInput } | SyncInitInput): InitOutput;
156
+
157
+ /**
158
+ * If `module_or_path` is {RequestInfo} or {URL}, makes a request and
159
+ * for everything else, calls `WebAssembly.instantiate` directly.
160
+ *
161
+ * @param {{ module_or_path: InitInput | Promise<InitInput> }} module_or_path - Passing `InitInput` directly is deprecated.
162
+ *
163
+ * @returns {Promise<InitOutput>}
164
+ */
165
+ export default function __wbg_init (module_or_path?: { module_or_path: InitInput | Promise<InitInput> } | InitInput | Promise<InitInput>): Promise<InitOutput>;