ruvector 0.1.94 → 0.1.95

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,127 @@
1
+ /**
2
+ * CommonJS-compatible WASM loader for Node.js
3
+ *
4
+ * This file provides a way to load the WASM module without requiring
5
+ * the --experimental-wasm-modules flag by manually loading the WASM bytes.
6
+ *
7
+ * Usage:
8
+ * const wasm = require('./ruvector_onnx_embeddings_wasm_cjs.js');
9
+ * await wasm.init(); // or wasm.initSync(wasmBytes)
10
+ */
11
+
12
+ const fs = require('fs');
13
+ const path = require('path');
14
+
15
+ // Re-export everything from the JS bindings
16
+ const bindings = require('./ruvector_onnx_embeddings_wasm_bg.js');
17
+
18
+ // Track initialization state
19
+ let initialized = false;
20
+ let initPromise = null;
21
+
22
+ /**
23
+ * Initialize the WASM module asynchronously
24
+ * Automatically loads the .wasm file from the same directory
25
+ */
26
+ async function init(wasmInput) {
27
+ if (initialized) return bindings;
28
+
29
+ if (initPromise) {
30
+ await initPromise;
31
+ return bindings;
32
+ }
33
+
34
+ initPromise = (async () => {
35
+ let wasmBytes;
36
+
37
+ if (wasmInput instanceof WebAssembly.Module) {
38
+ // Already compiled module
39
+ const instance = await WebAssembly.instantiate(wasmInput, getImports());
40
+ bindings.__wbg_set_wasm(instance.exports);
41
+ finishInit();
42
+ return;
43
+ } else if (wasmInput instanceof ArrayBuffer || wasmInput instanceof Uint8Array) {
44
+ // Raw bytes provided
45
+ wasmBytes = wasmInput;
46
+ } else if (typeof wasmInput === 'string') {
47
+ // Path to WASM file
48
+ wasmBytes = fs.readFileSync(wasmInput);
49
+ } else {
50
+ // Auto-detect WASM file location
51
+ const wasmPath = path.join(__dirname, 'ruvector_onnx_embeddings_wasm_bg.wasm');
52
+ wasmBytes = fs.readFileSync(wasmPath);
53
+ }
54
+
55
+ const wasmModule = await WebAssembly.compile(wasmBytes);
56
+ const instance = await WebAssembly.instantiate(wasmModule, getImports());
57
+
58
+ bindings.__wbg_set_wasm(instance.exports);
59
+ finishInit();
60
+ })();
61
+
62
+ await initPromise;
63
+ return bindings;
64
+ }
65
+
66
+ /**
67
+ * Initialize the WASM module synchronously
68
+ * Requires the WASM bytes to be provided
69
+ */
70
+ function initSync(wasmBytes) {
71
+ if (initialized) return bindings;
72
+
73
+ if (!wasmBytes) {
74
+ const wasmPath = path.join(__dirname, 'ruvector_onnx_embeddings_wasm_bg.wasm');
75
+ wasmBytes = fs.readFileSync(wasmPath);
76
+ }
77
+
78
+ const wasmModule = new WebAssembly.Module(wasmBytes);
79
+ const instance = new WebAssembly.Instance(wasmModule, getImports());
80
+
81
+ bindings.__wbg_set_wasm(instance.exports);
82
+ finishInit();
83
+
84
+ return bindings;
85
+ }
86
+
87
+ /**
88
+ * Get the WASM import object
89
+ */
90
+ function getImports() {
91
+ return {
92
+ './ruvector_onnx_embeddings_wasm_bg.js': bindings,
93
+ };
94
+ }
95
+
96
+ /**
97
+ * Finalize initialization
98
+ */
99
+ function finishInit() {
100
+ if (typeof bindings.__wbindgen_init_externref_table === 'function') {
101
+ bindings.__wbindgen_init_externref_table();
102
+ }
103
+ initialized = true;
104
+ }
105
+
106
+ /**
107
+ * Check if initialized
108
+ */
109
+ function isInitialized() {
110
+ return initialized;
111
+ }
112
+
113
+ // Export init functions and all bindings
114
+ module.exports = {
115
+ init,
116
+ initSync,
117
+ isInitialized,
118
+ default: init,
119
+ // Re-export all bindings
120
+ WasmEmbedder: bindings.WasmEmbedder,
121
+ WasmEmbedderConfig: bindings.WasmEmbedderConfig,
122
+ PoolingStrategy: bindings.PoolingStrategy,
123
+ cosineSimilarity: bindings.cosineSimilarity,
124
+ normalizeL2: bindings.normalizeL2,
125
+ simd_available: bindings.simd_available,
126
+ version: bindings.version,
127
+ };
@@ -12,6 +12,26 @@
12
12
  * - Cached model loading (downloads from HuggingFace on first use)
13
13
  * - Batch embedding support
14
14
  * - Optional parallel workers for 3.8x batch speedup
15
+ * - CommonJS-compatible: No --experimental-wasm-modules flag required
16
+ *
17
+ * Quick Start (Simple API - returns arrays directly):
18
+ * ```javascript
19
+ * const { embedText, embedTexts } = require('ruvector');
20
+ *
21
+ * // Single embedding - returns number[]
22
+ * const vector = await embedText("hello world");
23
+ *
24
+ * // Batch embeddings - returns number[][]
25
+ * const vectors = await embedTexts(["hello", "world"]);
26
+ * ```
27
+ *
28
+ * Full API (returns metadata):
29
+ * ```javascript
30
+ * const { embed, embedBatch } = require('ruvector');
31
+ *
32
+ * // Returns { embedding: number[], dimension: number, timeMs: number }
33
+ * const result = await embed("hello world");
34
+ * ```
15
35
  */
16
36
  declare global {
17
37
  var __ruvector_require: NodeRequire | undefined;
@@ -59,6 +79,70 @@ export declare function embed(text: string): Promise<EmbeddingResult>;
59
79
  * Uses parallel workers automatically for batches >= parallelThreshold
60
80
  */
61
81
  export declare function embedBatch(texts: string[]): Promise<EmbeddingResult[]>;
82
+ /**
83
+ * ============================================================================
84
+ * SIMPLE API - Returns arrays directly (for easy integration)
85
+ * ============================================================================
86
+ */
87
+ /**
88
+ * Generate embedding for a single text - returns array directly
89
+ *
90
+ * This is the simplified API that returns just the embedding array,
91
+ * making it easy to use for vector operations, PostgreSQL insertion,
92
+ * and similarity calculations.
93
+ *
94
+ * @param text - The text to embed
95
+ * @returns A 384-dimensional embedding array
96
+ *
97
+ * @example
98
+ * ```javascript
99
+ * const { embedText } = require('ruvector');
100
+ *
101
+ * const vector = await embedText("hello world");
102
+ * console.log(vector.length); // 384
103
+ * console.log(Array.isArray(vector)); // true
104
+ *
105
+ * // Use directly with PostgreSQL
106
+ * await pool.query(
107
+ * 'INSERT INTO docs (content, embedding) VALUES ($1, $2)',
108
+ * [text, JSON.stringify(vector)]
109
+ * );
110
+ * ```
111
+ */
112
+ export declare function embedText(text: string): Promise<number[]>;
113
+ /**
114
+ * Generate embeddings for multiple texts - returns array of arrays
115
+ *
116
+ * This is the simplified batch API that returns just the embedding arrays.
117
+ * Uses optimized batch processing for much faster throughput than
118
+ * calling embedText() in a loop.
119
+ *
120
+ * @param texts - Array of texts to embed
121
+ * @param options - Optional batch processing options
122
+ * @returns Array of 384-dimensional embedding arrays
123
+ *
124
+ * @example
125
+ * ```javascript
126
+ * const { embedTexts } = require('ruvector');
127
+ *
128
+ * // Batch embed 8000 documents in ~30 seconds (vs 53 min sequentially)
129
+ * const vectors = await embedTexts(documents);
130
+ *
131
+ * // With options for very large batches
132
+ * const vectors = await embedTexts(documents, { batchSize: 256 });
133
+ *
134
+ * // Bulk insert into PostgreSQL
135
+ * for (let i = 0; i < documents.length; i++) {
136
+ * await pool.query(
137
+ * 'INSERT INTO docs (content, embedding) VALUES ($1, $2)',
138
+ * [documents[i], JSON.stringify(vectors[i])]
139
+ * );
140
+ * }
141
+ * ```
142
+ */
143
+ export declare function embedTexts(texts: string[], options?: {
144
+ batchSize?: number;
145
+ }): Promise<number[][]>;
62
146
  /**
63
147
  * Calculate cosine similarity between two texts
64
148
  */
@@ -1 +1 @@
1
- {"version":3,"file":"onnx-embedder.d.ts","sourceRoot":"","sources":["../../src/core/onnx-embedder.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;GAcG;AAQH,OAAO,CAAC,MAAM,CAAC;IAEb,IAAI,kBAAkB,EAAE,WAAW,GAAG,SAAS,CAAC;CACjD;AAuBD,MAAM,WAAW,kBAAkB;IACjC,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB;;;;;OAKG;IACH,cAAc,CAAC,EAAE,OAAO,GAAG,MAAM,CAAC;IAClC,wDAAwD;IACxD,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,iEAAiE;IACjE,iBAAiB,CAAC,EAAE,MAAM,CAAC;CAC5B;AAMD,MAAM,WAAW,eAAe;IAC9B,SAAS,EAAE,MAAM,EAAE,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,gBAAgB;IAC/B,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,MAAM,CAAC;CAChB;AAeD;;GAEG;AACH,wBAAgB,eAAe,IAAI,OAAO,CAOzC;AA6DD;;GAEG;AACH,wBAAsB,gBAAgB,CAAC,MAAM,GAAE,kBAAuB,GAAG,OAAO,CAAC,OAAO,CAAC,CA+FxF;AAED;;GAEG;AACH,wBAAsB,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,eAAe,CAAC,CAiBlE;AAED;;;GAGG;AACH,wBAAsB,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC,CAwC5E;AAED;;GAEG;AACH,wBAAsB,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,gBAAgB,CAAC,CAaxF;AAED;;GAEG;AACH,wBAAgB,gBAAgB,CAAC,CAAC,EAAE,MAAM,EAAE,EAAE,CAAC,EAAE,MAAM,EAAE,GAAG,MAAM,CAiBjE;AAED;;GAEG;AACH,wBAAgB,YAAY,IAAI,MAAM,CAErC;AAED;;GAEG;AACH,wBAAgB,OAAO,IAAI,OAAO,CAEjC;AAED;;GAEG;AACH,wBAAgB,QAAQ,IAAI;IAC1B,KAAK,EAAE,OAAO,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,OAAO,CAAC;IACd,QAAQ,EAAE,OAAO,CAAC;IAClB,eAAe,EAAE,MAAM,CAAC;IACxB,iBAAiB,EAAE,MAAM,CAAC;CAC3B,CAUA;AAED;;GAEG;AACH,wBAAsB,QAAQ,IAAI,OAAO,CAAC,IAAI,CAAC,CAM9C;AAGD,qBAAa,YAAY;IACvB,OAAO,CAAC,MAAM,CAAqB;gBAEvB,MAAM,GAAE,kBAAuB;IAIrC,IAAI,IAAI,OAAO,CAAC,OAAO,CAAC;IAIxB,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IAKtC,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;IAKhD,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;IAK/D,IAAI,SAAS,IAAI,MAAM,CAEtB;IAED,IAAI,KAAK,IAAI,OAAO,CAEnB;CACF;AAED,eAAe,YAAY,CAAC"}
1
+ {"version":3,"file":"onnx-embedder.d.ts","sourceRoot":"","sources":["../../src/core/onnx-embedder.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAkCG;AAQH,OAAO,CAAC,MAAM,CAAC;IAEb,IAAI,kBAAkB,EAAE,WAAW,GAAG,SAAS,CAAC;CACjD;AAsCD,MAAM,WAAW,kBAAkB;IACjC,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB;;;;;OAKG;IACH,cAAc,CAAC,EAAE,OAAO,GAAG,MAAM,CAAC;IAClC,wDAAwD;IACxD,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,iEAAiE;IACjE,iBAAiB,CAAC,EAAE,MAAM,CAAC;CAC5B;AAMD,MAAM,WAAW,eAAe;IAC9B,SAAS,EAAE,MAAM,EAAE,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,gBAAgB;IAC/B,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,MAAM,CAAC;CAChB;AAeD;;GAEG;AACH,wBAAgB,eAAe,IAAI,OAAO,CAOzC;AA6DD;;GAEG;AACH,wBAAsB,gBAAgB,CAAC,MAAM,GAAE,kBAAuB,GAAG,OAAO,CAAC,OAAO,CAAC,CAyGxF;AAED;;GAEG;AACH,wBAAsB,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,eAAe,CAAC,CAiBlE;AAED;;;GAGG;AACH,wBAAsB,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC,CAwC5E;AAED;;;;GAIG;AAEH;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AACH,wBAAsB,SAAS,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,CAU/D;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA6BG;AACH,wBAAsB,UAAU,CAC9B,KAAK,EAAE,MAAM,EAAE,EACf,OAAO,CAAC,EAAE;IAAE,SAAS,CAAC,EAAE,MAAM,CAAA;CAAE,GAC/B,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CA4CrB;AAED;;GAEG;AACH,wBAAsB,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,gBAAgB,CAAC,CAaxF;AAED;;GAEG;AACH,wBAAgB,gBAAgB,CAAC,CAAC,EAAE,MAAM,EAAE,EAAE,CAAC,EAAE,MAAM,EAAE,GAAG,MAAM,CAiBjE;AAED;;GAEG;AACH,wBAAgB,YAAY,IAAI,MAAM,CAErC;AAED;;GAEG;AACH,wBAAgB,OAAO,IAAI,OAAO,CAEjC;AAED;;GAEG;AACH,wBAAgB,QAAQ,IAAI;IAC1B,KAAK,EAAE,OAAO,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,OAAO,CAAC;IACd,QAAQ,EAAE,OAAO,CAAC;IAClB,eAAe,EAAE,MAAM,CAAC;IACxB,iBAAiB,EAAE,MAAM,CAAC;CAC3B,CAUA;AAED;;GAEG;AACH,wBAAsB,QAAQ,IAAI,OAAO,CAAC,IAAI,CAAC,CAM9C;AAGD,qBAAa,YAAY;IACvB,OAAO,CAAC,MAAM,CAAqB;gBAEvB,MAAM,GAAE,kBAAuB;IAIrC,IAAI,IAAI,OAAO,CAAC,OAAO,CAAC;IAIxB,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IAKtC,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;IAKhD,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;IAK/D,IAAI,SAAS,IAAI,MAAM,CAEtB;IAED,IAAI,KAAK,IAAI,OAAO,CAEnB;CACF;AAED,eAAe,YAAY,CAAC"}
@@ -13,6 +13,26 @@
13
13
  * - Cached model loading (downloads from HuggingFace on first use)
14
14
  * - Batch embedding support
15
15
  * - Optional parallel workers for 3.8x batch speedup
16
+ * - CommonJS-compatible: No --experimental-wasm-modules flag required
17
+ *
18
+ * Quick Start (Simple API - returns arrays directly):
19
+ * ```javascript
20
+ * const { embedText, embedTexts } = require('ruvector');
21
+ *
22
+ * // Single embedding - returns number[]
23
+ * const vector = await embedText("hello world");
24
+ *
25
+ * // Batch embeddings - returns number[][]
26
+ * const vectors = await embedTexts(["hello", "world"]);
27
+ * ```
28
+ *
29
+ * Full API (returns metadata):
30
+ * ```javascript
31
+ * const { embed, embedBatch } = require('ruvector');
32
+ *
33
+ * // Returns { embedding: number[], dimension: number, timeMs: number }
34
+ * const result = await embed("hello world");
35
+ * ```
16
36
  */
17
37
  var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
18
38
  if (k2 === undefined) k2 = k;
@@ -53,6 +73,8 @@ exports.isOnnxAvailable = isOnnxAvailable;
53
73
  exports.initOnnxEmbedder = initOnnxEmbedder;
54
74
  exports.embed = embed;
55
75
  exports.embedBatch = embedBatch;
76
+ exports.embedText = embedText;
77
+ exports.embedTexts = embedTexts;
56
78
  exports.similarity = similarity;
57
79
  exports.cosineSimilarity = cosineSimilarity;
58
80
  exports.getDimension = getDimension;
@@ -83,6 +105,21 @@ if (typeof globalThis !== 'undefined' && !globalThis.__ruvector_require) {
83
105
  // Force native dynamic import (avoids TypeScript transpiling to require)
84
106
  // eslint-disable-next-line @typescript-eslint/no-implied-eval
85
107
  const dynamicImport = new Function('specifier', 'return import(specifier)');
108
+ // Try to load the CommonJS-compatible WASM loader (no experimental flags needed)
109
+ function tryLoadCjsModule() {
110
+ try {
111
+ // Use require for CJS module which doesn't need experimental flags
112
+ const cjsPath = path.join(__dirname, 'onnx', 'pkg', 'ruvector_onnx_embeddings_wasm_cjs.js');
113
+ if (fs.existsSync(cjsPath)) {
114
+ // eslint-disable-next-line @typescript-eslint/no-var-requires
115
+ return require(cjsPath);
116
+ }
117
+ }
118
+ catch {
119
+ // CJS loader not available
120
+ }
121
+ return null;
122
+ }
86
123
  // Capability detection
87
124
  let simdAvailable = false;
88
125
  let parallelAvailable = false;
@@ -183,21 +220,32 @@ async function initOnnxEmbedder(config = {}) {
183
220
  // Paths to bundled ONNX files
184
221
  const pkgPath = path.join(__dirname, 'onnx', 'pkg', 'ruvector_onnx_embeddings_wasm.js');
185
222
  const loaderPath = path.join(__dirname, 'onnx', 'loader.js');
186
- if (!fs.existsSync(pkgPath)) {
223
+ const wasmPath = path.join(__dirname, 'onnx', 'pkg', 'ruvector_onnx_embeddings_wasm_bg.wasm');
224
+ if (!fs.existsSync(wasmPath)) {
187
225
  throw new Error('ONNX WASM files not bundled. The onnx/ directory is missing.');
188
226
  }
189
- // Convert paths to file:// URLs for cross-platform ESM compatibility (Windows fix)
190
- const pkgUrl = (0, url_1.pathToFileURL)(pkgPath).href;
191
- const loaderUrl = (0, url_1.pathToFileURL)(loaderPath).href;
192
- // Dynamic import of bundled modules using file:// URLs
193
- wasmModule = await dynamicImport(pkgUrl);
194
- // Initialize WASM module (loads the .wasm file)
195
- const wasmPath = path.join(__dirname, 'onnx', 'pkg', 'ruvector_onnx_embeddings_wasm_bg.wasm');
196
- if (wasmModule.default && typeof wasmModule.default === 'function') {
197
- // For bundler-style initialization, pass the wasm buffer
198
- const wasmBytes = fs.readFileSync(wasmPath);
199
- await wasmModule.default(wasmBytes);
227
+ // Try CJS loader first (no experimental flags needed)
228
+ const cjsModule = tryLoadCjsModule();
229
+ if (cjsModule) {
230
+ // Use CommonJS loader - no experimental flags required!
231
+ await cjsModule.init(wasmPath);
232
+ wasmModule = cjsModule;
200
233
  }
234
+ else {
235
+ // Fall back to ESM loader (may require --experimental-wasm-modules)
236
+ // Convert paths to file:// URLs for cross-platform ESM compatibility (Windows fix)
237
+ const pkgUrl = (0, url_1.pathToFileURL)(pkgPath).href;
238
+ // Dynamic import of bundled modules using file:// URLs
239
+ wasmModule = await dynamicImport(pkgUrl);
240
+ // Initialize WASM module (loads the .wasm file)
241
+ if (wasmModule.default && typeof wasmModule.default === 'function') {
242
+ // For bundler-style initialization, pass the wasm buffer
243
+ const wasmBytes = fs.readFileSync(wasmPath);
244
+ await wasmModule.default(wasmBytes);
245
+ }
246
+ }
247
+ // Load the model loader
248
+ const loaderUrl = (0, url_1.pathToFileURL)(loaderPath).href;
201
249
  const loaderModule = await dynamicImport(loaderUrl);
202
250
  const { ModelLoader } = loaderModule;
203
251
  // Create model loader with caching
@@ -310,6 +358,113 @@ async function embedBatch(texts) {
310
358
  }
311
359
  return results;
312
360
  }
361
+ /**
362
+ * ============================================================================
363
+ * SIMPLE API - Returns arrays directly (for easy integration)
364
+ * ============================================================================
365
+ */
366
+ /**
367
+ * Generate embedding for a single text - returns array directly
368
+ *
369
+ * This is the simplified API that returns just the embedding array,
370
+ * making it easy to use for vector operations, PostgreSQL insertion,
371
+ * and similarity calculations.
372
+ *
373
+ * @param text - The text to embed
374
+ * @returns A 384-dimensional embedding array
375
+ *
376
+ * @example
377
+ * ```javascript
378
+ * const { embedText } = require('ruvector');
379
+ *
380
+ * const vector = await embedText("hello world");
381
+ * console.log(vector.length); // 384
382
+ * console.log(Array.isArray(vector)); // true
383
+ *
384
+ * // Use directly with PostgreSQL
385
+ * await pool.query(
386
+ * 'INSERT INTO docs (content, embedding) VALUES ($1, $2)',
387
+ * [text, JSON.stringify(vector)]
388
+ * );
389
+ * ```
390
+ */
391
+ async function embedText(text) {
392
+ if (!isInitialized) {
393
+ await initOnnxEmbedder();
394
+ }
395
+ if (!embedder) {
396
+ throw new Error('ONNX embedder not initialized');
397
+ }
398
+ const embedding = embedder.embedOne(text);
399
+ return Array.from(embedding);
400
+ }
401
+ /**
402
+ * Generate embeddings for multiple texts - returns array of arrays
403
+ *
404
+ * This is the simplified batch API that returns just the embedding arrays.
405
+ * Uses optimized batch processing for much faster throughput than
406
+ * calling embedText() in a loop.
407
+ *
408
+ * @param texts - Array of texts to embed
409
+ * @param options - Optional batch processing options
410
+ * @returns Array of 384-dimensional embedding arrays
411
+ *
412
+ * @example
413
+ * ```javascript
414
+ * const { embedTexts } = require('ruvector');
415
+ *
416
+ * // Batch embed 8000 documents in ~30 seconds (vs 53 min sequentially)
417
+ * const vectors = await embedTexts(documents);
418
+ *
419
+ * // With options for very large batches
420
+ * const vectors = await embedTexts(documents, { batchSize: 256 });
421
+ *
422
+ * // Bulk insert into PostgreSQL
423
+ * for (let i = 0; i < documents.length; i++) {
424
+ * await pool.query(
425
+ * 'INSERT INTO docs (content, embedding) VALUES ($1, $2)',
426
+ * [documents[i], JSON.stringify(vectors[i])]
427
+ * );
428
+ * }
429
+ * ```
430
+ */
431
+ async function embedTexts(texts, options) {
432
+ if (!isInitialized) {
433
+ await initOnnxEmbedder();
434
+ }
435
+ if (!embedder) {
436
+ throw new Error('ONNX embedder not initialized');
437
+ }
438
+ if (texts.length === 0) {
439
+ return [];
440
+ }
441
+ const batchSize = options?.batchSize || 256;
442
+ // For small batches, process all at once
443
+ if (texts.length <= batchSize) {
444
+ // Use parallel workers for large batches
445
+ if (parallelEnabled && parallelEmbedder && texts.length >= parallelThreshold) {
446
+ const batchResults = await parallelEmbedder.embedBatch(texts);
447
+ return batchResults.map((emb) => Array.from(emb));
448
+ }
449
+ // Sequential processing
450
+ const batchEmbeddings = embedder.embedBatch(texts);
451
+ const dimension = embedder.dimension();
452
+ const results = [];
453
+ for (let i = 0; i < texts.length; i++) {
454
+ const embedding = batchEmbeddings.slice(i * dimension, (i + 1) * dimension);
455
+ results.push(Array.from(embedding));
456
+ }
457
+ return results;
458
+ }
459
+ // Process in chunks for very large batches
460
+ const results = [];
461
+ for (let i = 0; i < texts.length; i += batchSize) {
462
+ const chunk = texts.slice(i, i + batchSize);
463
+ const chunkResults = await embedTexts(chunk);
464
+ results.push(...chunkResults);
465
+ }
466
+ return results;
467
+ }
313
468
  /**
314
469
  * Calculate cosine similarity between two texts
315
470
  */
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ruvector",
3
- "version": "0.1.94",
3
+ "version": "0.1.95",
4
4
  "description": "High-performance vector database for Node.js with automatic native/WASM fallback",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
@@ -8,7 +8,7 @@
8
8
  "ruvector": "./bin/cli.js"
9
9
  },
10
10
  "scripts": {
11
- "build": "tsc && cp src/core/onnx/pkg/package.json dist/core/onnx/pkg/",
11
+ "build": "tsc && cp src/core/onnx/pkg/package.json dist/core/onnx/pkg/ && cp src/core/onnx/pkg/ruvector_onnx_embeddings_wasm_cjs.js dist/core/onnx/pkg/",
12
12
  "prepublishOnly": "npm run build",
13
13
  "test": "node test/integration.js"
14
14
  },