ruvector 0.1.94 → 0.1.95
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CommonJS-compatible WASM loader for Node.js
|
|
3
|
+
*
|
|
4
|
+
* This file provides a way to load the WASM module without requiring
|
|
5
|
+
* the --experimental-wasm-modules flag by manually loading the WASM bytes.
|
|
6
|
+
*
|
|
7
|
+
* Usage:
|
|
8
|
+
* const wasm = require('./ruvector_onnx_embeddings_wasm_cjs.js');
|
|
9
|
+
* await wasm.init(); // or wasm.initSync(wasmBytes)
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
const fs = require('fs');
|
|
13
|
+
const path = require('path');
|
|
14
|
+
|
|
15
|
+
// Re-export everything from the JS bindings
|
|
16
|
+
const bindings = require('./ruvector_onnx_embeddings_wasm_bg.js');
|
|
17
|
+
|
|
18
|
+
// Track initialization state
|
|
19
|
+
let initialized = false;
|
|
20
|
+
let initPromise = null;
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Initialize the WASM module asynchronously
|
|
24
|
+
* Automatically loads the .wasm file from the same directory
|
|
25
|
+
*/
|
|
26
|
+
async function init(wasmInput) {
|
|
27
|
+
if (initialized) return bindings;
|
|
28
|
+
|
|
29
|
+
if (initPromise) {
|
|
30
|
+
await initPromise;
|
|
31
|
+
return bindings;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
initPromise = (async () => {
|
|
35
|
+
let wasmBytes;
|
|
36
|
+
|
|
37
|
+
if (wasmInput instanceof WebAssembly.Module) {
|
|
38
|
+
// Already compiled module
|
|
39
|
+
const instance = await WebAssembly.instantiate(wasmInput, getImports());
|
|
40
|
+
bindings.__wbg_set_wasm(instance.exports);
|
|
41
|
+
finishInit();
|
|
42
|
+
return;
|
|
43
|
+
} else if (wasmInput instanceof ArrayBuffer || wasmInput instanceof Uint8Array) {
|
|
44
|
+
// Raw bytes provided
|
|
45
|
+
wasmBytes = wasmInput;
|
|
46
|
+
} else if (typeof wasmInput === 'string') {
|
|
47
|
+
// Path to WASM file
|
|
48
|
+
wasmBytes = fs.readFileSync(wasmInput);
|
|
49
|
+
} else {
|
|
50
|
+
// Auto-detect WASM file location
|
|
51
|
+
const wasmPath = path.join(__dirname, 'ruvector_onnx_embeddings_wasm_bg.wasm');
|
|
52
|
+
wasmBytes = fs.readFileSync(wasmPath);
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
const wasmModule = await WebAssembly.compile(wasmBytes);
|
|
56
|
+
const instance = await WebAssembly.instantiate(wasmModule, getImports());
|
|
57
|
+
|
|
58
|
+
bindings.__wbg_set_wasm(instance.exports);
|
|
59
|
+
finishInit();
|
|
60
|
+
})();
|
|
61
|
+
|
|
62
|
+
await initPromise;
|
|
63
|
+
return bindings;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* Initialize the WASM module synchronously
|
|
68
|
+
* Requires the WASM bytes to be provided
|
|
69
|
+
*/
|
|
70
|
+
function initSync(wasmBytes) {
|
|
71
|
+
if (initialized) return bindings;
|
|
72
|
+
|
|
73
|
+
if (!wasmBytes) {
|
|
74
|
+
const wasmPath = path.join(__dirname, 'ruvector_onnx_embeddings_wasm_bg.wasm');
|
|
75
|
+
wasmBytes = fs.readFileSync(wasmPath);
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
const wasmModule = new WebAssembly.Module(wasmBytes);
|
|
79
|
+
const instance = new WebAssembly.Instance(wasmModule, getImports());
|
|
80
|
+
|
|
81
|
+
bindings.__wbg_set_wasm(instance.exports);
|
|
82
|
+
finishInit();
|
|
83
|
+
|
|
84
|
+
return bindings;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* Get the WASM import object
|
|
89
|
+
*/
|
|
90
|
+
function getImports() {
|
|
91
|
+
return {
|
|
92
|
+
'./ruvector_onnx_embeddings_wasm_bg.js': bindings,
|
|
93
|
+
};
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
/**
|
|
97
|
+
* Finalize initialization
|
|
98
|
+
*/
|
|
99
|
+
function finishInit() {
|
|
100
|
+
if (typeof bindings.__wbindgen_init_externref_table === 'function') {
|
|
101
|
+
bindings.__wbindgen_init_externref_table();
|
|
102
|
+
}
|
|
103
|
+
initialized = true;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
/**
|
|
107
|
+
* Check if initialized
|
|
108
|
+
*/
|
|
109
|
+
function isInitialized() {
|
|
110
|
+
return initialized;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
// Export init functions and all bindings
|
|
114
|
+
module.exports = {
|
|
115
|
+
init,
|
|
116
|
+
initSync,
|
|
117
|
+
isInitialized,
|
|
118
|
+
default: init,
|
|
119
|
+
// Re-export all bindings
|
|
120
|
+
WasmEmbedder: bindings.WasmEmbedder,
|
|
121
|
+
WasmEmbedderConfig: bindings.WasmEmbedderConfig,
|
|
122
|
+
PoolingStrategy: bindings.PoolingStrategy,
|
|
123
|
+
cosineSimilarity: bindings.cosineSimilarity,
|
|
124
|
+
normalizeL2: bindings.normalizeL2,
|
|
125
|
+
simd_available: bindings.simd_available,
|
|
126
|
+
version: bindings.version,
|
|
127
|
+
};
|
|
@@ -12,6 +12,26 @@
|
|
|
12
12
|
* - Cached model loading (downloads from HuggingFace on first use)
|
|
13
13
|
* - Batch embedding support
|
|
14
14
|
* - Optional parallel workers for 3.8x batch speedup
|
|
15
|
+
* - CommonJS-compatible: No --experimental-wasm-modules flag required
|
|
16
|
+
*
|
|
17
|
+
* Quick Start (Simple API - returns arrays directly):
|
|
18
|
+
* ```javascript
|
|
19
|
+
* const { embedText, embedTexts } = require('ruvector');
|
|
20
|
+
*
|
|
21
|
+
* // Single embedding - returns number[]
|
|
22
|
+
* const vector = await embedText("hello world");
|
|
23
|
+
*
|
|
24
|
+
* // Batch embeddings - returns number[][]
|
|
25
|
+
* const vectors = await embedTexts(["hello", "world"]);
|
|
26
|
+
* ```
|
|
27
|
+
*
|
|
28
|
+
* Full API (returns metadata):
|
|
29
|
+
* ```javascript
|
|
30
|
+
* const { embed, embedBatch } = require('ruvector');
|
|
31
|
+
*
|
|
32
|
+
* // Returns { embedding: number[], dimension: number, timeMs: number }
|
|
33
|
+
* const result = await embed("hello world");
|
|
34
|
+
* ```
|
|
15
35
|
*/
|
|
16
36
|
declare global {
|
|
17
37
|
var __ruvector_require: NodeRequire | undefined;
|
|
@@ -59,6 +79,70 @@ export declare function embed(text: string): Promise<EmbeddingResult>;
|
|
|
59
79
|
* Uses parallel workers automatically for batches >= parallelThreshold
|
|
60
80
|
*/
|
|
61
81
|
export declare function embedBatch(texts: string[]): Promise<EmbeddingResult[]>;
|
|
82
|
+
/**
|
|
83
|
+
* ============================================================================
|
|
84
|
+
* SIMPLE API - Returns arrays directly (for easy integration)
|
|
85
|
+
* ============================================================================
|
|
86
|
+
*/
|
|
87
|
+
/**
|
|
88
|
+
* Generate embedding for a single text - returns array directly
|
|
89
|
+
*
|
|
90
|
+
* This is the simplified API that returns just the embedding array,
|
|
91
|
+
* making it easy to use for vector operations, PostgreSQL insertion,
|
|
92
|
+
* and similarity calculations.
|
|
93
|
+
*
|
|
94
|
+
* @param text - The text to embed
|
|
95
|
+
* @returns A 384-dimensional embedding array
|
|
96
|
+
*
|
|
97
|
+
* @example
|
|
98
|
+
* ```javascript
|
|
99
|
+
* const { embedText } = require('ruvector');
|
|
100
|
+
*
|
|
101
|
+
* const vector = await embedText("hello world");
|
|
102
|
+
* console.log(vector.length); // 384
|
|
103
|
+
* console.log(Array.isArray(vector)); // true
|
|
104
|
+
*
|
|
105
|
+
* // Use directly with PostgreSQL
|
|
106
|
+
* await pool.query(
|
|
107
|
+
* 'INSERT INTO docs (content, embedding) VALUES ($1, $2)',
|
|
108
|
+
* [text, JSON.stringify(vector)]
|
|
109
|
+
* );
|
|
110
|
+
* ```
|
|
111
|
+
*/
|
|
112
|
+
export declare function embedText(text: string): Promise<number[]>;
|
|
113
|
+
/**
|
|
114
|
+
* Generate embeddings for multiple texts - returns array of arrays
|
|
115
|
+
*
|
|
116
|
+
* This is the simplified batch API that returns just the embedding arrays.
|
|
117
|
+
* Uses optimized batch processing for much faster throughput than
|
|
118
|
+
* calling embedText() in a loop.
|
|
119
|
+
*
|
|
120
|
+
* @param texts - Array of texts to embed
|
|
121
|
+
* @param options - Optional batch processing options
|
|
122
|
+
* @returns Array of 384-dimensional embedding arrays
|
|
123
|
+
*
|
|
124
|
+
* @example
|
|
125
|
+
* ```javascript
|
|
126
|
+
* const { embedTexts } = require('ruvector');
|
|
127
|
+
*
|
|
128
|
+
* // Batch embed 8000 documents in ~30 seconds (vs 53 min sequentially)
|
|
129
|
+
* const vectors = await embedTexts(documents);
|
|
130
|
+
*
|
|
131
|
+
* // With options for very large batches
|
|
132
|
+
* const vectors = await embedTexts(documents, { batchSize: 256 });
|
|
133
|
+
*
|
|
134
|
+
* // Bulk insert into PostgreSQL
|
|
135
|
+
* for (let i = 0; i < documents.length; i++) {
|
|
136
|
+
* await pool.query(
|
|
137
|
+
* 'INSERT INTO docs (content, embedding) VALUES ($1, $2)',
|
|
138
|
+
* [documents[i], JSON.stringify(vectors[i])]
|
|
139
|
+
* );
|
|
140
|
+
* }
|
|
141
|
+
* ```
|
|
142
|
+
*/
|
|
143
|
+
export declare function embedTexts(texts: string[], options?: {
|
|
144
|
+
batchSize?: number;
|
|
145
|
+
}): Promise<number[][]>;
|
|
62
146
|
/**
|
|
63
147
|
* Calculate cosine similarity between two texts
|
|
64
148
|
*/
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"onnx-embedder.d.ts","sourceRoot":"","sources":["../../src/core/onnx-embedder.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"onnx-embedder.d.ts","sourceRoot":"","sources":["../../src/core/onnx-embedder.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAkCG;AAQH,OAAO,CAAC,MAAM,CAAC;IAEb,IAAI,kBAAkB,EAAE,WAAW,GAAG,SAAS,CAAC;CACjD;AAsCD,MAAM,WAAW,kBAAkB;IACjC,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB;;;;;OAKG;IACH,cAAc,CAAC,EAAE,OAAO,GAAG,MAAM,CAAC;IAClC,wDAAwD;IACxD,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,iEAAiE;IACjE,iBAAiB,CAAC,EAAE,MAAM,CAAC;CAC5B;AAMD,MAAM,WAAW,eAAe;IAC9B,SAAS,EAAE,MAAM,EAAE,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,gBAAgB;IAC/B,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,MAAM,CAAC;CAChB;AAeD;;GAEG;AACH,wBAAgB,eAAe,IAAI,OAAO,CAOzC;AA6DD;;GAEG;AACH,wBAAsB,gBAAgB,CAAC,MAAM,GAAE,kBAAuB,GAAG,OAAO,CAAC,OAAO,CAAC,CAyGxF;AAED;;GAEG;AACH,wBAAsB,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,eAAe,CAAC,CAiBlE;AAED;;;GAGG;AACH,wBAAsB,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC,CAwC5E;AAED;;;;GAIG;AAEH;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AACH,wBAAsB,SAAS,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,CAU/D;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA6BG;AACH,wBAAsB,UAAU,CAC9B,KAAK,EAAE,MAAM,EAAE,EACf,OAAO,CAAC,EAAE;IAAE,SAAS,CAAC,EAAE,MAAM,CAAA;CAAE,GAC/B,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CA4CrB;AAED;;GAEG;AACH,wBAAsB,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,gBAAgB,CAAC,CAaxF;AAED;;GAEG;AACH,wBAAgB,gBAAgB,CAAC,CAAC,EAAE,MAAM,EAAE,EAAE,CAAC,EAAE,MAAM,EAAE,GAAG,MAAM,CAiBjE;AAED;;GAEG;AACH,wBAAgB,YAAY,IAAI,MAAM,CAErC;AAED;;GAEG;AACH,wBAAgB,OAAO,IAAI,OAAO,CAEjC;AAED;;GAEG;AACH,wBAAgB,QAAQ,IAAI;IAC1B,KAAK,EAAE,OAAO,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,OAAO,CAAC;IACd,QAAQ,EAAE,OAAO,CAAC;IAClB,eAAe,EAAE,MAAM,CAAC;IACxB,iBAAiB,EAAE,MAAM,CAAC;CAC3B,CAUA;AAED;;GAEG;AACH,wBAAsB,QAAQ,IAAI,OAAO,CAAC,IAAI,CAAC,CAM9C;AAGD,qBAAa,YAAY;IACvB,OAAO,CAAC,MAAM,CAAqB;gBAEvB,MAAM,GAAE,kBAAuB;IAIrC,IAAI,IAAI,OAAO,CAAC,OAAO,CAAC;IAIxB,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IAKtC,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;IAKhD,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;IAK/D,IAAI,SAAS,IAAI,MAAM,CAEtB;IAED,IAAI,KAAK,IAAI,OAAO,CAEnB;CACF;AAED,eAAe,YAAY,CAAC"}
|
|
@@ -13,6 +13,26 @@
|
|
|
13
13
|
* - Cached model loading (downloads from HuggingFace on first use)
|
|
14
14
|
* - Batch embedding support
|
|
15
15
|
* - Optional parallel workers for 3.8x batch speedup
|
|
16
|
+
* - CommonJS-compatible: No --experimental-wasm-modules flag required
|
|
17
|
+
*
|
|
18
|
+
* Quick Start (Simple API - returns arrays directly):
|
|
19
|
+
* ```javascript
|
|
20
|
+
* const { embedText, embedTexts } = require('ruvector');
|
|
21
|
+
*
|
|
22
|
+
* // Single embedding - returns number[]
|
|
23
|
+
* const vector = await embedText("hello world");
|
|
24
|
+
*
|
|
25
|
+
* // Batch embeddings - returns number[][]
|
|
26
|
+
* const vectors = await embedTexts(["hello", "world"]);
|
|
27
|
+
* ```
|
|
28
|
+
*
|
|
29
|
+
* Full API (returns metadata):
|
|
30
|
+
* ```javascript
|
|
31
|
+
* const { embed, embedBatch } = require('ruvector');
|
|
32
|
+
*
|
|
33
|
+
* // Returns { embedding: number[], dimension: number, timeMs: number }
|
|
34
|
+
* const result = await embed("hello world");
|
|
35
|
+
* ```
|
|
16
36
|
*/
|
|
17
37
|
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
18
38
|
if (k2 === undefined) k2 = k;
|
|
@@ -53,6 +73,8 @@ exports.isOnnxAvailable = isOnnxAvailable;
|
|
|
53
73
|
exports.initOnnxEmbedder = initOnnxEmbedder;
|
|
54
74
|
exports.embed = embed;
|
|
55
75
|
exports.embedBatch = embedBatch;
|
|
76
|
+
exports.embedText = embedText;
|
|
77
|
+
exports.embedTexts = embedTexts;
|
|
56
78
|
exports.similarity = similarity;
|
|
57
79
|
exports.cosineSimilarity = cosineSimilarity;
|
|
58
80
|
exports.getDimension = getDimension;
|
|
@@ -83,6 +105,21 @@ if (typeof globalThis !== 'undefined' && !globalThis.__ruvector_require) {
|
|
|
83
105
|
// Force native dynamic import (avoids TypeScript transpiling to require)
|
|
84
106
|
// eslint-disable-next-line @typescript-eslint/no-implied-eval
|
|
85
107
|
const dynamicImport = new Function('specifier', 'return import(specifier)');
|
|
108
|
+
// Try to load the CommonJS-compatible WASM loader (no experimental flags needed)
|
|
109
|
+
function tryLoadCjsModule() {
|
|
110
|
+
try {
|
|
111
|
+
// Use require for CJS module which doesn't need experimental flags
|
|
112
|
+
const cjsPath = path.join(__dirname, 'onnx', 'pkg', 'ruvector_onnx_embeddings_wasm_cjs.js');
|
|
113
|
+
if (fs.existsSync(cjsPath)) {
|
|
114
|
+
// eslint-disable-next-line @typescript-eslint/no-var-requires
|
|
115
|
+
return require(cjsPath);
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
catch {
|
|
119
|
+
// CJS loader not available
|
|
120
|
+
}
|
|
121
|
+
return null;
|
|
122
|
+
}
|
|
86
123
|
// Capability detection
|
|
87
124
|
let simdAvailable = false;
|
|
88
125
|
let parallelAvailable = false;
|
|
@@ -183,21 +220,32 @@ async function initOnnxEmbedder(config = {}) {
|
|
|
183
220
|
// Paths to bundled ONNX files
|
|
184
221
|
const pkgPath = path.join(__dirname, 'onnx', 'pkg', 'ruvector_onnx_embeddings_wasm.js');
|
|
185
222
|
const loaderPath = path.join(__dirname, 'onnx', 'loader.js');
|
|
186
|
-
|
|
223
|
+
const wasmPath = path.join(__dirname, 'onnx', 'pkg', 'ruvector_onnx_embeddings_wasm_bg.wasm');
|
|
224
|
+
if (!fs.existsSync(wasmPath)) {
|
|
187
225
|
throw new Error('ONNX WASM files not bundled. The onnx/ directory is missing.');
|
|
188
226
|
}
|
|
189
|
-
//
|
|
190
|
-
const
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
const wasmPath = path.join(__dirname, 'onnx', 'pkg', 'ruvector_onnx_embeddings_wasm_bg.wasm');
|
|
196
|
-
if (wasmModule.default && typeof wasmModule.default === 'function') {
|
|
197
|
-
// For bundler-style initialization, pass the wasm buffer
|
|
198
|
-
const wasmBytes = fs.readFileSync(wasmPath);
|
|
199
|
-
await wasmModule.default(wasmBytes);
|
|
227
|
+
// Try CJS loader first (no experimental flags needed)
|
|
228
|
+
const cjsModule = tryLoadCjsModule();
|
|
229
|
+
if (cjsModule) {
|
|
230
|
+
// Use CommonJS loader - no experimental flags required!
|
|
231
|
+
await cjsModule.init(wasmPath);
|
|
232
|
+
wasmModule = cjsModule;
|
|
200
233
|
}
|
|
234
|
+
else {
|
|
235
|
+
// Fall back to ESM loader (may require --experimental-wasm-modules)
|
|
236
|
+
// Convert paths to file:// URLs for cross-platform ESM compatibility (Windows fix)
|
|
237
|
+
const pkgUrl = (0, url_1.pathToFileURL)(pkgPath).href;
|
|
238
|
+
// Dynamic import of bundled modules using file:// URLs
|
|
239
|
+
wasmModule = await dynamicImport(pkgUrl);
|
|
240
|
+
// Initialize WASM module (loads the .wasm file)
|
|
241
|
+
if (wasmModule.default && typeof wasmModule.default === 'function') {
|
|
242
|
+
// For bundler-style initialization, pass the wasm buffer
|
|
243
|
+
const wasmBytes = fs.readFileSync(wasmPath);
|
|
244
|
+
await wasmModule.default(wasmBytes);
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
// Load the model loader
|
|
248
|
+
const loaderUrl = (0, url_1.pathToFileURL)(loaderPath).href;
|
|
201
249
|
const loaderModule = await dynamicImport(loaderUrl);
|
|
202
250
|
const { ModelLoader } = loaderModule;
|
|
203
251
|
// Create model loader with caching
|
|
@@ -310,6 +358,113 @@ async function embedBatch(texts) {
|
|
|
310
358
|
}
|
|
311
359
|
return results;
|
|
312
360
|
}
|
|
361
|
+
/**
|
|
362
|
+
* ============================================================================
|
|
363
|
+
* SIMPLE API - Returns arrays directly (for easy integration)
|
|
364
|
+
* ============================================================================
|
|
365
|
+
*/
|
|
366
|
+
/**
|
|
367
|
+
* Generate embedding for a single text - returns array directly
|
|
368
|
+
*
|
|
369
|
+
* This is the simplified API that returns just the embedding array,
|
|
370
|
+
* making it easy to use for vector operations, PostgreSQL insertion,
|
|
371
|
+
* and similarity calculations.
|
|
372
|
+
*
|
|
373
|
+
* @param text - The text to embed
|
|
374
|
+
* @returns A 384-dimensional embedding array
|
|
375
|
+
*
|
|
376
|
+
* @example
|
|
377
|
+
* ```javascript
|
|
378
|
+
* const { embedText } = require('ruvector');
|
|
379
|
+
*
|
|
380
|
+
* const vector = await embedText("hello world");
|
|
381
|
+
* console.log(vector.length); // 384
|
|
382
|
+
* console.log(Array.isArray(vector)); // true
|
|
383
|
+
*
|
|
384
|
+
* // Use directly with PostgreSQL
|
|
385
|
+
* await pool.query(
|
|
386
|
+
* 'INSERT INTO docs (content, embedding) VALUES ($1, $2)',
|
|
387
|
+
* [text, JSON.stringify(vector)]
|
|
388
|
+
* );
|
|
389
|
+
* ```
|
|
390
|
+
*/
|
|
391
|
+
async function embedText(text) {
|
|
392
|
+
if (!isInitialized) {
|
|
393
|
+
await initOnnxEmbedder();
|
|
394
|
+
}
|
|
395
|
+
if (!embedder) {
|
|
396
|
+
throw new Error('ONNX embedder not initialized');
|
|
397
|
+
}
|
|
398
|
+
const embedding = embedder.embedOne(text);
|
|
399
|
+
return Array.from(embedding);
|
|
400
|
+
}
|
|
401
|
+
/**
|
|
402
|
+
* Generate embeddings for multiple texts - returns array of arrays
|
|
403
|
+
*
|
|
404
|
+
* This is the simplified batch API that returns just the embedding arrays.
|
|
405
|
+
* Uses optimized batch processing for much faster throughput than
|
|
406
|
+
* calling embedText() in a loop.
|
|
407
|
+
*
|
|
408
|
+
* @param texts - Array of texts to embed
|
|
409
|
+
* @param options - Optional batch processing options
|
|
410
|
+
* @returns Array of 384-dimensional embedding arrays
|
|
411
|
+
*
|
|
412
|
+
* @example
|
|
413
|
+
* ```javascript
|
|
414
|
+
* const { embedTexts } = require('ruvector');
|
|
415
|
+
*
|
|
416
|
+
* // Batch embed 8000 documents in ~30 seconds (vs 53 min sequentially)
|
|
417
|
+
* const vectors = await embedTexts(documents);
|
|
418
|
+
*
|
|
419
|
+
* // With options for very large batches
|
|
420
|
+
* const vectors = await embedTexts(documents, { batchSize: 256 });
|
|
421
|
+
*
|
|
422
|
+
* // Bulk insert into PostgreSQL
|
|
423
|
+
* for (let i = 0; i < documents.length; i++) {
|
|
424
|
+
* await pool.query(
|
|
425
|
+
* 'INSERT INTO docs (content, embedding) VALUES ($1, $2)',
|
|
426
|
+
* [documents[i], JSON.stringify(vectors[i])]
|
|
427
|
+
* );
|
|
428
|
+
* }
|
|
429
|
+
* ```
|
|
430
|
+
*/
|
|
431
|
+
async function embedTexts(texts, options) {
|
|
432
|
+
if (!isInitialized) {
|
|
433
|
+
await initOnnxEmbedder();
|
|
434
|
+
}
|
|
435
|
+
if (!embedder) {
|
|
436
|
+
throw new Error('ONNX embedder not initialized');
|
|
437
|
+
}
|
|
438
|
+
if (texts.length === 0) {
|
|
439
|
+
return [];
|
|
440
|
+
}
|
|
441
|
+
const batchSize = options?.batchSize || 256;
|
|
442
|
+
// For small batches, process all at once
|
|
443
|
+
if (texts.length <= batchSize) {
|
|
444
|
+
// Use parallel workers for large batches
|
|
445
|
+
if (parallelEnabled && parallelEmbedder && texts.length >= parallelThreshold) {
|
|
446
|
+
const batchResults = await parallelEmbedder.embedBatch(texts);
|
|
447
|
+
return batchResults.map((emb) => Array.from(emb));
|
|
448
|
+
}
|
|
449
|
+
// Sequential processing
|
|
450
|
+
const batchEmbeddings = embedder.embedBatch(texts);
|
|
451
|
+
const dimension = embedder.dimension();
|
|
452
|
+
const results = [];
|
|
453
|
+
for (let i = 0; i < texts.length; i++) {
|
|
454
|
+
const embedding = batchEmbeddings.slice(i * dimension, (i + 1) * dimension);
|
|
455
|
+
results.push(Array.from(embedding));
|
|
456
|
+
}
|
|
457
|
+
return results;
|
|
458
|
+
}
|
|
459
|
+
// Process in chunks for very large batches
|
|
460
|
+
const results = [];
|
|
461
|
+
for (let i = 0; i < texts.length; i += batchSize) {
|
|
462
|
+
const chunk = texts.slice(i, i + batchSize);
|
|
463
|
+
const chunkResults = await embedTexts(chunk);
|
|
464
|
+
results.push(...chunkResults);
|
|
465
|
+
}
|
|
466
|
+
return results;
|
|
467
|
+
}
|
|
313
468
|
/**
|
|
314
469
|
* Calculate cosine similarity between two texts
|
|
315
470
|
*/
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "ruvector",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.95",
|
|
4
4
|
"description": "High-performance vector database for Node.js with automatic native/WASM fallback",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"types": "dist/index.d.ts",
|
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
"ruvector": "./bin/cli.js"
|
|
9
9
|
},
|
|
10
10
|
"scripts": {
|
|
11
|
-
"build": "tsc && cp src/core/onnx/pkg/package.json dist/core/onnx/pkg/",
|
|
11
|
+
"build": "tsc && cp src/core/onnx/pkg/package.json dist/core/onnx/pkg/ && cp src/core/onnx/pkg/ruvector_onnx_embeddings_wasm_cjs.js dist/core/onnx/pkg/",
|
|
12
12
|
"prepublishOnly": "npm run build",
|
|
13
13
|
"test": "node test/integration.js"
|
|
14
14
|
},
|