ruvector 0.1.53 → 0.1.55

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,81 @@
1
+ /**
2
+ * ONNX WASM Embedder - Semantic embeddings for hooks
3
+ *
4
+ * Provides real transformer-based embeddings using all-MiniLM-L6-v2
5
+ * running in pure WASM (no native dependencies).
6
+ *
7
+ * Uses bundled ONNX WASM files from src/core/onnx/
8
+ *
9
+ * Features:
10
+ * - 384-dimensional semantic embeddings
11
+ * - Real semantic understanding (not hash-based)
12
+ * - Cached model loading (downloads from HuggingFace on first use)
13
+ * - Batch embedding support
14
+ */
15
+ export interface OnnxEmbedderConfig {
16
+ modelId?: string;
17
+ maxLength?: number;
18
+ normalize?: boolean;
19
+ cacheDir?: string;
20
+ }
21
+ export interface EmbeddingResult {
22
+ embedding: number[];
23
+ dimension: number;
24
+ timeMs: number;
25
+ }
26
+ export interface SimilarityResult {
27
+ similarity: number;
28
+ timeMs: number;
29
+ }
30
+ /**
31
+ * Check if ONNX embedder is available (bundled files exist)
32
+ */
33
+ export declare function isOnnxAvailable(): boolean;
34
+ /**
35
+ * Initialize the ONNX embedder (downloads model if needed)
36
+ */
37
+ export declare function initOnnxEmbedder(config?: OnnxEmbedderConfig): Promise<boolean>;
38
+ /**
39
+ * Generate embedding for text
40
+ */
41
+ export declare function embed(text: string): Promise<EmbeddingResult>;
42
+ /**
43
+ * Generate embeddings for multiple texts
44
+ */
45
+ export declare function embedBatch(texts: string[]): Promise<EmbeddingResult[]>;
46
+ /**
47
+ * Calculate cosine similarity between two texts
48
+ */
49
+ export declare function similarity(text1: string, text2: string): Promise<SimilarityResult>;
50
+ /**
51
+ * Calculate cosine similarity between two embeddings
52
+ */
53
+ export declare function cosineSimilarity(a: number[], b: number[]): number;
54
+ /**
55
+ * Get embedding dimension
56
+ */
57
+ export declare function getDimension(): number;
58
+ /**
59
+ * Check if embedder is ready
60
+ */
61
+ export declare function isReady(): boolean;
62
+ /**
63
+ * Get embedder stats
64
+ */
65
+ export declare function getStats(): {
66
+ ready: boolean;
67
+ dimension: number;
68
+ model: string;
69
+ };
70
+ export declare class OnnxEmbedder {
71
+ private config;
72
+ constructor(config?: OnnxEmbedderConfig);
73
+ init(): Promise<boolean>;
74
+ embed(text: string): Promise<number[]>;
75
+ embedBatch(texts: string[]): Promise<number[][]>;
76
+ similarity(text1: string, text2: string): Promise<number>;
77
+ get dimension(): number;
78
+ get ready(): boolean;
79
+ }
80
+ export default OnnxEmbedder;
81
+ //# sourceMappingURL=onnx-embedder.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"onnx-embedder.d.ts","sourceRoot":"","sources":["../../src/core/onnx-embedder.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAUH,MAAM,WAAW,kBAAkB;IACjC,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,eAAe;IAC9B,SAAS,EAAE,MAAM,EAAE,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,gBAAgB;IAC/B,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,MAAM,CAAC;CAChB;AAYD;;GAEG;AACH,wBAAgB,eAAe,IAAI,OAAO,CAOzC;AAED;;GAEG;AACH,wBAAsB,gBAAgB,CAAC,MAAM,GAAE,kBAAuB,GAAG,OAAO,CAAC,OAAO,CAAC,CA8DxF;AAED;;GAEG;AACH,wBAAsB,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,eAAe,CAAC,CAiBlE;AAED;;GAEG;AACH,wBAAsB,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC,CAyB5E;AAED;;GAEG;AACH,wBAAsB,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,gBAAgB,CAAC,CAaxF;AAED;;GAEG;AACH,wBAAgB,gBAAgB,CAAC,CAAC,EAAE,MAAM,EAAE,EAAE,CAAC,EAAE,MAAM,EAAE,GAAG,MAAM,CAiBjE;AAED;;GAEG;AACH,wBAAgB,YAAY,IAAI,MAAM,CAErC;AAED;;GAEG;AACH,wBAAgB,OAAO,IAAI,OAAO,CAEjC;AAED;;GAEG;AACH,wBAAgB,QAAQ,IAAI;IAAE,KAAK,EAAE,OAAO,CAAC;IAAC,SAAS,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,CAM/E;AAGD,qBAAa,YAAY;IACvB,OAAO,CAAC,MAAM,CAAqB;gBAEvB,MAAM,GAAE,kBAAuB;IAIrC,IAAI,IAAI,OAAO,CAAC,OAAO,CAAC;IAIxB,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IAKtC,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;IAKhD,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;IAK/D,IAAI,SAAS,IAAI,MAAM,CAEtB;IAED,IAAI,KAAK,IAAI,OAAO,CAEnB;CACF;AAED,eAAe,YAAY,CAAC"}
@@ -0,0 +1,269 @@
1
+ "use strict";
2
+ /**
3
+ * ONNX WASM Embedder - Semantic embeddings for hooks
4
+ *
5
+ * Provides real transformer-based embeddings using all-MiniLM-L6-v2
6
+ * running in pure WASM (no native dependencies).
7
+ *
8
+ * Uses bundled ONNX WASM files from src/core/onnx/
9
+ *
10
+ * Features:
11
+ * - 384-dimensional semantic embeddings
12
+ * - Real semantic understanding (not hash-based)
13
+ * - Cached model loading (downloads from HuggingFace on first use)
14
+ * - Batch embedding support
15
+ */
16
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
17
+ if (k2 === undefined) k2 = k;
18
+ var desc = Object.getOwnPropertyDescriptor(m, k);
19
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
20
+ desc = { enumerable: true, get: function() { return m[k]; } };
21
+ }
22
+ Object.defineProperty(o, k2, desc);
23
+ }) : (function(o, m, k, k2) {
24
+ if (k2 === undefined) k2 = k;
25
+ o[k2] = m[k];
26
+ }));
27
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
28
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
29
+ }) : function(o, v) {
30
+ o["default"] = v;
31
+ });
32
+ var __importStar = (this && this.__importStar) || (function () {
33
+ var ownKeys = function(o) {
34
+ ownKeys = Object.getOwnPropertyNames || function (o) {
35
+ var ar = [];
36
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
37
+ return ar;
38
+ };
39
+ return ownKeys(o);
40
+ };
41
+ return function (mod) {
42
+ if (mod && mod.__esModule) return mod;
43
+ var result = {};
44
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
45
+ __setModuleDefault(result, mod);
46
+ return result;
47
+ };
48
+ })();
49
+ Object.defineProperty(exports, "__esModule", { value: true });
50
+ exports.OnnxEmbedder = void 0;
51
+ exports.isOnnxAvailable = isOnnxAvailable;
52
+ exports.initOnnxEmbedder = initOnnxEmbedder;
53
+ exports.embed = embed;
54
+ exports.embedBatch = embedBatch;
55
+ exports.similarity = similarity;
56
+ exports.cosineSimilarity = cosineSimilarity;
57
+ exports.getDimension = getDimension;
58
+ exports.isReady = isReady;
59
+ exports.getStats = getStats;
60
+ const path = __importStar(require("path"));
61
+ const fs = __importStar(require("fs"));
62
+ // Force native dynamic import (avoids TypeScript transpiling to require)
63
+ // eslint-disable-next-line @typescript-eslint/no-implied-eval
64
+ const dynamicImport = new Function('specifier', 'return import(specifier)');
65
+ // Lazy-loaded module state
66
+ let wasmModule = null;
67
+ let embedder = null;
68
+ let loadError = null;
69
+ let loadPromise = null;
70
+ let isInitialized = false;
71
+ // Default model
72
+ const DEFAULT_MODEL = 'all-MiniLM-L6-v2';
73
+ /**
74
+ * Check if ONNX embedder is available (bundled files exist)
75
+ */
76
+ function isOnnxAvailable() {
77
+ try {
78
+ const pkgPath = path.join(__dirname, 'onnx', 'pkg', 'ruvector_onnx_embeddings_wasm.js');
79
+ return fs.existsSync(pkgPath);
80
+ }
81
+ catch {
82
+ return false;
83
+ }
84
+ }
85
+ /**
86
+ * Initialize the ONNX embedder (downloads model if needed)
87
+ */
88
+ async function initOnnxEmbedder(config = {}) {
89
+ if (isInitialized)
90
+ return true;
91
+ if (loadError)
92
+ throw loadError;
93
+ if (loadPromise) {
94
+ await loadPromise;
95
+ return isInitialized;
96
+ }
97
+ loadPromise = (async () => {
98
+ try {
99
+ // Paths to bundled ONNX files
100
+ const pkgPath = path.join(__dirname, 'onnx', 'pkg', 'ruvector_onnx_embeddings_wasm.js');
101
+ const loaderPath = path.join(__dirname, 'onnx', 'loader.js');
102
+ if (!fs.existsSync(pkgPath)) {
103
+ throw new Error('ONNX WASM files not bundled. The onnx/ directory is missing.');
104
+ }
105
+ // Dynamic import of bundled modules
106
+ wasmModule = await dynamicImport(pkgPath);
107
+ // Initialize WASM module (loads the .wasm file)
108
+ const wasmPath = path.join(__dirname, 'onnx', 'pkg', 'ruvector_onnx_embeddings_wasm_bg.wasm');
109
+ if (wasmModule.default && typeof wasmModule.default === 'function') {
110
+ // For bundler-style initialization, pass the wasm buffer
111
+ const wasmBytes = fs.readFileSync(wasmPath);
112
+ await wasmModule.default(wasmBytes);
113
+ }
114
+ const loaderModule = await dynamicImport(loaderPath);
115
+ const { ModelLoader } = loaderModule;
116
+ // Create model loader with caching
117
+ const modelLoader = new ModelLoader({
118
+ cache: true,
119
+ cacheDir: config.cacheDir || path.join(process.env.HOME || '/tmp', '.ruvector', 'models'),
120
+ });
121
+ // Load model (downloads from HuggingFace on first use)
122
+ const modelId = config.modelId || DEFAULT_MODEL;
123
+ console.error(`Loading ONNX model: ${modelId}...`);
124
+ const { modelBytes, tokenizerJson, config: modelConfig } = await modelLoader.loadModel(modelId);
125
+ // Create embedder with config
126
+ const embedderConfig = new wasmModule.WasmEmbedderConfig()
127
+ .setMaxLength(config.maxLength || modelConfig.maxLength || 256)
128
+ .setNormalize(config.normalize !== false)
129
+ .setPooling(0); // Mean pooling
130
+ embedder = wasmModule.WasmEmbedder.withConfig(modelBytes, tokenizerJson, embedderConfig);
131
+ console.error(`ONNX embedder ready: ${embedder.dimension()}d`);
132
+ isInitialized = true;
133
+ }
134
+ catch (e) {
135
+ loadError = new Error(`Failed to initialize ONNX embedder: ${e.message}`);
136
+ throw loadError;
137
+ }
138
+ })();
139
+ await loadPromise;
140
+ return isInitialized;
141
+ }
142
+ /**
143
+ * Generate embedding for text
144
+ */
145
+ async function embed(text) {
146
+ if (!isInitialized) {
147
+ await initOnnxEmbedder();
148
+ }
149
+ if (!embedder) {
150
+ throw new Error('ONNX embedder not initialized');
151
+ }
152
+ const start = performance.now();
153
+ const embedding = embedder.embedOne(text);
154
+ const timeMs = performance.now() - start;
155
+ return {
156
+ embedding: Array.from(embedding),
157
+ dimension: embedding.length,
158
+ timeMs,
159
+ };
160
+ }
161
+ /**
162
+ * Generate embeddings for multiple texts
163
+ */
164
+ async function embedBatch(texts) {
165
+ if (!isInitialized) {
166
+ await initOnnxEmbedder();
167
+ }
168
+ if (!embedder) {
169
+ throw new Error('ONNX embedder not initialized');
170
+ }
171
+ const start = performance.now();
172
+ const batchEmbeddings = embedder.embedBatch(texts);
173
+ const totalTime = performance.now() - start;
174
+ const dimension = embedder.dimension();
175
+ const results = [];
176
+ for (let i = 0; i < texts.length; i++) {
177
+ const embedding = batchEmbeddings.slice(i * dimension, (i + 1) * dimension);
178
+ results.push({
179
+ embedding: Array.from(embedding),
180
+ dimension,
181
+ timeMs: totalTime / texts.length,
182
+ });
183
+ }
184
+ return results;
185
+ }
186
+ /**
187
+ * Calculate cosine similarity between two texts
188
+ */
189
+ async function similarity(text1, text2) {
190
+ if (!isInitialized) {
191
+ await initOnnxEmbedder();
192
+ }
193
+ if (!embedder) {
194
+ throw new Error('ONNX embedder not initialized');
195
+ }
196
+ const start = performance.now();
197
+ const sim = embedder.similarity(text1, text2);
198
+ const timeMs = performance.now() - start;
199
+ return { similarity: sim, timeMs };
200
+ }
201
+ /**
202
+ * Calculate cosine similarity between two embeddings
203
+ */
204
+ function cosineSimilarity(a, b) {
205
+ if (a.length !== b.length) {
206
+ throw new Error('Embeddings must have same dimension');
207
+ }
208
+ let dotProduct = 0;
209
+ let normA = 0;
210
+ let normB = 0;
211
+ for (let i = 0; i < a.length; i++) {
212
+ dotProduct += a[i] * b[i];
213
+ normA += a[i] * a[i];
214
+ normB += b[i] * b[i];
215
+ }
216
+ const magnitude = Math.sqrt(normA) * Math.sqrt(normB);
217
+ return magnitude === 0 ? 0 : dotProduct / magnitude;
218
+ }
219
+ /**
220
+ * Get embedding dimension
221
+ */
222
+ function getDimension() {
223
+ return embedder ? embedder.dimension() : 384;
224
+ }
225
+ /**
226
+ * Check if embedder is ready
227
+ */
228
+ function isReady() {
229
+ return isInitialized;
230
+ }
231
+ /**
232
+ * Get embedder stats
233
+ */
234
+ function getStats() {
235
+ return {
236
+ ready: isInitialized,
237
+ dimension: embedder ? embedder.dimension() : 384,
238
+ model: DEFAULT_MODEL,
239
+ };
240
+ }
241
+ // Export class wrapper for compatibility
242
+ class OnnxEmbedder {
243
+ constructor(config = {}) {
244
+ this.config = config;
245
+ }
246
+ async init() {
247
+ return initOnnxEmbedder(this.config);
248
+ }
249
+ async embed(text) {
250
+ const result = await embed(text);
251
+ return result.embedding;
252
+ }
253
+ async embedBatch(texts) {
254
+ const results = await embedBatch(texts);
255
+ return results.map(r => r.embedding);
256
+ }
257
+ async similarity(text1, text2) {
258
+ const result = await similarity(text1, text2);
259
+ return result.similarity;
260
+ }
261
+ get dimension() {
262
+ return getDimension();
263
+ }
264
+ get ready() {
265
+ return isReady();
266
+ }
267
+ }
268
+ exports.OnnxEmbedder = OnnxEmbedder;
269
+ exports.default = OnnxEmbedder;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ruvector",
3
- "version": "0.1.53",
3
+ "version": "0.1.55",
4
4
  "description": "High-performance vector database for Node.js with automatic native/WASM fallback",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
@@ -38,7 +38,10 @@
38
38
  "lora",
39
39
  "ewc",
40
40
  "adaptive-learning",
41
- "continual-learning"
41
+ "continual-learning",
42
+ "onnx",
43
+ "semantic-embeddings",
44
+ "minilm"
42
45
  ],
43
46
  "author": "ruv.io Team <info@ruv.io> (https://ruv.io)",
44
47
  "homepage": "https://ruv.io",
package/ruvector.db CHANGED
Binary file