ruvector 0.1.93 → 0.1.95

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -25,6 +25,7 @@ export * from './learning-engine';
25
25
  export * from './adaptive-embedder';
26
26
  export * from './neural-embeddings';
27
27
  export * from './neural-perf';
28
+ export * from './onnx-llm';
28
29
  export * from '../analysis';
29
30
  export { default as gnnWrapper } from './gnn-wrapper';
30
31
  export { default as attentionFallbacks } from './attention-fallbacks';
@@ -44,4 +45,5 @@ export { default as TensorCompress } from './tensor-compress';
44
45
  export { default as LearningEngine } from './learning-engine';
45
46
  export { default as AdaptiveEmbedder } from './adaptive-embedder';
46
47
  export { default as NeuralSubstrate } from './neural-embeddings';
48
+ export { default as OnnxLLM } from './onnx-llm';
47
49
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/core/index.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,cAAc,eAAe,CAAC;AAC9B,cAAc,uBAAuB,CAAC;AACtC,cAAc,gBAAgB,CAAC;AAC/B,cAAc,gBAAgB,CAAC;AAC/B,cAAc,uBAAuB,CAAC;AACtC,cAAc,iBAAiB,CAAC;AAChC,cAAc,kBAAkB,CAAC;AACjC,cAAc,yBAAyB,CAAC;AACxC,cAAc,oBAAoB,CAAC;AACnC,cAAc,kBAAkB,CAAC;AACjC,cAAc,iBAAiB,CAAC;AAChC,cAAc,mBAAmB,CAAC;AAClC,cAAc,cAAc,CAAC;AAC7B,cAAc,mBAAmB,CAAC;AAClC,cAAc,mBAAmB,CAAC;AAClC,cAAc,oBAAoB,CAAC;AACnC,cAAc,mBAAmB,CAAC;AAClC,cAAc,mBAAmB,CAAC;AAClC,cAAc,qBAAqB,CAAC;AACpC,cAAc,qBAAqB,CAAC;AACpC,cAAc,eAAe,CAAC;AAG9B,cAAc,aAAa,CAAC;AAG5B,OAAO,EAAE,OAAO,IAAI,UAAU,EAAE,MAAM,eAAe,CAAC;AACtD,OAAO,EAAE,OAAO,IAAI,kBAAkB,EAAE,MAAM,uBAAuB,CAAC;AACtE,OAAO,EAAE,OAAO,IAAI,WAAW,EAAE,MAAM,gBAAgB,CAAC;AACxD,OAAO,EAAE,OAAO,IAAI,IAAI,EAAE,MAAM,gBAAgB,CAAC;AACjD,OAAO,EAAE,OAAO,IAAI,kBAAkB,EAAE,MAAM,uBAAuB,CAAC;AACtE,OAAO,EAAE,OAAO,IAAI,YAAY,EAAE,MAAM,iBAAiB,CAAC;AAC1D,OAAO,EAAE,OAAO,IAAI,qBAAqB,EAAE,MAAM,kBAAkB,CAAC;AACpE,OAAO,EAAE,OAAO,IAAI,oBAAoB,EAAE,MAAM,yBAAyB,CAAC;AAC1E,OAAO,EAAE,OAAO,IAAI,kBAAkB,EAAE,MAAM,oBAAoB,CAAC;AACnE,OAAO,EAAE,OAAO,IAAI,cAAc,EAAE,MAAM,kBAAkB,CAAC;AAC7D,OAAO,EAAE,OAAO,IAAI,SAAS,EAAE,MAAM,iBAAiB,CAAC;AACvD,OAAO,EAAE,OAAO,IAAI,eAAe,EAAE,MAAM,mBAAmB,CAAC;AAC/D,OAAO,EAAE,OAAO,IAAI,UAAU,EAAE,MAAM,cAAc,CAAC;AAGrD,OAAO,EAAE,UAAU,IAAI,SAAS,EAAE,MAAM,cAAc,CAAC;AAGvD,OAAO,EAAE,OAAO,IAAI,cAAc,EAAE,MAAM,mBAAmB,CAAC;AAC9D,OAAO,EAAE,OAAO,IAAI,cAAc,EAAE,MAAM,mBAAmB,CAAC;AAC9D,OAAO,EAAE,OAAO,IAAI,gBAAgB,EAAE,MAAM,qBAAqB,CAAC;AAClE,OAAO,EAAE,OAAO,IAAI,eAAe,EAAE,MAAM,qBAAqB,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/core/index.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,cAAc,eAAe,CAAC;AAC9B,cAAc,uBAAuB,CAAC;AACtC,cAAc,gBAAgB,CAAC;AAC/B,cAAc,gBAAgB,CAAC;AAC/B,cAAc,uBAAuB,CAAC;AACtC,cAAc,iBAAiB,CAAC;AAChC,cAAc,kBAAkB,CAAC;AACjC,cAAc,yBAAyB,CAAC;AACxC,cAAc,oBAAoB,CAAC;AACnC,cAAc,kBAAkB,CAAC;AACjC,cAAc,iBAAiB,CAAC;AAChC,cAAc,mBAAmB,CAAC;AAClC,cAAc,cAAc,CAAC;AAC7B,cAAc,mBAAmB,CAAC;AAClC,cAAc,mBAAmB,CAAC;AAClC,cAAc,oBAAoB,CAAC;AACnC,cAAc,mBAAmB,CAAC;AAClC,cAAc,mBAAmB,CAAC;AAClC,cAAc,qBAAqB,CAAC;AACpC,cAAc,qBAAqB,CAAC;AACpC,cAAc,eAAe,CAAC;AAC9B,cAAc,YAAY,CAAC;AAG3B,cAAc,aAAa,CAAC;AAG5B,OAAO,EAAE,OAAO,IAAI,UAAU,EAAE,MAAM,eAAe,CAAC;AACtD,OAAO,EAAE,OAAO,IAAI,kBAAkB,EAAE,MAAM,uBAAuB,CAAC;AACtE,OAAO,EAAE,OAAO,IAAI,WAAW,EAAE,MAAM,gBAAgB,CAAC;AACxD,OAAO,EAAE,OAAO,IAAI,IAAI,EAAE,MAAM,gBAAgB,CAAC;AACjD,OAAO,EAAE,OAAO,IAAI,kBAAkB,EAAE,MAAM,uBAAuB,CAAC;AACtE,OAAO,EAAE,OAAO,IAAI,YAAY,EAAE,MAAM,iBAAiB,CAAC;AAC1D,OAAO,EAAE,OAAO,IAAI,qBAAqB,EAAE,MAAM,kBAAkB,CAAC;AACpE,OAAO,EAAE,OAAO,IAAI,oBAAoB,EAAE,MAAM,yBAAyB,CAAC;AAC1E,OAAO,EAAE,OAAO,IAAI,kBAAkB,EAAE,MAAM,oBAAoB,CAAC;AACnE,OAAO,EAAE,OAAO,IAAI,cAAc,EAAE,MAAM,kBAAkB,CAAC;AAC7D,OAAO,EAAE,OAAO,IAAI,SAAS,EAAE,MAAM,iBAAiB,CAAC;AACvD,OAAO,EAAE,OAAO,IAAI,eAAe,EAAE,MAAM,mBAAmB,CAAC;AAC/D,OAAO,EAAE,OAAO,IAAI,UAAU,EAAE,MAAM,cAAc,CAAC;AAGrD,OAAO,EAAE,UAAU,IAAI,SAAS,EAAE,MAAM,cAAc,CAAC;AAGvD,OAAO,EAAE,OAAO,IAAI,cAAc,EAAE,MAAM,mBAAmB,CAAC;AAC9D,OAAO,EAAE,OAAO,IAAI,cAAc,EAAE,MAAM,mBAAmB,CAAC;AAC9D,OAAO,EAAE,OAAO,IAAI,gBAAgB,EAAE,MAAM,qBAAqB,CAAC;AAClE,OAAO,EAAE,OAAO,IAAI,eAAe,EAAE,MAAM,qBAAqB,CAAC;AACjE,OAAO,EAAE,OAAO,IAAI,OAAO,EAAE,MAAM,YAAY,CAAC"}
@@ -23,7 +23,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
23
23
  return (mod && mod.__esModule) ? mod : { "default": mod };
24
24
  };
25
25
  Object.defineProperty(exports, "__esModule", { value: true });
26
- exports.NeuralSubstrate = exports.AdaptiveEmbedder = exports.LearningEngine = exports.TensorCompress = exports.ASTParser = exports.CodeParser = exports.RuvectorCluster = exports.CodeGraph = exports.SemanticRouter = exports.ExtendedWorkerPool = exports.ParallelIntelligence = exports.OptimizedOnnxEmbedder = exports.OnnxEmbedder = exports.IntelligenceEngine = exports.Sona = exports.agentdbFast = exports.attentionFallbacks = exports.gnnWrapper = void 0;
26
+ exports.OnnxLLM = exports.NeuralSubstrate = exports.AdaptiveEmbedder = exports.LearningEngine = exports.TensorCompress = exports.ASTParser = exports.CodeParser = exports.RuvectorCluster = exports.CodeGraph = exports.SemanticRouter = exports.ExtendedWorkerPool = exports.ParallelIntelligence = exports.OptimizedOnnxEmbedder = exports.OnnxEmbedder = exports.IntelligenceEngine = exports.Sona = exports.agentdbFast = exports.attentionFallbacks = exports.gnnWrapper = void 0;
27
27
  __exportStar(require("./gnn-wrapper"), exports);
28
28
  __exportStar(require("./attention-fallbacks"), exports);
29
29
  __exportStar(require("./agentdb-fast"), exports);
@@ -45,6 +45,7 @@ __exportStar(require("./learning-engine"), exports);
45
45
  __exportStar(require("./adaptive-embedder"), exports);
46
46
  __exportStar(require("./neural-embeddings"), exports);
47
47
  __exportStar(require("./neural-perf"), exports);
48
+ __exportStar(require("./onnx-llm"), exports);
48
49
  // Analysis module (consolidated security, complexity, patterns)
49
50
  __exportStar(require("../analysis"), exports);
50
51
  // Re-export default objects for convenience
@@ -86,3 +87,5 @@ var adaptive_embedder_1 = require("./adaptive-embedder");
86
87
  Object.defineProperty(exports, "AdaptiveEmbedder", { enumerable: true, get: function () { return __importDefault(adaptive_embedder_1).default; } });
87
88
  var neural_embeddings_1 = require("./neural-embeddings");
88
89
  Object.defineProperty(exports, "NeuralSubstrate", { enumerable: true, get: function () { return __importDefault(neural_embeddings_1).default; } });
90
+ var onnx_llm_1 = require("./onnx-llm");
91
+ Object.defineProperty(exports, "OnnxLLM", { enumerable: true, get: function () { return __importDefault(onnx_llm_1).default; } });
@@ -0,0 +1,127 @@
1
+ /**
2
+ * CommonJS-compatible WASM loader for Node.js
3
+ *
4
+ * This file provides a way to load the WASM module without requiring
5
+ * the --experimental-wasm-modules flag by manually loading the WASM bytes.
6
+ *
7
+ * Usage:
8
+ * const wasm = require('./ruvector_onnx_embeddings_wasm_cjs.js');
9
+ * await wasm.init(); // or wasm.initSync(wasmBytes)
10
+ */
11
+
12
+ const fs = require('fs');
13
+ const path = require('path');
14
+
15
+ // Re-export everything from the JS bindings
16
+ const bindings = require('./ruvector_onnx_embeddings_wasm_bg.js');
17
+
18
+ // Track initialization state
19
+ let initialized = false;
20
+ let initPromise = null;
21
+
22
+ /**
23
+ * Initialize the WASM module asynchronously
24
+ * Automatically loads the .wasm file from the same directory
25
+ */
26
+ async function init(wasmInput) {
27
+ if (initialized) return bindings;
28
+
29
+ if (initPromise) {
30
+ await initPromise;
31
+ return bindings;
32
+ }
33
+
34
+ initPromise = (async () => {
35
+ let wasmBytes;
36
+
37
+ if (wasmInput instanceof WebAssembly.Module) {
38
+ // Already compiled module
39
+ const instance = await WebAssembly.instantiate(wasmInput, getImports());
40
+ bindings.__wbg_set_wasm(instance.exports);
41
+ finishInit();
42
+ return;
43
+ } else if (wasmInput instanceof ArrayBuffer || wasmInput instanceof Uint8Array) {
44
+ // Raw bytes provided
45
+ wasmBytes = wasmInput;
46
+ } else if (typeof wasmInput === 'string') {
47
+ // Path to WASM file
48
+ wasmBytes = fs.readFileSync(wasmInput);
49
+ } else {
50
+ // Auto-detect WASM file location
51
+ const wasmPath = path.join(__dirname, 'ruvector_onnx_embeddings_wasm_bg.wasm');
52
+ wasmBytes = fs.readFileSync(wasmPath);
53
+ }
54
+
55
+ const wasmModule = await WebAssembly.compile(wasmBytes);
56
+ const instance = await WebAssembly.instantiate(wasmModule, getImports());
57
+
58
+ bindings.__wbg_set_wasm(instance.exports);
59
+ finishInit();
60
+ })();
61
+
62
+ await initPromise;
63
+ return bindings;
64
+ }
65
+
66
+ /**
67
+ * Initialize the WASM module synchronously
68
+ * Requires the WASM bytes to be provided
69
+ */
70
+ function initSync(wasmBytes) {
71
+ if (initialized) return bindings;
72
+
73
+ if (!wasmBytes) {
74
+ const wasmPath = path.join(__dirname, 'ruvector_onnx_embeddings_wasm_bg.wasm');
75
+ wasmBytes = fs.readFileSync(wasmPath);
76
+ }
77
+
78
+ const wasmModule = new WebAssembly.Module(wasmBytes);
79
+ const instance = new WebAssembly.Instance(wasmModule, getImports());
80
+
81
+ bindings.__wbg_set_wasm(instance.exports);
82
+ finishInit();
83
+
84
+ return bindings;
85
+ }
86
+
87
+ /**
88
+ * Get the WASM import object
89
+ */
90
+ function getImports() {
91
+ return {
92
+ './ruvector_onnx_embeddings_wasm_bg.js': bindings,
93
+ };
94
+ }
95
+
96
+ /**
97
+ * Finalize initialization
98
+ */
99
+ function finishInit() {
100
+ if (typeof bindings.__wbindgen_init_externref_table === 'function') {
101
+ bindings.__wbindgen_init_externref_table();
102
+ }
103
+ initialized = true;
104
+ }
105
+
106
+ /**
107
+ * Check if initialized
108
+ */
109
+ function isInitialized() {
110
+ return initialized;
111
+ }
112
+
113
+ // Export init functions and all bindings
114
+ module.exports = {
115
+ init,
116
+ initSync,
117
+ isInitialized,
118
+ default: init,
119
+ // Re-export all bindings
120
+ WasmEmbedder: bindings.WasmEmbedder,
121
+ WasmEmbedderConfig: bindings.WasmEmbedderConfig,
122
+ PoolingStrategy: bindings.PoolingStrategy,
123
+ cosineSimilarity: bindings.cosineSimilarity,
124
+ normalizeL2: bindings.normalizeL2,
125
+ simd_available: bindings.simd_available,
126
+ version: bindings.version,
127
+ };
@@ -12,6 +12,26 @@
12
12
  * - Cached model loading (downloads from HuggingFace on first use)
13
13
  * - Batch embedding support
14
14
  * - Optional parallel workers for 3.8x batch speedup
15
+ * - CommonJS-compatible: No --experimental-wasm-modules flag required
16
+ *
17
+ * Quick Start (Simple API - returns arrays directly):
18
+ * ```javascript
19
+ * const { embedText, embedTexts } = require('ruvector');
20
+ *
21
+ * // Single embedding - returns number[]
22
+ * const vector = await embedText("hello world");
23
+ *
24
+ * // Batch embeddings - returns number[][]
25
+ * const vectors = await embedTexts(["hello", "world"]);
26
+ * ```
27
+ *
28
+ * Full API (returns metadata):
29
+ * ```javascript
30
+ * const { embed, embedBatch } = require('ruvector');
31
+ *
32
+ * // Returns { embedding: number[], dimension: number, timeMs: number }
33
+ * const result = await embed("hello world");
34
+ * ```
15
35
  */
16
36
  declare global {
17
37
  var __ruvector_require: NodeRequire | undefined;
@@ -59,6 +79,70 @@ export declare function embed(text: string): Promise<EmbeddingResult>;
59
79
  * Uses parallel workers automatically for batches >= parallelThreshold
60
80
  */
61
81
  export declare function embedBatch(texts: string[]): Promise<EmbeddingResult[]>;
82
+ /**
83
+ * ============================================================================
84
+ * SIMPLE API - Returns arrays directly (for easy integration)
85
+ * ============================================================================
86
+ */
87
+ /**
88
+ * Generate embedding for a single text - returns array directly
89
+ *
90
+ * This is the simplified API that returns just the embedding array,
91
+ * making it easy to use for vector operations, PostgreSQL insertion,
92
+ * and similarity calculations.
93
+ *
94
+ * @param text - The text to embed
95
+ * @returns A 384-dimensional embedding array
96
+ *
97
+ * @example
98
+ * ```javascript
99
+ * const { embedText } = require('ruvector');
100
+ *
101
+ * const vector = await embedText("hello world");
102
+ * console.log(vector.length); // 384
103
+ * console.log(Array.isArray(vector)); // true
104
+ *
105
+ * // Use directly with PostgreSQL
106
+ * await pool.query(
107
+ * 'INSERT INTO docs (content, embedding) VALUES ($1, $2)',
108
+ * [text, JSON.stringify(vector)]
109
+ * );
110
+ * ```
111
+ */
112
+ export declare function embedText(text: string): Promise<number[]>;
113
+ /**
114
+ * Generate embeddings for multiple texts - returns array of arrays
115
+ *
116
+ * This is the simplified batch API that returns just the embedding arrays.
117
+ * Uses optimized batch processing for much faster throughput than
118
+ * calling embedText() in a loop.
119
+ *
120
+ * @param texts - Array of texts to embed
121
+ * @param options - Optional batch processing options
122
+ * @returns Array of 384-dimensional embedding arrays
123
+ *
124
+ * @example
125
+ * ```javascript
126
+ * const { embedTexts } = require('ruvector');
127
+ *
128
+ * // Batch embed 8000 documents in ~30 seconds (vs 53 min sequentially)
129
+ * const vectors = await embedTexts(documents);
130
+ *
131
+ * // With options for very large batches
132
+ * const vectors = await embedTexts(documents, { batchSize: 256 });
133
+ *
134
+ * // Bulk insert into PostgreSQL
135
+ * for (let i = 0; i < documents.length; i++) {
136
+ * await pool.query(
137
+ * 'INSERT INTO docs (content, embedding) VALUES ($1, $2)',
138
+ * [documents[i], JSON.stringify(vectors[i])]
139
+ * );
140
+ * }
141
+ * ```
142
+ */
143
+ export declare function embedTexts(texts: string[], options?: {
144
+ batchSize?: number;
145
+ }): Promise<number[][]>;
62
146
  /**
63
147
  * Calculate cosine similarity between two texts
64
148
  */
@@ -1 +1 @@
1
- {"version":3,"file":"onnx-embedder.d.ts","sourceRoot":"","sources":["../../src/core/onnx-embedder.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;GAcG;AAQH,OAAO,CAAC,MAAM,CAAC;IAEb,IAAI,kBAAkB,EAAE,WAAW,GAAG,SAAS,CAAC;CACjD;AAuBD,MAAM,WAAW,kBAAkB;IACjC,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB;;;;;OAKG;IACH,cAAc,CAAC,EAAE,OAAO,GAAG,MAAM,CAAC;IAClC,wDAAwD;IACxD,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,iEAAiE;IACjE,iBAAiB,CAAC,EAAE,MAAM,CAAC;CAC5B;AAMD,MAAM,WAAW,eAAe;IAC9B,SAAS,EAAE,MAAM,EAAE,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,gBAAgB;IAC/B,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,MAAM,CAAC;CAChB;AAeD;;GAEG;AACH,wBAAgB,eAAe,IAAI,OAAO,CAOzC;AA6DD;;GAEG;AACH,wBAAsB,gBAAgB,CAAC,MAAM,GAAE,kBAAuB,GAAG,OAAO,CAAC,OAAO,CAAC,CA+FxF;AAED;;GAEG;AACH,wBAAsB,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,eAAe,CAAC,CAiBlE;AAED;;;GAGG;AACH,wBAAsB,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC,CAwC5E;AAED;;GAEG;AACH,wBAAsB,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,gBAAgB,CAAC,CAaxF;AAED;;GAEG;AACH,wBAAgB,gBAAgB,CAAC,CAAC,EAAE,MAAM,EAAE,EAAE,CAAC,EAAE,MAAM,EAAE,GAAG,MAAM,CAiBjE;AAED;;GAEG;AACH,wBAAgB,YAAY,IAAI,MAAM,CAErC;AAED;;GAEG;AACH,wBAAgB,OAAO,IAAI,OAAO,CAEjC;AAED;;GAEG;AACH,wBAAgB,QAAQ,IAAI;IAC1B,KAAK,EAAE,OAAO,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,OAAO,CAAC;IACd,QAAQ,EAAE,OAAO,CAAC;IAClB,eAAe,EAAE,MAAM,CAAC;IACxB,iBAAiB,EAAE,MAAM,CAAC;CAC3B,CAUA;AAED;;GAEG;AACH,wBAAsB,QAAQ,IAAI,OAAO,CAAC,IAAI,CAAC,CAM9C;AAGD,qBAAa,YAAY;IACvB,OAAO,CAAC,MAAM,CAAqB;gBAEvB,MAAM,GAAE,kBAAuB;IAIrC,IAAI,IAAI,OAAO,CAAC,OAAO,CAAC;IAIxB,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IAKtC,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;IAKhD,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;IAK/D,IAAI,SAAS,IAAI,MAAM,CAEtB;IAED,IAAI,KAAK,IAAI,OAAO,CAEnB;CACF;AAED,eAAe,YAAY,CAAC"}
1
+ {"version":3,"file":"onnx-embedder.d.ts","sourceRoot":"","sources":["../../src/core/onnx-embedder.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAkCG;AAQH,OAAO,CAAC,MAAM,CAAC;IAEb,IAAI,kBAAkB,EAAE,WAAW,GAAG,SAAS,CAAC;CACjD;AAsCD,MAAM,WAAW,kBAAkB;IACjC,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB;;;;;OAKG;IACH,cAAc,CAAC,EAAE,OAAO,GAAG,MAAM,CAAC;IAClC,wDAAwD;IACxD,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,iEAAiE;IACjE,iBAAiB,CAAC,EAAE,MAAM,CAAC;CAC5B;AAMD,MAAM,WAAW,eAAe;IAC9B,SAAS,EAAE,MAAM,EAAE,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,gBAAgB;IAC/B,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,MAAM,CAAC;CAChB;AAeD;;GAEG;AACH,wBAAgB,eAAe,IAAI,OAAO,CAOzC;AA6DD;;GAEG;AACH,wBAAsB,gBAAgB,CAAC,MAAM,GAAE,kBAAuB,GAAG,OAAO,CAAC,OAAO,CAAC,CAyGxF;AAED;;GAEG;AACH,wBAAsB,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,eAAe,CAAC,CAiBlE;AAED;;;GAGG;AACH,wBAAsB,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC,CAwC5E;AAED;;;;GAIG;AAEH;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AACH,wBAAsB,SAAS,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,CAU/D;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA6BG;AACH,wBAAsB,UAAU,CAC9B,KAAK,EAAE,MAAM,EAAE,EACf,OAAO,CAAC,EAAE;IAAE,SAAS,CAAC,EAAE,MAAM,CAAA;CAAE,GAC/B,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CA4CrB;AAED;;GAEG;AACH,wBAAsB,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,gBAAgB,CAAC,CAaxF;AAED;;GAEG;AACH,wBAAgB,gBAAgB,CAAC,CAAC,EAAE,MAAM,EAAE,EAAE,CAAC,EAAE,MAAM,EAAE,GAAG,MAAM,CAiBjE;AAED;;GAEG;AACH,wBAAgB,YAAY,IAAI,MAAM,CAErC;AAED;;GAEG;AACH,wBAAgB,OAAO,IAAI,OAAO,CAEjC;AAED;;GAEG;AACH,wBAAgB,QAAQ,IAAI;IAC1B,KAAK,EAAE,OAAO,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,OAAO,CAAC;IACd,QAAQ,EAAE,OAAO,CAAC;IAClB,eAAe,EAAE,MAAM,CAAC;IACxB,iBAAiB,EAAE,MAAM,CAAC;CAC3B,CAUA;AAED;;GAEG;AACH,wBAAsB,QAAQ,IAAI,OAAO,CAAC,IAAI,CAAC,CAM9C;AAGD,qBAAa,YAAY;IACvB,OAAO,CAAC,MAAM,CAAqB;gBAEvB,MAAM,GAAE,kBAAuB;IAIrC,IAAI,IAAI,OAAO,CAAC,OAAO,CAAC;IAIxB,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IAKtC,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;IAKhD,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;IAK/D,IAAI,SAAS,IAAI,MAAM,CAEtB;IAED,IAAI,KAAK,IAAI,OAAO,CAEnB;CACF;AAED,eAAe,YAAY,CAAC"}
@@ -13,6 +13,26 @@
13
13
  * - Cached model loading (downloads from HuggingFace on first use)
14
14
  * - Batch embedding support
15
15
  * - Optional parallel workers for 3.8x batch speedup
16
+ * - CommonJS-compatible: No --experimental-wasm-modules flag required
17
+ *
18
+ * Quick Start (Simple API - returns arrays directly):
19
+ * ```javascript
20
+ * const { embedText, embedTexts } = require('ruvector');
21
+ *
22
+ * // Single embedding - returns number[]
23
+ * const vector = await embedText("hello world");
24
+ *
25
+ * // Batch embeddings - returns number[][]
26
+ * const vectors = await embedTexts(["hello", "world"]);
27
+ * ```
28
+ *
29
+ * Full API (returns metadata):
30
+ * ```javascript
31
+ * const { embed, embedBatch } = require('ruvector');
32
+ *
33
+ * // Returns { embedding: number[], dimension: number, timeMs: number }
34
+ * const result = await embed("hello world");
35
+ * ```
16
36
  */
17
37
  var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
18
38
  if (k2 === undefined) k2 = k;
@@ -53,6 +73,8 @@ exports.isOnnxAvailable = isOnnxAvailable;
53
73
  exports.initOnnxEmbedder = initOnnxEmbedder;
54
74
  exports.embed = embed;
55
75
  exports.embedBatch = embedBatch;
76
+ exports.embedText = embedText;
77
+ exports.embedTexts = embedTexts;
56
78
  exports.similarity = similarity;
57
79
  exports.cosineSimilarity = cosineSimilarity;
58
80
  exports.getDimension = getDimension;
@@ -83,6 +105,21 @@ if (typeof globalThis !== 'undefined' && !globalThis.__ruvector_require) {
83
105
  // Force native dynamic import (avoids TypeScript transpiling to require)
84
106
  // eslint-disable-next-line @typescript-eslint/no-implied-eval
85
107
  const dynamicImport = new Function('specifier', 'return import(specifier)');
108
+ // Try to load the CommonJS-compatible WASM loader (no experimental flags needed)
109
+ function tryLoadCjsModule() {
110
+ try {
111
+ // Use require for CJS module which doesn't need experimental flags
112
+ const cjsPath = path.join(__dirname, 'onnx', 'pkg', 'ruvector_onnx_embeddings_wasm_cjs.js');
113
+ if (fs.existsSync(cjsPath)) {
114
+ // eslint-disable-next-line @typescript-eslint/no-var-requires
115
+ return require(cjsPath);
116
+ }
117
+ }
118
+ catch {
119
+ // CJS loader not available
120
+ }
121
+ return null;
122
+ }
86
123
  // Capability detection
87
124
  let simdAvailable = false;
88
125
  let parallelAvailable = false;
@@ -183,21 +220,32 @@ async function initOnnxEmbedder(config = {}) {
183
220
  // Paths to bundled ONNX files
184
221
  const pkgPath = path.join(__dirname, 'onnx', 'pkg', 'ruvector_onnx_embeddings_wasm.js');
185
222
  const loaderPath = path.join(__dirname, 'onnx', 'loader.js');
186
- if (!fs.existsSync(pkgPath)) {
223
+ const wasmPath = path.join(__dirname, 'onnx', 'pkg', 'ruvector_onnx_embeddings_wasm_bg.wasm');
224
+ if (!fs.existsSync(wasmPath)) {
187
225
  throw new Error('ONNX WASM files not bundled. The onnx/ directory is missing.');
188
226
  }
189
- // Convert paths to file:// URLs for cross-platform ESM compatibility (Windows fix)
190
- const pkgUrl = (0, url_1.pathToFileURL)(pkgPath).href;
191
- const loaderUrl = (0, url_1.pathToFileURL)(loaderPath).href;
192
- // Dynamic import of bundled modules using file:// URLs
193
- wasmModule = await dynamicImport(pkgUrl);
194
- // Initialize WASM module (loads the .wasm file)
195
- const wasmPath = path.join(__dirname, 'onnx', 'pkg', 'ruvector_onnx_embeddings_wasm_bg.wasm');
196
- if (wasmModule.default && typeof wasmModule.default === 'function') {
197
- // For bundler-style initialization, pass the wasm buffer
198
- const wasmBytes = fs.readFileSync(wasmPath);
199
- await wasmModule.default(wasmBytes);
227
+ // Try CJS loader first (no experimental flags needed)
228
+ const cjsModule = tryLoadCjsModule();
229
+ if (cjsModule) {
230
+ // Use CommonJS loader - no experimental flags required!
231
+ await cjsModule.init(wasmPath);
232
+ wasmModule = cjsModule;
200
233
  }
234
+ else {
235
+ // Fall back to ESM loader (may require --experimental-wasm-modules)
236
+ // Convert paths to file:// URLs for cross-platform ESM compatibility (Windows fix)
237
+ const pkgUrl = (0, url_1.pathToFileURL)(pkgPath).href;
238
+ // Dynamic import of bundled modules using file:// URLs
239
+ wasmModule = await dynamicImport(pkgUrl);
240
+ // Initialize WASM module (loads the .wasm file)
241
+ if (wasmModule.default && typeof wasmModule.default === 'function') {
242
+ // For bundler-style initialization, pass the wasm buffer
243
+ const wasmBytes = fs.readFileSync(wasmPath);
244
+ await wasmModule.default(wasmBytes);
245
+ }
246
+ }
247
+ // Load the model loader
248
+ const loaderUrl = (0, url_1.pathToFileURL)(loaderPath).href;
201
249
  const loaderModule = await dynamicImport(loaderUrl);
202
250
  const { ModelLoader } = loaderModule;
203
251
  // Create model loader with caching
@@ -310,6 +358,113 @@ async function embedBatch(texts) {
310
358
  }
311
359
  return results;
312
360
  }
361
+ /**
362
+ * ============================================================================
363
+ * SIMPLE API - Returns arrays directly (for easy integration)
364
+ * ============================================================================
365
+ */
366
+ /**
367
+ * Generate embedding for a single text - returns array directly
368
+ *
369
+ * This is the simplified API that returns just the embedding array,
370
+ * making it easy to use for vector operations, PostgreSQL insertion,
371
+ * and similarity calculations.
372
+ *
373
+ * @param text - The text to embed
374
+ * @returns A 384-dimensional embedding array
375
+ *
376
+ * @example
377
+ * ```javascript
378
+ * const { embedText } = require('ruvector');
379
+ *
380
+ * const vector = await embedText("hello world");
381
+ * console.log(vector.length); // 384
382
+ * console.log(Array.isArray(vector)); // true
383
+ *
384
+ * // Use directly with PostgreSQL
385
+ * await pool.query(
386
+ * 'INSERT INTO docs (content, embedding) VALUES ($1, $2)',
387
+ * [text, JSON.stringify(vector)]
388
+ * );
389
+ * ```
390
+ */
391
+ async function embedText(text) {
392
+ if (!isInitialized) {
393
+ await initOnnxEmbedder();
394
+ }
395
+ if (!embedder) {
396
+ throw new Error('ONNX embedder not initialized');
397
+ }
398
+ const embedding = embedder.embedOne(text);
399
+ return Array.from(embedding);
400
+ }
401
+ /**
402
+ * Generate embeddings for multiple texts - returns array of arrays
403
+ *
404
+ * This is the simplified batch API that returns just the embedding arrays.
405
+ * Uses optimized batch processing for much faster throughput than
406
+ * calling embedText() in a loop.
407
+ *
408
+ * @param texts - Array of texts to embed
409
+ * @param options - Optional batch processing options
410
+ * @returns Array of 384-dimensional embedding arrays
411
+ *
412
+ * @example
413
+ * ```javascript
414
+ * const { embedTexts } = require('ruvector');
415
+ *
416
+ * // Batch embed 8000 documents in ~30 seconds (vs 53 min sequentially)
417
+ * const vectors = await embedTexts(documents);
418
+ *
419
+ * // With options for very large batches
420
+ * const vectors = await embedTexts(documents, { batchSize: 256 });
421
+ *
422
+ * // Bulk insert into PostgreSQL
423
+ * for (let i = 0; i < documents.length; i++) {
424
+ * await pool.query(
425
+ * 'INSERT INTO docs (content, embedding) VALUES ($1, $2)',
426
+ * [documents[i], JSON.stringify(vectors[i])]
427
+ * );
428
+ * }
429
+ * ```
430
+ */
431
+ async function embedTexts(texts, options) {
432
+ if (!isInitialized) {
433
+ await initOnnxEmbedder();
434
+ }
435
+ if (!embedder) {
436
+ throw new Error('ONNX embedder not initialized');
437
+ }
438
+ if (texts.length === 0) {
439
+ return [];
440
+ }
441
+ const batchSize = options?.batchSize || 256;
442
+ // For small batches, process all at once
443
+ if (texts.length <= batchSize) {
444
+ // Use parallel workers for large batches
445
+ if (parallelEnabled && parallelEmbedder && texts.length >= parallelThreshold) {
446
+ const batchResults = await parallelEmbedder.embedBatch(texts);
447
+ return batchResults.map((emb) => Array.from(emb));
448
+ }
449
+ // Sequential processing
450
+ const batchEmbeddings = embedder.embedBatch(texts);
451
+ const dimension = embedder.dimension();
452
+ const results = [];
453
+ for (let i = 0; i < texts.length; i++) {
454
+ const embedding = batchEmbeddings.slice(i * dimension, (i + 1) * dimension);
455
+ results.push(Array.from(embedding));
456
+ }
457
+ return results;
458
+ }
459
+ // Process in chunks for very large batches
460
+ const results = [];
461
+ for (let i = 0; i < texts.length; i += batchSize) {
462
+ const chunk = texts.slice(i, i + batchSize);
463
+ const chunkResults = await embedTexts(chunk);
464
+ results.push(...chunkResults);
465
+ }
466
+ return results;
467
+ }
313
468
  /**
314
469
  * Calculate cosine similarity between two texts
315
470
  */
@@ -0,0 +1,206 @@
1
+ /**
2
+ * ONNX LLM Text Generation for RuVector
3
+ *
4
+ * Provides real local LLM inference using ONNX Runtime via transformers.js
5
+ * Supports small models that run efficiently on CPU:
6
+ * - SmolLM 135M - Smallest, fast (~135MB)
7
+ * - SmolLM 360M - Better quality (~360MB)
8
+ * - TinyLlama 1.1B - Best small model quality (~1GB quantized)
9
+ * - Qwen2.5 0.5B - Good balance (~500MB)
10
+ *
11
+ * Features:
12
+ * - Automatic model downloading and caching
13
+ * - Quantized INT4/INT8 models for efficiency
14
+ * - Streaming generation support
15
+ * - Temperature, top-k, top-p sampling
16
+ * - KV cache for efficient multi-turn conversations
17
+ */
18
+ export interface OnnxLLMConfig {
19
+ /** Model ID (default: 'Xenova/smollm-135m-instruct') */
20
+ modelId?: string;
21
+ /** Cache directory for models */
22
+ cacheDir?: string;
23
+ /** Use quantized model (default: true) */
24
+ quantized?: boolean;
25
+ /** Device: 'cpu' | 'webgpu' (default: 'cpu') */
26
+ device?: 'cpu' | 'webgpu';
27
+ /** Maximum context length */
28
+ maxLength?: number;
29
+ }
30
+ export interface GenerationConfig {
31
+ /** Maximum new tokens to generate (default: 128) */
32
+ maxNewTokens?: number;
33
+ /** Temperature for sampling (default: 0.7) */
34
+ temperature?: number;
35
+ /** Top-p nucleus sampling (default: 0.9) */
36
+ topP?: number;
37
+ /** Top-k sampling (default: 50) */
38
+ topK?: number;
39
+ /** Repetition penalty (default: 1.1) */
40
+ repetitionPenalty?: number;
41
+ /** Stop sequences */
42
+ stopSequences?: string[];
43
+ /** System prompt for chat models */
44
+ systemPrompt?: string;
45
+ /** Enable streaming (callback for each token) */
46
+ onToken?: (token: string) => void;
47
+ }
48
+ export interface GenerationResult {
49
+ /** Generated text */
50
+ text: string;
51
+ /** Number of tokens generated */
52
+ tokensGenerated: number;
53
+ /** Time taken in milliseconds */
54
+ timeMs: number;
55
+ /** Tokens per second */
56
+ tokensPerSecond: number;
57
+ /** Model used */
58
+ model: string;
59
+ /** Whether model was loaded from cache */
60
+ cached: boolean;
61
+ }
62
+ export declare const AVAILABLE_MODELS: {
63
+ readonly 'trm-tinystories': {
64
+ readonly id: "Xenova/TinyStories-33M";
65
+ readonly name: "TinyStories 33M (TRM)";
66
+ readonly size: "~65MB";
67
+ readonly description: "Ultra-tiny model for stories and basic generation";
68
+ readonly contextLength: 512;
69
+ };
70
+ readonly 'trm-gpt2-tiny': {
71
+ readonly id: "Xenova/gpt2";
72
+ readonly name: "GPT-2 124M (TRM)";
73
+ readonly size: "~250MB";
74
+ readonly description: "Classic GPT-2 tiny for general text";
75
+ readonly contextLength: 1024;
76
+ };
77
+ readonly 'trm-distilgpt2': {
78
+ readonly id: "Xenova/distilgpt2";
79
+ readonly name: "DistilGPT-2 (TRM)";
80
+ readonly size: "~82MB";
81
+ readonly description: "Distilled GPT-2, fastest general model";
82
+ readonly contextLength: 1024;
83
+ };
84
+ readonly 'smollm-135m': {
85
+ readonly id: "HuggingFaceTB/SmolLM-135M-Instruct";
86
+ readonly name: "SmolLM 135M";
87
+ readonly size: "~135MB";
88
+ readonly description: "Smallest instruct model, very fast";
89
+ readonly contextLength: 2048;
90
+ };
91
+ readonly 'smollm-360m': {
92
+ readonly id: "HuggingFaceTB/SmolLM-360M-Instruct";
93
+ readonly name: "SmolLM 360M";
94
+ readonly size: "~360MB";
95
+ readonly description: "Small model, fast, better quality";
96
+ readonly contextLength: 2048;
97
+ };
98
+ readonly 'smollm2-135m': {
99
+ readonly id: "HuggingFaceTB/SmolLM2-135M-Instruct";
100
+ readonly name: "SmolLM2 135M";
101
+ readonly size: "~135MB";
102
+ readonly description: "Latest SmolLM v2, improved capabilities";
103
+ readonly contextLength: 2048;
104
+ };
105
+ readonly 'smollm2-360m': {
106
+ readonly id: "HuggingFaceTB/SmolLM2-360M-Instruct";
107
+ readonly name: "SmolLM2 360M";
108
+ readonly size: "~360MB";
109
+ readonly description: "Latest SmolLM v2, better quality";
110
+ readonly contextLength: 2048;
111
+ };
112
+ readonly 'qwen2.5-0.5b': {
113
+ readonly id: "Qwen/Qwen2.5-0.5B-Instruct";
114
+ readonly name: "Qwen2.5 0.5B";
115
+ readonly size: "~300MB quantized";
116
+ readonly description: "Good balance of speed and quality, multilingual";
117
+ readonly contextLength: 4096;
118
+ };
119
+ readonly tinyllama: {
120
+ readonly id: "TinyLlama/TinyLlama-1.1B-Chat-v1.0";
121
+ readonly name: "TinyLlama 1.1B";
122
+ readonly size: "~600MB quantized";
123
+ readonly description: "Best small model quality, slower";
124
+ readonly contextLength: 2048;
125
+ };
126
+ readonly 'codegemma-2b': {
127
+ readonly id: "google/codegemma-2b";
128
+ readonly name: "CodeGemma 2B";
129
+ readonly size: "~1GB quantized";
130
+ readonly description: "Code generation specialist";
131
+ readonly contextLength: 8192;
132
+ };
133
+ readonly 'deepseek-coder-1.3b': {
134
+ readonly id: "deepseek-ai/deepseek-coder-1.3b-instruct";
135
+ readonly name: "DeepSeek Coder 1.3B";
136
+ readonly size: "~700MB quantized";
137
+ readonly description: "Excellent for code tasks";
138
+ readonly contextLength: 4096;
139
+ };
140
+ readonly 'phi-2': {
141
+ readonly id: "microsoft/phi-2";
142
+ readonly name: "Phi-2 2.7B";
143
+ readonly size: "~1.5GB quantized";
144
+ readonly description: "High quality small model";
145
+ readonly contextLength: 2048;
146
+ };
147
+ readonly 'phi-3-mini': {
148
+ readonly id: "microsoft/Phi-3-mini-4k-instruct";
149
+ readonly name: "Phi-3 Mini";
150
+ readonly size: "~2GB quantized";
151
+ readonly description: "Best quality tiny model";
152
+ readonly contextLength: 4096;
153
+ };
154
+ };
155
+ export type ModelKey = keyof typeof AVAILABLE_MODELS;
156
+ /**
157
+ * Check if transformers.js is available
158
+ */
159
+ export declare function isTransformersAvailable(): Promise<boolean>;
160
+ /**
161
+ * Initialize the ONNX LLM with specified model
162
+ */
163
+ export declare function initOnnxLLM(config?: OnnxLLMConfig): Promise<boolean>;
164
+ /**
165
+ * Generate text using ONNX LLM
166
+ */
167
+ export declare function generate(prompt: string, config?: GenerationConfig): Promise<GenerationResult>;
168
+ /**
169
+ * Generate with streaming (token by token)
170
+ */
171
+ export declare function generateStream(prompt: string, config?: GenerationConfig): Promise<AsyncGenerator<string, GenerationResult, undefined>>;
172
+ /**
173
+ * Chat completion with conversation history
174
+ */
175
+ export declare function chat(messages: Array<{
176
+ role: 'system' | 'user' | 'assistant';
177
+ content: string;
178
+ }>, config?: GenerationConfig): Promise<GenerationResult>;
179
+ /**
180
+ * Get model information
181
+ */
182
+ export declare function getModelInfo(): {
183
+ model: string | null;
184
+ ready: boolean;
185
+ availableModels: typeof AVAILABLE_MODELS;
186
+ };
187
+ /**
188
+ * Unload the current model to free memory
189
+ */
190
+ export declare function unload(): Promise<void>;
191
+ export declare class OnnxLLM {
192
+ private config;
193
+ private initialized;
194
+ constructor(config?: OnnxLLMConfig);
195
+ init(): Promise<boolean>;
196
+ generate(prompt: string, config?: GenerationConfig): Promise<GenerationResult>;
197
+ chat(messages: Array<{
198
+ role: 'system' | 'user' | 'assistant';
199
+ content: string;
200
+ }>, config?: GenerationConfig): Promise<GenerationResult>;
201
+ unload(): Promise<void>;
202
+ get ready(): boolean;
203
+ get model(): string | null;
204
+ }
205
+ export default OnnxLLM;
206
+ //# sourceMappingURL=onnx-llm.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"onnx-llm.d.ts","sourceRoot":"","sources":["../../src/core/onnx-llm.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;GAgBG;AAaH,MAAM,WAAW,aAAa;IAC5B,wDAAwD;IACxD,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,iCAAiC;IACjC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,0CAA0C;IAC1C,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,gDAAgD;IAChD,MAAM,CAAC,EAAE,KAAK,GAAG,QAAQ,CAAC;IAC1B,6BAA6B;IAC7B,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,gBAAgB;IAC/B,oDAAoD;IACpD,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,8CAA8C;IAC9C,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,4CAA4C;IAC5C,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,mCAAmC;IACnC,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,wCAAwC;IACxC,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,qBAAqB;IACrB,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;IACzB,oCAAoC;IACpC,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,iDAAiD;IACjD,OAAO,CAAC,EAAE,CAAC,KAAK,EAAE,MAAM,KAAK,IAAI,CAAC;CACnC;AAED,MAAM,WAAW,gBAAgB;IAC/B,qBAAqB;IACrB,IAAI,EAAE,MAAM,CAAC;IACb,iCAAiC;IACjC,eAAe,EAAE,MAAM,CAAC;IACxB,iCAAiC;IACjC,MAAM,EAAE,MAAM,CAAC;IACf,wBAAwB;IACxB,eAAe,EAAE,MAAM,CAAC;IACxB,iBAAiB;IACjB,KAAK,EAAE,MAAM,CAAC;IACd,0CAA0C;IAC1C,MAAM,EAAE,OAAO,CAAC;CACjB;AAMD,eAAO,MAAM,gBAAgB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAmHnB,CAAC;AAEX,MAAM,MAAM,QAAQ,GAAG,MAAM,OAAO,gBAAgB,CAAC;AAYrD;;GAEG;AACH,wBAAsB,uBAAuB,IAAI,OAAO,CAAC,OAAO,CAAC,CAOhE;AAED;;GAEG;AACH,wBAAsB,WAAW,CAAC,MAAM,GAAE,aAAkB,GAAG,OAAO,CAAC,OAAO,CAAC,CAqD9E;AAED;;GAEG;AACH,wBAAsB,QAAQ,CAC5B,MAAM,EAAE,MAAM,EACd,MAAM,GAAE,gBAAqB,GAC5B,OAAO,CAAC,gBAAgB,CAAC,CA0C3B;AAED;;GAEG;AACH,wBAAsB,cAAc,CAClC,MAAM,EAAE,MAAM,EACd,MAAM,GAAE,gBAAqB,GAC5B,OAAO,CAAC,cAAc,CAAC,MAAM,EAAE,gBAAgB,EAAE,SAAS,CAAC,CAAC,CA0D9D;AAED;;GAEG;AACH,wBAAsB,IAAI,CACxB,QAAQ,EAAE,KAAK,CAAC;IAAE,IAAI,EAAE,QAAQ,GAAG,MAAM,GAAG,WAAW,CAAC;IAAC,OAAO,EAAE,MAAM,CAAA;CAAE,CAAC,EAC3E,MAAM,GAAE,gBAAqB,GAC5B,OAAO,CAAC,gBAAgB,CAAC,CAsB3B;AAED;;GAEG;AACH,wBAAgB,YAAY,IAAI;IAC9B,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC;IACrB,KAAK,EAAE,OAAO,CAAC;IACf,eAAe,EAAE,OAAO,gBAAgB,CAAC;CAC1C,CAMA;AAED;;GAEG;AACH,wBAAsB,MAAM,IAAI,OAAO,CAAC,IAAI,CAAC,CAQ5C;AAMD,qBAAa,OAAO;IAClB,OAAO,CAAC,MAAM,CAAgB;IAC9B,OAAO,CAAC,WAAW,CAAS;gBAEhB,MAAM,GAAE,aAAkB;IAIhC,IAAI,IAAI,OAAO,CAAC,OAAO,CAAC;IAMxB,QAAQ,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,gBAAgB,GAAG,OAAO,CAAC,gBAAgB,CAAC;IAK9E,IAAI,CACR,QAAQ,EAAE,KAAK,CAAC;QAAE,IAAI,EAAE,QAAQ,GAAG,MAAM,GAAG,WAAW,CAAC;QAAC,OAAO,EAAE,MAAM,CAAA;KAAE,CAAC,EAC3E,MAAM,CAAC,EAAE,gBAAgB,GACxB,OAAO,CAAC,gBAAgB,CAAC;IAKtB,MAAM,IAAI,OAAO,CAAC,IAAI,CAAC;IAK7B,IAAI,KAAK,IAAI,OAAO,CAEnB;IAED,IAAI,KAAK,IAAI,MAAM,GAAG,IAAI,CAEzB;CACF;AAED,eAAe,OAAO,CAAC"}
@@ -0,0 +1,430 @@
1
+ "use strict";
2
+ /**
3
+ * ONNX LLM Text Generation for RuVector
4
+ *
5
+ * Provides real local LLM inference using ONNX Runtime via transformers.js
6
+ * Supports small models that run efficiently on CPU:
7
+ * - SmolLM 135M - Smallest, fast (~135MB)
8
+ * - SmolLM 360M - Better quality (~360MB)
9
+ * - TinyLlama 1.1B - Best small model quality (~1GB quantized)
10
+ * - Qwen2.5 0.5B - Good balance (~500MB)
11
+ *
12
+ * Features:
13
+ * - Automatic model downloading and caching
14
+ * - Quantized INT4/INT8 models for efficiency
15
+ * - Streaming generation support
16
+ * - Temperature, top-k, top-p sampling
17
+ * - KV cache for efficient multi-turn conversations
18
+ */
19
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
20
+ if (k2 === undefined) k2 = k;
21
+ var desc = Object.getOwnPropertyDescriptor(m, k);
22
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
23
+ desc = { enumerable: true, get: function() { return m[k]; } };
24
+ }
25
+ Object.defineProperty(o, k2, desc);
26
+ }) : (function(o, m, k, k2) {
27
+ if (k2 === undefined) k2 = k;
28
+ o[k2] = m[k];
29
+ }));
30
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
31
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
32
+ }) : function(o, v) {
33
+ o["default"] = v;
34
+ });
35
+ var __importStar = (this && this.__importStar) || (function () {
36
+ var ownKeys = function(o) {
37
+ ownKeys = Object.getOwnPropertyNames || function (o) {
38
+ var ar = [];
39
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
40
+ return ar;
41
+ };
42
+ return ownKeys(o);
43
+ };
44
+ return function (mod) {
45
+ if (mod && mod.__esModule) return mod;
46
+ var result = {};
47
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
48
+ __setModuleDefault(result, mod);
49
+ return result;
50
+ };
51
+ })();
52
+ Object.defineProperty(exports, "__esModule", { value: true });
53
+ exports.OnnxLLM = exports.AVAILABLE_MODELS = void 0;
54
+ exports.isTransformersAvailable = isTransformersAvailable;
55
+ exports.initOnnxLLM = initOnnxLLM;
56
+ exports.generate = generate;
57
+ exports.generateStream = generateStream;
58
+ exports.chat = chat;
59
+ exports.getModelInfo = getModelInfo;
60
+ exports.unload = unload;
61
+ const path = __importStar(require("path"));
62
+ const fs = __importStar(require("fs"));
63
+ // Force native dynamic import (avoids TypeScript transpiling to require)
64
+ // eslint-disable-next-line @typescript-eslint/no-implied-eval
65
+ const dynamicImport = new Function('specifier', 'return import(specifier)');
66
+ // ============================================================================
67
+ // Available Models
68
+ // ============================================================================
69
+ exports.AVAILABLE_MODELS = {
70
+ // =========================================================================
71
+ // TRM - Tiny Random Models (smallest, fastest)
72
+ // =========================================================================
73
+ 'trm-tinystories': {
74
+ id: 'Xenova/TinyStories-33M',
75
+ name: 'TinyStories 33M (TRM)',
76
+ size: '~65MB',
77
+ description: 'Ultra-tiny model for stories and basic generation',
78
+ contextLength: 512,
79
+ },
80
+ 'trm-gpt2-tiny': {
81
+ id: 'Xenova/gpt2',
82
+ name: 'GPT-2 124M (TRM)',
83
+ size: '~250MB',
84
+ description: 'Classic GPT-2 tiny for general text',
85
+ contextLength: 1024,
86
+ },
87
+ 'trm-distilgpt2': {
88
+ id: 'Xenova/distilgpt2',
89
+ name: 'DistilGPT-2 (TRM)',
90
+ size: '~82MB',
91
+ description: 'Distilled GPT-2, fastest general model',
92
+ contextLength: 1024,
93
+ },
94
+ // =========================================================================
95
+ // SmolLM - Smallest production-ready models
96
+ // =========================================================================
97
+ 'smollm-135m': {
98
+ id: 'HuggingFaceTB/SmolLM-135M-Instruct',
99
+ name: 'SmolLM 135M',
100
+ size: '~135MB',
101
+ description: 'Smallest instruct model, very fast',
102
+ contextLength: 2048,
103
+ },
104
+ 'smollm-360m': {
105
+ id: 'HuggingFaceTB/SmolLM-360M-Instruct',
106
+ name: 'SmolLM 360M',
107
+ size: '~360MB',
108
+ description: 'Small model, fast, better quality',
109
+ contextLength: 2048,
110
+ },
111
+ 'smollm2-135m': {
112
+ id: 'HuggingFaceTB/SmolLM2-135M-Instruct',
113
+ name: 'SmolLM2 135M',
114
+ size: '~135MB',
115
+ description: 'Latest SmolLM v2, improved capabilities',
116
+ contextLength: 2048,
117
+ },
118
+ 'smollm2-360m': {
119
+ id: 'HuggingFaceTB/SmolLM2-360M-Instruct',
120
+ name: 'SmolLM2 360M',
121
+ size: '~360MB',
122
+ description: 'Latest SmolLM v2, better quality',
123
+ contextLength: 2048,
124
+ },
125
+ // =========================================================================
126
+ // Qwen - Chinese/English bilingual models
127
+ // =========================================================================
128
+ 'qwen2.5-0.5b': {
129
+ id: 'Qwen/Qwen2.5-0.5B-Instruct',
130
+ name: 'Qwen2.5 0.5B',
131
+ size: '~300MB quantized',
132
+ description: 'Good balance of speed and quality, multilingual',
133
+ contextLength: 4096,
134
+ },
135
+ // =========================================================================
136
+ // TinyLlama - Llama architecture in tiny form
137
+ // =========================================================================
138
+ 'tinyllama': {
139
+ id: 'TinyLlama/TinyLlama-1.1B-Chat-v1.0',
140
+ name: 'TinyLlama 1.1B',
141
+ size: '~600MB quantized',
142
+ description: 'Best small model quality, slower',
143
+ contextLength: 2048,
144
+ },
145
+ // =========================================================================
146
+ // Code-specialized models
147
+ // =========================================================================
148
+ 'codegemma-2b': {
149
+ id: 'google/codegemma-2b',
150
+ name: 'CodeGemma 2B',
151
+ size: '~1GB quantized',
152
+ description: 'Code generation specialist',
153
+ contextLength: 8192,
154
+ },
155
+ 'deepseek-coder-1.3b': {
156
+ id: 'deepseek-ai/deepseek-coder-1.3b-instruct',
157
+ name: 'DeepSeek Coder 1.3B',
158
+ size: '~700MB quantized',
159
+ description: 'Excellent for code tasks',
160
+ contextLength: 4096,
161
+ },
162
+ // =========================================================================
163
+ // Phi models - Microsoft's tiny powerhouses
164
+ // =========================================================================
165
+ 'phi-2': {
166
+ id: 'microsoft/phi-2',
167
+ name: 'Phi-2 2.7B',
168
+ size: '~1.5GB quantized',
169
+ description: 'High quality small model',
170
+ contextLength: 2048,
171
+ },
172
+ 'phi-3-mini': {
173
+ id: 'microsoft/Phi-3-mini-4k-instruct',
174
+ name: 'Phi-3 Mini',
175
+ size: '~2GB quantized',
176
+ description: 'Best quality tiny model',
177
+ contextLength: 4096,
178
+ },
179
+ };
180
+ // ============================================================================
181
+ // ONNX LLM Generator
182
+ // ============================================================================
183
+ let pipeline = null;
184
+ let transformers = null;
185
+ let loadedModel = null;
186
+ let loadPromise = null;
187
+ let loadError = null;
188
+ /**
189
+ * Check if transformers.js is available
190
+ */
191
+ async function isTransformersAvailable() {
192
+ try {
193
+ await dynamicImport('@xenova/transformers');
194
+ return true;
195
+ }
196
+ catch {
197
+ return false;
198
+ }
199
+ }
200
+ /**
201
+ * Initialize the ONNX LLM with specified model
202
+ */
203
+ async function initOnnxLLM(config = {}) {
204
+ if (pipeline && loadedModel === config.modelId) {
205
+ return true;
206
+ }
207
+ if (loadError)
208
+ throw loadError;
209
+ if (loadPromise) {
210
+ await loadPromise;
211
+ return pipeline !== null;
212
+ }
213
+ const modelId = config.modelId || 'HuggingFaceTB/SmolLM-135M-Instruct';
214
+ loadPromise = (async () => {
215
+ try {
216
+ console.error(`Loading ONNX LLM: ${modelId}...`);
217
+ // Import transformers.js
218
+ transformers = await dynamicImport('@xenova/transformers');
219
+ const { pipeline: createPipeline, env } = transformers;
220
+ // Configure cache directory
221
+ if (config.cacheDir) {
222
+ env.cacheDir = config.cacheDir;
223
+ }
224
+ else {
225
+ env.cacheDir = path.join(process.env.HOME || '/tmp', '.ruvector', 'models', 'onnx-llm');
226
+ }
227
+ // Ensure cache directory exists
228
+ if (!fs.existsSync(env.cacheDir)) {
229
+ fs.mkdirSync(env.cacheDir, { recursive: true });
230
+ }
231
+ // Disable remote model fetching warnings
232
+ env.allowRemoteModels = true;
233
+ env.allowLocalModels = true;
234
+ // Create text generation pipeline
235
+ console.error(`Downloading model (first run may take a while)...`);
236
+ pipeline = await createPipeline('text-generation', modelId, {
237
+ quantized: config.quantized !== false,
238
+ device: config.device || 'cpu',
239
+ });
240
+ loadedModel = modelId;
241
+ console.error(`ONNX LLM ready: ${modelId}`);
242
+ }
243
+ catch (e) {
244
+ loadError = new Error(`Failed to initialize ONNX LLM: ${e.message}`);
245
+ throw loadError;
246
+ }
247
+ })();
248
+ await loadPromise;
249
+ return pipeline !== null;
250
+ }
251
+ /**
252
+ * Generate text using ONNX LLM
253
+ */
254
+ async function generate(prompt, config = {}) {
255
+ if (!pipeline) {
256
+ await initOnnxLLM();
257
+ }
258
+ if (!pipeline) {
259
+ throw new Error('ONNX LLM not initialized');
260
+ }
261
+ const start = performance.now();
262
+ // Build the input text (apply chat template if needed)
263
+ let inputText = prompt;
264
+ if (config.systemPrompt) {
265
+ // Apply simple chat format
266
+ inputText = `<|system|>\n${config.systemPrompt}<|end|>\n<|user|>\n${prompt}<|end|>\n<|assistant|>\n`;
267
+ }
268
+ // Generate
269
+ const outputs = await pipeline(inputText, {
270
+ max_new_tokens: config.maxNewTokens || 128,
271
+ temperature: config.temperature || 0.7,
272
+ top_p: config.topP || 0.9,
273
+ top_k: config.topK || 50,
274
+ repetition_penalty: config.repetitionPenalty || 1.1,
275
+ do_sample: (config.temperature || 0.7) > 0,
276
+ return_full_text: false,
277
+ });
278
+ const timeMs = performance.now() - start;
279
+ const generatedText = outputs[0]?.generated_text || '';
280
+ // Estimate tokens (rough approximation)
281
+ const tokensGenerated = Math.ceil(generatedText.split(/\s+/).length * 1.3);
282
+ return {
283
+ text: generatedText.trim(),
284
+ tokensGenerated,
285
+ timeMs,
286
+ tokensPerSecond: tokensGenerated / (timeMs / 1000),
287
+ model: loadedModel || 'unknown',
288
+ cached: true,
289
+ };
290
+ }
291
+ /**
292
+ * Generate with streaming (token by token)
293
+ */
294
+ async function generateStream(prompt, config = {}) {
295
+ if (!pipeline) {
296
+ await initOnnxLLM();
297
+ }
298
+ if (!pipeline) {
299
+ throw new Error('ONNX LLM not initialized');
300
+ }
301
+ const start = performance.now();
302
+ let fullText = '';
303
+ let tokenCount = 0;
304
+ // Build input text
305
+ let inputText = prompt;
306
+ if (config.systemPrompt) {
307
+ inputText = `<|system|>\n${config.systemPrompt}<|end|>\n<|user|>\n${prompt}<|end|>\n<|assistant|>\n`;
308
+ }
309
+ // Create streamer
310
+ const { TextStreamer } = transformers;
311
+ const streamer = new TextStreamer(pipeline.tokenizer, {
312
+ skip_prompt: true,
313
+ callback_function: (text) => {
314
+ fullText += text;
315
+ tokenCount++;
316
+ if (config.onToken) {
317
+ config.onToken(text);
318
+ }
319
+ },
320
+ });
321
+ // Generate with streamer
322
+ await pipeline(inputText, {
323
+ max_new_tokens: config.maxNewTokens || 128,
324
+ temperature: config.temperature || 0.7,
325
+ top_p: config.topP || 0.9,
326
+ top_k: config.topK || 50,
327
+ repetition_penalty: config.repetitionPenalty || 1.1,
328
+ do_sample: (config.temperature || 0.7) > 0,
329
+ streamer,
330
+ });
331
+ const timeMs = performance.now() - start;
332
+ // Return generator that yields the collected text
333
+ async function* generator() {
334
+ yield fullText;
335
+ return {
336
+ text: fullText.trim(),
337
+ tokensGenerated: tokenCount,
338
+ timeMs,
339
+ tokensPerSecond: tokenCount / (timeMs / 1000),
340
+ model: loadedModel || 'unknown',
341
+ cached: true,
342
+ };
343
+ }
344
+ return generator();
345
+ }
346
+ /**
347
+ * Chat completion with conversation history
348
+ */
349
+ async function chat(messages, config = {}) {
350
+ if (!pipeline) {
351
+ await initOnnxLLM();
352
+ }
353
+ if (!pipeline) {
354
+ throw new Error('ONNX LLM not initialized');
355
+ }
356
+ // Build conversation text from messages
357
+ let conversationText = '';
358
+ for (const msg of messages) {
359
+ if (msg.role === 'system') {
360
+ conversationText += `<|system|>\n${msg.content}<|end|>\n`;
361
+ }
362
+ else if (msg.role === 'user') {
363
+ conversationText += `<|user|>\n${msg.content}<|end|>\n`;
364
+ }
365
+ else if (msg.role === 'assistant') {
366
+ conversationText += `<|assistant|>\n${msg.content}<|end|>\n`;
367
+ }
368
+ }
369
+ conversationText += '<|assistant|>\n';
370
+ return generate(conversationText, { ...config, systemPrompt: undefined });
371
+ }
372
+ /**
373
+ * Get model information
374
+ */
375
+ function getModelInfo() {
376
+ return {
377
+ model: loadedModel,
378
+ ready: pipeline !== null,
379
+ availableModels: exports.AVAILABLE_MODELS,
380
+ };
381
+ }
382
+ /**
383
+ * Unload the current model to free memory
384
+ */
385
+ async function unload() {
386
+ if (pipeline) {
387
+ // Note: transformers.js doesn't have explicit dispose, but we can null the reference
388
+ pipeline = null;
389
+ loadedModel = null;
390
+ loadPromise = null;
391
+ loadError = null;
392
+ }
393
+ }
394
+ // ============================================================================
395
+ // Class wrapper for OOP usage
396
+ // ============================================================================
397
+ class OnnxLLM {
398
+ constructor(config = {}) {
399
+ this.initialized = false;
400
+ this.config = config;
401
+ }
402
+ async init() {
403
+ if (this.initialized)
404
+ return true;
405
+ this.initialized = await initOnnxLLM(this.config);
406
+ return this.initialized;
407
+ }
408
+ async generate(prompt, config) {
409
+ if (!this.initialized)
410
+ await this.init();
411
+ return generate(prompt, config);
412
+ }
413
+ async chat(messages, config) {
414
+ if (!this.initialized)
415
+ await this.init();
416
+ return chat(messages, config);
417
+ }
418
+ async unload() {
419
+ await unload();
420
+ this.initialized = false;
421
+ }
422
+ get ready() {
423
+ return this.initialized;
424
+ }
425
+ get model() {
426
+ return loadedModel;
427
+ }
428
+ }
429
+ exports.OnnxLLM = OnnxLLM;
430
+ exports.default = OnnxLLM;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ruvector",
3
- "version": "0.1.93",
3
+ "version": "0.1.95",
4
4
  "description": "High-performance vector database for Node.js with automatic native/WASM fallback",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
@@ -8,7 +8,7 @@
8
8
  "ruvector": "./bin/cli.js"
9
9
  },
10
10
  "scripts": {
11
- "build": "tsc && cp src/core/onnx/pkg/package.json dist/core/onnx/pkg/",
11
+ "build": "tsc && cp src/core/onnx/pkg/package.json dist/core/onnx/pkg/ && cp src/core/onnx/pkg/ruvector_onnx_embeddings_wasm_cjs.js dist/core/onnx/pkg/",
12
12
  "prepublishOnly": "npm run build",
13
13
  "test": "node test/integration.js"
14
14
  },
@@ -60,6 +60,7 @@
60
60
  "@ruvector/core": "^0.1.25",
61
61
  "@ruvector/gnn": "^0.1.22",
62
62
  "@ruvector/sona": "^0.1.4",
63
+ "@xenova/transformers": "^2.17.2",
63
64
  "chalk": "^4.1.2",
64
65
  "commander": "^11.1.0",
65
66
  "ora": "^5.4.1"