ruvector 0.1.93 → 0.1.95
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/core/index.d.ts +2 -0
- package/dist/core/index.d.ts.map +1 -1
- package/dist/core/index.js +4 -1
- package/dist/core/onnx/pkg/ruvector_onnx_embeddings_wasm_cjs.js +127 -0
- package/dist/core/onnx-embedder.d.ts +84 -0
- package/dist/core/onnx-embedder.d.ts.map +1 -1
- package/dist/core/onnx-embedder.js +167 -12
- package/dist/core/onnx-llm.d.ts +206 -0
- package/dist/core/onnx-llm.d.ts.map +1 -0
- package/dist/core/onnx-llm.js +430 -0
- package/package.json +3 -2
package/dist/core/index.d.ts
CHANGED
|
@@ -25,6 +25,7 @@ export * from './learning-engine';
|
|
|
25
25
|
export * from './adaptive-embedder';
|
|
26
26
|
export * from './neural-embeddings';
|
|
27
27
|
export * from './neural-perf';
|
|
28
|
+
export * from './onnx-llm';
|
|
28
29
|
export * from '../analysis';
|
|
29
30
|
export { default as gnnWrapper } from './gnn-wrapper';
|
|
30
31
|
export { default as attentionFallbacks } from './attention-fallbacks';
|
|
@@ -44,4 +45,5 @@ export { default as TensorCompress } from './tensor-compress';
|
|
|
44
45
|
export { default as LearningEngine } from './learning-engine';
|
|
45
46
|
export { default as AdaptiveEmbedder } from './adaptive-embedder';
|
|
46
47
|
export { default as NeuralSubstrate } from './neural-embeddings';
|
|
48
|
+
export { default as OnnxLLM } from './onnx-llm';
|
|
47
49
|
//# sourceMappingURL=index.d.ts.map
|
package/dist/core/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/core/index.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,cAAc,eAAe,CAAC;AAC9B,cAAc,uBAAuB,CAAC;AACtC,cAAc,gBAAgB,CAAC;AAC/B,cAAc,gBAAgB,CAAC;AAC/B,cAAc,uBAAuB,CAAC;AACtC,cAAc,iBAAiB,CAAC;AAChC,cAAc,kBAAkB,CAAC;AACjC,cAAc,yBAAyB,CAAC;AACxC,cAAc,oBAAoB,CAAC;AACnC,cAAc,kBAAkB,CAAC;AACjC,cAAc,iBAAiB,CAAC;AAChC,cAAc,mBAAmB,CAAC;AAClC,cAAc,cAAc,CAAC;AAC7B,cAAc,mBAAmB,CAAC;AAClC,cAAc,mBAAmB,CAAC;AAClC,cAAc,oBAAoB,CAAC;AACnC,cAAc,mBAAmB,CAAC;AAClC,cAAc,mBAAmB,CAAC;AAClC,cAAc,qBAAqB,CAAC;AACpC,cAAc,qBAAqB,CAAC;AACpC,cAAc,eAAe,CAAC;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/core/index.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,cAAc,eAAe,CAAC;AAC9B,cAAc,uBAAuB,CAAC;AACtC,cAAc,gBAAgB,CAAC;AAC/B,cAAc,gBAAgB,CAAC;AAC/B,cAAc,uBAAuB,CAAC;AACtC,cAAc,iBAAiB,CAAC;AAChC,cAAc,kBAAkB,CAAC;AACjC,cAAc,yBAAyB,CAAC;AACxC,cAAc,oBAAoB,CAAC;AACnC,cAAc,kBAAkB,CAAC;AACjC,cAAc,iBAAiB,CAAC;AAChC,cAAc,mBAAmB,CAAC;AAClC,cAAc,cAAc,CAAC;AAC7B,cAAc,mBAAmB,CAAC;AAClC,cAAc,mBAAmB,CAAC;AAClC,cAAc,oBAAoB,CAAC;AACnC,cAAc,mBAAmB,CAAC;AAClC,cAAc,mBAAmB,CAAC;AAClC,cAAc,qBAAqB,CAAC;AACpC,cAAc,qBAAqB,CAAC;AACpC,cAAc,eAAe,CAAC;AAC9B,cAAc,YAAY,CAAC;AAG3B,cAAc,aAAa,CAAC;AAG5B,OAAO,EAAE,OAAO,IAAI,UAAU,EAAE,MAAM,eAAe,CAAC;AACtD,OAAO,EAAE,OAAO,IAAI,kBAAkB,EAAE,MAAM,uBAAuB,CAAC;AACtE,OAAO,EAAE,OAAO,IAAI,WAAW,EAAE,MAAM,gBAAgB,CAAC;AACxD,OAAO,EAAE,OAAO,IAAI,IAAI,EAAE,MAAM,gBAAgB,CAAC;AACjD,OAAO,EAAE,OAAO,IAAI,kBAAkB,EAAE,MAAM,uBAAuB,CAAC;AACtE,OAAO,EAAE,OAAO,IAAI,YAAY,EAAE,MAAM,iBAAiB,CAAC;AAC1D,OAAO,EAAE,OAAO,IAAI,qBAAqB,EAAE,MAAM,kBAAkB,CAAC;AACpE,OAAO,EAAE,OAAO,IAAI,oBAAoB,EAAE,MAAM,yBAAyB,CAAC;AAC1E,OAAO,EAAE,OAAO,IAAI,kBAAkB,EAAE,MAAM,oBAAoB,CAAC;AACnE,OAAO,EAAE,OAAO,IAAI,cAAc,EAAE,MAAM,kBAAkB,CAAC;AAC7D,OAAO,EAAE,OAAO,IAAI,SAAS,EAAE,MAAM,iBAAiB,CAAC;AACvD,OAAO,EAAE,OAAO,IAAI,eAAe,EAAE,MAAM,mBAAmB,CAAC;AAC/D,OAAO,EAAE,OAAO,IAAI,UAAU,EAAE,MAAM,cAAc,CAAC;AAGrD,OAAO,EAAE,UAAU,IAAI,SAAS,EAAE,MAAM,cAAc,CAAC;AAGvD,OAAO,EAAE,OAAO,IAAI,cAAc,EAAE,MAAM,mBAAmB,CAAC;AAC9D,OAAO,EAAE,OAAO,IAAI,cAAc,EAAE,MAAM,mBAAmB,CAAC;AAC9D,OAAO,EAAE,OAAO,IAAI,gBAAgB,EAAE,MAAM,qBAAqB,CAAC;AAClE,OAAO,EAAE,OAAO,IAAI,eAAe,EAAE,MAAM,qBAAqB,CAAC;AACjE,OAAO,EAAE,OAAO,IAAI,OAAO,EAAE,MAAM,YAAY,CAAC"}
|
package/dist/core/index.js
CHANGED
|
@@ -23,7 +23,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
23
23
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
24
24
|
};
|
|
25
25
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
26
|
-
exports.NeuralSubstrate = exports.AdaptiveEmbedder = exports.LearningEngine = exports.TensorCompress = exports.ASTParser = exports.CodeParser = exports.RuvectorCluster = exports.CodeGraph = exports.SemanticRouter = exports.ExtendedWorkerPool = exports.ParallelIntelligence = exports.OptimizedOnnxEmbedder = exports.OnnxEmbedder = exports.IntelligenceEngine = exports.Sona = exports.agentdbFast = exports.attentionFallbacks = exports.gnnWrapper = void 0;
|
|
26
|
+
exports.OnnxLLM = exports.NeuralSubstrate = exports.AdaptiveEmbedder = exports.LearningEngine = exports.TensorCompress = exports.ASTParser = exports.CodeParser = exports.RuvectorCluster = exports.CodeGraph = exports.SemanticRouter = exports.ExtendedWorkerPool = exports.ParallelIntelligence = exports.OptimizedOnnxEmbedder = exports.OnnxEmbedder = exports.IntelligenceEngine = exports.Sona = exports.agentdbFast = exports.attentionFallbacks = exports.gnnWrapper = void 0;
|
|
27
27
|
__exportStar(require("./gnn-wrapper"), exports);
|
|
28
28
|
__exportStar(require("./attention-fallbacks"), exports);
|
|
29
29
|
__exportStar(require("./agentdb-fast"), exports);
|
|
@@ -45,6 +45,7 @@ __exportStar(require("./learning-engine"), exports);
|
|
|
45
45
|
__exportStar(require("./adaptive-embedder"), exports);
|
|
46
46
|
__exportStar(require("./neural-embeddings"), exports);
|
|
47
47
|
__exportStar(require("./neural-perf"), exports);
|
|
48
|
+
__exportStar(require("./onnx-llm"), exports);
|
|
48
49
|
// Analysis module (consolidated security, complexity, patterns)
|
|
49
50
|
__exportStar(require("../analysis"), exports);
|
|
50
51
|
// Re-export default objects for convenience
|
|
@@ -86,3 +87,5 @@ var adaptive_embedder_1 = require("./adaptive-embedder");
|
|
|
86
87
|
Object.defineProperty(exports, "AdaptiveEmbedder", { enumerable: true, get: function () { return __importDefault(adaptive_embedder_1).default; } });
|
|
87
88
|
var neural_embeddings_1 = require("./neural-embeddings");
|
|
88
89
|
Object.defineProperty(exports, "NeuralSubstrate", { enumerable: true, get: function () { return __importDefault(neural_embeddings_1).default; } });
|
|
90
|
+
var onnx_llm_1 = require("./onnx-llm");
|
|
91
|
+
Object.defineProperty(exports, "OnnxLLM", { enumerable: true, get: function () { return __importDefault(onnx_llm_1).default; } });
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CommonJS-compatible WASM loader for Node.js
|
|
3
|
+
*
|
|
4
|
+
* This file provides a way to load the WASM module without requiring
|
|
5
|
+
* the --experimental-wasm-modules flag by manually loading the WASM bytes.
|
|
6
|
+
*
|
|
7
|
+
* Usage:
|
|
8
|
+
* const wasm = require('./ruvector_onnx_embeddings_wasm_cjs.js');
|
|
9
|
+
* await wasm.init(); // or wasm.initSync(wasmBytes)
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
const fs = require('fs');
|
|
13
|
+
const path = require('path');
|
|
14
|
+
|
|
15
|
+
// Re-export everything from the JS bindings
|
|
16
|
+
const bindings = require('./ruvector_onnx_embeddings_wasm_bg.js');
|
|
17
|
+
|
|
18
|
+
// Track initialization state
|
|
19
|
+
let initialized = false;
|
|
20
|
+
let initPromise = null;
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Initialize the WASM module asynchronously
|
|
24
|
+
* Automatically loads the .wasm file from the same directory
|
|
25
|
+
*/
|
|
26
|
+
async function init(wasmInput) {
|
|
27
|
+
if (initialized) return bindings;
|
|
28
|
+
|
|
29
|
+
if (initPromise) {
|
|
30
|
+
await initPromise;
|
|
31
|
+
return bindings;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
initPromise = (async () => {
|
|
35
|
+
let wasmBytes;
|
|
36
|
+
|
|
37
|
+
if (wasmInput instanceof WebAssembly.Module) {
|
|
38
|
+
// Already compiled module
|
|
39
|
+
const instance = await WebAssembly.instantiate(wasmInput, getImports());
|
|
40
|
+
bindings.__wbg_set_wasm(instance.exports);
|
|
41
|
+
finishInit();
|
|
42
|
+
return;
|
|
43
|
+
} else if (wasmInput instanceof ArrayBuffer || wasmInput instanceof Uint8Array) {
|
|
44
|
+
// Raw bytes provided
|
|
45
|
+
wasmBytes = wasmInput;
|
|
46
|
+
} else if (typeof wasmInput === 'string') {
|
|
47
|
+
// Path to WASM file
|
|
48
|
+
wasmBytes = fs.readFileSync(wasmInput);
|
|
49
|
+
} else {
|
|
50
|
+
// Auto-detect WASM file location
|
|
51
|
+
const wasmPath = path.join(__dirname, 'ruvector_onnx_embeddings_wasm_bg.wasm');
|
|
52
|
+
wasmBytes = fs.readFileSync(wasmPath);
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
const wasmModule = await WebAssembly.compile(wasmBytes);
|
|
56
|
+
const instance = await WebAssembly.instantiate(wasmModule, getImports());
|
|
57
|
+
|
|
58
|
+
bindings.__wbg_set_wasm(instance.exports);
|
|
59
|
+
finishInit();
|
|
60
|
+
})();
|
|
61
|
+
|
|
62
|
+
await initPromise;
|
|
63
|
+
return bindings;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* Initialize the WASM module synchronously
|
|
68
|
+
* Requires the WASM bytes to be provided
|
|
69
|
+
*/
|
|
70
|
+
function initSync(wasmBytes) {
|
|
71
|
+
if (initialized) return bindings;
|
|
72
|
+
|
|
73
|
+
if (!wasmBytes) {
|
|
74
|
+
const wasmPath = path.join(__dirname, 'ruvector_onnx_embeddings_wasm_bg.wasm');
|
|
75
|
+
wasmBytes = fs.readFileSync(wasmPath);
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
const wasmModule = new WebAssembly.Module(wasmBytes);
|
|
79
|
+
const instance = new WebAssembly.Instance(wasmModule, getImports());
|
|
80
|
+
|
|
81
|
+
bindings.__wbg_set_wasm(instance.exports);
|
|
82
|
+
finishInit();
|
|
83
|
+
|
|
84
|
+
return bindings;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* Get the WASM import object
|
|
89
|
+
*/
|
|
90
|
+
function getImports() {
|
|
91
|
+
return {
|
|
92
|
+
'./ruvector_onnx_embeddings_wasm_bg.js': bindings,
|
|
93
|
+
};
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
/**
|
|
97
|
+
* Finalize initialization
|
|
98
|
+
*/
|
|
99
|
+
function finishInit() {
|
|
100
|
+
if (typeof bindings.__wbindgen_init_externref_table === 'function') {
|
|
101
|
+
bindings.__wbindgen_init_externref_table();
|
|
102
|
+
}
|
|
103
|
+
initialized = true;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
/**
|
|
107
|
+
* Check if initialized
|
|
108
|
+
*/
|
|
109
|
+
function isInitialized() {
|
|
110
|
+
return initialized;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
// Export init functions and all bindings
|
|
114
|
+
module.exports = {
|
|
115
|
+
init,
|
|
116
|
+
initSync,
|
|
117
|
+
isInitialized,
|
|
118
|
+
default: init,
|
|
119
|
+
// Re-export all bindings
|
|
120
|
+
WasmEmbedder: bindings.WasmEmbedder,
|
|
121
|
+
WasmEmbedderConfig: bindings.WasmEmbedderConfig,
|
|
122
|
+
PoolingStrategy: bindings.PoolingStrategy,
|
|
123
|
+
cosineSimilarity: bindings.cosineSimilarity,
|
|
124
|
+
normalizeL2: bindings.normalizeL2,
|
|
125
|
+
simd_available: bindings.simd_available,
|
|
126
|
+
version: bindings.version,
|
|
127
|
+
};
|
|
@@ -12,6 +12,26 @@
|
|
|
12
12
|
* - Cached model loading (downloads from HuggingFace on first use)
|
|
13
13
|
* - Batch embedding support
|
|
14
14
|
* - Optional parallel workers for 3.8x batch speedup
|
|
15
|
+
* - CommonJS-compatible: No --experimental-wasm-modules flag required
|
|
16
|
+
*
|
|
17
|
+
* Quick Start (Simple API - returns arrays directly):
|
|
18
|
+
* ```javascript
|
|
19
|
+
* const { embedText, embedTexts } = require('ruvector');
|
|
20
|
+
*
|
|
21
|
+
* // Single embedding - returns number[]
|
|
22
|
+
* const vector = await embedText("hello world");
|
|
23
|
+
*
|
|
24
|
+
* // Batch embeddings - returns number[][]
|
|
25
|
+
* const vectors = await embedTexts(["hello", "world"]);
|
|
26
|
+
* ```
|
|
27
|
+
*
|
|
28
|
+
* Full API (returns metadata):
|
|
29
|
+
* ```javascript
|
|
30
|
+
* const { embed, embedBatch } = require('ruvector');
|
|
31
|
+
*
|
|
32
|
+
* // Returns { embedding: number[], dimension: number, timeMs: number }
|
|
33
|
+
* const result = await embed("hello world");
|
|
34
|
+
* ```
|
|
15
35
|
*/
|
|
16
36
|
declare global {
|
|
17
37
|
var __ruvector_require: NodeRequire | undefined;
|
|
@@ -59,6 +79,70 @@ export declare function embed(text: string): Promise<EmbeddingResult>;
|
|
|
59
79
|
* Uses parallel workers automatically for batches >= parallelThreshold
|
|
60
80
|
*/
|
|
61
81
|
export declare function embedBatch(texts: string[]): Promise<EmbeddingResult[]>;
|
|
82
|
+
/**
|
|
83
|
+
* ============================================================================
|
|
84
|
+
* SIMPLE API - Returns arrays directly (for easy integration)
|
|
85
|
+
* ============================================================================
|
|
86
|
+
*/
|
|
87
|
+
/**
|
|
88
|
+
* Generate embedding for a single text - returns array directly
|
|
89
|
+
*
|
|
90
|
+
* This is the simplified API that returns just the embedding array,
|
|
91
|
+
* making it easy to use for vector operations, PostgreSQL insertion,
|
|
92
|
+
* and similarity calculations.
|
|
93
|
+
*
|
|
94
|
+
* @param text - The text to embed
|
|
95
|
+
* @returns A 384-dimensional embedding array
|
|
96
|
+
*
|
|
97
|
+
* @example
|
|
98
|
+
* ```javascript
|
|
99
|
+
* const { embedText } = require('ruvector');
|
|
100
|
+
*
|
|
101
|
+
* const vector = await embedText("hello world");
|
|
102
|
+
* console.log(vector.length); // 384
|
|
103
|
+
* console.log(Array.isArray(vector)); // true
|
|
104
|
+
*
|
|
105
|
+
* // Use directly with PostgreSQL
|
|
106
|
+
* await pool.query(
|
|
107
|
+
* 'INSERT INTO docs (content, embedding) VALUES ($1, $2)',
|
|
108
|
+
* [text, JSON.stringify(vector)]
|
|
109
|
+
* );
|
|
110
|
+
* ```
|
|
111
|
+
*/
|
|
112
|
+
export declare function embedText(text: string): Promise<number[]>;
|
|
113
|
+
/**
|
|
114
|
+
* Generate embeddings for multiple texts - returns array of arrays
|
|
115
|
+
*
|
|
116
|
+
* This is the simplified batch API that returns just the embedding arrays.
|
|
117
|
+
* Uses optimized batch processing for much faster throughput than
|
|
118
|
+
* calling embedText() in a loop.
|
|
119
|
+
*
|
|
120
|
+
* @param texts - Array of texts to embed
|
|
121
|
+
* @param options - Optional batch processing options
|
|
122
|
+
* @returns Array of 384-dimensional embedding arrays
|
|
123
|
+
*
|
|
124
|
+
* @example
|
|
125
|
+
* ```javascript
|
|
126
|
+
* const { embedTexts } = require('ruvector');
|
|
127
|
+
*
|
|
128
|
+
* // Batch embed 8000 documents in ~30 seconds (vs 53 min sequentially)
|
|
129
|
+
* const vectors = await embedTexts(documents);
|
|
130
|
+
*
|
|
131
|
+
* // With options for very large batches
|
|
132
|
+
* const vectors = await embedTexts(documents, { batchSize: 256 });
|
|
133
|
+
*
|
|
134
|
+
* // Bulk insert into PostgreSQL
|
|
135
|
+
* for (let i = 0; i < documents.length; i++) {
|
|
136
|
+
* await pool.query(
|
|
137
|
+
* 'INSERT INTO docs (content, embedding) VALUES ($1, $2)',
|
|
138
|
+
* [documents[i], JSON.stringify(vectors[i])]
|
|
139
|
+
* );
|
|
140
|
+
* }
|
|
141
|
+
* ```
|
|
142
|
+
*/
|
|
143
|
+
export declare function embedTexts(texts: string[], options?: {
|
|
144
|
+
batchSize?: number;
|
|
145
|
+
}): Promise<number[][]>;
|
|
62
146
|
/**
|
|
63
147
|
* Calculate cosine similarity between two texts
|
|
64
148
|
*/
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"onnx-embedder.d.ts","sourceRoot":"","sources":["../../src/core/onnx-embedder.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"onnx-embedder.d.ts","sourceRoot":"","sources":["../../src/core/onnx-embedder.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAkCG;AAQH,OAAO,CAAC,MAAM,CAAC;IAEb,IAAI,kBAAkB,EAAE,WAAW,GAAG,SAAS,CAAC;CACjD;AAsCD,MAAM,WAAW,kBAAkB;IACjC,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB;;;;;OAKG;IACH,cAAc,CAAC,EAAE,OAAO,GAAG,MAAM,CAAC;IAClC,wDAAwD;IACxD,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,iEAAiE;IACjE,iBAAiB,CAAC,EAAE,MAAM,CAAC;CAC5B;AAMD,MAAM,WAAW,eAAe;IAC9B,SAAS,EAAE,MAAM,EAAE,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,gBAAgB;IAC/B,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,MAAM,CAAC;CAChB;AAeD;;GAEG;AACH,wBAAgB,eAAe,IAAI,OAAO,CAOzC;AA6DD;;GAEG;AACH,wBAAsB,gBAAgB,CAAC,MAAM,GAAE,kBAAuB,GAAG,OAAO,CAAC,OAAO,CAAC,CAyGxF;AAED;;GAEG;AACH,wBAAsB,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,eAAe,CAAC,CAiBlE;AAED;;;GAGG;AACH,wBAAsB,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC,CAwC5E;AAED;;;;GAIG;AAEH;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AACH,wBAAsB,SAAS,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,CAU/D;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA6BG;AACH,wBAAsB,UAAU,CAC9B,KAAK,EAAE,MAAM,EAAE,EACf,OAAO,CAAC,EAAE;IAAE,SAAS,CAAC,EAAE,MAAM,CAAA;CAAE,GAC/B,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CA4CrB;AAED;;GAEG;AACH,wBAAsB,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,gBAAgB,CAAC,CAaxF;AAED;;GAEG;AACH,wBAAgB,gBAAgB,CAAC,CAAC,EAAE,MAAM,EAAE,EAAE,CAAC,EAAE,MAAM,EAAE,GAAG,MAAM,CAiBjE;AAED;;GAEG;AACH,wBAAgB,YAAY,IAAI,MAAM,CAErC;AAED;;GAEG;AACH,wBAAgB,OAAO,IAAI,OAAO,CAEjC;AAED;;GAEG;AACH,wBAAgB,QAAQ,IAAI;IAC1B,KAAK,EAAE,OAAO,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,OAAO,CAAC;IACd,QAAQ,EAAE,OAAO,CAAC;IAClB,eAAe,EAAE,MAAM,CAAC;IACxB,iBAAiB,EAAE,MAAM,CAAC;CAC3B,CAUA;AAED;;GAEG;AACH,wBAAsB,QAAQ,IAAI,OAAO,CAAC,IAAI,CAAC,CAM9C;AAGD,qBAAa,YAAY;IACvB,OAAO,CAAC,MAAM,CAAqB;gBAEvB,MAAM,GAAE,kBAAuB;IAIrC,IAAI,IAAI,OAAO,CAAC,OAAO,CAAC;IAIxB,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IAKtC,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;IAKhD,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;IAK/D,IAAI,SAAS,IAAI,MAAM,CAEtB;IAED,IAAI,KAAK,IAAI,OAAO,CAEnB;CACF;AAED,eAAe,YAAY,CAAC"}
|
|
@@ -13,6 +13,26 @@
|
|
|
13
13
|
* - Cached model loading (downloads from HuggingFace on first use)
|
|
14
14
|
* - Batch embedding support
|
|
15
15
|
* - Optional parallel workers for 3.8x batch speedup
|
|
16
|
+
* - CommonJS-compatible: No --experimental-wasm-modules flag required
|
|
17
|
+
*
|
|
18
|
+
* Quick Start (Simple API - returns arrays directly):
|
|
19
|
+
* ```javascript
|
|
20
|
+
* const { embedText, embedTexts } = require('ruvector');
|
|
21
|
+
*
|
|
22
|
+
* // Single embedding - returns number[]
|
|
23
|
+
* const vector = await embedText("hello world");
|
|
24
|
+
*
|
|
25
|
+
* // Batch embeddings - returns number[][]
|
|
26
|
+
* const vectors = await embedTexts(["hello", "world"]);
|
|
27
|
+
* ```
|
|
28
|
+
*
|
|
29
|
+
* Full API (returns metadata):
|
|
30
|
+
* ```javascript
|
|
31
|
+
* const { embed, embedBatch } = require('ruvector');
|
|
32
|
+
*
|
|
33
|
+
* // Returns { embedding: number[], dimension: number, timeMs: number }
|
|
34
|
+
* const result = await embed("hello world");
|
|
35
|
+
* ```
|
|
16
36
|
*/
|
|
17
37
|
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
18
38
|
if (k2 === undefined) k2 = k;
|
|
@@ -53,6 +73,8 @@ exports.isOnnxAvailable = isOnnxAvailable;
|
|
|
53
73
|
exports.initOnnxEmbedder = initOnnxEmbedder;
|
|
54
74
|
exports.embed = embed;
|
|
55
75
|
exports.embedBatch = embedBatch;
|
|
76
|
+
exports.embedText = embedText;
|
|
77
|
+
exports.embedTexts = embedTexts;
|
|
56
78
|
exports.similarity = similarity;
|
|
57
79
|
exports.cosineSimilarity = cosineSimilarity;
|
|
58
80
|
exports.getDimension = getDimension;
|
|
@@ -83,6 +105,21 @@ if (typeof globalThis !== 'undefined' && !globalThis.__ruvector_require) {
|
|
|
83
105
|
// Force native dynamic import (avoids TypeScript transpiling to require)
|
|
84
106
|
// eslint-disable-next-line @typescript-eslint/no-implied-eval
|
|
85
107
|
const dynamicImport = new Function('specifier', 'return import(specifier)');
|
|
108
|
+
// Try to load the CommonJS-compatible WASM loader (no experimental flags needed)
|
|
109
|
+
function tryLoadCjsModule() {
|
|
110
|
+
try {
|
|
111
|
+
// Use require for CJS module which doesn't need experimental flags
|
|
112
|
+
const cjsPath = path.join(__dirname, 'onnx', 'pkg', 'ruvector_onnx_embeddings_wasm_cjs.js');
|
|
113
|
+
if (fs.existsSync(cjsPath)) {
|
|
114
|
+
// eslint-disable-next-line @typescript-eslint/no-var-requires
|
|
115
|
+
return require(cjsPath);
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
catch {
|
|
119
|
+
// CJS loader not available
|
|
120
|
+
}
|
|
121
|
+
return null;
|
|
122
|
+
}
|
|
86
123
|
// Capability detection
|
|
87
124
|
let simdAvailable = false;
|
|
88
125
|
let parallelAvailable = false;
|
|
@@ -183,21 +220,32 @@ async function initOnnxEmbedder(config = {}) {
|
|
|
183
220
|
// Paths to bundled ONNX files
|
|
184
221
|
const pkgPath = path.join(__dirname, 'onnx', 'pkg', 'ruvector_onnx_embeddings_wasm.js');
|
|
185
222
|
const loaderPath = path.join(__dirname, 'onnx', 'loader.js');
|
|
186
|
-
|
|
223
|
+
const wasmPath = path.join(__dirname, 'onnx', 'pkg', 'ruvector_onnx_embeddings_wasm_bg.wasm');
|
|
224
|
+
if (!fs.existsSync(wasmPath)) {
|
|
187
225
|
throw new Error('ONNX WASM files not bundled. The onnx/ directory is missing.');
|
|
188
226
|
}
|
|
189
|
-
//
|
|
190
|
-
const
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
const wasmPath = path.join(__dirname, 'onnx', 'pkg', 'ruvector_onnx_embeddings_wasm_bg.wasm');
|
|
196
|
-
if (wasmModule.default && typeof wasmModule.default === 'function') {
|
|
197
|
-
// For bundler-style initialization, pass the wasm buffer
|
|
198
|
-
const wasmBytes = fs.readFileSync(wasmPath);
|
|
199
|
-
await wasmModule.default(wasmBytes);
|
|
227
|
+
// Try CJS loader first (no experimental flags needed)
|
|
228
|
+
const cjsModule = tryLoadCjsModule();
|
|
229
|
+
if (cjsModule) {
|
|
230
|
+
// Use CommonJS loader - no experimental flags required!
|
|
231
|
+
await cjsModule.init(wasmPath);
|
|
232
|
+
wasmModule = cjsModule;
|
|
200
233
|
}
|
|
234
|
+
else {
|
|
235
|
+
// Fall back to ESM loader (may require --experimental-wasm-modules)
|
|
236
|
+
// Convert paths to file:// URLs for cross-platform ESM compatibility (Windows fix)
|
|
237
|
+
const pkgUrl = (0, url_1.pathToFileURL)(pkgPath).href;
|
|
238
|
+
// Dynamic import of bundled modules using file:// URLs
|
|
239
|
+
wasmModule = await dynamicImport(pkgUrl);
|
|
240
|
+
// Initialize WASM module (loads the .wasm file)
|
|
241
|
+
if (wasmModule.default && typeof wasmModule.default === 'function') {
|
|
242
|
+
// For bundler-style initialization, pass the wasm buffer
|
|
243
|
+
const wasmBytes = fs.readFileSync(wasmPath);
|
|
244
|
+
await wasmModule.default(wasmBytes);
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
// Load the model loader
|
|
248
|
+
const loaderUrl = (0, url_1.pathToFileURL)(loaderPath).href;
|
|
201
249
|
const loaderModule = await dynamicImport(loaderUrl);
|
|
202
250
|
const { ModelLoader } = loaderModule;
|
|
203
251
|
// Create model loader with caching
|
|
@@ -310,6 +358,113 @@ async function embedBatch(texts) {
|
|
|
310
358
|
}
|
|
311
359
|
return results;
|
|
312
360
|
}
|
|
361
|
+
/**
|
|
362
|
+
* ============================================================================
|
|
363
|
+
* SIMPLE API - Returns arrays directly (for easy integration)
|
|
364
|
+
* ============================================================================
|
|
365
|
+
*/
|
|
366
|
+
/**
|
|
367
|
+
* Generate embedding for a single text - returns array directly
|
|
368
|
+
*
|
|
369
|
+
* This is the simplified API that returns just the embedding array,
|
|
370
|
+
* making it easy to use for vector operations, PostgreSQL insertion,
|
|
371
|
+
* and similarity calculations.
|
|
372
|
+
*
|
|
373
|
+
* @param text - The text to embed
|
|
374
|
+
* @returns A 384-dimensional embedding array
|
|
375
|
+
*
|
|
376
|
+
* @example
|
|
377
|
+
* ```javascript
|
|
378
|
+
* const { embedText } = require('ruvector');
|
|
379
|
+
*
|
|
380
|
+
* const vector = await embedText("hello world");
|
|
381
|
+
* console.log(vector.length); // 384
|
|
382
|
+
* console.log(Array.isArray(vector)); // true
|
|
383
|
+
*
|
|
384
|
+
* // Use directly with PostgreSQL
|
|
385
|
+
* await pool.query(
|
|
386
|
+
* 'INSERT INTO docs (content, embedding) VALUES ($1, $2)',
|
|
387
|
+
* [text, JSON.stringify(vector)]
|
|
388
|
+
* );
|
|
389
|
+
* ```
|
|
390
|
+
*/
|
|
391
|
+
async function embedText(text) {
|
|
392
|
+
if (!isInitialized) {
|
|
393
|
+
await initOnnxEmbedder();
|
|
394
|
+
}
|
|
395
|
+
if (!embedder) {
|
|
396
|
+
throw new Error('ONNX embedder not initialized');
|
|
397
|
+
}
|
|
398
|
+
const embedding = embedder.embedOne(text);
|
|
399
|
+
return Array.from(embedding);
|
|
400
|
+
}
|
|
401
|
+
/**
|
|
402
|
+
* Generate embeddings for multiple texts - returns array of arrays
|
|
403
|
+
*
|
|
404
|
+
* This is the simplified batch API that returns just the embedding arrays.
|
|
405
|
+
* Uses optimized batch processing for much faster throughput than
|
|
406
|
+
* calling embedText() in a loop.
|
|
407
|
+
*
|
|
408
|
+
* @param texts - Array of texts to embed
|
|
409
|
+
* @param options - Optional batch processing options
|
|
410
|
+
* @returns Array of 384-dimensional embedding arrays
|
|
411
|
+
*
|
|
412
|
+
* @example
|
|
413
|
+
* ```javascript
|
|
414
|
+
* const { embedTexts } = require('ruvector');
|
|
415
|
+
*
|
|
416
|
+
* // Batch embed 8000 documents in ~30 seconds (vs 53 min sequentially)
|
|
417
|
+
* const vectors = await embedTexts(documents);
|
|
418
|
+
*
|
|
419
|
+
* // With options for very large batches
|
|
420
|
+
* const vectors = await embedTexts(documents, { batchSize: 256 });
|
|
421
|
+
*
|
|
422
|
+
* // Bulk insert into PostgreSQL
|
|
423
|
+
* for (let i = 0; i < documents.length; i++) {
|
|
424
|
+
* await pool.query(
|
|
425
|
+
* 'INSERT INTO docs (content, embedding) VALUES ($1, $2)',
|
|
426
|
+
* [documents[i], JSON.stringify(vectors[i])]
|
|
427
|
+
* );
|
|
428
|
+
* }
|
|
429
|
+
* ```
|
|
430
|
+
*/
|
|
431
|
+
async function embedTexts(texts, options) {
|
|
432
|
+
if (!isInitialized) {
|
|
433
|
+
await initOnnxEmbedder();
|
|
434
|
+
}
|
|
435
|
+
if (!embedder) {
|
|
436
|
+
throw new Error('ONNX embedder not initialized');
|
|
437
|
+
}
|
|
438
|
+
if (texts.length === 0) {
|
|
439
|
+
return [];
|
|
440
|
+
}
|
|
441
|
+
const batchSize = options?.batchSize || 256;
|
|
442
|
+
// For small batches, process all at once
|
|
443
|
+
if (texts.length <= batchSize) {
|
|
444
|
+
// Use parallel workers for large batches
|
|
445
|
+
if (parallelEnabled && parallelEmbedder && texts.length >= parallelThreshold) {
|
|
446
|
+
const batchResults = await parallelEmbedder.embedBatch(texts);
|
|
447
|
+
return batchResults.map((emb) => Array.from(emb));
|
|
448
|
+
}
|
|
449
|
+
// Sequential processing
|
|
450
|
+
const batchEmbeddings = embedder.embedBatch(texts);
|
|
451
|
+
const dimension = embedder.dimension();
|
|
452
|
+
const results = [];
|
|
453
|
+
for (let i = 0; i < texts.length; i++) {
|
|
454
|
+
const embedding = batchEmbeddings.slice(i * dimension, (i + 1) * dimension);
|
|
455
|
+
results.push(Array.from(embedding));
|
|
456
|
+
}
|
|
457
|
+
return results;
|
|
458
|
+
}
|
|
459
|
+
// Process in chunks for very large batches
|
|
460
|
+
const results = [];
|
|
461
|
+
for (let i = 0; i < texts.length; i += batchSize) {
|
|
462
|
+
const chunk = texts.slice(i, i + batchSize);
|
|
463
|
+
const chunkResults = await embedTexts(chunk);
|
|
464
|
+
results.push(...chunkResults);
|
|
465
|
+
}
|
|
466
|
+
return results;
|
|
467
|
+
}
|
|
313
468
|
/**
|
|
314
469
|
* Calculate cosine similarity between two texts
|
|
315
470
|
*/
|
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ONNX LLM Text Generation for RuVector
|
|
3
|
+
*
|
|
4
|
+
* Provides real local LLM inference using ONNX Runtime via transformers.js
|
|
5
|
+
* Supports small models that run efficiently on CPU:
|
|
6
|
+
* - SmolLM 135M - Smallest, fast (~135MB)
|
|
7
|
+
* - SmolLM 360M - Better quality (~360MB)
|
|
8
|
+
* - TinyLlama 1.1B - Best small model quality (~1GB quantized)
|
|
9
|
+
* - Qwen2.5 0.5B - Good balance (~500MB)
|
|
10
|
+
*
|
|
11
|
+
* Features:
|
|
12
|
+
* - Automatic model downloading and caching
|
|
13
|
+
* - Quantized INT4/INT8 models for efficiency
|
|
14
|
+
* - Streaming generation support
|
|
15
|
+
* - Temperature, top-k, top-p sampling
|
|
16
|
+
* - KV cache for efficient multi-turn conversations
|
|
17
|
+
*/
|
|
18
|
+
export interface OnnxLLMConfig {
|
|
19
|
+
/** Model ID (default: 'Xenova/smollm-135m-instruct') */
|
|
20
|
+
modelId?: string;
|
|
21
|
+
/** Cache directory for models */
|
|
22
|
+
cacheDir?: string;
|
|
23
|
+
/** Use quantized model (default: true) */
|
|
24
|
+
quantized?: boolean;
|
|
25
|
+
/** Device: 'cpu' | 'webgpu' (default: 'cpu') */
|
|
26
|
+
device?: 'cpu' | 'webgpu';
|
|
27
|
+
/** Maximum context length */
|
|
28
|
+
maxLength?: number;
|
|
29
|
+
}
|
|
30
|
+
export interface GenerationConfig {
|
|
31
|
+
/** Maximum new tokens to generate (default: 128) */
|
|
32
|
+
maxNewTokens?: number;
|
|
33
|
+
/** Temperature for sampling (default: 0.7) */
|
|
34
|
+
temperature?: number;
|
|
35
|
+
/** Top-p nucleus sampling (default: 0.9) */
|
|
36
|
+
topP?: number;
|
|
37
|
+
/** Top-k sampling (default: 50) */
|
|
38
|
+
topK?: number;
|
|
39
|
+
/** Repetition penalty (default: 1.1) */
|
|
40
|
+
repetitionPenalty?: number;
|
|
41
|
+
/** Stop sequences */
|
|
42
|
+
stopSequences?: string[];
|
|
43
|
+
/** System prompt for chat models */
|
|
44
|
+
systemPrompt?: string;
|
|
45
|
+
/** Enable streaming (callback for each token) */
|
|
46
|
+
onToken?: (token: string) => void;
|
|
47
|
+
}
|
|
48
|
+
export interface GenerationResult {
|
|
49
|
+
/** Generated text */
|
|
50
|
+
text: string;
|
|
51
|
+
/** Number of tokens generated */
|
|
52
|
+
tokensGenerated: number;
|
|
53
|
+
/** Time taken in milliseconds */
|
|
54
|
+
timeMs: number;
|
|
55
|
+
/** Tokens per second */
|
|
56
|
+
tokensPerSecond: number;
|
|
57
|
+
/** Model used */
|
|
58
|
+
model: string;
|
|
59
|
+
/** Whether model was loaded from cache */
|
|
60
|
+
cached: boolean;
|
|
61
|
+
}
|
|
62
|
+
export declare const AVAILABLE_MODELS: {
|
|
63
|
+
readonly 'trm-tinystories': {
|
|
64
|
+
readonly id: "Xenova/TinyStories-33M";
|
|
65
|
+
readonly name: "TinyStories 33M (TRM)";
|
|
66
|
+
readonly size: "~65MB";
|
|
67
|
+
readonly description: "Ultra-tiny model for stories and basic generation";
|
|
68
|
+
readonly contextLength: 512;
|
|
69
|
+
};
|
|
70
|
+
readonly 'trm-gpt2-tiny': {
|
|
71
|
+
readonly id: "Xenova/gpt2";
|
|
72
|
+
readonly name: "GPT-2 124M (TRM)";
|
|
73
|
+
readonly size: "~250MB";
|
|
74
|
+
readonly description: "Classic GPT-2 tiny for general text";
|
|
75
|
+
readonly contextLength: 1024;
|
|
76
|
+
};
|
|
77
|
+
readonly 'trm-distilgpt2': {
|
|
78
|
+
readonly id: "Xenova/distilgpt2";
|
|
79
|
+
readonly name: "DistilGPT-2 (TRM)";
|
|
80
|
+
readonly size: "~82MB";
|
|
81
|
+
readonly description: "Distilled GPT-2, fastest general model";
|
|
82
|
+
readonly contextLength: 1024;
|
|
83
|
+
};
|
|
84
|
+
readonly 'smollm-135m': {
|
|
85
|
+
readonly id: "HuggingFaceTB/SmolLM-135M-Instruct";
|
|
86
|
+
readonly name: "SmolLM 135M";
|
|
87
|
+
readonly size: "~135MB";
|
|
88
|
+
readonly description: "Smallest instruct model, very fast";
|
|
89
|
+
readonly contextLength: 2048;
|
|
90
|
+
};
|
|
91
|
+
readonly 'smollm-360m': {
|
|
92
|
+
readonly id: "HuggingFaceTB/SmolLM-360M-Instruct";
|
|
93
|
+
readonly name: "SmolLM 360M";
|
|
94
|
+
readonly size: "~360MB";
|
|
95
|
+
readonly description: "Small model, fast, better quality";
|
|
96
|
+
readonly contextLength: 2048;
|
|
97
|
+
};
|
|
98
|
+
readonly 'smollm2-135m': {
|
|
99
|
+
readonly id: "HuggingFaceTB/SmolLM2-135M-Instruct";
|
|
100
|
+
readonly name: "SmolLM2 135M";
|
|
101
|
+
readonly size: "~135MB";
|
|
102
|
+
readonly description: "Latest SmolLM v2, improved capabilities";
|
|
103
|
+
readonly contextLength: 2048;
|
|
104
|
+
};
|
|
105
|
+
readonly 'smollm2-360m': {
|
|
106
|
+
readonly id: "HuggingFaceTB/SmolLM2-360M-Instruct";
|
|
107
|
+
readonly name: "SmolLM2 360M";
|
|
108
|
+
readonly size: "~360MB";
|
|
109
|
+
readonly description: "Latest SmolLM v2, better quality";
|
|
110
|
+
readonly contextLength: 2048;
|
|
111
|
+
};
|
|
112
|
+
readonly 'qwen2.5-0.5b': {
|
|
113
|
+
readonly id: "Qwen/Qwen2.5-0.5B-Instruct";
|
|
114
|
+
readonly name: "Qwen2.5 0.5B";
|
|
115
|
+
readonly size: "~300MB quantized";
|
|
116
|
+
readonly description: "Good balance of speed and quality, multilingual";
|
|
117
|
+
readonly contextLength: 4096;
|
|
118
|
+
};
|
|
119
|
+
readonly tinyllama: {
|
|
120
|
+
readonly id: "TinyLlama/TinyLlama-1.1B-Chat-v1.0";
|
|
121
|
+
readonly name: "TinyLlama 1.1B";
|
|
122
|
+
readonly size: "~600MB quantized";
|
|
123
|
+
readonly description: "Best small model quality, slower";
|
|
124
|
+
readonly contextLength: 2048;
|
|
125
|
+
};
|
|
126
|
+
readonly 'codegemma-2b': {
|
|
127
|
+
readonly id: "google/codegemma-2b";
|
|
128
|
+
readonly name: "CodeGemma 2B";
|
|
129
|
+
readonly size: "~1GB quantized";
|
|
130
|
+
readonly description: "Code generation specialist";
|
|
131
|
+
readonly contextLength: 8192;
|
|
132
|
+
};
|
|
133
|
+
readonly 'deepseek-coder-1.3b': {
|
|
134
|
+
readonly id: "deepseek-ai/deepseek-coder-1.3b-instruct";
|
|
135
|
+
readonly name: "DeepSeek Coder 1.3B";
|
|
136
|
+
readonly size: "~700MB quantized";
|
|
137
|
+
readonly description: "Excellent for code tasks";
|
|
138
|
+
readonly contextLength: 4096;
|
|
139
|
+
};
|
|
140
|
+
readonly 'phi-2': {
|
|
141
|
+
readonly id: "microsoft/phi-2";
|
|
142
|
+
readonly name: "Phi-2 2.7B";
|
|
143
|
+
readonly size: "~1.5GB quantized";
|
|
144
|
+
readonly description: "High quality small model";
|
|
145
|
+
readonly contextLength: 2048;
|
|
146
|
+
};
|
|
147
|
+
readonly 'phi-3-mini': {
|
|
148
|
+
readonly id: "microsoft/Phi-3-mini-4k-instruct";
|
|
149
|
+
readonly name: "Phi-3 Mini";
|
|
150
|
+
readonly size: "~2GB quantized";
|
|
151
|
+
readonly description: "Best quality tiny model";
|
|
152
|
+
readonly contextLength: 4096;
|
|
153
|
+
};
|
|
154
|
+
};
|
|
155
|
+
export type ModelKey = keyof typeof AVAILABLE_MODELS;
|
|
156
|
+
/**
|
|
157
|
+
* Check if transformers.js is available
|
|
158
|
+
*/
|
|
159
|
+
export declare function isTransformersAvailable(): Promise<boolean>;
|
|
160
|
+
/**
|
|
161
|
+
* Initialize the ONNX LLM with specified model
|
|
162
|
+
*/
|
|
163
|
+
export declare function initOnnxLLM(config?: OnnxLLMConfig): Promise<boolean>;
|
|
164
|
+
/**
|
|
165
|
+
* Generate text using ONNX LLM
|
|
166
|
+
*/
|
|
167
|
+
export declare function generate(prompt: string, config?: GenerationConfig): Promise<GenerationResult>;
|
|
168
|
+
/**
|
|
169
|
+
* Generate with streaming (token by token)
|
|
170
|
+
*/
|
|
171
|
+
export declare function generateStream(prompt: string, config?: GenerationConfig): Promise<AsyncGenerator<string, GenerationResult, undefined>>;
|
|
172
|
+
/**
|
|
173
|
+
* Chat completion with conversation history
|
|
174
|
+
*/
|
|
175
|
+
export declare function chat(messages: Array<{
|
|
176
|
+
role: 'system' | 'user' | 'assistant';
|
|
177
|
+
content: string;
|
|
178
|
+
}>, config?: GenerationConfig): Promise<GenerationResult>;
|
|
179
|
+
/**
|
|
180
|
+
* Get model information
|
|
181
|
+
*/
|
|
182
|
+
export declare function getModelInfo(): {
|
|
183
|
+
model: string | null;
|
|
184
|
+
ready: boolean;
|
|
185
|
+
availableModels: typeof AVAILABLE_MODELS;
|
|
186
|
+
};
|
|
187
|
+
/**
|
|
188
|
+
* Unload the current model to free memory
|
|
189
|
+
*/
|
|
190
|
+
export declare function unload(): Promise<void>;
|
|
191
|
+
export declare class OnnxLLM {
|
|
192
|
+
private config;
|
|
193
|
+
private initialized;
|
|
194
|
+
constructor(config?: OnnxLLMConfig);
|
|
195
|
+
init(): Promise<boolean>;
|
|
196
|
+
generate(prompt: string, config?: GenerationConfig): Promise<GenerationResult>;
|
|
197
|
+
chat(messages: Array<{
|
|
198
|
+
role: 'system' | 'user' | 'assistant';
|
|
199
|
+
content: string;
|
|
200
|
+
}>, config?: GenerationConfig): Promise<GenerationResult>;
|
|
201
|
+
unload(): Promise<void>;
|
|
202
|
+
get ready(): boolean;
|
|
203
|
+
get model(): string | null;
|
|
204
|
+
}
|
|
205
|
+
export default OnnxLLM;
|
|
206
|
+
//# sourceMappingURL=onnx-llm.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"onnx-llm.d.ts","sourceRoot":"","sources":["../../src/core/onnx-llm.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;GAgBG;AAaH,MAAM,WAAW,aAAa;IAC5B,wDAAwD;IACxD,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,iCAAiC;IACjC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,0CAA0C;IAC1C,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,gDAAgD;IAChD,MAAM,CAAC,EAAE,KAAK,GAAG,QAAQ,CAAC;IAC1B,6BAA6B;IAC7B,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,gBAAgB;IAC/B,oDAAoD;IACpD,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,8CAA8C;IAC9C,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,4CAA4C;IAC5C,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,mCAAmC;IACnC,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,wCAAwC;IACxC,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,qBAAqB;IACrB,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;IACzB,oCAAoC;IACpC,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,iDAAiD;IACjD,OAAO,CAAC,EAAE,CAAC,KAAK,EAAE,MAAM,KAAK,IAAI,CAAC;CACnC;AAED,MAAM,WAAW,gBAAgB;IAC/B,qBAAqB;IACrB,IAAI,EAAE,MAAM,CAAC;IACb,iCAAiC;IACjC,eAAe,EAAE,MAAM,CAAC;IACxB,iCAAiC;IACjC,MAAM,EAAE,MAAM,CAAC;IACf,wBAAwB;IACxB,eAAe,EAAE,MAAM,CAAC;IACxB,iBAAiB;IACjB,KAAK,EAAE,MAAM,CAAC;IACd,0CAA0C;IAC1C,MAAM,EAAE,OAAO,CAAC;CACjB;AAMD,eAAO,MAAM,gBAAgB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAmHnB,CAAC;AAEX,MAAM,MAAM,QAAQ,GAAG,MAAM,OAAO,gBAAgB,CAAC;AAYrD;;GAEG;AACH,wBAAsB,uBAAuB,IAAI,OAAO,CAAC,OAAO,CAAC,CAOhE;AAED;;GAEG;AACH,wBAAsB,WAAW,CAAC,MAAM,GAAE,aAAkB,GAAG,OAAO,CAAC,OAAO,CAAC,CAqD9E;AAED;;GAEG;AACH,wBAAsB,QAAQ,CAC5B,MAAM,EAAE,MAAM,EACd,MAAM,GAAE,gBAAqB,GAC5B,OAAO,CAAC,gBAAgB,CAAC,CA0C3B;AAED;;GAEG;AACH,wBAAsB,cAAc,CAClC,MAAM,EAAE,MAAM,EACd,MAAM,GAAE,gBAAqB,GAC5B,OAAO,CAAC,cAAc,CAAC,MAAM,EAAE,gBAAgB,EAAE,SAAS,CAAC,CAAC,CA0D9D;AAED;;GAEG;AACH,wBAAsB,IAAI,CACxB,QAAQ,EAAE,KAAK,CAAC;IAAE,IAAI,EAAE,QAAQ,GAAG,MAAM,GAAG,WAAW,CAAC;IAAC,OAAO,EAAE,MAAM,CAAA;CAAE,CAAC,EAC3E,MAAM,GAAE,gBAAqB,GAC5B,OAAO,CAAC,gBAAgB,CAAC,CAsB3B;AAED;;GAEG;AACH,wBAAgB,YAAY,IAAI;IAC9B,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC;IACrB,KAAK,EAAE,OAAO,CAAC;IACf,eAAe,EAAE,OAAO,gBAAgB,CAAC;CAC1C,CAMA;AAED;;GAEG;AACH,wBAAsB,MAAM,IAAI,OAAO,CAAC,IAAI,CAAC,CAQ5C;AAMD,qBAAa,OAAO;IAClB,OAAO,CAAC,MAAM,CAAgB;IAC9B,OAAO,CAAC,WAAW,CAAS;gBAEhB,MAAM,GAAE,aAAkB;IAIhC,IAAI,IAAI,OAAO,CAAC,OAAO,CAAC;IAMxB,QAAQ,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,gBAAgB,GAAG,OAAO,CAAC,gBAAgB,CAAC;IAK9E,IAAI,CACR,QAAQ,EAAE,KAAK,CAAC;QAAE,IAAI,EAAE,QAAQ,GAAG,MAAM,GAAG,WAAW,CAAC;QAAC,OAAO,EAAE,MAAM,CAAA;KAAE,CAAC,EAC3E,MAAM,CAAC,EAAE,gBAAgB,GACxB,OAAO,CAAC,gBAAgB,CAAC;IAKtB,MAAM,IAAI,OAAO,CAAC,IAAI,CAAC;IAK7B,IAAI,KAAK,IAAI,OAAO,CAEnB;IAED,IAAI,KAAK,IAAI,MAAM,GAAG,IAAI,CAEzB;CACF;AAED,eAAe,OAAO,CAAC"}
|
|
@@ -0,0 +1,430 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* ONNX LLM Text Generation for RuVector
|
|
4
|
+
*
|
|
5
|
+
* Provides real local LLM inference using ONNX Runtime via transformers.js
|
|
6
|
+
* Supports small models that run efficiently on CPU:
|
|
7
|
+
* - SmolLM 135M - Smallest, fast (~135MB)
|
|
8
|
+
* - SmolLM 360M - Better quality (~360MB)
|
|
9
|
+
* - TinyLlama 1.1B - Best small model quality (~1GB quantized)
|
|
10
|
+
* - Qwen2.5 0.5B - Good balance (~500MB)
|
|
11
|
+
*
|
|
12
|
+
* Features:
|
|
13
|
+
* - Automatic model downloading and caching
|
|
14
|
+
* - Quantized INT4/INT8 models for efficiency
|
|
15
|
+
* - Streaming generation support
|
|
16
|
+
* - Temperature, top-k, top-p sampling
|
|
17
|
+
* - KV cache for efficient multi-turn conversations
|
|
18
|
+
*/
|
|
19
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
20
|
+
if (k2 === undefined) k2 = k;
|
|
21
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
22
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
23
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
24
|
+
}
|
|
25
|
+
Object.defineProperty(o, k2, desc);
|
|
26
|
+
}) : (function(o, m, k, k2) {
|
|
27
|
+
if (k2 === undefined) k2 = k;
|
|
28
|
+
o[k2] = m[k];
|
|
29
|
+
}));
|
|
30
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
31
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
32
|
+
}) : function(o, v) {
|
|
33
|
+
o["default"] = v;
|
|
34
|
+
});
|
|
35
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
36
|
+
var ownKeys = function(o) {
|
|
37
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
38
|
+
var ar = [];
|
|
39
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
40
|
+
return ar;
|
|
41
|
+
};
|
|
42
|
+
return ownKeys(o);
|
|
43
|
+
};
|
|
44
|
+
return function (mod) {
|
|
45
|
+
if (mod && mod.__esModule) return mod;
|
|
46
|
+
var result = {};
|
|
47
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
48
|
+
__setModuleDefault(result, mod);
|
|
49
|
+
return result;
|
|
50
|
+
};
|
|
51
|
+
})();
|
|
52
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
53
|
+
exports.OnnxLLM = exports.AVAILABLE_MODELS = void 0;
|
|
54
|
+
exports.isTransformersAvailable = isTransformersAvailable;
|
|
55
|
+
exports.initOnnxLLM = initOnnxLLM;
|
|
56
|
+
exports.generate = generate;
|
|
57
|
+
exports.generateStream = generateStream;
|
|
58
|
+
exports.chat = chat;
|
|
59
|
+
exports.getModelInfo = getModelInfo;
|
|
60
|
+
exports.unload = unload;
|
|
61
|
+
const path = __importStar(require("path"));
|
|
62
|
+
const fs = __importStar(require("fs"));
|
|
63
|
+
// Force native dynamic import (avoids TypeScript transpiling to require)
|
|
64
|
+
// eslint-disable-next-line @typescript-eslint/no-implied-eval
|
|
65
|
+
const dynamicImport = new Function('specifier', 'return import(specifier)');
|
|
66
|
+
// ============================================================================
|
|
67
|
+
// Available Models
|
|
68
|
+
// ============================================================================
|
|
69
|
+
exports.AVAILABLE_MODELS = {
|
|
70
|
+
// =========================================================================
|
|
71
|
+
// TRM - Tiny Random Models (smallest, fastest)
|
|
72
|
+
// =========================================================================
|
|
73
|
+
'trm-tinystories': {
|
|
74
|
+
id: 'Xenova/TinyStories-33M',
|
|
75
|
+
name: 'TinyStories 33M (TRM)',
|
|
76
|
+
size: '~65MB',
|
|
77
|
+
description: 'Ultra-tiny model for stories and basic generation',
|
|
78
|
+
contextLength: 512,
|
|
79
|
+
},
|
|
80
|
+
'trm-gpt2-tiny': {
|
|
81
|
+
id: 'Xenova/gpt2',
|
|
82
|
+
name: 'GPT-2 124M (TRM)',
|
|
83
|
+
size: '~250MB',
|
|
84
|
+
description: 'Classic GPT-2 tiny for general text',
|
|
85
|
+
contextLength: 1024,
|
|
86
|
+
},
|
|
87
|
+
'trm-distilgpt2': {
|
|
88
|
+
id: 'Xenova/distilgpt2',
|
|
89
|
+
name: 'DistilGPT-2 (TRM)',
|
|
90
|
+
size: '~82MB',
|
|
91
|
+
description: 'Distilled GPT-2, fastest general model',
|
|
92
|
+
contextLength: 1024,
|
|
93
|
+
},
|
|
94
|
+
// =========================================================================
|
|
95
|
+
// SmolLM - Smallest production-ready models
|
|
96
|
+
// =========================================================================
|
|
97
|
+
'smollm-135m': {
|
|
98
|
+
id: 'HuggingFaceTB/SmolLM-135M-Instruct',
|
|
99
|
+
name: 'SmolLM 135M',
|
|
100
|
+
size: '~135MB',
|
|
101
|
+
description: 'Smallest instruct model, very fast',
|
|
102
|
+
contextLength: 2048,
|
|
103
|
+
},
|
|
104
|
+
'smollm-360m': {
|
|
105
|
+
id: 'HuggingFaceTB/SmolLM-360M-Instruct',
|
|
106
|
+
name: 'SmolLM 360M',
|
|
107
|
+
size: '~360MB',
|
|
108
|
+
description: 'Small model, fast, better quality',
|
|
109
|
+
contextLength: 2048,
|
|
110
|
+
},
|
|
111
|
+
'smollm2-135m': {
|
|
112
|
+
id: 'HuggingFaceTB/SmolLM2-135M-Instruct',
|
|
113
|
+
name: 'SmolLM2 135M',
|
|
114
|
+
size: '~135MB',
|
|
115
|
+
description: 'Latest SmolLM v2, improved capabilities',
|
|
116
|
+
contextLength: 2048,
|
|
117
|
+
},
|
|
118
|
+
'smollm2-360m': {
|
|
119
|
+
id: 'HuggingFaceTB/SmolLM2-360M-Instruct',
|
|
120
|
+
name: 'SmolLM2 360M',
|
|
121
|
+
size: '~360MB',
|
|
122
|
+
description: 'Latest SmolLM v2, better quality',
|
|
123
|
+
contextLength: 2048,
|
|
124
|
+
},
|
|
125
|
+
// =========================================================================
|
|
126
|
+
// Qwen - Chinese/English bilingual models
|
|
127
|
+
// =========================================================================
|
|
128
|
+
'qwen2.5-0.5b': {
|
|
129
|
+
id: 'Qwen/Qwen2.5-0.5B-Instruct',
|
|
130
|
+
name: 'Qwen2.5 0.5B',
|
|
131
|
+
size: '~300MB quantized',
|
|
132
|
+
description: 'Good balance of speed and quality, multilingual',
|
|
133
|
+
contextLength: 4096,
|
|
134
|
+
},
|
|
135
|
+
// =========================================================================
|
|
136
|
+
// TinyLlama - Llama architecture in tiny form
|
|
137
|
+
// =========================================================================
|
|
138
|
+
'tinyllama': {
|
|
139
|
+
id: 'TinyLlama/TinyLlama-1.1B-Chat-v1.0',
|
|
140
|
+
name: 'TinyLlama 1.1B',
|
|
141
|
+
size: '~600MB quantized',
|
|
142
|
+
description: 'Best small model quality, slower',
|
|
143
|
+
contextLength: 2048,
|
|
144
|
+
},
|
|
145
|
+
// =========================================================================
|
|
146
|
+
// Code-specialized models
|
|
147
|
+
// =========================================================================
|
|
148
|
+
'codegemma-2b': {
|
|
149
|
+
id: 'google/codegemma-2b',
|
|
150
|
+
name: 'CodeGemma 2B',
|
|
151
|
+
size: '~1GB quantized',
|
|
152
|
+
description: 'Code generation specialist',
|
|
153
|
+
contextLength: 8192,
|
|
154
|
+
},
|
|
155
|
+
'deepseek-coder-1.3b': {
|
|
156
|
+
id: 'deepseek-ai/deepseek-coder-1.3b-instruct',
|
|
157
|
+
name: 'DeepSeek Coder 1.3B',
|
|
158
|
+
size: '~700MB quantized',
|
|
159
|
+
description: 'Excellent for code tasks',
|
|
160
|
+
contextLength: 4096,
|
|
161
|
+
},
|
|
162
|
+
// =========================================================================
|
|
163
|
+
// Phi models - Microsoft's tiny powerhouses
|
|
164
|
+
// =========================================================================
|
|
165
|
+
'phi-2': {
|
|
166
|
+
id: 'microsoft/phi-2',
|
|
167
|
+
name: 'Phi-2 2.7B',
|
|
168
|
+
size: '~1.5GB quantized',
|
|
169
|
+
description: 'High quality small model',
|
|
170
|
+
contextLength: 2048,
|
|
171
|
+
},
|
|
172
|
+
'phi-3-mini': {
|
|
173
|
+
id: 'microsoft/Phi-3-mini-4k-instruct',
|
|
174
|
+
name: 'Phi-3 Mini',
|
|
175
|
+
size: '~2GB quantized',
|
|
176
|
+
description: 'Best quality tiny model',
|
|
177
|
+
contextLength: 4096,
|
|
178
|
+
},
|
|
179
|
+
};
|
|
180
|
+
// ============================================================================
|
|
181
|
+
// ONNX LLM Generator
|
|
182
|
+
// ============================================================================
|
|
183
|
+
let pipeline = null;
|
|
184
|
+
let transformers = null;
|
|
185
|
+
let loadedModel = null;
|
|
186
|
+
let loadPromise = null;
|
|
187
|
+
let loadError = null;
|
|
188
|
+
/**
|
|
189
|
+
* Check if transformers.js is available
|
|
190
|
+
*/
|
|
191
|
+
async function isTransformersAvailable() {
|
|
192
|
+
try {
|
|
193
|
+
await dynamicImport('@xenova/transformers');
|
|
194
|
+
return true;
|
|
195
|
+
}
|
|
196
|
+
catch {
|
|
197
|
+
return false;
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
/**
|
|
201
|
+
* Initialize the ONNX LLM with specified model
|
|
202
|
+
*/
|
|
203
|
+
async function initOnnxLLM(config = {}) {
|
|
204
|
+
if (pipeline && loadedModel === config.modelId) {
|
|
205
|
+
return true;
|
|
206
|
+
}
|
|
207
|
+
if (loadError)
|
|
208
|
+
throw loadError;
|
|
209
|
+
if (loadPromise) {
|
|
210
|
+
await loadPromise;
|
|
211
|
+
return pipeline !== null;
|
|
212
|
+
}
|
|
213
|
+
const modelId = config.modelId || 'HuggingFaceTB/SmolLM-135M-Instruct';
|
|
214
|
+
loadPromise = (async () => {
|
|
215
|
+
try {
|
|
216
|
+
console.error(`Loading ONNX LLM: ${modelId}...`);
|
|
217
|
+
// Import transformers.js
|
|
218
|
+
transformers = await dynamicImport('@xenova/transformers');
|
|
219
|
+
const { pipeline: createPipeline, env } = transformers;
|
|
220
|
+
// Configure cache directory
|
|
221
|
+
if (config.cacheDir) {
|
|
222
|
+
env.cacheDir = config.cacheDir;
|
|
223
|
+
}
|
|
224
|
+
else {
|
|
225
|
+
env.cacheDir = path.join(process.env.HOME || '/tmp', '.ruvector', 'models', 'onnx-llm');
|
|
226
|
+
}
|
|
227
|
+
// Ensure cache directory exists
|
|
228
|
+
if (!fs.existsSync(env.cacheDir)) {
|
|
229
|
+
fs.mkdirSync(env.cacheDir, { recursive: true });
|
|
230
|
+
}
|
|
231
|
+
// Disable remote model fetching warnings
|
|
232
|
+
env.allowRemoteModels = true;
|
|
233
|
+
env.allowLocalModels = true;
|
|
234
|
+
// Create text generation pipeline
|
|
235
|
+
console.error(`Downloading model (first run may take a while)...`);
|
|
236
|
+
pipeline = await createPipeline('text-generation', modelId, {
|
|
237
|
+
quantized: config.quantized !== false,
|
|
238
|
+
device: config.device || 'cpu',
|
|
239
|
+
});
|
|
240
|
+
loadedModel = modelId;
|
|
241
|
+
console.error(`ONNX LLM ready: ${modelId}`);
|
|
242
|
+
}
|
|
243
|
+
catch (e) {
|
|
244
|
+
loadError = new Error(`Failed to initialize ONNX LLM: ${e.message}`);
|
|
245
|
+
throw loadError;
|
|
246
|
+
}
|
|
247
|
+
})();
|
|
248
|
+
await loadPromise;
|
|
249
|
+
return pipeline !== null;
|
|
250
|
+
}
|
|
251
|
+
/**
|
|
252
|
+
* Generate text using ONNX LLM
|
|
253
|
+
*/
|
|
254
|
+
async function generate(prompt, config = {}) {
|
|
255
|
+
if (!pipeline) {
|
|
256
|
+
await initOnnxLLM();
|
|
257
|
+
}
|
|
258
|
+
if (!pipeline) {
|
|
259
|
+
throw new Error('ONNX LLM not initialized');
|
|
260
|
+
}
|
|
261
|
+
const start = performance.now();
|
|
262
|
+
// Build the input text (apply chat template if needed)
|
|
263
|
+
let inputText = prompt;
|
|
264
|
+
if (config.systemPrompt) {
|
|
265
|
+
// Apply simple chat format
|
|
266
|
+
inputText = `<|system|>\n${config.systemPrompt}<|end|>\n<|user|>\n${prompt}<|end|>\n<|assistant|>\n`;
|
|
267
|
+
}
|
|
268
|
+
// Generate
|
|
269
|
+
const outputs = await pipeline(inputText, {
|
|
270
|
+
max_new_tokens: config.maxNewTokens || 128,
|
|
271
|
+
temperature: config.temperature || 0.7,
|
|
272
|
+
top_p: config.topP || 0.9,
|
|
273
|
+
top_k: config.topK || 50,
|
|
274
|
+
repetition_penalty: config.repetitionPenalty || 1.1,
|
|
275
|
+
do_sample: (config.temperature || 0.7) > 0,
|
|
276
|
+
return_full_text: false,
|
|
277
|
+
});
|
|
278
|
+
const timeMs = performance.now() - start;
|
|
279
|
+
const generatedText = outputs[0]?.generated_text || '';
|
|
280
|
+
// Estimate tokens (rough approximation)
|
|
281
|
+
const tokensGenerated = Math.ceil(generatedText.split(/\s+/).length * 1.3);
|
|
282
|
+
return {
|
|
283
|
+
text: generatedText.trim(),
|
|
284
|
+
tokensGenerated,
|
|
285
|
+
timeMs,
|
|
286
|
+
tokensPerSecond: tokensGenerated / (timeMs / 1000),
|
|
287
|
+
model: loadedModel || 'unknown',
|
|
288
|
+
cached: true,
|
|
289
|
+
};
|
|
290
|
+
}
|
|
291
|
+
/**
|
|
292
|
+
* Generate with streaming (token by token)
|
|
293
|
+
*/
|
|
294
|
+
async function generateStream(prompt, config = {}) {
|
|
295
|
+
if (!pipeline) {
|
|
296
|
+
await initOnnxLLM();
|
|
297
|
+
}
|
|
298
|
+
if (!pipeline) {
|
|
299
|
+
throw new Error('ONNX LLM not initialized');
|
|
300
|
+
}
|
|
301
|
+
const start = performance.now();
|
|
302
|
+
let fullText = '';
|
|
303
|
+
let tokenCount = 0;
|
|
304
|
+
// Build input text
|
|
305
|
+
let inputText = prompt;
|
|
306
|
+
if (config.systemPrompt) {
|
|
307
|
+
inputText = `<|system|>\n${config.systemPrompt}<|end|>\n<|user|>\n${prompt}<|end|>\n<|assistant|>\n`;
|
|
308
|
+
}
|
|
309
|
+
// Create streamer
|
|
310
|
+
const { TextStreamer } = transformers;
|
|
311
|
+
const streamer = new TextStreamer(pipeline.tokenizer, {
|
|
312
|
+
skip_prompt: true,
|
|
313
|
+
callback_function: (text) => {
|
|
314
|
+
fullText += text;
|
|
315
|
+
tokenCount++;
|
|
316
|
+
if (config.onToken) {
|
|
317
|
+
config.onToken(text);
|
|
318
|
+
}
|
|
319
|
+
},
|
|
320
|
+
});
|
|
321
|
+
// Generate with streamer
|
|
322
|
+
await pipeline(inputText, {
|
|
323
|
+
max_new_tokens: config.maxNewTokens || 128,
|
|
324
|
+
temperature: config.temperature || 0.7,
|
|
325
|
+
top_p: config.topP || 0.9,
|
|
326
|
+
top_k: config.topK || 50,
|
|
327
|
+
repetition_penalty: config.repetitionPenalty || 1.1,
|
|
328
|
+
do_sample: (config.temperature || 0.7) > 0,
|
|
329
|
+
streamer,
|
|
330
|
+
});
|
|
331
|
+
const timeMs = performance.now() - start;
|
|
332
|
+
// Return generator that yields the collected text
|
|
333
|
+
async function* generator() {
|
|
334
|
+
yield fullText;
|
|
335
|
+
return {
|
|
336
|
+
text: fullText.trim(),
|
|
337
|
+
tokensGenerated: tokenCount,
|
|
338
|
+
timeMs,
|
|
339
|
+
tokensPerSecond: tokenCount / (timeMs / 1000),
|
|
340
|
+
model: loadedModel || 'unknown',
|
|
341
|
+
cached: true,
|
|
342
|
+
};
|
|
343
|
+
}
|
|
344
|
+
return generator();
|
|
345
|
+
}
|
|
346
|
+
/**
|
|
347
|
+
* Chat completion with conversation history
|
|
348
|
+
*/
|
|
349
|
+
async function chat(messages, config = {}) {
|
|
350
|
+
if (!pipeline) {
|
|
351
|
+
await initOnnxLLM();
|
|
352
|
+
}
|
|
353
|
+
if (!pipeline) {
|
|
354
|
+
throw new Error('ONNX LLM not initialized');
|
|
355
|
+
}
|
|
356
|
+
// Build conversation text from messages
|
|
357
|
+
let conversationText = '';
|
|
358
|
+
for (const msg of messages) {
|
|
359
|
+
if (msg.role === 'system') {
|
|
360
|
+
conversationText += `<|system|>\n${msg.content}<|end|>\n`;
|
|
361
|
+
}
|
|
362
|
+
else if (msg.role === 'user') {
|
|
363
|
+
conversationText += `<|user|>\n${msg.content}<|end|>\n`;
|
|
364
|
+
}
|
|
365
|
+
else if (msg.role === 'assistant') {
|
|
366
|
+
conversationText += `<|assistant|>\n${msg.content}<|end|>\n`;
|
|
367
|
+
}
|
|
368
|
+
}
|
|
369
|
+
conversationText += '<|assistant|>\n';
|
|
370
|
+
return generate(conversationText, { ...config, systemPrompt: undefined });
|
|
371
|
+
}
|
|
372
|
+
/**
|
|
373
|
+
* Get model information
|
|
374
|
+
*/
|
|
375
|
+
function getModelInfo() {
|
|
376
|
+
return {
|
|
377
|
+
model: loadedModel,
|
|
378
|
+
ready: pipeline !== null,
|
|
379
|
+
availableModels: exports.AVAILABLE_MODELS,
|
|
380
|
+
};
|
|
381
|
+
}
|
|
382
|
+
/**
|
|
383
|
+
* Unload the current model to free memory
|
|
384
|
+
*/
|
|
385
|
+
async function unload() {
|
|
386
|
+
if (pipeline) {
|
|
387
|
+
// Note: transformers.js doesn't have explicit dispose, but we can null the reference
|
|
388
|
+
pipeline = null;
|
|
389
|
+
loadedModel = null;
|
|
390
|
+
loadPromise = null;
|
|
391
|
+
loadError = null;
|
|
392
|
+
}
|
|
393
|
+
}
|
|
394
|
+
// ============================================================================
|
|
395
|
+
// Class wrapper for OOP usage
|
|
396
|
+
// ============================================================================
|
|
397
|
+
class OnnxLLM {
|
|
398
|
+
constructor(config = {}) {
|
|
399
|
+
this.initialized = false;
|
|
400
|
+
this.config = config;
|
|
401
|
+
}
|
|
402
|
+
async init() {
|
|
403
|
+
if (this.initialized)
|
|
404
|
+
return true;
|
|
405
|
+
this.initialized = await initOnnxLLM(this.config);
|
|
406
|
+
return this.initialized;
|
|
407
|
+
}
|
|
408
|
+
async generate(prompt, config) {
|
|
409
|
+
if (!this.initialized)
|
|
410
|
+
await this.init();
|
|
411
|
+
return generate(prompt, config);
|
|
412
|
+
}
|
|
413
|
+
async chat(messages, config) {
|
|
414
|
+
if (!this.initialized)
|
|
415
|
+
await this.init();
|
|
416
|
+
return chat(messages, config);
|
|
417
|
+
}
|
|
418
|
+
async unload() {
|
|
419
|
+
await unload();
|
|
420
|
+
this.initialized = false;
|
|
421
|
+
}
|
|
422
|
+
get ready() {
|
|
423
|
+
return this.initialized;
|
|
424
|
+
}
|
|
425
|
+
get model() {
|
|
426
|
+
return loadedModel;
|
|
427
|
+
}
|
|
428
|
+
}
|
|
429
|
+
exports.OnnxLLM = OnnxLLM;
|
|
430
|
+
exports.default = OnnxLLM;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "ruvector",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.95",
|
|
4
4
|
"description": "High-performance vector database for Node.js with automatic native/WASM fallback",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"types": "dist/index.d.ts",
|
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
"ruvector": "./bin/cli.js"
|
|
9
9
|
},
|
|
10
10
|
"scripts": {
|
|
11
|
-
"build": "tsc && cp src/core/onnx/pkg/package.json dist/core/onnx/pkg/",
|
|
11
|
+
"build": "tsc && cp src/core/onnx/pkg/package.json dist/core/onnx/pkg/ && cp src/core/onnx/pkg/ruvector_onnx_embeddings_wasm_cjs.js dist/core/onnx/pkg/",
|
|
12
12
|
"prepublishOnly": "npm run build",
|
|
13
13
|
"test": "node test/integration.js"
|
|
14
14
|
},
|
|
@@ -60,6 +60,7 @@
|
|
|
60
60
|
"@ruvector/core": "^0.1.25",
|
|
61
61
|
"@ruvector/gnn": "^0.1.22",
|
|
62
62
|
"@ruvector/sona": "^0.1.4",
|
|
63
|
+
"@xenova/transformers": "^2.17.2",
|
|
63
64
|
"chalk": "^4.1.2",
|
|
64
65
|
"commander": "^11.1.0",
|
|
65
66
|
"ora": "^5.4.1"
|