ruvector 0.2.21 → 0.2.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (123) hide show
  1. package/README.md +2 -2
  2. package/bin/cli.js +160 -0
  3. package/package.json +9 -5
  4. package/src/decompiler/api-prober.js +302 -0
  5. package/src/decompiler/model-decompiler.js +423 -0
  6. package/dist/analysis/complexity.d.ts +0 -52
  7. package/dist/analysis/complexity.d.ts.map +0 -1
  8. package/dist/analysis/complexity.js +0 -146
  9. package/dist/analysis/index.d.ts +0 -15
  10. package/dist/analysis/index.d.ts.map +0 -1
  11. package/dist/analysis/index.js +0 -38
  12. package/dist/analysis/patterns.d.ts +0 -71
  13. package/dist/analysis/patterns.d.ts.map +0 -1
  14. package/dist/analysis/patterns.js +0 -243
  15. package/dist/analysis/security.d.ts +0 -51
  16. package/dist/analysis/security.d.ts.map +0 -1
  17. package/dist/analysis/security.js +0 -139
  18. package/dist/core/adaptive-embedder.d.ts +0 -156
  19. package/dist/core/adaptive-embedder.d.ts.map +0 -1
  20. package/dist/core/adaptive-embedder.js +0 -838
  21. package/dist/core/agentdb-fast.d.ts +0 -149
  22. package/dist/core/agentdb-fast.d.ts.map +0 -1
  23. package/dist/core/agentdb-fast.js +0 -301
  24. package/dist/core/ast-parser.d.ts +0 -108
  25. package/dist/core/ast-parser.d.ts.map +0 -1
  26. package/dist/core/ast-parser.js +0 -602
  27. package/dist/core/attention-fallbacks.d.ts +0 -321
  28. package/dist/core/attention-fallbacks.d.ts.map +0 -1
  29. package/dist/core/attention-fallbacks.js +0 -552
  30. package/dist/core/cluster-wrapper.d.ts +0 -148
  31. package/dist/core/cluster-wrapper.d.ts.map +0 -1
  32. package/dist/core/cluster-wrapper.js +0 -271
  33. package/dist/core/coverage-router.d.ts +0 -88
  34. package/dist/core/coverage-router.d.ts.map +0 -1
  35. package/dist/core/coverage-router.js +0 -315
  36. package/dist/core/diff-embeddings.d.ts +0 -93
  37. package/dist/core/diff-embeddings.d.ts.map +0 -1
  38. package/dist/core/diff-embeddings.js +0 -334
  39. package/dist/core/gnn-wrapper.d.ts +0 -143
  40. package/dist/core/gnn-wrapper.d.ts.map +0 -1
  41. package/dist/core/gnn-wrapper.js +0 -213
  42. package/dist/core/graph-algorithms.d.ts +0 -83
  43. package/dist/core/graph-algorithms.d.ts.map +0 -1
  44. package/dist/core/graph-algorithms.js +0 -514
  45. package/dist/core/graph-wrapper.d.ts +0 -147
  46. package/dist/core/graph-wrapper.d.ts.map +0 -1
  47. package/dist/core/graph-wrapper.js +0 -299
  48. package/dist/core/index.d.ts +0 -48
  49. package/dist/core/index.d.ts.map +0 -1
  50. package/dist/core/index.js +0 -89
  51. package/dist/core/intelligence-engine.d.ts +0 -258
  52. package/dist/core/intelligence-engine.d.ts.map +0 -1
  53. package/dist/core/intelligence-engine.js +0 -1030
  54. package/dist/core/learning-engine.d.ts +0 -160
  55. package/dist/core/learning-engine.d.ts.map +0 -1
  56. package/dist/core/learning-engine.js +0 -589
  57. package/dist/core/neural-embeddings.d.ts +0 -393
  58. package/dist/core/neural-embeddings.d.ts.map +0 -1
  59. package/dist/core/neural-embeddings.js +0 -1091
  60. package/dist/core/neural-perf.d.ts +0 -331
  61. package/dist/core/neural-perf.d.ts.map +0 -1
  62. package/dist/core/neural-perf.js +0 -704
  63. package/dist/core/onnx/loader.js +0 -348
  64. package/dist/core/onnx/pkg/LICENSE +0 -21
  65. package/dist/core/onnx/pkg/loader.js +0 -348
  66. package/dist/core/onnx/pkg/package.json +0 -3
  67. package/dist/core/onnx/pkg/ruvector_onnx_embeddings_wasm.d.ts +0 -112
  68. package/dist/core/onnx/pkg/ruvector_onnx_embeddings_wasm.js +0 -5
  69. package/dist/core/onnx/pkg/ruvector_onnx_embeddings_wasm_bg.js +0 -638
  70. package/dist/core/onnx/pkg/ruvector_onnx_embeddings_wasm_bg.wasm +0 -0
  71. package/dist/core/onnx/pkg/ruvector_onnx_embeddings_wasm_bg.wasm.d.ts +0 -29
  72. package/dist/core/onnx/pkg/ruvector_onnx_embeddings_wasm_cjs.js +0 -127
  73. package/dist/core/onnx-embedder.d.ts +0 -105
  74. package/dist/core/onnx-embedder.d.ts.map +0 -1
  75. package/dist/core/onnx-embedder.js +0 -410
  76. package/dist/core/onnx-llm.d.ts +0 -206
  77. package/dist/core/onnx-llm.d.ts.map +0 -1
  78. package/dist/core/onnx-llm.js +0 -430
  79. package/dist/core/onnx-optimized.d.ts +0 -109
  80. package/dist/core/onnx-optimized.d.ts.map +0 -1
  81. package/dist/core/onnx-optimized.js +0 -419
  82. package/dist/core/parallel-intelligence.d.ts +0 -109
  83. package/dist/core/parallel-intelligence.d.ts.map +0 -1
  84. package/dist/core/parallel-intelligence.js +0 -340
  85. package/dist/core/parallel-workers.d.ts +0 -177
  86. package/dist/core/parallel-workers.d.ts.map +0 -1
  87. package/dist/core/parallel-workers.js +0 -671
  88. package/dist/core/router-wrapper.d.ts +0 -62
  89. package/dist/core/router-wrapper.d.ts.map +0 -1
  90. package/dist/core/router-wrapper.js +0 -209
  91. package/dist/core/rvf-wrapper.d.ts +0 -86
  92. package/dist/core/rvf-wrapper.d.ts.map +0 -1
  93. package/dist/core/rvf-wrapper.js +0 -102
  94. package/dist/core/sona-wrapper.d.ts +0 -226
  95. package/dist/core/sona-wrapper.d.ts.map +0 -1
  96. package/dist/core/sona-wrapper.js +0 -282
  97. package/dist/core/tensor-compress.d.ts +0 -134
  98. package/dist/core/tensor-compress.d.ts.map +0 -1
  99. package/dist/core/tensor-compress.js +0 -432
  100. package/dist/index.d.ts +0 -105
  101. package/dist/index.d.ts.map +0 -1
  102. package/dist/index.js +0 -221
  103. package/dist/services/embedding-service.d.ts +0 -136
  104. package/dist/services/embedding-service.d.ts.map +0 -1
  105. package/dist/services/embedding-service.js +0 -294
  106. package/dist/services/index.d.ts +0 -6
  107. package/dist/services/index.d.ts.map +0 -1
  108. package/dist/services/index.js +0 -26
  109. package/dist/types.d.ts +0 -145
  110. package/dist/types.d.ts.map +0 -1
  111. package/dist/types.js +0 -2
  112. package/dist/workers/benchmark.d.ts +0 -44
  113. package/dist/workers/benchmark.d.ts.map +0 -1
  114. package/dist/workers/benchmark.js +0 -230
  115. package/dist/workers/index.d.ts +0 -10
  116. package/dist/workers/index.d.ts.map +0 -1
  117. package/dist/workers/index.js +0 -25
  118. package/dist/workers/native-worker.d.ts +0 -76
  119. package/dist/workers/native-worker.d.ts.map +0 -1
  120. package/dist/workers/native-worker.js +0 -490
  121. package/dist/workers/types.d.ts +0 -69
  122. package/dist/workers/types.d.ts.map +0 -1
  123. package/dist/workers/types.js +0 -7
@@ -1,206 +0,0 @@
1
- /**
2
- * ONNX LLM Text Generation for RuVector
3
- *
4
- * Provides real local LLM inference using ONNX Runtime via transformers.js
5
- * Supports small models that run efficiently on CPU:
6
- * - SmolLM 135M - Smallest, fast (~135MB)
7
- * - SmolLM 360M - Better quality (~360MB)
8
- * - TinyLlama 1.1B - Best small model quality (~1GB quantized)
9
- * - Qwen2.5 0.5B - Good balance (~500MB)
10
- *
11
- * Features:
12
- * - Automatic model downloading and caching
13
- * - Quantized INT4/INT8 models for efficiency
14
- * - Streaming generation support
15
- * - Temperature, top-k, top-p sampling
16
- * - KV cache for efficient multi-turn conversations
17
- */
18
- export interface OnnxLLMConfig {
19
- /** Model ID (default: 'Xenova/smollm-135m-instruct') */
20
- modelId?: string;
21
- /** Cache directory for models */
22
- cacheDir?: string;
23
- /** Use quantized model (default: true) */
24
- quantized?: boolean;
25
- /** Device: 'cpu' | 'webgpu' (default: 'cpu') */
26
- device?: 'cpu' | 'webgpu';
27
- /** Maximum context length */
28
- maxLength?: number;
29
- }
30
- export interface GenerationConfig {
31
- /** Maximum new tokens to generate (default: 128) */
32
- maxNewTokens?: number;
33
- /** Temperature for sampling (default: 0.7) */
34
- temperature?: number;
35
- /** Top-p nucleus sampling (default: 0.9) */
36
- topP?: number;
37
- /** Top-k sampling (default: 50) */
38
- topK?: number;
39
- /** Repetition penalty (default: 1.1) */
40
- repetitionPenalty?: number;
41
- /** Stop sequences */
42
- stopSequences?: string[];
43
- /** System prompt for chat models */
44
- systemPrompt?: string;
45
- /** Enable streaming (callback for each token) */
46
- onToken?: (token: string) => void;
47
- }
48
- export interface GenerationResult {
49
- /** Generated text */
50
- text: string;
51
- /** Number of tokens generated */
52
- tokensGenerated: number;
53
- /** Time taken in milliseconds */
54
- timeMs: number;
55
- /** Tokens per second */
56
- tokensPerSecond: number;
57
- /** Model used */
58
- model: string;
59
- /** Whether model was loaded from cache */
60
- cached: boolean;
61
- }
62
- export declare const AVAILABLE_MODELS: {
63
- readonly 'trm-tinystories': {
64
- readonly id: "Xenova/TinyStories-33M";
65
- readonly name: "TinyStories 33M (TRM)";
66
- readonly size: "~65MB";
67
- readonly description: "Ultra-tiny model for stories and basic generation";
68
- readonly contextLength: 512;
69
- };
70
- readonly 'trm-gpt2-tiny': {
71
- readonly id: "Xenova/gpt2";
72
- readonly name: "GPT-2 124M (TRM)";
73
- readonly size: "~250MB";
74
- readonly description: "Classic GPT-2 tiny for general text";
75
- readonly contextLength: 1024;
76
- };
77
- readonly 'trm-distilgpt2': {
78
- readonly id: "Xenova/distilgpt2";
79
- readonly name: "DistilGPT-2 (TRM)";
80
- readonly size: "~82MB";
81
- readonly description: "Distilled GPT-2, fastest general model";
82
- readonly contextLength: 1024;
83
- };
84
- readonly 'smollm-135m': {
85
- readonly id: "HuggingFaceTB/SmolLM-135M-Instruct";
86
- readonly name: "SmolLM 135M";
87
- readonly size: "~135MB";
88
- readonly description: "Smallest instruct model, very fast";
89
- readonly contextLength: 2048;
90
- };
91
- readonly 'smollm-360m': {
92
- readonly id: "HuggingFaceTB/SmolLM-360M-Instruct";
93
- readonly name: "SmolLM 360M";
94
- readonly size: "~360MB";
95
- readonly description: "Small model, fast, better quality";
96
- readonly contextLength: 2048;
97
- };
98
- readonly 'smollm2-135m': {
99
- readonly id: "HuggingFaceTB/SmolLM2-135M-Instruct";
100
- readonly name: "SmolLM2 135M";
101
- readonly size: "~135MB";
102
- readonly description: "Latest SmolLM v2, improved capabilities";
103
- readonly contextLength: 2048;
104
- };
105
- readonly 'smollm2-360m': {
106
- readonly id: "HuggingFaceTB/SmolLM2-360M-Instruct";
107
- readonly name: "SmolLM2 360M";
108
- readonly size: "~360MB";
109
- readonly description: "Latest SmolLM v2, better quality";
110
- readonly contextLength: 2048;
111
- };
112
- readonly 'qwen2.5-0.5b': {
113
- readonly id: "Qwen/Qwen2.5-0.5B-Instruct";
114
- readonly name: "Qwen2.5 0.5B";
115
- readonly size: "~300MB quantized";
116
- readonly description: "Good balance of speed and quality, multilingual";
117
- readonly contextLength: 4096;
118
- };
119
- readonly tinyllama: {
120
- readonly id: "TinyLlama/TinyLlama-1.1B-Chat-v1.0";
121
- readonly name: "TinyLlama 1.1B";
122
- readonly size: "~600MB quantized";
123
- readonly description: "Best small model quality, slower";
124
- readonly contextLength: 2048;
125
- };
126
- readonly 'codegemma-2b': {
127
- readonly id: "google/codegemma-2b";
128
- readonly name: "CodeGemma 2B";
129
- readonly size: "~1GB quantized";
130
- readonly description: "Code generation specialist";
131
- readonly contextLength: 8192;
132
- };
133
- readonly 'deepseek-coder-1.3b': {
134
- readonly id: "deepseek-ai/deepseek-coder-1.3b-instruct";
135
- readonly name: "DeepSeek Coder 1.3B";
136
- readonly size: "~700MB quantized";
137
- readonly description: "Excellent for code tasks";
138
- readonly contextLength: 4096;
139
- };
140
- readonly 'phi-2': {
141
- readonly id: "microsoft/phi-2";
142
- readonly name: "Phi-2 2.7B";
143
- readonly size: "~1.5GB quantized";
144
- readonly description: "High quality small model";
145
- readonly contextLength: 2048;
146
- };
147
- readonly 'phi-3-mini': {
148
- readonly id: "microsoft/Phi-3-mini-4k-instruct";
149
- readonly name: "Phi-3 Mini";
150
- readonly size: "~2GB quantized";
151
- readonly description: "Best quality tiny model";
152
- readonly contextLength: 4096;
153
- };
154
- };
155
- export type ModelKey = keyof typeof AVAILABLE_MODELS;
156
- /**
157
- * Check if transformers.js is available
158
- */
159
- export declare function isTransformersAvailable(): Promise<boolean>;
160
- /**
161
- * Initialize the ONNX LLM with specified model
162
- */
163
- export declare function initOnnxLLM(config?: OnnxLLMConfig): Promise<boolean>;
164
- /**
165
- * Generate text using ONNX LLM
166
- */
167
- export declare function generate(prompt: string, config?: GenerationConfig): Promise<GenerationResult>;
168
- /**
169
- * Generate with streaming (token by token)
170
- */
171
- export declare function generateStream(prompt: string, config?: GenerationConfig): Promise<AsyncGenerator<string, GenerationResult, undefined>>;
172
- /**
173
- * Chat completion with conversation history
174
- */
175
- export declare function chat(messages: Array<{
176
- role: 'system' | 'user' | 'assistant';
177
- content: string;
178
- }>, config?: GenerationConfig): Promise<GenerationResult>;
179
- /**
180
- * Get model information
181
- */
182
- export declare function getModelInfo(): {
183
- model: string | null;
184
- ready: boolean;
185
- availableModels: typeof AVAILABLE_MODELS;
186
- };
187
- /**
188
- * Unload the current model to free memory
189
- */
190
- export declare function unload(): Promise<void>;
191
- export declare class OnnxLLM {
192
- private config;
193
- private initialized;
194
- constructor(config?: OnnxLLMConfig);
195
- init(): Promise<boolean>;
196
- generate(prompt: string, config?: GenerationConfig): Promise<GenerationResult>;
197
- chat(messages: Array<{
198
- role: 'system' | 'user' | 'assistant';
199
- content: string;
200
- }>, config?: GenerationConfig): Promise<GenerationResult>;
201
- unload(): Promise<void>;
202
- get ready(): boolean;
203
- get model(): string | null;
204
- }
205
- export default OnnxLLM;
206
- //# sourceMappingURL=onnx-llm.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"onnx-llm.d.ts","sourceRoot":"","sources":["../../src/core/onnx-llm.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;GAgBG;AAaH,MAAM,WAAW,aAAa;IAC5B,wDAAwD;IACxD,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,iCAAiC;IACjC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,0CAA0C;IAC1C,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,gDAAgD;IAChD,MAAM,CAAC,EAAE,KAAK,GAAG,QAAQ,CAAC;IAC1B,6BAA6B;IAC7B,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,gBAAgB;IAC/B,oDAAoD;IACpD,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,8CAA8C;IAC9C,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,4CAA4C;IAC5C,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,mCAAmC;IACnC,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,wCAAwC;IACxC,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,qBAAqB;IACrB,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;IACzB,oCAAoC;IACpC,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,iDAAiD;IACjD,OAAO,CAAC,EAAE,CAAC,KAAK,EAAE,MAAM,KAAK,IAAI,CAAC;CACnC;AAED,MAAM,WAAW,gBAAgB;IAC/B,qBAAqB;IACrB,IAAI,EAAE,MAAM,CAAC;IACb,iCAAiC;IACjC,eAAe,EAAE,MAAM,CAAC;IACxB,iCAAiC;IACjC,MAAM,EAAE,MAAM,CAAC;IACf,wBAAwB;IACxB,eAAe,EAAE,MAAM,CAAC;IACxB,iBAAiB;IACjB,KAAK,EAAE,MAAM,CAAC;IACd,0CAA0C;IAC1C,MAAM,EAAE,OAAO,CAAC;CACjB;AAMD,eAAO,MAAM,gBAAgB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAmHnB,CAAC;AAEX,MAAM,MAAM,QAAQ,GAAG,MAAM,OAAO,gBAAgB,CAAC;AAYrD;;GAEG;AACH,wBAAsB,uBAAuB,IAAI,OAAO,CAAC,OAAO,CAAC,CAOhE;AAED;;GAEG;AACH,wBAAsB,WAAW,CAAC,MAAM,GAAE,aAAkB,GAAG,OAAO,CAAC,OAAO,CAAC,CAqD9E;AAED;;GAEG;AACH,wBAAsB,QAAQ,CAC5B,MAAM,EAAE,MAAM,EACd,MAAM,GAAE,gBAAqB,GAC5B,OAAO,CAAC,gBAAgB,CAAC,CA0C3B;AAED;;GAEG;AACH,wBAAsB,cAAc,CAClC,MAAM,EAAE,MAAM,EACd,MAAM,GAAE,gBAAqB,GAC5B,OAAO,CAAC,cAAc,CAAC,MAAM,EAAE,gBAAgB,EAAE,SAAS,CAAC,CAAC,CA0D9D;AAED;;GAEG;AACH,wBAAsB,IAAI,CACxB,QAAQ,EAAE,KAAK,CAAC;IAAE,IAAI,EAAE,QAAQ,GAAG,MAAM,GAAG,WAAW,CAAC;IAAC,OAAO,EAAE,MAAM,CAAA;CAAE,CAAC,EAC3E,MAAM,GAAE,gBAAqB,GAC5B,OAAO,CAAC,gBAAgB,CAAC,CAsB3B;AAED;;GAEG;AACH,wBAAgB,YAAY,IAAI;IAC9B,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC;IACrB,KAAK,EAAE,OAAO,CAAC;IACf,eAAe,EAAE,OAAO,gBAAgB,CAAC;CAC1C,CAMA;AAED;;GAEG;AACH,wBAAsB,MAAM,IAAI,OAAO,CAAC,IAAI,CAAC,CAQ5C;AAMD,qBAAa,OAAO;IAClB,OAAO,CAAC,MAAM,CAAgB;IAC9B,OAAO,CAAC,WAAW,CAAS;gBAEhB,MAAM,GAAE,aAAkB;IAIhC,IAAI,IAAI,OAAO,CAAC,OAAO,CAAC;IAMxB,QAAQ,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,gBAAgB,GAAG,OAAO,CAAC,gBAAgB,CAAC;IAK9E,IAAI,CACR,QAAQ,EAAE,KAAK,CAAC;QAAE,IAAI,EAAE,QAAQ,GAAG,MAAM,GAAG,WAAW,CAAC;QAAC,OAAO,EAAE,MAAM,CAAA;KAAE,CAAC,EAC3E,MAAM,CAAC,EAAE,gBAAgB,GACxB,OAAO,CAAC,gBAAgB,CAAC;IAKtB,MAAM,IAAI,OAAO,CAAC,IAAI,CAAC;IAK7B,IAAI,KAAK,IAAI,OAAO,CAEnB;IAED,IAAI,KAAK,IAAI,MAAM,GAAG,IAAI,CAEzB;CACF;AAED,eAAe,OAAO,CAAC"}
@@ -1,430 +0,0 @@
1
- "use strict";
2
- /**
3
- * ONNX LLM Text Generation for RuVector
4
- *
5
- * Provides real local LLM inference using ONNX Runtime via transformers.js
6
- * Supports small models that run efficiently on CPU:
7
- * - SmolLM 135M - Smallest, fast (~135MB)
8
- * - SmolLM 360M - Better quality (~360MB)
9
- * - TinyLlama 1.1B - Best small model quality (~1GB quantized)
10
- * - Qwen2.5 0.5B - Good balance (~500MB)
11
- *
12
- * Features:
13
- * - Automatic model downloading and caching
14
- * - Quantized INT4/INT8 models for efficiency
15
- * - Streaming generation support
16
- * - Temperature, top-k, top-p sampling
17
- * - KV cache for efficient multi-turn conversations
18
- */
19
- var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
20
- if (k2 === undefined) k2 = k;
21
- var desc = Object.getOwnPropertyDescriptor(m, k);
22
- if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
23
- desc = { enumerable: true, get: function() { return m[k]; } };
24
- }
25
- Object.defineProperty(o, k2, desc);
26
- }) : (function(o, m, k, k2) {
27
- if (k2 === undefined) k2 = k;
28
- o[k2] = m[k];
29
- }));
30
- var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
31
- Object.defineProperty(o, "default", { enumerable: true, value: v });
32
- }) : function(o, v) {
33
- o["default"] = v;
34
- });
35
- var __importStar = (this && this.__importStar) || (function () {
36
- var ownKeys = function(o) {
37
- ownKeys = Object.getOwnPropertyNames || function (o) {
38
- var ar = [];
39
- for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
40
- return ar;
41
- };
42
- return ownKeys(o);
43
- };
44
- return function (mod) {
45
- if (mod && mod.__esModule) return mod;
46
- var result = {};
47
- if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
48
- __setModuleDefault(result, mod);
49
- return result;
50
- };
51
- })();
52
- Object.defineProperty(exports, "__esModule", { value: true });
53
- exports.OnnxLLM = exports.AVAILABLE_MODELS = void 0;
54
- exports.isTransformersAvailable = isTransformersAvailable;
55
- exports.initOnnxLLM = initOnnxLLM;
56
- exports.generate = generate;
57
- exports.generateStream = generateStream;
58
- exports.chat = chat;
59
- exports.getModelInfo = getModelInfo;
60
- exports.unload = unload;
61
- const path = __importStar(require("path"));
62
- const fs = __importStar(require("fs"));
63
- // Force native dynamic import (avoids TypeScript transpiling to require)
64
- // eslint-disable-next-line @typescript-eslint/no-implied-eval
65
- const dynamicImport = new Function('specifier', 'return import(specifier)');
66
- // ============================================================================
67
- // Available Models
68
- // ============================================================================
69
- exports.AVAILABLE_MODELS = {
70
- // =========================================================================
71
- // TRM - Tiny Random Models (smallest, fastest)
72
- // =========================================================================
73
- 'trm-tinystories': {
74
- id: 'Xenova/TinyStories-33M',
75
- name: 'TinyStories 33M (TRM)',
76
- size: '~65MB',
77
- description: 'Ultra-tiny model for stories and basic generation',
78
- contextLength: 512,
79
- },
80
- 'trm-gpt2-tiny': {
81
- id: 'Xenova/gpt2',
82
- name: 'GPT-2 124M (TRM)',
83
- size: '~250MB',
84
- description: 'Classic GPT-2 tiny for general text',
85
- contextLength: 1024,
86
- },
87
- 'trm-distilgpt2': {
88
- id: 'Xenova/distilgpt2',
89
- name: 'DistilGPT-2 (TRM)',
90
- size: '~82MB',
91
- description: 'Distilled GPT-2, fastest general model',
92
- contextLength: 1024,
93
- },
94
- // =========================================================================
95
- // SmolLM - Smallest production-ready models
96
- // =========================================================================
97
- 'smollm-135m': {
98
- id: 'HuggingFaceTB/SmolLM-135M-Instruct',
99
- name: 'SmolLM 135M',
100
- size: '~135MB',
101
- description: 'Smallest instruct model, very fast',
102
- contextLength: 2048,
103
- },
104
- 'smollm-360m': {
105
- id: 'HuggingFaceTB/SmolLM-360M-Instruct',
106
- name: 'SmolLM 360M',
107
- size: '~360MB',
108
- description: 'Small model, fast, better quality',
109
- contextLength: 2048,
110
- },
111
- 'smollm2-135m': {
112
- id: 'HuggingFaceTB/SmolLM2-135M-Instruct',
113
- name: 'SmolLM2 135M',
114
- size: '~135MB',
115
- description: 'Latest SmolLM v2, improved capabilities',
116
- contextLength: 2048,
117
- },
118
- 'smollm2-360m': {
119
- id: 'HuggingFaceTB/SmolLM2-360M-Instruct',
120
- name: 'SmolLM2 360M',
121
- size: '~360MB',
122
- description: 'Latest SmolLM v2, better quality',
123
- contextLength: 2048,
124
- },
125
- // =========================================================================
126
- // Qwen - Chinese/English bilingual models
127
- // =========================================================================
128
- 'qwen2.5-0.5b': {
129
- id: 'Qwen/Qwen2.5-0.5B-Instruct',
130
- name: 'Qwen2.5 0.5B',
131
- size: '~300MB quantized',
132
- description: 'Good balance of speed and quality, multilingual',
133
- contextLength: 4096,
134
- },
135
- // =========================================================================
136
- // TinyLlama - Llama architecture in tiny form
137
- // =========================================================================
138
- 'tinyllama': {
139
- id: 'TinyLlama/TinyLlama-1.1B-Chat-v1.0',
140
- name: 'TinyLlama 1.1B',
141
- size: '~600MB quantized',
142
- description: 'Best small model quality, slower',
143
- contextLength: 2048,
144
- },
145
- // =========================================================================
146
- // Code-specialized models
147
- // =========================================================================
148
- 'codegemma-2b': {
149
- id: 'google/codegemma-2b',
150
- name: 'CodeGemma 2B',
151
- size: '~1GB quantized',
152
- description: 'Code generation specialist',
153
- contextLength: 8192,
154
- },
155
- 'deepseek-coder-1.3b': {
156
- id: 'deepseek-ai/deepseek-coder-1.3b-instruct',
157
- name: 'DeepSeek Coder 1.3B',
158
- size: '~700MB quantized',
159
- description: 'Excellent for code tasks',
160
- contextLength: 4096,
161
- },
162
- // =========================================================================
163
- // Phi models - Microsoft's tiny powerhouses
164
- // =========================================================================
165
- 'phi-2': {
166
- id: 'microsoft/phi-2',
167
- name: 'Phi-2 2.7B',
168
- size: '~1.5GB quantized',
169
- description: 'High quality small model',
170
- contextLength: 2048,
171
- },
172
- 'phi-3-mini': {
173
- id: 'microsoft/Phi-3-mini-4k-instruct',
174
- name: 'Phi-3 Mini',
175
- size: '~2GB quantized',
176
- description: 'Best quality tiny model',
177
- contextLength: 4096,
178
- },
179
- };
180
- // ============================================================================
181
- // ONNX LLM Generator
182
- // ============================================================================
183
- let pipeline = null;
184
- let transformers = null;
185
- let loadedModel = null;
186
- let loadPromise = null;
187
- let loadError = null;
188
- /**
189
- * Check if transformers.js is available
190
- */
191
- async function isTransformersAvailable() {
192
- try {
193
- await dynamicImport('@xenova/transformers');
194
- return true;
195
- }
196
- catch {
197
- return false;
198
- }
199
- }
200
- /**
201
- * Initialize the ONNX LLM with specified model
202
- */
203
- async function initOnnxLLM(config = {}) {
204
- if (pipeline && loadedModel === config.modelId) {
205
- return true;
206
- }
207
- if (loadError)
208
- throw loadError;
209
- if (loadPromise) {
210
- await loadPromise;
211
- return pipeline !== null;
212
- }
213
- const modelId = config.modelId || 'HuggingFaceTB/SmolLM-135M-Instruct';
214
- loadPromise = (async () => {
215
- try {
216
- console.error(`Loading ONNX LLM: ${modelId}...`);
217
- // Import transformers.js
218
- transformers = await dynamicImport('@xenova/transformers');
219
- const { pipeline: createPipeline, env } = transformers;
220
- // Configure cache directory
221
- if (config.cacheDir) {
222
- env.cacheDir = config.cacheDir;
223
- }
224
- else {
225
- env.cacheDir = path.join(process.env.HOME || '/tmp', '.ruvector', 'models', 'onnx-llm');
226
- }
227
- // Ensure cache directory exists
228
- if (!fs.existsSync(env.cacheDir)) {
229
- fs.mkdirSync(env.cacheDir, { recursive: true });
230
- }
231
- // Disable remote model fetching warnings
232
- env.allowRemoteModels = true;
233
- env.allowLocalModels = true;
234
- // Create text generation pipeline
235
- console.error(`Downloading model (first run may take a while)...`);
236
- pipeline = await createPipeline('text-generation', modelId, {
237
- quantized: config.quantized !== false,
238
- device: config.device || 'cpu',
239
- });
240
- loadedModel = modelId;
241
- console.error(`ONNX LLM ready: ${modelId}`);
242
- }
243
- catch (e) {
244
- loadError = new Error(`Failed to initialize ONNX LLM: ${e.message}`);
245
- throw loadError;
246
- }
247
- })();
248
- await loadPromise;
249
- return pipeline !== null;
250
- }
251
- /**
252
- * Generate text using ONNX LLM
253
- */
254
- async function generate(prompt, config = {}) {
255
- if (!pipeline) {
256
- await initOnnxLLM();
257
- }
258
- if (!pipeline) {
259
- throw new Error('ONNX LLM not initialized');
260
- }
261
- const start = performance.now();
262
- // Build the input text (apply chat template if needed)
263
- let inputText = prompt;
264
- if (config.systemPrompt) {
265
- // Apply simple chat format
266
- inputText = `<|system|>\n${config.systemPrompt}<|end|>\n<|user|>\n${prompt}<|end|>\n<|assistant|>\n`;
267
- }
268
- // Generate
269
- const outputs = await pipeline(inputText, {
270
- max_new_tokens: config.maxNewTokens || 128,
271
- temperature: config.temperature || 0.7,
272
- top_p: config.topP || 0.9,
273
- top_k: config.topK || 50,
274
- repetition_penalty: config.repetitionPenalty || 1.1,
275
- do_sample: (config.temperature || 0.7) > 0,
276
- return_full_text: false,
277
- });
278
- const timeMs = performance.now() - start;
279
- const generatedText = outputs[0]?.generated_text || '';
280
- // Estimate tokens (rough approximation)
281
- const tokensGenerated = Math.ceil(generatedText.split(/\s+/).length * 1.3);
282
- return {
283
- text: generatedText.trim(),
284
- tokensGenerated,
285
- timeMs,
286
- tokensPerSecond: tokensGenerated / (timeMs / 1000),
287
- model: loadedModel || 'unknown',
288
- cached: true,
289
- };
290
- }
291
- /**
292
- * Generate with streaming (token by token)
293
- */
294
- async function generateStream(prompt, config = {}) {
295
- if (!pipeline) {
296
- await initOnnxLLM();
297
- }
298
- if (!pipeline) {
299
- throw new Error('ONNX LLM not initialized');
300
- }
301
- const start = performance.now();
302
- let fullText = '';
303
- let tokenCount = 0;
304
- // Build input text
305
- let inputText = prompt;
306
- if (config.systemPrompt) {
307
- inputText = `<|system|>\n${config.systemPrompt}<|end|>\n<|user|>\n${prompt}<|end|>\n<|assistant|>\n`;
308
- }
309
- // Create streamer
310
- const { TextStreamer } = transformers;
311
- const streamer = new TextStreamer(pipeline.tokenizer, {
312
- skip_prompt: true,
313
- callback_function: (text) => {
314
- fullText += text;
315
- tokenCount++;
316
- if (config.onToken) {
317
- config.onToken(text);
318
- }
319
- },
320
- });
321
- // Generate with streamer
322
- await pipeline(inputText, {
323
- max_new_tokens: config.maxNewTokens || 128,
324
- temperature: config.temperature || 0.7,
325
- top_p: config.topP || 0.9,
326
- top_k: config.topK || 50,
327
- repetition_penalty: config.repetitionPenalty || 1.1,
328
- do_sample: (config.temperature || 0.7) > 0,
329
- streamer,
330
- });
331
- const timeMs = performance.now() - start;
332
- // Return generator that yields the collected text
333
- async function* generator() {
334
- yield fullText;
335
- return {
336
- text: fullText.trim(),
337
- tokensGenerated: tokenCount,
338
- timeMs,
339
- tokensPerSecond: tokenCount / (timeMs / 1000),
340
- model: loadedModel || 'unknown',
341
- cached: true,
342
- };
343
- }
344
- return generator();
345
- }
346
- /**
347
- * Chat completion with conversation history
348
- */
349
- async function chat(messages, config = {}) {
350
- if (!pipeline) {
351
- await initOnnxLLM();
352
- }
353
- if (!pipeline) {
354
- throw new Error('ONNX LLM not initialized');
355
- }
356
- // Build conversation text from messages
357
- let conversationText = '';
358
- for (const msg of messages) {
359
- if (msg.role === 'system') {
360
- conversationText += `<|system|>\n${msg.content}<|end|>\n`;
361
- }
362
- else if (msg.role === 'user') {
363
- conversationText += `<|user|>\n${msg.content}<|end|>\n`;
364
- }
365
- else if (msg.role === 'assistant') {
366
- conversationText += `<|assistant|>\n${msg.content}<|end|>\n`;
367
- }
368
- }
369
- conversationText += '<|assistant|>\n';
370
- return generate(conversationText, { ...config, systemPrompt: undefined });
371
- }
372
- /**
373
- * Get model information
374
- */
375
- function getModelInfo() {
376
- return {
377
- model: loadedModel,
378
- ready: pipeline !== null,
379
- availableModels: exports.AVAILABLE_MODELS,
380
- };
381
- }
382
- /**
383
- * Unload the current model to free memory
384
- */
385
- async function unload() {
386
- if (pipeline) {
387
- // Note: transformers.js doesn't have explicit dispose, but we can null the reference
388
- pipeline = null;
389
- loadedModel = null;
390
- loadPromise = null;
391
- loadError = null;
392
- }
393
- }
394
- // ============================================================================
395
- // Class wrapper for OOP usage
396
- // ============================================================================
397
- class OnnxLLM {
398
- constructor(config = {}) {
399
- this.initialized = false;
400
- this.config = config;
401
- }
402
- async init() {
403
- if (this.initialized)
404
- return true;
405
- this.initialized = await initOnnxLLM(this.config);
406
- return this.initialized;
407
- }
408
- async generate(prompt, config) {
409
- if (!this.initialized)
410
- await this.init();
411
- return generate(prompt, config);
412
- }
413
- async chat(messages, config) {
414
- if (!this.initialized)
415
- await this.init();
416
- return chat(messages, config);
417
- }
418
- async unload() {
419
- await unload();
420
- this.initialized = false;
421
- }
422
- get ready() {
423
- return this.initialized;
424
- }
425
- get model() {
426
- return loadedModel;
427
- }
428
- }
429
- exports.OnnxLLM = OnnxLLM;
430
- exports.default = OnnxLLM;