@ruvector/edge-net 0.1.6 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +3 -2
  2. package/real-agents.js +252 -39
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ruvector/edge-net",
3
- "version": "0.1.6",
3
+ "version": "0.1.7",
4
4
  "type": "module",
5
5
  "description": "Distributed compute intelligence network with AI agents and workers - contribute browser compute, spawn distributed AI agents, earn credits. Features Time Crystal coordination, Neural DAG attention, P2P swarm intelligence, and multi-agent workflows.",
6
6
  "main": "ruvector_edge_net.js",
@@ -116,6 +116,7 @@
116
116
  "history": "node join.js --history"
117
117
  },
118
118
  "dependencies": {
119
- "@ruvector/ruvllm": "^0.2.3"
119
+ "@ruvector/ruvllm": "^0.2.3",
120
+ "@xenova/transformers": "^2.17.2"
120
121
  }
121
122
  }
package/real-agents.js CHANGED
@@ -21,19 +21,57 @@ import { join } from 'path';
21
21
  // ============================================
22
22
 
23
23
  const LLM_PROVIDERS = {
24
- // LOCAL LLM - Default, no API key needed
24
+ // ONNX LLM via transformers.js - Default, no API key needed
25
+ // Uses real ONNX models (SmolLM, TinyLlama, etc.)
25
26
  local: {
26
- name: 'RuvLLM Local',
27
+ name: 'ONNX Local',
27
28
  type: 'local',
29
+ backend: 'onnx', // Primary: transformers.js ONNX
28
30
  models: {
29
- fast: 'ruvllm-fast',
30
- balanced: 'ruvllm-balanced',
31
- powerful: 'ruvllm-powerful',
31
+ // TRM (Tiny Random Models) - Fastest
32
+ fast: process.env.ONNX_MODEL_FAST || 'Xenova/distilgpt2',
33
+ // SmolLM - Better quality
34
+ balanced: process.env.ONNX_MODEL || 'HuggingFaceTB/SmolLM-135M-Instruct',
35
+ // TinyLlama - Best small model
36
+ powerful: process.env.ONNX_MODEL_POWERFUL || 'HuggingFaceTB/SmolLM-360M-Instruct',
37
+ },
38
+ },
39
+ onnx: {
40
+ name: 'ONNX Transformers.js',
41
+ type: 'local',
42
+ backend: 'onnx',
43
+ models: {
44
+ // TRM - Ultra tiny models
45
+ 'trm-tinystories': 'Xenova/TinyStories-33M',
46
+ 'trm-gpt2': 'Xenova/gpt2',
47
+ 'trm-distilgpt2': 'Xenova/distilgpt2',
48
+ // SmolLM series
49
+ fast: 'HuggingFaceTB/SmolLM-135M-Instruct',
50
+ balanced: 'HuggingFaceTB/SmolLM-360M-Instruct',
51
+ powerful: 'TinyLlama/TinyLlama-1.1B-Chat-v1.0',
52
+ // Named models
53
+ 'smollm-135m': 'HuggingFaceTB/SmolLM-135M-Instruct',
54
+ 'smollm-360m': 'HuggingFaceTB/SmolLM-360M-Instruct',
55
+ 'smollm2-135m': 'HuggingFaceTB/SmolLM2-135M-Instruct',
56
+ 'tinyllama': 'TinyLlama/TinyLlama-1.1B-Chat-v1.0',
57
+ 'qwen2.5-0.5b': 'Qwen/Qwen2.5-0.5B-Instruct',
58
+ },
59
+ },
60
+ ollama: {
61
+ name: 'Ollama',
62
+ type: 'local',
63
+ backend: 'ollama',
64
+ baseUrl: process.env.OLLAMA_HOST || 'http://localhost:11434',
65
+ models: {
66
+ fast: process.env.OLLAMA_MODEL_FAST || 'qwen2.5:0.5b',
67
+ balanced: process.env.OLLAMA_MODEL || 'qwen2.5:1.5b',
68
+ powerful: process.env.OLLAMA_MODEL_POWERFUL || 'qwen2.5:3b',
32
69
  },
33
70
  },
34
71
  ruvllm: {
35
- name: 'RuvLLM',
72
+ name: 'RuvLLM (Legacy)',
36
73
  type: 'local',
74
+ backend: 'ruvllm',
37
75
  models: {
38
76
  fast: 'ruvllm-fast',
39
77
  balanced: 'ruvllm-balanced',
@@ -124,13 +162,92 @@ export class LLMClient {
124
162
  throw new Error(`Unknown LLM provider: ${this.provider}`);
125
163
  }
126
164
 
127
- // Initialize local LLM if using local provider
165
+ // Initialize local LLM backends
128
166
  this.ruvllm = null;
129
167
  this.ruvllmInitialized = false;
168
+ this.onnxPipeline = null;
169
+ this.onnxInitialized = false;
170
+ this.onnxModel = null;
171
+ }
172
+
173
+ /**
174
+ * Initialize ONNX LLM via transformers.js
175
+ * This is the primary local inference method
176
+ */
177
+ async initOnnx(modelId) {
178
+ if (this.onnxInitialized && this.onnxModel === modelId) return true;
179
+
180
+ try {
181
+ console.log(`[LLM] Loading ONNX model: ${modelId}...`);
182
+ console.log('[LLM] First load may take a few minutes to download the model...');
183
+
184
+ const transformers = await import('@xenova/transformers');
185
+ const { pipeline, env } = transformers;
186
+
187
+ // Configure cache
188
+ env.cacheDir = process.env.ONNX_CACHE_DIR ||
189
+ (process.env.HOME ? `${process.env.HOME}/.ruvector/models/onnx` : '/tmp/.ruvector/models/onnx');
190
+ env.allowRemoteModels = true;
191
+ env.allowLocalModels = true;
192
+
193
+ // Create text generation pipeline
194
+ this.onnxPipeline = await pipeline('text-generation', modelId, {
195
+ quantized: true,
196
+ device: 'cpu',
197
+ });
198
+
199
+ this.onnxModel = modelId;
200
+ this.onnxInitialized = true;
201
+ console.log(`[LLM] ONNX model ready: ${modelId}`);
202
+ return true;
203
+ } catch (error) {
204
+ console.warn('[LLM] ONNX init failed:', error.message);
205
+ return false;
206
+ }
207
+ }
208
+
209
+ /**
210
+ * Call ONNX LLM for text generation
211
+ */
212
+ async callOnnx(modelId, systemPrompt, userMessage, options = {}) {
213
+ await this.initOnnx(modelId);
214
+ if (!this.onnxPipeline) {
215
+ throw new Error('ONNX pipeline not initialized');
216
+ }
217
+
218
+ // Build prompt (simple format for small models)
219
+ const prompt = systemPrompt
220
+ ? `${systemPrompt}\n\nUser: ${userMessage}\n\nAssistant:`
221
+ : userMessage;
222
+
223
+ const start = Date.now();
224
+
225
+ const outputs = await this.onnxPipeline(prompt, {
226
+ max_new_tokens: options.maxTokens || 256,
227
+ temperature: options.temperature || 0.7,
228
+ top_p: options.topP || 0.9,
229
+ top_k: options.topK || 50,
230
+ repetition_penalty: 1.1,
231
+ do_sample: (options.temperature || 0.7) > 0,
232
+ return_full_text: false,
233
+ });
234
+
235
+ const timeMs = Date.now() - start;
236
+ const generatedText = outputs[0]?.generated_text || '';
237
+
238
+ return {
239
+ content: generatedText.trim(),
240
+ model: modelId,
241
+ timeMs,
242
+ usage: {
243
+ input_tokens: Math.ceil(prompt.length / 4),
244
+ output_tokens: Math.ceil(generatedText.length / 4),
245
+ },
246
+ };
130
247
  }
131
248
 
132
249
  /**
133
- * Initialize local ruvllm
250
+ * Initialize legacy ruvllm
134
251
  */
135
252
  async initLocal() {
136
253
  if (this.ruvllmInitialized) return;
@@ -172,56 +289,152 @@ export class LLMClient {
172
289
  }
173
290
 
174
291
  /**
175
- * Call local RuvLLM
292
+ * Call local LLM (ONNX primary, Ollama fallback)
176
293
  */
177
294
  async callLocal(systemPrompt, userMessage, options = {}) {
178
- await this.initLocal();
179
-
180
295
  const modelTier = options.model || this.model;
181
- const prompt = `${systemPrompt}\n\n${userMessage}`;
296
+ const modelName = this.config.models[modelTier] || this.config.models.balanced;
297
+ const backend = this.config.backend || 'onnx';
182
298
 
183
- if (this.ruvllm) {
184
- // Use ruvllm engine
185
- const response = this.ruvllm.query(prompt, {
186
- maxTokens: options.maxTokens || this.maxTokens,
187
- temperature: options.temperature || 0.7,
188
- });
299
+ // ========================================
300
+ // 1. ONNX via transformers.js (Primary - REAL AI)
301
+ // ========================================
302
+ if (backend === 'onnx' || this.provider === 'local' || this.provider === 'onnx') {
303
+ try {
304
+ const onnxModelId = this.config.models[modelTier] || modelName;
305
+ const response = await this.callOnnx(onnxModelId, systemPrompt, userMessage, options);
306
+
307
+ // Validate response is meaningful
308
+ if (response.content && response.content.length > 5) {
309
+ return {
310
+ content: response.content,
311
+ model: response.model,
312
+ usage: response.usage,
313
+ stopReason: 'end',
314
+ local: true,
315
+ onnx: true,
316
+ timeMs: response.timeMs,
317
+ };
318
+ }
319
+ } catch (error) {
320
+ console.log(`[LLM] ONNX not available: ${error.message}`);
321
+ }
322
+ }
189
323
 
190
- // Check if response is valid (not garbage/simulation output)
191
- const isValidResponse = response.text &&
192
- response.text.length > 10 &&
193
- /[a-zA-Z]{3,}/.test(response.text) &&
194
- !/^[>A-Z~|%#@\\+]+/.test(response.text);
195
-
196
- if (isValidResponse) {
197
- return {
198
- content: response.text,
199
- model: `ruvllm-${modelTier}`,
200
- usage: { input_tokens: prompt.length, output_tokens: response.text.length },
201
- stopReason: 'end',
202
- confidence: response.confidence,
203
- local: true,
204
- };
324
+ // ========================================
325
+ // 2. Ollama (Fallback if ONNX unavailable)
326
+ // ========================================
327
+ if (backend === 'ollama' || this.config.baseUrl) {
328
+ const baseUrl = this.config.baseUrl || 'http://localhost:11434';
329
+ const ollamaModel = this.config.models[modelTier] || 'qwen2.5:0.5b';
330
+
331
+ try {
332
+ const response = await this.callOllama(baseUrl, ollamaModel, systemPrompt, userMessage, options);
333
+ if (response) {
334
+ return {
335
+ content: response.content,
336
+ model: ollamaModel,
337
+ usage: response.usage || { input_tokens: 0, output_tokens: 0 },
338
+ stopReason: 'end',
339
+ local: true,
340
+ ollama: true,
341
+ };
342
+ }
343
+ } catch (error) {
344
+ console.log(`[LLM] Ollama not available: ${error.message}`);
205
345
  }
346
+ }
206
347
 
207
- // RuvLLM returned simulation output, use smart fallback
208
- console.log('[LLM] RuvLLM returned simulation output, using smart fallback');
348
+ // ========================================
349
+ // 3. Legacy RuvLLM (if explicitly selected)
350
+ // ========================================
351
+ if (backend === 'ruvllm' || this.provider === 'ruvllm') {
352
+ await this.initLocal();
353
+ if (this.ruvllm) {
354
+ const prompt = `${systemPrompt}\n\n${userMessage}`;
355
+ const response = this.ruvllm.query(prompt, {
356
+ maxTokens: options.maxTokens || this.maxTokens,
357
+ temperature: options.temperature || 0.7,
358
+ });
359
+
360
+ // Check if response is valid (not garbage)
361
+ const isValidResponse = response.text &&
362
+ response.text.length > 10 &&
363
+ /[a-zA-Z]{3,}/.test(response.text) &&
364
+ !/^[>A-Z~|%#@\\+]+/.test(response.text);
365
+
366
+ if (isValidResponse) {
367
+ return {
368
+ content: response.text,
369
+ model: `ruvllm-${modelTier}`,
370
+ usage: { input_tokens: prompt.length, output_tokens: response.text.length },
371
+ stopReason: 'end',
372
+ confidence: response.confidence,
373
+ local: true,
374
+ };
375
+ }
376
+ }
209
377
  }
210
378
 
211
- // Smart fallback: Generate contextual response
212
- console.log('[LLM] Using smart local generation');
379
+ // ========================================
380
+ // 4. Smart Template Fallback (Last resort)
381
+ // ========================================
382
+ console.log('[LLM] Using smart template generation');
383
+ console.log('[LLM] Install @xenova/transformers for real ONNX AI inference');
213
384
  const fallbackResponse = this.generateSmartResponse(systemPrompt, userMessage);
214
385
 
215
386
  return {
216
387
  content: fallbackResponse,
217
- model: `ruvllm-${modelTier}-local`,
218
- usage: { input_tokens: prompt.length, output_tokens: fallbackResponse.length },
388
+ model: `template-${modelTier}`,
389
+ usage: { input_tokens: systemPrompt.length + userMessage.length, output_tokens: fallbackResponse.length },
219
390
  stopReason: 'end',
220
391
  local: true,
221
392
  fallback: true,
222
393
  };
223
394
  }
224
395
 
396
+ /**
397
+ * Call Ollama API
398
+ */
399
+ async callOllama(baseUrl, model, systemPrompt, userMessage, options = {}) {
400
+ const url = `${baseUrl}/api/chat`;
401
+
402
+ const body = {
403
+ model,
404
+ messages: [
405
+ { role: 'system', content: systemPrompt },
406
+ { role: 'user', content: userMessage },
407
+ ],
408
+ stream: false,
409
+ options: {
410
+ temperature: options.temperature || 0.7,
411
+ num_predict: options.maxTokens || this.maxTokens,
412
+ },
413
+ };
414
+
415
+ const response = await fetch(url, {
416
+ method: 'POST',
417
+ headers: { 'Content-Type': 'application/json' },
418
+ body: JSON.stringify(body),
419
+ signal: AbortSignal.timeout(options.timeout || 120000), // 2 min timeout
420
+ });
421
+
422
+ if (!response.ok) {
423
+ const errorText = await response.text();
424
+ throw new Error(`Ollama error ${response.status}: ${errorText}`);
425
+ }
426
+
427
+ const result = await response.json();
428
+
429
+ return {
430
+ content: result.message?.content || '',
431
+ usage: {
432
+ input_tokens: result.prompt_eval_count || 0,
433
+ output_tokens: result.eval_count || 0,
434
+ },
435
+ };
436
+ }
437
+
225
438
  /**
226
439
  * Generate smart contextual response based on task type
227
440
  */