npm - @ruvector/edge-net - Versions diffs - 0.1.6 → 0.1.7 - Mend

@ruvector/edge-net 0.1.6 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/package.json +3 -2
package/real-agents.js +252 -39

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@ruvector/edge-net",
-  "version": "0.1.6",
+  "version": "0.1.7",
   "type": "module",
   "description": "Distributed compute intelligence network with AI agents and workers - contribute browser compute, spawn distributed AI agents, earn credits. Features Time Crystal coordination, Neural DAG attention, P2P swarm intelligence, and multi-agent workflows.",
   "main": "ruvector_edge_net.js",
@@ -116,6 +116,7 @@
     "history": "node join.js --history"
   },
   "dependencies": {
-    "@ruvector/ruvllm": "^0.2.3"
+    "@ruvector/ruvllm": "^0.2.3",
+    "@xenova/transformers": "^2.17.2"
   }
 }

package/real-agents.js CHANGED Viewed

@@ -21,19 +21,57 @@ import { join } from 'path';
 // ============================================
 const LLM_PROVIDERS = {
-    // LOCAL LLM - Default, no API key needed
+    // ONNX LLM via transformers.js - Default, no API key needed
+    // Uses real ONNX models (SmolLM, TinyLlama, etc.)
     local: {
-        name: 'RuvLLM Local',
+        name: 'ONNX Local',
         type: 'local',
+        backend: 'onnx', // Primary: transformers.js ONNX
         models: {
-            fast: 'ruvllm-fast',
-            balanced: 'ruvllm-balanced',
-            powerful: 'ruvllm-powerful',
+            // TRM (Tiny Random Models) - Fastest
+            fast: process.env.ONNX_MODEL_FAST || 'Xenova/distilgpt2',
+            // SmolLM - Better quality
+            balanced: process.env.ONNX_MODEL || 'HuggingFaceTB/SmolLM-135M-Instruct',
+            // TinyLlama - Best small model
+            powerful: process.env.ONNX_MODEL_POWERFUL || 'HuggingFaceTB/SmolLM-360M-Instruct',
+        },
+    },
+    onnx: {
+        name: 'ONNX Transformers.js',
+        type: 'local',
+        backend: 'onnx',
+        models: {
+            // TRM - Ultra tiny models
+            'trm-tinystories': 'Xenova/TinyStories-33M',
+            'trm-gpt2': 'Xenova/gpt2',
+            'trm-distilgpt2': 'Xenova/distilgpt2',
+            // SmolLM series
+            fast: 'HuggingFaceTB/SmolLM-135M-Instruct',
+            balanced: 'HuggingFaceTB/SmolLM-360M-Instruct',
+            powerful: 'TinyLlama/TinyLlama-1.1B-Chat-v1.0',
+            // Named models
+            'smollm-135m': 'HuggingFaceTB/SmolLM-135M-Instruct',
+            'smollm-360m': 'HuggingFaceTB/SmolLM-360M-Instruct',
+            'smollm2-135m': 'HuggingFaceTB/SmolLM2-135M-Instruct',
+            'tinyllama': 'TinyLlama/TinyLlama-1.1B-Chat-v1.0',
+            'qwen2.5-0.5b': 'Qwen/Qwen2.5-0.5B-Instruct',
+        },
+    },
+    ollama: {
+        name: 'Ollama',
+        type: 'local',
+        backend: 'ollama',
+        baseUrl: process.env.OLLAMA_HOST || 'http://localhost:11434',
+        models: {
+            fast: process.env.OLLAMA_MODEL_FAST || 'qwen2.5:0.5b',
+            balanced: process.env.OLLAMA_MODEL || 'qwen2.5:1.5b',
+            powerful: process.env.OLLAMA_MODEL_POWERFUL || 'qwen2.5:3b',
         },
     },
     ruvllm: {
-        name: 'RuvLLM',
+        name: 'RuvLLM (Legacy)',
         type: 'local',
+        backend: 'ruvllm',
         models: {
             fast: 'ruvllm-fast',
             balanced: 'ruvllm-balanced',
@@ -124,13 +162,92 @@ export class LLMClient {
             throw new Error(`Unknown LLM provider: ${this.provider}`);
         }
-        // Initialize local LLM if using local provider
+        // Initialize local LLM backends
         this.ruvllm = null;
         this.ruvllmInitialized = false;
+        this.onnxPipeline = null;
+        this.onnxInitialized = false;
+        this.onnxModel = null;
+    }
+    /**
+     * Initialize ONNX LLM via transformers.js
+     * This is the primary local inference method
+     */
+    async initOnnx(modelId) {
+        if (this.onnxInitialized && this.onnxModel === modelId) return true;
+        try {
+            console.log(`[LLM] Loading ONNX model: ${modelId}...`);
+            console.log('[LLM] First load may take a few minutes to download the model...');
+            const transformers = await import('@xenova/transformers');
+            const { pipeline, env } = transformers;
+            // Configure cache
+            env.cacheDir = process.env.ONNX_CACHE_DIR ||
+                (process.env.HOME ? `${process.env.HOME}/.ruvector/models/onnx` : '/tmp/.ruvector/models/onnx');
+            env.allowRemoteModels = true;
+            env.allowLocalModels = true;
+            // Create text generation pipeline
+            this.onnxPipeline = await pipeline('text-generation', modelId, {
+                quantized: true,
+                device: 'cpu',
+            });
+            this.onnxModel = modelId;
+            this.onnxInitialized = true;
+            console.log(`[LLM] ONNX model ready: ${modelId}`);
+            return true;
+        } catch (error) {
+            console.warn('[LLM] ONNX init failed:', error.message);
+            return false;
+        }
+    }
+    /**
+     * Call ONNX LLM for text generation
+     */
+    async callOnnx(modelId, systemPrompt, userMessage, options = {}) {
+        await this.initOnnx(modelId);
+        if (!this.onnxPipeline) {
+            throw new Error('ONNX pipeline not initialized');
+        }
+        // Build prompt (simple format for small models)
+        const prompt = systemPrompt
+            ? `${systemPrompt}\n\nUser: ${userMessage}\n\nAssistant:`
+            : userMessage;
+        const start = Date.now();
+        const outputs = await this.onnxPipeline(prompt, {
+            max_new_tokens: options.maxTokens || 256,
+            temperature: options.temperature || 0.7,
+            top_p: options.topP || 0.9,
+            top_k: options.topK || 50,
+            repetition_penalty: 1.1,
+            do_sample: (options.temperature || 0.7) > 0,
+            return_full_text: false,
+        });
+        const timeMs = Date.now() - start;
+        const generatedText = outputs[0]?.generated_text || '';
+        return {
+            content: generatedText.trim(),
+            model: modelId,
+            timeMs,
+            usage: {
+                input_tokens: Math.ceil(prompt.length / 4),
+                output_tokens: Math.ceil(generatedText.length / 4),
+            },
+        };
     }
     /**
-     * Initialize local ruvllm
+     * Initialize legacy ruvllm
      */
     async initLocal() {
         if (this.ruvllmInitialized) return;
@@ -172,56 +289,152 @@ export class LLMClient {
     }
     /**
-     * Call local RuvLLM
+     * Call local LLM (ONNX primary, Ollama fallback)
      */
     async callLocal(systemPrompt, userMessage, options = {}) {
-        await this.initLocal();
         const modelTier = options.model || this.model;
-        const prompt = `${systemPrompt}\n\n${userMessage}`;
+        const modelName = this.config.models[modelTier] || this.config.models.balanced;
+        const backend = this.config.backend || 'onnx';
-        if (this.ruvllm) {
-            // Use ruvllm engine
-            const response = this.ruvllm.query(prompt, {
-                maxTokens: options.maxTokens || this.maxTokens,
-                temperature: options.temperature || 0.7,
-            });
+        // ========================================
+        // 1. ONNX via transformers.js (Primary - REAL AI)
+        // ========================================
+        if (backend === 'onnx' || this.provider === 'local' || this.provider === 'onnx') {
+            try {
+                const onnxModelId = this.config.models[modelTier] || modelName;
+                const response = await this.callOnnx(onnxModelId, systemPrompt, userMessage, options);
+                // Validate response is meaningful
+                if (response.content && response.content.length > 5) {
+                    return {
+                        content: response.content,
+                        model: response.model,
+                        usage: response.usage,
+                        stopReason: 'end',
+                        local: true,
+                        onnx: true,
+                        timeMs: response.timeMs,
+                    };
+                }
+            } catch (error) {
+                console.log(`[LLM] ONNX not available: ${error.message}`);
+            }
+        }
-            // Check if response is valid (not garbage/simulation output)
-            const isValidResponse = response.text &&
-                response.text.length > 10 &&
-                /[a-zA-Z]{3,}/.test(response.text) &&
-                !/^[>A-Z~|%#@\\+]+/.test(response.text);
-            if (isValidResponse) {
-                return {
-                    content: response.text,
-                    model: `ruvllm-${modelTier}`,
-                    usage: { input_tokens: prompt.length, output_tokens: response.text.length },
-                    stopReason: 'end',
-                    confidence: response.confidence,
-                    local: true,
-                };
+        // ========================================
+        // 2. Ollama (Fallback if ONNX unavailable)
+        // ========================================
+        if (backend === 'ollama' || this.config.baseUrl) {
+            const baseUrl = this.config.baseUrl || 'http://localhost:11434';
+            const ollamaModel = this.config.models[modelTier] || 'qwen2.5:0.5b';
+            try {
+                const response = await this.callOllama(baseUrl, ollamaModel, systemPrompt, userMessage, options);
+                if (response) {
+                    return {
+                        content: response.content,
+                        model: ollamaModel,
+                        usage: response.usage || { input_tokens: 0, output_tokens: 0 },
+                        stopReason: 'end',
+                        local: true,
+                        ollama: true,
+                    };
+                }
+            } catch (error) {
+                console.log(`[LLM] Ollama not available: ${error.message}`);
             }
+        }
-            // RuvLLM returned simulation output, use smart fallback
-            console.log('[LLM] RuvLLM returned simulation output, using smart fallback');
+        // ========================================
+        // 3. Legacy RuvLLM (if explicitly selected)
+        // ========================================
+        if (backend === 'ruvllm' || this.provider === 'ruvllm') {
+            await this.initLocal();
+            if (this.ruvllm) {
+                const prompt = `${systemPrompt}\n\n${userMessage}`;
+                const response = this.ruvllm.query(prompt, {
+                    maxTokens: options.maxTokens || this.maxTokens,
+                    temperature: options.temperature || 0.7,
+                });
+                // Check if response is valid (not garbage)
+                const isValidResponse = response.text &&
+                    response.text.length > 10 &&
+                    /[a-zA-Z]{3,}/.test(response.text) &&
+                    !/^[>A-Z~|%#@\\+]+/.test(response.text);
+                if (isValidResponse) {
+                    return {
+                        content: response.text,
+                        model: `ruvllm-${modelTier}`,
+                        usage: { input_tokens: prompt.length, output_tokens: response.text.length },
+                        stopReason: 'end',
+                        confidence: response.confidence,
+                        local: true,
+                    };
+                }
+            }
         }
-        // Smart fallback: Generate contextual response
-        console.log('[LLM] Using smart local generation');
+        // ========================================
+        // 4. Smart Template Fallback (Last resort)
+        // ========================================
+        console.log('[LLM] Using smart template generation');
+        console.log('[LLM] Install @xenova/transformers for real ONNX AI inference');
         const fallbackResponse = this.generateSmartResponse(systemPrompt, userMessage);
         return {
             content: fallbackResponse,
-            model: `ruvllm-${modelTier}-local`,
-            usage: { input_tokens: prompt.length, output_tokens: fallbackResponse.length },
+            model: `template-${modelTier}`,
+            usage: { input_tokens: systemPrompt.length + userMessage.length, output_tokens: fallbackResponse.length },
             stopReason: 'end',
             local: true,
             fallback: true,
         };
     }
+    /**
+     * Call Ollama API
+     */
+    async callOllama(baseUrl, model, systemPrompt, userMessage, options = {}) {
+        const url = `${baseUrl}/api/chat`;
+        const body = {
+            model,
+            messages: [
+                { role: 'system', content: systemPrompt },
+                { role: 'user', content: userMessage },
+            ],
+            stream: false,
+            options: {
+                temperature: options.temperature || 0.7,
+                num_predict: options.maxTokens || this.maxTokens,
+            },
+        };
+        const response = await fetch(url, {
+            method: 'POST',
+            headers: { 'Content-Type': 'application/json' },
+            body: JSON.stringify(body),
+            signal: AbortSignal.timeout(options.timeout || 120000), // 2 min timeout
+        });
+        if (!response.ok) {
+            const errorText = await response.text();
+            throw new Error(`Ollama error ${response.status}: ${errorText}`);
+        }
+        const result = await response.json();
+        return {
+            content: result.message?.content || '',
+            usage: {
+                input_tokens: result.prompt_eval_count || 0,
+                output_tokens: result.eval_count || 0,
+            },
+        };
+    }
     /**
      * Generate smart contextual response based on task type
      */