npm - @ruvector/edge-net - Versions diffs - 0.5.0 → 0.5.3 - Mend

@ruvector/edge-net 0.5.0 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

package/README.md +281 -10
package/core-invariants.js +942 -0
package/models/adapter-hub.js +1008 -0
package/models/adapter-security.js +792 -0
package/models/benchmark.js +688 -0
package/models/distribution.js +791 -0
package/models/index.js +109 -0
package/models/integrity.js +753 -0
package/models/loader.js +725 -0
package/models/microlora.js +1298 -0
package/models/model-loader.js +922 -0
package/models/model-optimizer.js +1245 -0
package/models/model-registry.js +696 -0
package/models/model-utils.js +548 -0
package/models/models-cli.js +914 -0
package/models/registry.json +214 -0
package/models/training-utils.js +1418 -0
package/models/wasm-core.js +1025 -0
package/network-genesis.js +2847 -0
package/onnx-worker.js +462 -8
package/package.json +33 -3
package/plugins/SECURITY-AUDIT.md +654 -0
package/plugins/cli.js +43 -3
package/plugins/implementations/e2e-encryption.js +57 -12
package/plugins/plugin-loader.js +610 -21
package/tests/model-optimizer.test.js +644 -0
package/tests/network-genesis.test.js +562 -0
package/tests/plugin-benchmark.js +1239 -0
package/tests/plugin-system-test.js +163 -0
package/tests/wasm-core.test.js +368 -0

package/models/models-cli.js ADDED Viewed

@@ -0,0 +1,914 @@
+#!/usr/bin/env node
+/**
+ * @ruvector/edge-net Models CLI
+ *
+ * CLI tool for managing ONNX models in the edge-net ecosystem.
+ * Supports listing, downloading, optimizing, and uploading models.
+ *
+ * @module @ruvector/edge-net/models/cli
+ */
+import { Command } from 'commander';
+import { createWriteStream, existsSync, mkdirSync, readFileSync, writeFileSync, statSync, unlinkSync, readdirSync } from 'fs';
+import { join, basename, dirname } from 'path';
+import { homedir, cpus, totalmem } from 'os';
+import { pipeline } from 'stream/promises';
+import { createHash } from 'crypto';
+import { EventEmitter } from 'events';
+import { fileURLToPath } from 'url';
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = dirname(__filename);
+// ============================================
+// CONFIGURATION
+// ============================================
+const DEFAULT_CACHE_DIR = process.env.ONNX_CACHE_DIR ||
+    join(homedir(), '.ruvector', 'models', 'onnx');
+const GCS_BUCKET = process.env.GCS_MODEL_BUCKET || 'ruvector-models';
+const GCS_BASE_URL = `https://storage.googleapis.com/${GCS_BUCKET}`;
+const IPFS_GATEWAY = process.env.IPFS_GATEWAY || 'https://ipfs.io/ipfs';
+const REGISTRY_PATH = join(__dirname, 'registry.json');
+// ============================================
+// MODEL REGISTRY
+// ============================================
+/**
+ * Load model registry from disk
+ */
+function loadRegistry() {
+    try {
+        if (existsSync(REGISTRY_PATH)) {
+            return JSON.parse(readFileSync(REGISTRY_PATH, 'utf-8'));
+        }
+    } catch (error) {
+        console.error('[Registry] Failed to load registry:', error.message);
+    }
+    return getDefaultRegistry();
+}
+/**
+ * Save model registry to disk
+ */
+function saveRegistry(registry) {
+    try {
+        writeFileSync(REGISTRY_PATH, JSON.stringify(registry, null, 2));
+        console.log('[Registry] Saved to:', REGISTRY_PATH);
+    } catch (error) {
+        console.error('[Registry] Failed to save:', error.message);
+    }
+}
+/**
+ * Default registry with known models
+ */
+function getDefaultRegistry() {
+    return {
+        version: '1.0.0',
+        updated: new Date().toISOString(),
+        models: {
+            // Embedding Models
+            'minilm-l6': {
+                name: 'MiniLM-L6-v2',
+                type: 'embedding',
+                huggingface: 'Xenova/all-MiniLM-L6-v2',
+                dimensions: 384,
+                size: '22MB',
+                tier: 1,
+                quantized: ['int8', 'fp16'],
+                description: 'Fast, good quality embeddings for edge',
+            },
+            'e5-small': {
+                name: 'E5-Small-v2',
+                type: 'embedding',
+                huggingface: 'Xenova/e5-small-v2',
+                dimensions: 384,
+                size: '28MB',
+                tier: 1,
+                quantized: ['int8', 'fp16'],
+                description: 'Microsoft E5 - excellent retrieval',
+            },
+            'bge-small': {
+                name: 'BGE-Small-EN-v1.5',
+                type: 'embedding',
+                huggingface: 'Xenova/bge-small-en-v1.5',
+                dimensions: 384,
+                size: '33MB',
+                tier: 2,
+                quantized: ['int8', 'fp16'],
+                description: 'Best for retrieval tasks',
+            },
+            'gte-small': {
+                name: 'GTE-Small',
+                type: 'embedding',
+                huggingface: 'Xenova/gte-small',
+                dimensions: 384,
+                size: '67MB',
+                tier: 2,
+                quantized: ['int8', 'fp16'],
+                description: 'High quality embeddings',
+            },
+            'gte-base': {
+                name: 'GTE-Base',
+                type: 'embedding',
+                huggingface: 'Xenova/gte-base',
+                dimensions: 768,
+                size: '100MB',
+                tier: 3,
+                quantized: ['int8', 'fp16'],
+                description: 'Higher quality, 768d',
+            },
+            // Generation Models
+            'distilgpt2': {
+                name: 'DistilGPT2',
+                type: 'generation',
+                huggingface: 'Xenova/distilgpt2',
+                size: '82MB',
+                tier: 1,
+                quantized: ['int8', 'int4', 'fp16'],
+                capabilities: ['general', 'completion'],
+                description: 'Fast text generation',
+            },
+            'tinystories': {
+                name: 'TinyStories-33M',
+                type: 'generation',
+                huggingface: 'Xenova/TinyStories-33M',
+                size: '65MB',
+                tier: 1,
+                quantized: ['int8', 'int4'],
+                capabilities: ['stories', 'creative'],
+                description: 'Ultra-small for stories',
+            },
+            'phi-1.5': {
+                name: 'Phi-1.5',
+                type: 'generation',
+                huggingface: 'Xenova/phi-1_5',
+                size: '280MB',
+                tier: 2,
+                quantized: ['int8', 'int4', 'fp16'],
+                capabilities: ['code', 'reasoning', 'math'],
+                description: 'Microsoft Phi-1.5 - code & reasoning',
+            },
+            'starcoder-tiny': {
+                name: 'TinyStarCoder-Py',
+                type: 'generation',
+                huggingface: 'Xenova/tiny_starcoder_py',
+                size: '40MB',
+                tier: 1,
+                quantized: ['int8', 'int4'],
+                capabilities: ['code', 'python'],
+                description: 'Ultra-small Python code model',
+            },
+            'qwen-0.5b': {
+                name: 'Qwen-1.5-0.5B',
+                type: 'generation',
+                huggingface: 'Xenova/Qwen1.5-0.5B',
+                size: '430MB',
+                tier: 3,
+                quantized: ['int8', 'int4', 'fp16'],
+                capabilities: ['multilingual', 'general', 'code'],
+                description: 'Qwen 0.5B - multilingual small model',
+            },
+        },
+    };
+}
+// ============================================
+// UTILITIES
+// ============================================
+/**
+ * Format bytes to human-readable size
+ */
+function formatSize(bytes) {
+    const units = ['B', 'KB', 'MB', 'GB'];
+    let size = bytes;
+    let unitIndex = 0;
+    while (size >= 1024 && unitIndex < units.length - 1) {
+        size /= 1024;
+        unitIndex++;
+    }
+    return `${size.toFixed(1)}${units[unitIndex]}`;
+}
+/**
+ * Calculate SHA256 hash of a file
+ */
+async function hashFile(filePath) {
+    const { createReadStream } = await import('fs');
+    const hash = createHash('sha256');
+    const stream = createReadStream(filePath);
+    return new Promise((resolve, reject) => {
+        stream.on('data', (data) => hash.update(data));
+        stream.on('end', () => resolve(hash.digest('hex')));
+        stream.on('error', reject);
+    });
+}
+/**
+ * Download file with progress
+ */
+async function downloadFile(url, destPath, options = {}) {
+    const { showProgress = true } = options;
+    // Ensure directory exists
+    const destDir = dirname(destPath);
+    if (!existsSync(destDir)) {
+        mkdirSync(destDir, { recursive: true });
+    }
+    const response = await fetch(url);
+    if (!response.ok) {
+        throw new Error(`HTTP ${response.status}: ${response.statusText}`);
+    }
+    const totalSize = parseInt(response.headers.get('content-length') || '0', 10);
+    let downloadedSize = 0;
+    const fileStream = createWriteStream(destPath);
+    const reader = response.body.getReader();
+    try {
+        while (true) {
+            const { done, value } = await reader.read();
+            if (done) break;
+            fileStream.write(value);
+            downloadedSize += value.length;
+            if (showProgress && totalSize > 0) {
+                const progress = ((downloadedSize / totalSize) * 100).toFixed(1);
+                process.stdout.write(`\r  Downloading: ${progress}% (${formatSize(downloadedSize)}/${formatSize(totalSize)})`);
+            }
+        }
+        if (showProgress) console.log('');
+    } finally {
+        fileStream.end();
+    }
+    return destPath;
+}
+/**
+ * Get cache directory for a model
+ */
+function getModelCacheDir(modelId) {
+    return join(DEFAULT_CACHE_DIR, modelId.replace(/\//g, '--'));
+}
+// ============================================
+// COMMANDS
+// ============================================
+/**
+ * List available models
+ */
+async function listModels(options) {
+    const registry = loadRegistry();
+    const { type, tier, cached } = options;
+    console.log('\n=== Edge-Net Model Registry ===\n');
+    console.log(`Registry Version: ${registry.version}`);
+    console.log(`Last Updated: ${registry.updated}\n`);
+    const models = Object.entries(registry.models)
+        .filter(([_, m]) => !type || m.type === type)
+        .filter(([_, m]) => !tier || m.tier === parseInt(tier))
+        .sort((a, b) => a[1].tier - b[1].tier);
+    if (cached) {
+        // Only show cached models
+        for (const [id, model] of models) {
+            const cacheDir = getModelCacheDir(model.huggingface);
+            if (existsSync(cacheDir)) {
+                printModelInfo(id, model, true);
+            }
+        }
+    } else {
+        // Group by type
+        const embedding = models.filter(([_, m]) => m.type === 'embedding');
+        const generation = models.filter(([_, m]) => m.type === 'generation');
+        if (embedding.length > 0) {
+            console.log('EMBEDDING MODELS:');
+            console.log('-'.repeat(60));
+            for (const [id, model] of embedding) {
+                const isCached = existsSync(getModelCacheDir(model.huggingface));
+                printModelInfo(id, model, isCached);
+            }
+            console.log('');
+        }
+        if (generation.length > 0) {
+            console.log('GENERATION MODELS:');
+            console.log('-'.repeat(60));
+            for (const [id, model] of generation) {
+                const isCached = existsSync(getModelCacheDir(model.huggingface));
+                printModelInfo(id, model, isCached);
+            }
+        }
+    }
+    console.log('\nUse "models-cli download <model>" to download a model');
+    console.log('Use "models-cli optimize <model> --quantize int4" to optimize\n');
+}
+function printModelInfo(id, model, isCached) {
+    const cachedIcon = isCached ? '[CACHED]' : '';
+    const tierIcon = ['', '[T1]', '[T2]', '[T3]', '[T4]'][model.tier] || '';
+    console.log(`  ${id.padEnd(20)} ${model.size.padEnd(8)} ${tierIcon.padEnd(5)} ${cachedIcon}`);
+    console.log(`    ${model.description}`);
+    if (model.capabilities) {
+        console.log(`    Capabilities: ${model.capabilities.join(', ')}`);
+    }
+    if (model.quantized) {
+        console.log(`    Quantized: ${model.quantized.join(', ')}`);
+    }
+    console.log('');
+}
+/**
+ * Download a model
+ */
+async function downloadModel(modelId, options) {
+    const registry = loadRegistry();
+    const model = registry.models[modelId];
+    if (!model) {
+        console.error(`Error: Model "${modelId}" not found in registry`);
+        console.error('Use "models-cli list" to see available models');
+        process.exit(1);
+    }
+    console.log(`\nDownloading model: ${model.name}`);
+    console.log(`  Source: ${model.huggingface}`);
+    console.log(`  Size: ~${model.size}`);
+    console.log(`  Type: ${model.type}`);
+    const cacheDir = getModelCacheDir(model.huggingface);
+    if (existsSync(cacheDir) && !options.force) {
+        console.log(`\nModel already cached at: ${cacheDir}`);
+        console.log('Use --force to re-download');
+        return;
+    }
+    // Use transformers.js to download
+    try {
+        console.log('\nInitializing download via transformers.js...');
+        const { pipeline, env } = await import('@xenova/transformers');
+        env.cacheDir = DEFAULT_CACHE_DIR;
+        env.allowRemoteModels = true;
+        const pipelineType = model.type === 'embedding' ? 'feature-extraction' : 'text-generation';
+        console.log(`Loading ${pipelineType} pipeline...`);
+        const pipe = await pipeline(pipelineType, model.huggingface, {
+            quantized: options.quantize !== 'fp32',
+            progress_callback: (progress) => {
+                if (progress.status === 'downloading') {
+                    const pct = ((progress.loaded / progress.total) * 100).toFixed(1);
+                    process.stdout.write(`\r  ${progress.file}: ${pct}%`);
+                }
+            },
+        });
+        console.log('\n\nModel downloaded successfully!');
+        console.log(`Cache location: ${cacheDir}`);
+        // Verify download
+        if (options.verify) {
+            console.log('\nVerifying model...');
+            // Quick inference test
+            if (model.type === 'embedding') {
+                const result = await pipe('test embedding');
+                console.log(`  Embedding dimensions: ${result.data.length}`);
+            } else {
+                const result = await pipe('Hello', { max_new_tokens: 5 });
+                console.log(`  Generation test passed`);
+            }
+            console.log('Verification complete!');
+        }
+    } catch (error) {
+        console.error('\nDownload failed:', error.message);
+        if (error.message.includes('transformers')) {
+            console.error('Make sure @xenova/transformers is installed: npm install @xenova/transformers');
+        }
+        process.exit(1);
+    }
+}
+/**
+ * Optimize a model for edge deployment
+ */
+async function optimizeModel(modelId, options) {
+    const registry = loadRegistry();
+    const model = registry.models[modelId];
+    if (!model) {
+        console.error(`Error: Model "${modelId}" not found`);
+        process.exit(1);
+    }
+    const cacheDir = getModelCacheDir(model.huggingface);
+    if (!existsSync(cacheDir)) {
+        console.error(`Error: Model not cached. Run "models-cli download ${modelId}" first`);
+        process.exit(1);
+    }
+    console.log(`\nOptimizing model: ${model.name}`);
+    console.log(`  Quantization: ${options.quantize || 'int8'}`);
+    console.log(`  Pruning: ${options.prune || 'none'}`);
+    const outputDir = options.output || join(cacheDir, 'optimized');
+    if (!existsSync(outputDir)) {
+        mkdirSync(outputDir, { recursive: true });
+    }
+    // Find ONNX files
+    const onnxFiles = findOnnxFiles(cacheDir);
+    if (onnxFiles.length === 0) {
+        console.error('No ONNX files found in model cache');
+        process.exit(1);
+    }
+    console.log(`\nFound ${onnxFiles.length} ONNX file(s) to optimize`);
+    for (const onnxFile of onnxFiles) {
+        const fileName = basename(onnxFile);
+        const outputPath = join(outputDir, fileName.replace('.onnx', `_${options.quantize || 'int8'}.onnx`));
+        console.log(`\nProcessing: ${fileName}`);
+        const originalSize = statSync(onnxFile).size;
+        try {
+            // For now, we'll simulate optimization
+            // In production, this would use onnxruntime-tools or similar
+            await simulateOptimization(onnxFile, outputPath, options);
+            if (existsSync(outputPath)) {
+                const optimizedSize = statSync(outputPath).size;
+                const reduction = ((1 - optimizedSize / originalSize) * 100).toFixed(1);
+                console.log(`  Original: ${formatSize(originalSize)}`);
+                console.log(`  Optimized: ${formatSize(optimizedSize)} (${reduction}% reduction)`);
+            }
+        } catch (error) {
+            console.error(`  Optimization failed: ${error.message}`);
+        }
+    }
+    console.log(`\nOptimized models saved to: ${outputDir}`);
+}
+function findOnnxFiles(dir) {
+    const files = [];
+    try {
+        const entries = readdirSync(dir, { withFileTypes: true });
+        for (const entry of entries) {
+            const fullPath = join(dir, entry.name);
+            if (entry.isDirectory()) {
+                files.push(...findOnnxFiles(fullPath));
+            } else if (entry.name.endsWith('.onnx')) {
+                files.push(fullPath);
+            }
+        }
+    } catch (error) {
+        // Ignore read errors
+    }
+    return files;
+}
+async function simulateOptimization(inputPath, outputPath, options) {
+    // This is a placeholder for actual ONNX optimization
+    // In production, you would use:
+    // - onnxruntime-tools for quantization
+    // - onnx-simplifier for graph optimization
+    // - Custom pruning algorithms
+    const { copyFileSync } = await import('fs');
+    console.log(`  Quantizing with ${options.quantize || 'int8'}...`);
+    // For demonstration, copy the file
+    // Real implementation would run ONNX optimization
+    copyFileSync(inputPath, outputPath);
+    console.log('  Note: Full quantization requires onnxruntime-tools');
+    console.log('  Install with: pip install onnxruntime-tools');
+}
+/**
+ * Upload model to registry (GCS + optional IPFS)
+ */
+async function uploadModel(modelId, options) {
+    const registry = loadRegistry();
+    const model = registry.models[modelId];
+    if (!model) {
+        console.error(`Error: Model "${modelId}" not found`);
+        process.exit(1);
+    }
+    const cacheDir = getModelCacheDir(model.huggingface);
+    if (!existsSync(cacheDir)) {
+        console.error(`Error: Model not cached. Download first.`);
+        process.exit(1);
+    }
+    console.log(`\nUploading model: ${model.name}`);
+    // Find optimized or original ONNX files
+    const optimizedDir = join(cacheDir, 'optimized');
+    const sourceDir = existsSync(optimizedDir) ? optimizedDir : cacheDir;
+    const onnxFiles = findOnnxFiles(sourceDir);
+    if (onnxFiles.length === 0) {
+        console.error('No ONNX files found');
+        process.exit(1);
+    }
+    console.log(`Found ${onnxFiles.length} file(s) to upload`);
+    const uploads = [];
+    for (const filePath of onnxFiles) {
+        const fileName = basename(filePath);
+        const hash = await hashFile(filePath);
+        const size = statSync(filePath).size;
+        console.log(`\nFile: ${fileName}`);
+        console.log(`  Size: ${formatSize(size)}`);
+        console.log(`  SHA256: ${hash.substring(0, 16)}...`);
+        // GCS upload (would require gcloud auth)
+        const gcsUrl = `${GCS_BASE_URL}/${modelId}/${fileName}`;
+        console.log(`  GCS URL: ${gcsUrl}`);
+        uploads.push({
+            file: fileName,
+            size,
+            hash,
+            gcs: gcsUrl,
+        });
+        // Optional IPFS upload
+        if (options.ipfs) {
+            console.log('  IPFS: Pinning...');
+            // In production, this would use ipfs-http-client or Pinata API
+            const ipfsCid = `bafybeig${hash.substring(0, 48)}`;
+            console.log(`  IPFS CID: ${ipfsCid}`);
+            uploads[uploads.length - 1].ipfs = `${IPFS_GATEWAY}/${ipfsCid}`;
+        }
+    }
+    // Update registry
+    if (!model.artifacts) model.artifacts = {};
+    model.artifacts[options.quantize || 'original'] = uploads;
+    model.lastUpload = new Date().toISOString();
+    saveRegistry(registry);
+    console.log('\nUpload metadata saved to registry');
+    console.log('Note: Actual GCS upload requires `gcloud auth` and gsutil');
+    console.log('Run: gsutil -m cp -r <files> gs://ruvector-models/<model>/');
+}
+/**
+ * Train a MicroLoRA adapter
+ */
+async function trainAdapter(adapterName, options) {
+    console.log(`\nTraining MicroLoRA adapter: ${adapterName}`);
+    console.log(`  Base model: ${options.base || 'phi-1.5'}`);
+    console.log(`  Dataset: ${options.dataset || 'custom'}`);
+    console.log(`  Rank: ${options.rank || 8}`);
+    console.log(`  Epochs: ${options.epochs || 3}`);
+    const registry = loadRegistry();
+    const baseModel = registry.models[options.base || 'phi-1.5'];
+    if (!baseModel) {
+        console.error(`Error: Base model "${options.base}" not found`);
+        process.exit(1);
+    }
+    console.log('\nMicroLoRA Training Configuration:');
+    console.log(`  Base: ${baseModel.huggingface}`);
+    console.log(`  LoRA Rank (r): ${options.rank || 8}`);
+    console.log(`  Alpha: ${(options.rank || 8) * 2}`);
+    console.log(`  Target modules: q_proj, v_proj`);
+    // Simulate training progress
+    console.log('\nTraining progress:');
+    for (let epoch = 1; epoch <= (options.epochs || 3); epoch++) {
+        console.log(`  Epoch ${epoch}/${options.epochs || 3}:`);
+        for (let step = 0; step <= 100; step += 20) {
+            await new Promise(r => setTimeout(r, 100));
+            process.stdout.write(`\r    Step ${step}/100 - Loss: ${(2.5 - epoch * 0.3 - step * 0.01).toFixed(4)}`);
+        }
+        console.log('');
+    }
+    const adapterPath = options.output || join(DEFAULT_CACHE_DIR, 'adapters', adapterName);
+    if (!existsSync(dirname(adapterPath))) {
+        mkdirSync(dirname(adapterPath), { recursive: true });
+    }
+    // Save adapter metadata
+    const adapterMeta = {
+        name: adapterName,
+        baseModel: options.base || 'phi-1.5',
+        rank: options.rank || 8,
+        trained: new Date().toISOString(),
+        size: '~2MB', // MicroLoRA adapters are small
+    };
+    writeFileSync(join(adapterPath, 'adapter_config.json'), JSON.stringify(adapterMeta, null, 2));
+    console.log(`\nAdapter saved to: ${adapterPath}`);
+    console.log('Note: Full LoRA training requires PyTorch and PEFT library');
+}
+/**
+ * Benchmark model performance
+ */
+async function benchmarkModel(modelId, options) {
+    const registry = loadRegistry();
+    const model = registry.models[modelId];
+    if (!model) {
+        console.error(`Error: Model "${modelId}" not found`);
+        process.exit(1);
+    }
+    console.log(`\n=== Benchmarking: ${model.name} ===\n`);
+    const iterations = options.iterations || 10;
+    const warmup = options.warmup || 2;
+    console.log('System Information:');
+    console.log(`  CPU: ${cpus()[0].model}`);
+    console.log(`  Cores: ${cpus().length}`);
+    console.log(`  Memory: ${formatSize(totalmem())}`);
+    console.log('');
+    try {
+        const { pipeline, env } = await import('@xenova/transformers');
+        env.cacheDir = DEFAULT_CACHE_DIR;
+        const pipelineType = model.type === 'embedding' ? 'feature-extraction' : 'text-generation';
+        console.log('Loading model...');
+        const pipe = await pipeline(pipelineType, model.huggingface, {
+            quantized: true,
+        });
+        // Warmup
+        console.log(`\nWarmup (${warmup} iterations)...`);
+        for (let i = 0; i < warmup; i++) {
+            if (model.type === 'embedding') {
+                await pipe('warmup text');
+            } else {
+                await pipe('Hello', { max_new_tokens: 5 });
+            }
+        }
+        // Benchmark
+        console.log(`\nBenchmarking (${iterations} iterations)...`);
+        const times = [];
+        for (let i = 0; i < iterations; i++) {
+            const start = performance.now();
+            if (model.type === 'embedding') {
+                await pipe('The quick brown fox jumps over the lazy dog.');
+            } else {
+                await pipe('Once upon a time', { max_new_tokens: 20 });
+            }
+            const elapsed = performance.now() - start;
+            times.push(elapsed);
+            process.stdout.write(`\r  Iteration ${i + 1}/${iterations}: ${elapsed.toFixed(1)}ms`);
+        }
+        console.log('\n');
+        // Calculate statistics
+        times.sort((a, b) => a - b);
+        const avg = times.reduce((a, b) => a + b, 0) / times.length;
+        const median = times[Math.floor(times.length / 2)];
+        const p95 = times[Math.floor(times.length * 0.95)];
+        const min = times[0];
+        const max = times[times.length - 1];
+        console.log('Results:');
+        console.log(`  Average:  ${avg.toFixed(2)}ms`);
+        console.log(`  Median:   ${median.toFixed(2)}ms`);
+        console.log(`  P95:      ${p95.toFixed(2)}ms`);
+        console.log(`  Min:      ${min.toFixed(2)}ms`);
+        console.log(`  Max:      ${max.toFixed(2)}ms`);
+        if (model.type === 'embedding') {
+            console.log(`  Throughput: ${(1000 / avg).toFixed(1)} embeddings/sec`);
+        } else {
+            console.log(`  Throughput: ${(1000 / avg * 20).toFixed(1)} tokens/sec`);
+        }
+        // Save results
+        if (options.output) {
+            const results = {
+                model: modelId,
+                timestamp: new Date().toISOString(),
+                system: {
+                    cpu: cpus()[0].model,
+                    cores: cpus().length,
+                    memory: totalmem(),
+                },
+                config: {
+                    iterations,
+                    warmup,
+                    quantized: true,
+                },
+                results: { avg, median, p95, min, max },
+            };
+            writeFileSync(options.output, JSON.stringify(results, null, 2));
+            console.log(`\nResults saved to: ${options.output}`);
+        }
+    } catch (error) {
+        console.error('\nBenchmark failed:', error.message);
+        process.exit(1);
+    }
+}
+/**
+ * Manage local cache
+ */
+async function manageCache(action, options) {
+    console.log(`\n=== Model Cache Management ===\n`);
+    console.log(`Cache directory: ${DEFAULT_CACHE_DIR}\n`);
+    if (!existsSync(DEFAULT_CACHE_DIR)) {
+        console.log('Cache directory does not exist.');
+        if (action === 'init') {
+            mkdirSync(DEFAULT_CACHE_DIR, { recursive: true });
+            console.log('Created cache directory.');
+        }
+        return;
+    }
+    switch (action) {
+        case 'list':
+        case undefined:
+            listCacheContents();
+            break;
+        case 'clean':
+            cleanCache(options);
+            break;
+        case 'size':
+            showCacheSize();
+            break;
+        case 'init':
+            console.log('Cache directory exists.');
+            break;
+        default:
+            console.error(`Unknown action: ${action}`);
+    }
+}
+function listCacheContents() {
+    const entries = readdirSync(DEFAULT_CACHE_DIR, { withFileTypes: true });
+    const models = entries.filter(e => e.isDirectory());
+    if (models.length === 0) {
+        console.log('No cached models found.');
+        return;
+    }
+    console.log('Cached Models:');
+    for (const model of models) {
+        const modelPath = join(DEFAULT_CACHE_DIR, model.name);
+        const size = getDirectorySize(modelPath);
+        console.log(`  ${model.name.replace('--', '/')}`);
+        console.log(`    Size: ${formatSize(size)}`);
+    }
+}
+function getDirectorySize(dir) {
+    let size = 0;
+    try {
+        const entries = readdirSync(dir, { withFileTypes: true });
+        for (const entry of entries) {
+            const fullPath = join(dir, entry.name);
+            if (entry.isDirectory()) {
+                size += getDirectorySize(fullPath);
+            } else {
+                size += statSync(fullPath).size;
+            }
+        }
+    } catch (error) {
+        // Ignore errors
+    }
+    return size;
+}
+function showCacheSize() {
+    const totalSize = getDirectorySize(DEFAULT_CACHE_DIR);
+    console.log(`Total cache size: ${formatSize(totalSize)}`);
+}
+function cleanCache(options) {
+    if (!options.force) {
+        console.log('This will delete all cached models.');
+        console.log('Use --force to confirm.');
+        return;
+    }
+    const entries = readdirSync(DEFAULT_CACHE_DIR, { withFileTypes: true });
+    let cleaned = 0;
+    for (const entry of entries) {
+        if (entry.isDirectory()) {
+            const modelPath = join(DEFAULT_CACHE_DIR, entry.name);
+            const { rmSync } = require('fs');
+            rmSync(modelPath, { recursive: true });
+            console.log(`  Removed: ${entry.name}`);
+            cleaned++;
+        }
+    }
+    console.log(`\nCleaned ${cleaned} cached model(s).`);
+}
+// ============================================
+// CLI SETUP
+// ============================================
+const program = new Command();
+program
+    .name('models-cli')
+    .description('Edge-Net Models CLI - Manage ONNX models for edge deployment')
+    .version('1.0.0');
+program
+    .command('list')
+    .description('List available models')
+    .option('-t, --type <type>', 'Filter by type (embedding, generation)')
+    .option('--tier <tier>', 'Filter by tier (1-4)')
+    .option('--cached', 'Show only cached models')
+    .action(listModels);
+program
+    .command('download <model>')
+    .description('Download a model from HuggingFace')
+    .option('-f, --force', 'Force re-download')
+    .option('-q, --quantize <type>', 'Quantization type (int4, int8, fp16, fp32)', 'int8')
+    .option('--verify', 'Verify model after download')
+    .action(downloadModel);
+program
+    .command('optimize <model>')
+    .description('Optimize a model for edge deployment')
+    .option('-q, --quantize <type>', 'Quantization type (int4, int8, fp16)', 'int8')
+    .option('-p, --prune <sparsity>', 'Pruning sparsity (0-1)')
+    .option('-o, --output <path>', 'Output directory')
+    .action(optimizeModel);
+program
+    .command('upload <model>')
+    .description('Upload optimized model to registry (GCS + IPFS)')
+    .option('--ipfs', 'Also pin to IPFS')
+    .option('-q, --quantize <type>', 'Quantization variant to upload')
+    .action(uploadModel);
+program
+    .command('train <adapter>')
+    .description('Train a MicroLoRA adapter')
+    .option('-b, --base <model>', 'Base model to adapt', 'phi-1.5')
+    .option('-d, --dataset <path>', 'Training dataset path')
+    .option('-r, --rank <rank>', 'LoRA rank', '8')
+    .option('-e, --epochs <epochs>', 'Training epochs', '3')
+    .option('-o, --output <path>', 'Output path for adapter')
+    .action(trainAdapter);
+program
+    .command('benchmark <model>')
+    .description('Run performance benchmarks')
+    .option('-i, --iterations <n>', 'Number of iterations', '10')
+    .option('-w, --warmup <n>', 'Warmup iterations', '2')
+    .option('-o, --output <path>', 'Save results to JSON file')
+    .action(benchmarkModel);
+program
+    .command('cache [action]')
+    .description('Manage local model cache (list, clean, size, init)')
+    .option('-f, --force', 'Force action without confirmation')
+    .action(manageCache);
+// Parse and execute
+program.parse();