npm - @auxot/worker-cli - Versions diffs - 0.1.2 → 0.1.3 - Mend

@auxot/worker-cli 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/dist/index.js +5897 -297
package/dist/index.js.map +7 -0
package/package.json +4 -4
package/dist/capabilities.js +0 -125
package/dist/debug.js +0 -54
package/dist/gpu-detection.js +0 -171
package/dist/gpu-id.js +0 -48
package/dist/llama-binary.js +0 -287
package/dist/llama-process.js +0 -203
package/dist/llama.js +0 -207
package/dist/model-downloader.js +0 -145
package/dist/model-resolver.js +0 -80
package/dist/policy-validator.js +0 -242
package/dist/types.js +0 -4
package/dist/websocket.js +0 -433

package/dist/model-downloader.js DELETED Viewed

@@ -1,145 +0,0 @@
-/**
- * Model Downloader
- *
- * Downloads GGUF model files from Hugging Face.
- *
- * Features:
- * - Progress reporting
- * - Resumable downloads (HTTP Range requests)
- * - Integrity verification (file size)
- * - Caching (checks if file exists)
- */
-import { createWriteStream, existsSync, statSync } from 'node:fs';
-import { mkdir } from 'node:fs/promises';
-import { dirname } from 'node:path';
-/**
- * Download a model file from Hugging Face
- *
- * @param entry Model registry entry
- * @param outputPath Full path where the file should be saved
- * @param onProgress Optional progress callback (bytes downloaded, total bytes)
- * @returns Path to downloaded file
- */
-export async function downloadModel(entry, outputPath, onProgress) {
-    // Create output directory if it doesn't exist
-    const outputDir = dirname(outputPath);
-    if (!existsSync(outputDir)) {
-        await mkdir(outputDir, { recursive: true });
-    }
-    // Check if file already exists
-    if (existsSync(outputPath)) {
-        const stats = statSync(outputPath);
-        // If file size matches expected size, skip download
-        if (entry.file_size_bytes && stats.size === entry.file_size_bytes) {
-            console.log(`  ✓ Model already downloaded (${formatBytes(stats.size)})`);
-            return outputPath;
-        }
-        // If file exists but size doesn't match, delete it and re-download
-        if (entry.file_size_bytes && stats.size !== entry.file_size_bytes) {
-            console.log(`  ⊘ Existing file size mismatch (${formatBytes(stats.size)} vs ${formatBytes(entry.file_size_bytes)})`);
-            console.log(`  ⊘ Re-downloading...`);
-            // Delete the file - we'll download fresh
-            const { unlink } = await import('node:fs/promises');
-            await unlink(outputPath);
-        }
-    }
-    // Build Hugging Face download URL
-    // Format: https://huggingface.co/{repo_id}/resolve/main/{filename}
-    const downloadUrl = `https://huggingface.co/${entry.huggingface_id}/resolve/main/${entry.file_name}`;
-    console.log(`  Downloading from: ${entry.huggingface_id}`);
-    console.log(`  File: ${entry.file_name}`);
-    if (entry.file_size_bytes) {
-        console.log(`  Size: ${formatBytes(entry.file_size_bytes)}`);
-    }
-    // Download with progress tracking
-    let downloadedBytes = 0;
-    const totalBytes = entry.file_size_bytes || 0;
-    // Check if we can resume (partial file exists)
-    let startByte = 0;
-    if (existsSync(outputPath)) {
-        const stats = statSync(outputPath);
-        startByte = stats.size;
-        if (startByte > 0 && startByte < totalBytes) {
-            console.log(`  Resuming from ${formatBytes(startByte)}...`);
-            downloadedBytes = startByte;
-        }
-    }
-    const response = await fetch(downloadUrl, {
-        headers: startByte > 0 ? {
-            'Range': `bytes=${startByte}-`,
-        } : {},
-    });
-    if (!response.ok) {
-        if (response.status === 416) {
-            // Range not satisfiable - file already fully downloaded
-            if (existsSync(outputPath)) {
-                console.log(`  ✓ Download complete`);
-                return outputPath;
-            }
-        }
-        throw new Error(`Download failed: ${response.status} ${response.statusText}`);
-    }
-    const contentLength = response.headers.get('content-length');
-    const totalSize = contentLength ? parseInt(contentLength, 10) + startByte : totalBytes;
-    // Open file for writing (append if resuming)
-    const fileStream = createWriteStream(outputPath, { flags: startByte > 0 ? 'a' : 'w' });
-    // Stream response to file
-    const reader = response.body?.getReader();
-    if (!reader) {
-        throw new Error('Response body is not readable');
-    }
-    try {
-        while (true) {
-            const { done, value } = await reader.read();
-            if (done) {
-                break;
-            }
-            fileStream.write(value);
-            downloadedBytes += value.length;
-            // Report progress
-            if (onProgress) {
-                onProgress(downloadedBytes, totalSize);
-            }
-            else if (totalSize > 0) {
-                // Simple progress log every 10MB
-                if (downloadedBytes % (10 * 1024 * 1024) < value.length) {
-                    const percent = ((downloadedBytes / totalSize) * 100).toFixed(1);
-                    process.stdout.write(`\r  Progress: ${percent}% (${formatBytes(downloadedBytes)} / ${formatBytes(totalSize)})`);
-                }
-            }
-        }
-        fileStream.end();
-        // Wait for file stream to finish
-        await new Promise((resolve, reject) => {
-            fileStream.on('finish', resolve);
-            fileStream.on('error', reject);
-        });
-        if (totalSize > 0 && downloadedBytes !== totalSize) {
-            throw new Error(`Download incomplete: ${downloadedBytes} bytes downloaded, expected ${totalSize}`);
-        }
-        if (onProgress) {
-            // Clear progress line
-            process.stdout.write('\r');
-        }
-        else {
-            process.stdout.write('\r');
-        }
-        console.log(`  ✓ Download complete (${formatBytes(downloadedBytes)})`);
-        return outputPath;
-    }
-    catch (error) {
-        fileStream.destroy();
-        throw error;
-    }
-}
-/**
- * Format bytes to human-readable string
- */
-function formatBytes(bytes) {
-    if (bytes === 0)
-        return '0 B';
-    const k = 1024;
-    const sizes = ['B', 'KB', 'MB', 'GB', 'TB'];
-    const i = Math.floor(Math.log(bytes) / Math.log(k));
-    return `${(bytes / Math.pow(k, i)).toFixed(1)} ${sizes[i]}`;
-}

package/dist/model-resolver.js DELETED Viewed

@@ -1,80 +0,0 @@
-/**
- * Model Path Resolver
- *
- * Resolves model paths from policy using the model registry.
- * Downloads models if not cached.
- */
-import { loadRegistry, getModels } from '@auxot/model-registry';
-import { join } from 'path';
-import { homedir } from 'os';
-import { downloadModel } from './model-downloader.js';
-import { existsSync } from 'node:fs';
-/**
- * Get model entry from registry based on policy
- */
-export function getModelFromPolicy(policy) {
-    const registry = loadRegistry();
-    // Find model matching policy (model_name and quantization)
-    const matchingModels = getModels(registry, {
-        model_name: policy.model_name,
-    });
-    // Filter by quantization (case-insensitive)
-    const model = matchingModels.find((m) => m.quantization.toLowerCase() === policy.quantization.toLowerCase());
-    return model || null;
-}
-/**
- * Get model path from policy, downloading if necessary
- *
- * Returns the local path for the model file.
- * Downloads the model if it doesn't exist or is incomplete.
- *
- * @param policy GPU key policy
- * @param onProgress Optional progress callback (downloaded, total)
- * @returns Model file path, or null if model not found in registry
- */
-export async function ensureModelDownloaded(policy, onProgress) {
-    const model = getModelFromPolicy(policy);
-    if (!model) {
-        console.error(`  ✗ Model not found in registry: ${policy.model_name} (${policy.quantization})`);
-        return null;
-    }
-    // Build model path
-    const modelsDir = process.env.AUXOT_MODELS_DIR || join(homedir(), '.auxot', 'models');
-    const modelDir = join(modelsDir, model.huggingface_id.replace('/', '_'));
-    const modelPath = join(modelDir, model.file_name);
-    // Check if model exists and is valid
-    if (existsSync(modelPath)) {
-        const { statSync } = await import('node:fs');
-        const stats = statSync(modelPath);
-        // If file size matches expected size, use cached model
-        if (model.file_size_bytes && stats.size === model.file_size_bytes) {
-            return modelPath;
-        }
-    }
-    // Download model
-    console.log(`  Downloading model: ${model.model_name} (${model.quantization})`);
-    try {
-        await downloadModel(model, modelPath, onProgress);
-        return modelPath;
-    }
-    catch (error) {
-        console.error(`  ✗ Download failed:`, error);
-        throw error;
-    }
-}
-/**
- * Get model path without downloading (for checking if model exists)
- *
- * @param policy GPU key policy
- * @returns Model file path, or null if model not found in registry
- */
-export function getModelPath(policy) {
-    const model = getModelFromPolicy(policy);
-    if (!model) {
-        return null;
-    }
-    const modelsDir = process.env.AUXOT_MODELS_DIR || join(homedir(), '.auxot', 'models');
-    const modelDir = join(modelsDir, model.huggingface_id.replace('/', '_'));
-    const modelPath = join(modelDir, model.file_name);
-    return modelPath;
-}

package/dist/policy-validator.js DELETED Viewed

@@ -1,242 +0,0 @@
-/**
- * Policy Validation
- *
- * Validates worker capabilities against GPU key policy.
- */
-/**
- * Normalize model name for comparison
- * Extracts base model name + version + variant (removes quantization, parameters, file patterns)
- */
-function normalizeModelName(name) {
-    // Remove common quantization suffixes
-    let normalized = name
-        .replace(/-GGUF$/i, '')
-        .replace(/\.gguf$/i, '');
-    // Remove path components (keep only filename)
-    const parts = normalized.split('/');
-    if (parts.length > 1) {
-        normalized = parts[parts.length - 1];
-    }
-    // Remove multi-GGUF file patterns (e.g., "-00001-of-00003")
-    normalized = normalized.replace(/-\d{5}-of-\d{5}$/i, '');
-    // Extract version number (e.g., "3", "2.5", "4", "3.3")
-    let version = '';
-    const versionMatch = normalized.match(/^(Qwen|Llama|Ministral|Devstral|Gemma|DeepSeek|Granite|GPT-OSS)[-_]?(\d+(?:\.\d+)?)/i);
-    if (versionMatch) {
-        version = versionMatch[2];
-        normalized = normalized.replace(new RegExp(`^${versionMatch[1]}[-_]?${versionMatch[2]}`, 'i'), versionMatch[1]);
-    }
-    // Extract variant (composite or single)
-    let variant = '';
-    const compositePatterns = [
-        /-VL-Instruct$/i,
-        /-VL-Thinking$/i,
-        /-VL-Chat$/i,
-        /-VL-Coder$/i,
-        /-VL-Code$/i,
-    ];
-    for (const pattern of compositePatterns) {
-        const match = normalized.match(pattern);
-        if (match) {
-            variant = match[0].replace(/^-/i, '');
-            normalized = normalized.replace(pattern, '');
-            break;
-        }
-    }
-    if (!variant) {
-        const singlePatterns = [
-            /-Instruct$/i,
-            /-Thinking$/i,
-            /-Chat$/i,
-            /-Coder$/i,
-            /-Code$/i,
-            /-VL$/i,
-            /-Vision$/i,
-            /-Maverick$/i,
-            /-Scout$/i,
-            /-Reasoning$/i,
-        ];
-        for (const pattern of singlePatterns) {
-            const match = normalized.match(pattern);
-            if (match) {
-                variant = match[0].replace(/^-/i, '');
-                normalized = normalized.replace(pattern, '');
-                break;
-            }
-        }
-    }
-    // Remove quantization patterns (Q4_K_M, Q5_K_S, etc.)
-    normalized = normalized.replace(/[-_]Q\d+[_\w]*/i, '');
-    // Remove parameter counts (7B, 13B, 30B, etc.)
-    normalized = normalized.replace(/[-_](\d+(?:\.\d+)?[BMK])(?![0-9])/i, '');
-    // Remove expert counts (A22B, E2B, etc.)
-    normalized = normalized.replace(/[-_]([AE]\d+[BMK])/i, '');
-    normalized = normalized.replace(/[-_](\d+[AE])(?![0-9])/i, '');
-    // Remove trailing version numbers that are NOT part of the model name
-    normalized = normalized.replace(/[-_](\d{4,})$/i, ''); // Remove 4+ digit trailing numbers
-    // Normalize base name
-    const nameLower = normalized.toLowerCase();
-    let base = '';
-    if (nameLower.startsWith('qwen')) {
-        base = 'Qwen';
-    }
-    else if (nameLower.startsWith('llama') || nameLower.startsWith('meta-llama')) {
-        base = 'Llama';
-    }
-    else if (nameLower.startsWith('ministral') || nameLower.startsWith('devstral')) {
-        base = 'Ministral';
-    }
-    else if (nameLower.startsWith('gemma')) {
-        base = 'Gemma';
-    }
-    else if (nameLower.startsWith('deepseek')) {
-        base = 'DeepSeek';
-    }
-    else if (nameLower.startsWith('granite')) {
-        base = 'Granite';
-    }
-    else if (nameLower.startsWith('gpt-oss') || nameLower.startsWith('gptoss')) {
-        base = 'GPT-OSS';
-    }
-    else {
-        base = normalized.trim();
-    }
-    // Combine base + version + variant
-    let result = base;
-    if (version) {
-        result += ` ${version}`;
-    }
-    if (variant) {
-        result += `-${variant}`;
-    }
-    return result.trim();
-}
-/**
- * Extract quantization from model name or capabilities
- */
-function extractQuantization(capabilities) {
-    // Try to extract from model name
-    const model = capabilities.model || '';
-    // Check for quantization patterns
-    const quantPatterns = [
-        'Q3_K_S', 'Q4_K_S', 'Q5_K_S', 'Q6_K', 'Q8_0', 'Q8_K',
-        'F16', 'F32', 'BF16',
-    ];
-    for (const pattern of quantPatterns) {
-        if (model.includes(pattern)) {
-            return pattern;
-        }
-    }
-    return null;
-}
-/**
- * Infer capabilities from model name
- */
-function inferCapabilitiesFromModel(modelName) {
-    const name = modelName.toLowerCase();
-    const capabilities = [];
-    if (name.includes('vision') || name.includes('multimodal') || name.includes('vl-')) {
-        capabilities.push('vision');
-    }
-    if (name.includes('code') || name.includes('coder') || name.includes('starcoder')) {
-        capabilities.push('code');
-    }
-    if (name.includes('embed') || name.includes('embedding')) {
-        capabilities.push('embedding');
-    }
-    if (capabilities.length === 0) {
-        capabilities.push('chat');
-    }
-    return [...new Set(capabilities)];
-}
-/**
- * Validate worker capabilities against policy
- */
-/**
- * Parse parameter count from string (e.g., "7B" -> 7e9, "30B" -> 30e9)
- */
-function parseParameters(parameters) {
-    const match = parameters.match(/^(\d+(?:\.\d+)?)(B|M|K)$/i);
-    if (!match)
-        return 0;
-    const value = parseFloat(match[1]);
-    const unit = match[2].toUpperCase();
-    if (unit === 'B')
-        return value * 1e9;
-    if (unit === 'M')
-        return value * 1e6;
-    if (unit === 'K')
-        return value * 1e3;
-    return value;
-}
-export function validatePolicy(discoveredCapabilities, policy) {
-    const errors = [];
-    const warnings = []; // Declared but currently unused (reserved for future validation warnings)
-    // 0. Check model size limit for CPU mode (warning only - binary download already handles this)
-    // Note: This is just a warning since the policy comes from the server
-    // The actual binary selection (GPU vs CPU) happens in llama-binary.ts
-    // TODO: Add warnings here if needed in the future
-    // 1. Model name match (normalized comparison)
-    // Normalize both discovered and policy model names to base + version + variant
-    const discoveredNormalized = normalizeModelName(discoveredCapabilities.model || '');
-    const policyNormalized = normalizeModelName(policy.model_name);
-    if (discoveredNormalized !== policyNormalized) {
-        errors.push(`Model name mismatch: discovered "${discoveredCapabilities.model}" (normalized: "${discoveredNormalized}") ` +
-            `does not match policy "${policy.model_name}" (normalized: "${policyNormalized}")`);
-    }
-    // 2. Context size >= policy.context_size
-    const discoveredCtxSize = discoveredCapabilities.ctx_size || 0;
-    if (discoveredCtxSize < policy.context_size) {
-        errors.push(`Context size insufficient: discovered ${discoveredCtxSize} < required ${policy.context_size}`);
-    }
-    // 3. Quantization match (if specified in policy)
-    const discoveredQuant = extractQuantization(discoveredCapabilities);
-    if (discoveredQuant && discoveredQuant !== policy.quantization) {
-        errors.push(`Quantization mismatch: discovered "${discoveredQuant}" does not match policy "${policy.quantization}"`);
-    }
-    // 4. Capabilities match (worker must have all required capabilities)
-    const discoveredCaps = inferCapabilitiesFromModel(discoveredCapabilities.model || '');
-    const missingCaps = policy.capabilities.filter((requiredCap) => !discoveredCaps.includes(requiredCap));
-    if (missingCaps.length > 0) {
-        errors.push(`Missing required capabilities: ${missingCaps.join(', ')}. ` +
-            `Discovered: ${discoveredCaps.join(', ')}. ` +
-            `Required: ${policy.capabilities.join(', ')}`);
-    }
-    // 5. Parameters match (if specified in policy)
-    if (policy.parameters) {
-        const discoveredParams = discoveredCapabilities.parameters;
-        if (discoveredParams && discoveredParams !== policy.parameters) {
-            errors.push(`Parameters mismatch: discovered "${discoveredParams}" does not match policy "${policy.parameters}"`);
-        }
-    }
-    // 6. Family match (if specified in policy)
-    if (policy.family) {
-        // Infer family from model name or capabilities
-        // MoE models have:
-        // 1. "MoE" or "mixture-of-experts" in name
-        // 2. Expert notation like "A22B", "E22B" (e.g., "235B-A22B" = 235B total, 22B experts)
-        // 3. Multiple parameter counts separated by dashes (total-expert pattern)
-        const modelName = (discoveredCapabilities.model || '').toLowerCase();
-        // Check for explicit MoE indicators
-        const hasMoEKeyword = modelName.includes('moe') || modelName.includes('mixture-of-experts');
-        // Check for expert notation (A22B, E22B, etc.)
-        const hasExpertNotation = /[ae]\d+[bmk]/i.test(modelName);
-        // Check for total-expert parameter pattern (e.g., "235B-A22B", "70B-E2B")
-        const hasTotalExpertPattern = /\d+[bmk]-[ae]\d+[bmk]/i.test(modelName);
-        // Check for multiple large parameter counts separated by dashes
-        const paramPatterns = modelName.match(/\d+[bmk]/gi) || [];
-        const hasMultipleLargeParams = paramPatterns.length >= 2 &&
-            paramPatterns.some(p => /^(\d{2,}|[0-9]+0)[bmk]$/i.test(p)); // 2+ digits or ends in 0
-        const isMoE = hasMoEKeyword || hasExpertNotation || hasTotalExpertPattern || hasMultipleLargeParams;
-        const discoveredFamily = isMoE ? 'MoE' : 'Dense';
-        if (discoveredFamily !== policy.family) {
-            errors.push(`Family mismatch: discovered "${discoveredFamily}" does not match policy "${policy.family}". ` +
-                `Model name: "${discoveredCapabilities.model}"`);
-        }
-    }
-    return {
-        valid: errors.length === 0,
-        errors,
-        warnings: warnings.length > 0 ? warnings : undefined,
-    };
-}

package/dist/types.js DELETED Viewed

@@ -1,4 +0,0 @@
-/**
- * Type definitions for worker CLI
- */
-export {};