npm - @auxot/worker-cli - Versions diffs - 0.1.2 → 0.1.4 - Mend

@auxot/worker-cli 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/dist/index.js +5897 -297
package/dist/index.js.map +7 -0
package/package.json +4 -4
package/dist/capabilities.js +0 -125
package/dist/debug.js +0 -54
package/dist/gpu-detection.js +0 -171
package/dist/gpu-id.js +0 -48
package/dist/llama-binary.js +0 -287
package/dist/llama-process.js +0 -203
package/dist/llama.js +0 -207
package/dist/model-downloader.js +0 -145
package/dist/model-resolver.js +0 -80
package/dist/policy-validator.js +0 -242
package/dist/types.js +0 -4
package/dist/websocket.js +0 -433

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@auxot/worker-cli",
-  "version": "0.1.2",
+  "version": "0.1.4",
   "type": "module",
   "description": "Auxot GPU worker CLI - connects local GPU resources to Auxot platform",
   "license": "UNLICENSED",
@@ -34,20 +34,20 @@
     "dev": "tsx src/index.ts",
     "dev:debug": "tsx src/index.ts --debug 1",
     "dev:debug2": "tsx src/index.ts --debug 2",
-    "build": "tsc",
+    "build": "node build.mjs",
     "start": "node dist/index.js",
     "prepublishOnly": "npm run build"
   },
   "dependencies": {
-    "@auxot/model-registry": "*",
-    "@auxot/shared": "*",
     "eventsource-parser": "^1.1.2",
     "ws": "^8.16.0",
     "uuid": "^9.0.1"
   },
   "devDependencies": {
+    "@auxot/model-registry": "*",
     "@types/node": "^20.10.6",
     "@types/ws": "^8.5.10",
+    "esbuild": "^0.20.0",
     "tsx": "^4.7.0",
     "typescript": "^5.3.3"
   },

package/dist/capabilities.js DELETED Viewed

@@ -1,125 +0,0 @@
-/**
- * llama.cpp Capability Discovery
- *
- * Queries llama.cpp OpenAI-compatible API to discover:
- * - Available models
- * - Context size
- * - Model parameters
- * - VRAM/GPU info (if available)
- */
-/**
- * Discover llama.cpp capabilities via OpenAI-compatible API
- */
-/**
- * Normalize model name from file path
- *
- * Examples:
- * - "/Users/kminkler/.models/Qwen3-Coder-30B-A3B-Instruct-Q4_K_S.gguf"
- *   -> "Qwen3-Coder-30B-A3B-Instruct"
- * - "llama-2-7b-chat.Q4_K_M.gguf"
- *   -> "llama-2-7b-chat"
- */
-function normalizeModelName(filePath) {
-    // Extract filename from path
-    const filename = filePath.split('/').pop() || filePath;
-    // Remove .gguf extension
-    let name = filename.replace(/\.gguf$/i, '');
-    // Remove common quantization suffixes (Q4_K_M, Q4_K_S, Q5_K_M, etc.)
-    name = name.replace(/-?(Q\d+_[KM]_[SM]|Q\d+_[KM]|Q\d+)$/i, '');
-    // Clean up any trailing hyphens or underscores
-    name = name.replace(/[-_]+$/, '');
-    return name;
-}
-export async function discoverCapabilities(llamaUrl) {
-    try {
-        // Query /v1/models endpoint
-        const modelsResponse = await fetch(`${llamaUrl}/v1/models`);
-        if (!modelsResponse.ok) {
-            throw new Error(`Failed to fetch models: ${modelsResponse.status} ${modelsResponse.statusText}`);
-        }
-        const modelsData = await modelsResponse.json();
-        if (!modelsData.data || modelsData.data.length === 0) {
-            throw new Error('No models found in llama.cpp server');
-        }
-        // Use first model (typically only one model loaded)
-        const model = modelsData.data[0];
-        // Normalize model name (remove path and quantization suffix)
-        const modelName = normalizeModelName(model.id);
-        const capabilities = {
-            backend: 'llama.cpp',
-            model: modelName,
-            ctx_size: 4096, // Default, will try to get actual from /props
-        };
-        // Extract parameter count if available
-        if (model.meta?.n_params) {
-            const params = model.meta.n_params;
-            if (params >= 1e9) {
-                capabilities.parameters = `${Math.round(params / 1e9)}B`;
-            }
-            else if (params >= 1e6) {
-                capabilities.parameters = `${Math.round(params / 1e6)}M`;
-            }
-        }
-        // Try to get runtime context size from /props endpoint
-        // This gives us the actual --ctx-size value, not n_ctx_train
-        try {
-            const propsResponse = await fetch(`${llamaUrl}/props`);
-            if (propsResponse.ok) {
-                const props = await propsResponse.json();
-                // Extract runtime context size (this is the --ctx-size value)
-                if (props.default_generation_settings?.n_ctx) {
-                    capabilities.ctx_size = props.default_generation_settings.n_ctx;
-                }
-                // Extract default max_tokens (for unlimited generation)
-                // llama.cpp defaults to -1 (unlimited) but OpenAI API compat layer defaults to 2048
-                // We want to explicitly send the server's max to override the API layer default
-                // Path is: default_generation_settings.params.n_predict
-                if (props.default_generation_settings?.params?.n_predict !== undefined) {
-                    capabilities.max_tokens_default = props.default_generation_settings.params.n_predict;
-                }
-                // Extract total_slots (parallel job capacity)
-                if (props.total_slots) {
-                    capabilities.total_slots = props.total_slots;
-                }
-                // Extract VRAM info if available
-                if (props.total_vram_mb) {
-                    capabilities.vram_gb = Math.round(props.total_vram_mb / 1024);
-                }
-                // If we got the context size from /props, we're done
-                if (capabilities.ctx_size !== 4096) {
-                    console.log('Discovered capabilities:', capabilities);
-                    return capabilities;
-                }
-            }
-        }
-        catch (propsError) {
-            console.warn('/props endpoint not available, trying /health');
-        }
-        // Try /health endpoint as fallback
-        try {
-            const healthResponse = await fetch(`${llamaUrl}/health`);
-            if (healthResponse.ok) {
-                const health = await healthResponse.json();
-                // Some versions expose n_ctx in health endpoint
-                if (health.n_ctx) {
-                    capabilities.ctx_size = health.n_ctx;
-                    console.log(`Runtime context size from /health: ${capabilities.ctx_size}`);
-                }
-            }
-        }
-        catch {
-            console.warn('/health endpoint not available');
-        }
-        // Last resort: use n_ctx_train as estimate
-        if (capabilities.ctx_size === 4096 && model.meta?.n_ctx_train) {
-            console.warn('Could not determine runtime context size, using n_ctx_train as fallback');
-            capabilities.ctx_size = model.meta.n_ctx_train;
-        }
-        console.log('Discovered capabilities:', capabilities);
-        return capabilities;
-    }
-    catch (error) {
-        console.error('Failed to discover capabilities:', error);
-        throw error;
-    }
-}

package/dist/debug.js DELETED Viewed

@@ -1,54 +0,0 @@
-/**
- * Debug logging utilities
- *
- * Controlled by --debug flag with optional level (1 or 2)
- * Level 1: WebSocket messages between CLI and server
- * Level 2: Level 1 + llama.cpp messages
- */
-let debugLevel = 0;
-export function setDebugLevel(level) {
-    debugLevel = level;
-}
-export function getDebugLevel() {
-    return debugLevel;
-}
-/**
- * Log message from WebSocket server to CLI
- */
-export function logServerToClient(message) {
-    if (debugLevel < 1)
-        return;
-    console.log('cli <<< server');
-    console.log(JSON.stringify(message, null, 2));
-    console.log('');
-}
-/**
- * Log message from CLI to WebSocket server
- */
-export function logClientToServer(message) {
-    if (debugLevel < 1)
-        return;
-    console.log('cli >>> server');
-    console.log(JSON.stringify(message, null, 2));
-    console.log('');
-}
-/**
- * Log request from CLI to llama.cpp
- */
-export function logClientToLlama(request) {
-    if (debugLevel < 2)
-        return;
-    console.log('llama.cpp << cli');
-    console.log(JSON.stringify(request, null, 2));
-    console.log('');
-}
-/**
- * Log response chunk from llama.cpp to CLI
- */
-export function logLlamaToClient(chunk) {
-    if (debugLevel < 2)
-        return;
-    console.log('llama.cpp >> cli');
-    console.log(chunk);
-    console.log('');
-}

package/dist/gpu-detection.js DELETED Viewed

@@ -1,171 +0,0 @@
-/**
- * GPU Hardware Detection
- *
- * Detects available GPU hardware to determine which llama.cpp binary variant to use.
- *
- * Detection order:
- * - Windows: NVIDIA CUDA → CPU (with warning)
- * - Linux: NVIDIA CUDA → Vulkan → CPU (with warning)
- * - macOS: Metal (built-in) → CPU (with warning, rare)
- */
-import { exec } from 'child_process';
-import { promisify } from 'util';
-import { platform } from 'os';
-const execAsync = promisify(exec);
-/**
- * Detect GPU hardware and return appropriate backend
- */
-export async function detectGpuBackend() {
-    const os = platform();
-    if (os === 'darwin') {
-        return await detectMacOSGpu();
-    }
-    else if (os === 'linux') {
-        return await detectLinuxGpu();
-    }
-    else if (os === 'win32') {
-        return await detectWindowsGpu();
-    }
-    // Unknown platform - default to CPU with warning
-    return {
-        backend: 'cpu',
-        detected: false,
-        warning: `Unknown platform: ${os}. Defaulting to CPU variant.`,
-    };
-}
-/**
- * Detect GPU on macOS
- *
- * Apple Silicon (M1/M2/M3) always has Metal GPU.
- * Intel Macs may or may not have Metal (depends on GPU model).
- */
-async function detectMacOSGpu() {
-    try {
-        // Check if Metal is available (should be on all modern macOS)
-        // Metal is built into macOS, so we assume it's available
-        // The standard binaries include Metal support
-        return {
-            backend: 'metal',
-            detected: true,
-        };
-    }
-    catch (error) {
-        // Fallback to CPU
-        return {
-            backend: 'cpu',
-            detected: false,
-            warning: 'Metal GPU not detected. Using CPU variant (performance will be limited).',
-        };
-    }
-}
-/**
- * Detect GPU on Linux
- *
- * Detection order:
- * 1. NVIDIA CUDA (nvidia-smi)
- * 2. Vulkan (vulkaninfo or lspci)
- * 3. CPU (fallback with warning)
- */
-async function detectLinuxGpu() {
-    // Try NVIDIA CUDA first
-    try {
-        await execAsync('nvidia-smi --query-gpu=name --format=csv,noheader', {
-            timeout: 5000,
-        });
-        // NVIDIA GPU detected
-        // Note: llama.cpp releases don't include CUDA binaries for Linux
-        // Users need to build from source for CUDA on Linux
-        // For now, use Vulkan (works with NVIDIA GPUs too)
-        return {
-            backend: 'vulkan',
-            detected: true,
-        };
-    }
-    catch (error) {
-        // nvidia-smi not found or failed - continue to Vulkan check
-    }
-    // Try Vulkan (works with AMD, NVIDIA, Intel GPUs)
-    try {
-        await execAsync('vulkaninfo --summary 2>/dev/null', {
-            timeout: 5000,
-        });
-        // Vulkan GPU detected
-        return {
-            backend: 'vulkan',
-            detected: true,
-        };
-    }
-    catch (error) {
-        // vulkaninfo not found - try lspci as fallback
-        try {
-            const { stdout } = await execAsync('lspci | grep -i vga', {
-                timeout: 5000,
-            });
-            if (stdout && stdout.trim()) {
-                // Some GPU detected (AMD/NVIDIA/Intel) - try Vulkan anyway
-                // Vulkan binaries should work even if vulkaninfo isn't installed
-                return {
-                    backend: 'vulkan',
-                    detected: true,
-                };
-            }
-        }
-        catch (error) {
-            // lspci failed or no GPU found
-        }
-    }
-    // No GPU detected - use CPU with warning
-    return {
-        backend: 'cpu',
-        detected: false,
-        warning: 'No GPU detected. Using CPU variant (performance will be severely limited). Consider using models <= 7B.',
-    };
-}
-/**
- * Detect GPU on Windows
- *
- * Detection order:
- * 1. NVIDIA CUDA (nvidia-smi)
- * 2. CPU (fallback with warning)
- */
-async function detectWindowsGpu() {
-    // Try NVIDIA CUDA
-    try {
-        await execAsync('nvidia-smi --query-gpu=name --format=csv,noheader', {
-            timeout: 5000,
-            shell: 'cmd.exe',
-        });
-        // NVIDIA GPU detected
-        return {
-            backend: 'cuda',
-            detected: true,
-        };
-    }
-    catch (error) {
-        // nvidia-smi not found or failed - no NVIDIA GPU
-    }
-    // Try to check for other GPUs via WMI (AMD, Intel)
-    try {
-        const { stdout } = await execAsync('wmic path win32_VideoController get name', {
-            timeout: 5000,
-            shell: 'cmd.exe',
-        });
-        if (stdout && stdout.includes('AMD') || stdout.includes('Radeon') || stdout.includes('NVIDIA')) {
-            // Some GPU detected but not NVIDIA - use CPU (Windows releases don't have Vulkan binaries easily available)
-            return {
-                backend: 'cpu',
-                detected: false,
-                warning: 'Non-NVIDIA GPU detected. Using CPU variant (CUDA binaries require NVIDIA GPUs). Consider using models <= 7B.',
-            };
-        }
-    }
-    catch (error) {
-        // WMI failed
-    }
-    // No GPU detected - use CPU with warning
-    return {
-        backend: 'cpu',
-        detected: false,
-        warning: 'No GPU detected. Using CPU variant (performance will be severely limited). Consider using models <= 7B.',
-    };
-}

package/dist/gpu-id.js DELETED Viewed

@@ -1,48 +0,0 @@
-/**
- * GPU ID Management
- *
- * Generates and persists a stable UUID for this GPU worker.
- * Stored in ~/.auxot/gpu-id (or %USERPROFILE%\.auxot\gpu-id on Windows)
- */
-import { randomUUID } from 'crypto';
-import { readFile, writeFile, mkdir } from 'fs/promises';
-import { homedir } from 'os';
-import { join } from 'path';
-const AUXOT_DIR = join(homedir(), '.auxot');
-const GPU_ID_FILE = join(AUXOT_DIR, 'gpu-id');
-/**
- * Get or create a stable GPU ID
- *
- * Returns the same UUID across restarts of the worker CLI.
- */
-export async function getOrCreateGpuId() {
-    try {
-        // Try to read existing GPU ID
-        const existingId = await readFile(GPU_ID_FILE, 'utf-8');
-        const trimmed = existingId.trim();
-        // Validate it's a UUID
-        const uuidRegex = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
-        if (uuidRegex.test(trimmed)) {
-            return trimmed;
-        }
-        console.warn('Invalid GPU ID found, generating new one');
-    }
-    catch (error) {
-        // File doesn't exist or can't be read - generate new ID
-    }
-    // Generate new UUID
-    const newId = randomUUID();
-    try {
-        // Ensure directory exists
-        await mkdir(AUXOT_DIR, { recursive: true });
-        // Write GPU ID
-        await writeFile(GPU_ID_FILE, newId, 'utf-8');
-        console.log(`Generated new GPU ID: ${newId}`);
-        console.log(`Stored in: ${GPU_ID_FILE}`);
-    }
-    catch (error) {
-        console.error('Failed to save GPU ID:', error);
-        throw error;
-    }
-    return newId;
-}