npm - @auxot/worker-cli - Versions diffs - 0.1.5 → 0.3.3 - Mend

@auxot/worker-cli 0.1.5 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/index.js CHANGED Viewed

@@ -5,37 +5,6 @@ var __export = (target, all) => {
     __defProp(target, name, { get: all[name], enumerable: true });
 };
-// src/gpu-id.ts
-import { randomUUID } from "crypto";
-import { readFile, writeFile, mkdir } from "fs/promises";
-import { homedir } from "os";
-import { join } from "path";
-var AUXOT_DIR = join(homedir(), ".auxot");
-var GPU_ID_FILE = join(AUXOT_DIR, "gpu-id");
-async function getOrCreateGpuId() {
-  try {
-    const existingId = await readFile(GPU_ID_FILE, "utf-8");
-    const trimmed = existingId.trim();
-    const uuidRegex2 = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
-    if (uuidRegex2.test(trimmed)) {
-      return trimmed;
-    }
-    console.warn("Invalid GPU ID found, generating new one");
-  } catch (error) {
-  }
-  const newId = randomUUID();
-  try {
-    await mkdir(AUXOT_DIR, { recursive: true });
-    await writeFile(GPU_ID_FILE, newId, "utf-8");
-    console.log(`Generated new GPU ID: ${newId}`);
-    console.log(`Stored in: ${GPU_ID_FILE}`);
-  } catch (error) {
-    console.error("Failed to save GPU ID:", error);
-    throw error;
-  }
-  return newId;
-}
 // src/capabilities.ts
 function normalizeModelName(filePath) {
   const filename = filePath.split("/").pop() || filePath;
@@ -87,12 +56,10 @@ async function discoverCapabilities(llamaUrl) {
           capabilities.vram_gb = Math.round(props.total_vram_mb / 1024);
         }
         if (capabilities.ctx_size !== 4096) {
-          console.log("Discovered capabilities:", capabilities);
           return capabilities;
         }
       }
     } catch (propsError) {
-      console.warn("/props endpoint not available, trying /health");
     }
     try {
       const healthResponse = await fetch(`${llamaUrl}/health`);
@@ -100,17 +67,13 @@ async function discoverCapabilities(llamaUrl) {
         const health = await healthResponse.json();
         if (health.n_ctx) {
           capabilities.ctx_size = health.n_ctx;
-          console.log(`Runtime context size from /health: ${capabilities.ctx_size}`);
         }
       }
     } catch {
-      console.warn("/health endpoint not available");
     }
     if (capabilities.ctx_size === 4096 && model.meta?.n_ctx_train) {
-      console.warn("Could not determine runtime context size, using n_ctx_train as fallback");
       capabilities.ctx_size = model.meta.n_ctx_train;
     }
-    console.log("Discovered capabilities:", capabilities);
     return capabilities;
   } catch (error) {
     console.error("Failed to discover capabilities:", error);
@@ -282,7 +245,8 @@ function validatePolicy(discoveredCapabilities, policy) {
   const warnings = [];
   const discoveredNormalized = normalizeModelName2(discoveredCapabilities.model || "");
   const policyNormalized = normalizeModelName2(policy.model_name);
-  if (discoveredNormalized !== policyNormalized) {
+  const isModelMatch = discoveredNormalized === policyNormalized || discoveredNormalized.startsWith(policyNormalized) || policyNormalized.startsWith(discoveredNormalized);
+  if (!isModelMatch) {
     errors.push(
       `Model name mismatch: discovered "${discoveredCapabilities.model}" (normalized: "${discoveredNormalized}") does not match policy "${policy.model_name}" (normalized: "${policyNormalized}")`
     );
@@ -348,6 +312,8 @@ var WebSocketConnection = class {
   heartbeatTimer = null;
   reconnectTimer = null;
   gpuKey;
+  gpuId = null;
+  // Server-assigned GPU ID
   capabilities;
   onJobCallback = null;
   onCancelCallback = null;
@@ -359,10 +325,19 @@ var WebSocketConnection = class {
   shouldReconnect = true;
   isReconnecting = false;
   policy = null;
+  silentDisconnect = false;
+  // Suppress disconnect messages
   constructor(gpuKey, capabilities) {
     this.gpuKey = gpuKey;
     this.capabilities = capabilities;
   }
+  /**
+   * Set silent mode (suppress disconnect messages)
+   * Used during download phase to avoid jarring messages
+   */
+  setSilentMode(silent) {
+    this.silentDisconnect = silent;
+  }
   /**
    * Connect to WebSocket server and send hello message
    */
@@ -380,7 +355,7 @@ var WebSocketConnection = class {
         reject(new Error("No WebSocket URL configured"));
         return;
       }
-      if (!this.isReconnecting) {
+      if (!this.isReconnecting && !this.silentDisconnect) {
         console.log(`Connecting to ${this.wsUrl}...`);
       }
       try {
@@ -414,6 +389,9 @@ var WebSocketConnection = class {
           if (message.type === "hello_ack") {
             clearTimeout(connectionTimeout);
             if (message.success) {
+              if (message.gpu_id) {
+                this.gpuId = message.gpu_id;
+              }
               if (!message.policy) {
                 const errorMsg = "Server did not send policy in hello_ack";
                 console.error(`\u2717 ${errorMsg}`);
@@ -427,16 +405,14 @@ var WebSocketConnection = class {
                 try {
                   await this.onPolicyCallback(message.policy);
                 } catch (error) {
-                  console.error("[Policy Callback] Error:", error);
+                  if (!this.silentDisconnect) {
+                    console.error("[Policy Callback] Error:", error);
+                  }
                   this.shouldReconnect = false;
                   this.ws?.close();
                   reject(error);
                   return;
                 }
-                console.log("\u2713 Successfully authenticated with server");
-                console.log(`  Policy: ${message.policy.model_name} (${message.policy.quantization})`);
-                console.log("  Spawning llama.cpp process...");
-                console.log("  (Capabilities validation will happen via config message)");
               } else {
                 const validation = await validatePolicy(this.capabilities, message.policy);
                 if (validation.warnings && validation.warnings.length > 0) {
@@ -545,12 +521,12 @@ var WebSocketConnection = class {
         this.isConnected = false;
         this.stopHeartbeat();
         if (this.shouldReconnect) {
-          if (!this.isReconnecting) {
+          if (!this.isReconnecting && !this.silentDisconnect) {
             console.log("WebSocket disconnected, will continue to retry...");
             this.isReconnecting = true;
           }
           this.scheduleReconnect();
-        } else {
+        } else if (!this.silentDisconnect) {
           console.log("WebSocket disconnected");
         }
       });
@@ -694,6 +670,12 @@ var WebSocketConnection = class {
   getPolicy() {
     return this.policy;
   }
+  /**
+   * Get GPU ID assigned by server
+   */
+  getGpuId() {
+    return this.gpuId;
+  }
   /**
    * Get current capabilities
    */
@@ -771,6 +753,12 @@ async function processJob(job, llamaUrl, capabilities, abortSignal, onToken) {
   const reader = response.body.getReader();
   const decoder = new TextDecoder();
   const toolCallsMap = /* @__PURE__ */ new Map();
+  let hasReceivedFirstToken = false;
+  const keepaliveInterval = setInterval(() => {
+    if (!hasReceivedFirstToken) {
+      onToken("");
+    }
+  }, 1e4);
   const parser = createParser((event) => {
     if (event.type === "reconnect-interval")
       return;
@@ -783,6 +771,7 @@ async function processJob(job, llamaUrl, capabilities, abortSignal, onToken) {
       }
       const content = chunk.choices[0]?.delta?.content;
       if (content) {
+        hasReceivedFirstToken = true;
         fullResponse += content;
         onToken(content);
       }
@@ -834,6 +823,7 @@ async function processJob(job, llamaUrl, capabilities, abortSignal, onToken) {
       throw error;
     }
   } finally {
+    clearInterval(keepaliveInterval);
     reader.releaseLock();
   }
   let durationMs;
@@ -879,9 +869,9 @@ import { spawn } from "child_process";
 // src/llama-binary.ts
 import { existsSync, chmodSync, statSync } from "node:fs";
-import { mkdir as mkdir2, unlink } from "node:fs/promises";
-import { join as join2 } from "node:path";
-import { homedir as homedir2 } from "os";
+import { mkdir, unlink } from "node:fs/promises";
+import { join } from "node:path";
+import { homedir } from "os";
 import { platform as platform2, arch } from "os";
 import { createWriteStream } from "node:fs";
 import { exec as exec2 } from "child_process";
@@ -1038,18 +1028,18 @@ async function getArchiveName() {
 function getCacheDir() {
   const os = platform2();
   const architecture = arch();
-  const cacheDir = process.env.AUXOT_LLAMA_CACHE_DIR || join2(homedir2(), ".auxot", "llama-server");
-  return join2(cacheDir, `${os}-${architecture}`);
+  const cacheDir = process.env.AUXOT_LLAMA_CACHE_DIR || join(homedir(), ".auxot", "llama-server");
+  return join(cacheDir, `${os}-${architecture}`);
 }
 function getBinaryPath() {
   const cacheDir = getCacheDir();
-  return join2(cacheDir, `llama-${LLAMA_CPP_VERSION}`, "llama-server");
+  return join(cacheDir, `llama-${LLAMA_CPP_VERSION}`, "llama-server");
 }
 async function downloadLlamaBinary(onProgress) {
   const { archiveName, warning } = await getArchiveName();
   const binaryPath = getBinaryPath();
   const cacheDir = getCacheDir();
-  const archivePath = join2(cacheDir, archiveName);
+  const archivePath = join(cacheDir, archiveName);
   if (warning) {
     console.warn(`  \u26A0 ${warning}`);
   }
@@ -1061,7 +1051,7 @@ async function downloadLlamaBinary(onProgress) {
     }
   }
   if (!existsSync(cacheDir)) {
-    await mkdir2(cacheDir, { recursive: true });
+    await mkdir(cacheDir, { recursive: true });
   }
   const downloadUrl = `https://github.com/${LLAMA_CPP_REPO}/releases/download/${LLAMA_CPP_VERSION}/${archiveName}`;
   console.log(`  Downloading llama.cpp binary...`);
@@ -1235,37 +1225,27 @@ async function spawnLlamaCpp(options) {
   let stdoutBuffer = "";
   childProcess.stdout?.on("data", (data) => {
     stdoutBuffer += data.toString();
-    const lines = stdoutBuffer.split("\n");
-    stdoutBuffer = lines.pop() || "";
-    for (const line of lines) {
-      const trimmed = line.trim();
-      if (trimmed) {
-        console.log(`[llama.cpp stdout] ${trimmed}`);
-      }
+    if (stdoutBuffer.length > 1e4) {
+      stdoutBuffer = stdoutBuffer.slice(-5e3);
     }
   });
   let stderrBuffer = "";
   childProcess.stderr?.on("data", (data) => {
-    stderrBuffer += data.toString();
-    const lines = stderrBuffer.split("\n");
-    stderrBuffer = lines.pop() || "";
+    const chunk = data.toString();
+    stderrBuffer += chunk;
+    const lines = chunk.split("\n");
     for (const line of lines) {
-      const trimmed = line.trim();
-      if (trimmed) {
-        console.error(`[llama.cpp stderr] ${trimmed}`);
+      const lower = line.toLowerCase();
+      if (lower.includes("error") || lower.includes("fatal") || lower.includes("crash") || lower.includes("failed")) {
+        console.error(`[llama.cpp] ${line.trim()}`);
       }
     }
+    if (stderrBuffer.length > 1e4) {
+      stderrBuffer = stderrBuffer.slice(-5e3);
+    }
   });
   childProcess.on("exit", (code, signal) => {
     isRunning = false;
-    if (stdoutBuffer.trim()) {
-      console.log(`[llama.cpp stdout] ${stdoutBuffer.trim()}`);
-      stdoutBuffer = "";
-    }
-    if (stderrBuffer.trim()) {
-      console.error(`[llama.cpp stderr] ${stderrBuffer.trim()}`);
-      stderrBuffer = "";
-    }
     if (code !== null) {
       console.log(`[llama.cpp] Process exited with code ${code}`);
       if (code !== 0) {
@@ -1344,15 +1324,23 @@ async function spawnLlamaCpp(options) {
     }
   };
 }
-async function waitForLlamaReady(url, timeoutMs = 3e4) {
+async function waitForLlamaReady(url, timeoutMs = 6e4) {
   const startTime = Date.now();
-  const checkInterval = 500;
+  const checkInterval = 1e3;
   while (Date.now() - startTime < timeoutMs) {
     try {
-      const response = await fetch(`${url}/v1/models`);
+      const response = await fetch(`${url}/v1/models`, {
+        signal: AbortSignal.timeout(5e3)
+        // 5 second timeout per request
+      });
       if (response.ok) {
-        console.log("[llama.cpp] Server is ready");
-        return;
+        const contentType = response.headers.get("content-type");
+        if (contentType && contentType.includes("application/json")) {
+          const data = await response.json();
+          if (data && (data.data || data.object)) {
+            return;
+          }
+        }
       }
     } catch (error) {
     }
@@ -5413,10 +5401,11 @@ var ModelRegistryEntrySchema = external_exports.object({
   family: ModelFamilySchema,
   parameters: external_exports.string(),
   default_context_size: external_exports.number().int().positive(),
+  max_context_size: external_exports.number().int().positive(),
   vram_requirements_gb: external_exports.number().positive(),
   capabilities: external_exports.array(ModelCapabilitySchema).min(1),
   file_name: external_exports.string(),
-  file_size_bytes: external_exports.number().int().positive().optional()
+  file_size_bytes: external_exports.number().int().positive().nullable().optional()
 });
 var ModelRegistrySchema = external_exports.object({
   version: external_exports.string(),
@@ -5429,7 +5418,7 @@ function validateModelRegistry(data) {
 // ../../packages/model-registry/dist/src/loader.js
 import { readFileSync, statSync as statSync2 } from "node:fs";
-import { join as join3, dirname as dirname2 } from "node:path";
+import { join as join2, dirname as dirname2 } from "node:path";
 import { fileURLToPath } from "node:url";
 var __filename = fileURLToPath(import.meta.url);
 var __dirname = dirname2(__filename);
@@ -5438,11 +5427,11 @@ var cachedRegistryPath = null;
 var cachedRegistryMtime = null;
 function loadRegistry() {
   const registryPaths = [
-    join3(__dirname, "..", "..", "registry.json"),
+    join2(__dirname, "..", "..", "registry.json"),
     // From dist/src/ -> package root
-    join3(__dirname, "..", "registry.json"),
+    join2(__dirname, "..", "registry.json"),
     // From dist/ -> package root (if running from dist/)
-    join3(__dirname, "registry.json")
+    join2(__dirname, "registry.json")
     // Same directory (if copied there)
   ];
   let registryPath = null;
@@ -5509,17 +5498,17 @@ function getModels(registry, filters) {
 }
 // src/model-resolver.ts
-import { join as join5 } from "path";
-import { homedir as homedir3 } from "os";
+import { join as join4 } from "path";
+import { homedir as homedir2 } from "os";
 // src/model-downloader.ts
 import { createWriteStream as createWriteStream2, existsSync as existsSync2, statSync as statSync3 } from "node:fs";
-import { mkdir as mkdir3 } from "node:fs/promises";
+import { mkdir as mkdir2 } from "node:fs/promises";
 import { dirname as dirname3 } from "node:path";
 async function downloadModel(entry, outputPath, onProgress) {
   const outputDir = dirname3(outputPath);
   if (!existsSync2(outputDir)) {
-    await mkdir3(outputDir, { recursive: true });
+    await mkdir2(outputDir, { recursive: true });
   }
   if (existsSync2(outputPath)) {
     const stats = statSync3(outputPath);
@@ -5527,9 +5516,11 @@ async function downloadModel(entry, outputPath, onProgress) {
       console.log(`  \u2713 Model already downloaded (${formatBytes2(stats.size)})`);
       return outputPath;
     }
-    if (entry.file_size_bytes && stats.size !== entry.file_size_bytes) {
-      console.log(`  \u2298 Existing file size mismatch (${formatBytes2(stats.size)} vs ${formatBytes2(entry.file_size_bytes)})`);
-      console.log(`  \u2298 Re-downloading...`);
+    if (entry.file_size_bytes && stats.size < entry.file_size_bytes) {
+      console.log(`  \u2299 Partial download found (${formatBytes2(stats.size)} / ${formatBytes2(entry.file_size_bytes)})`);
+      console.log(`  \u2299 Resuming download...`);
+    } else if (entry.file_size_bytes && stats.size > entry.file_size_bytes) {
+      console.log(`  \u2298 File is larger than expected, restarting download...`);
       const { unlink: unlink2 } = await import("node:fs/promises");
       await unlink2(outputPath);
     }
@@ -5546,10 +5537,6 @@ async function downloadModel(entry, outputPath, onProgress) {
   if (existsSync2(outputPath)) {
     const stats = statSync3(outputPath);
     startByte = stats.size;
-    if (startByte > 0 && startByte < totalBytes) {
-      console.log(`  Resuming from ${formatBytes2(startByte)}...`);
-      downloadedBytes = startByte;
-    }
   }
   const response = await fetch(downloadUrl, {
     headers: startByte > 0 ? {
@@ -5567,11 +5554,29 @@ async function downloadModel(entry, outputPath, onProgress) {
   }
   const contentLength = response.headers.get("content-length");
   const totalSize = contentLength ? parseInt(contentLength, 10) + startByte : totalBytes;
+  if (startByte > 0) {
+    if (startByte < totalSize) {
+      console.log(`  \u2299 Resuming from ${formatBytes2(startByte)}...`);
+      downloadedBytes = startByte;
+    } else if (startByte === totalSize) {
+      console.log(`  \u2713 Model already downloaded (${formatBytes2(startByte)})`);
+      return outputPath;
+    } else {
+      console.log(`  \u2298 File is larger than expected (${formatBytes2(startByte)} > ${formatBytes2(totalSize)}), restarting...`);
+      const { unlink: unlink2 } = await import("node:fs/promises");
+      await unlink2(outputPath);
+      startByte = 0;
+      downloadedBytes = 0;
+    }
+  }
   const fileStream = createWriteStream2(outputPath, { flags: startByte > 0 ? "a" : "w" });
   const reader = response.body?.getReader();
   if (!reader) {
     throw new Error("Response body is not readable");
   }
+  let lastProgressUpdate = Date.now();
+  const startTime = Date.now();
+  const bytesAtStart = downloadedBytes;
   try {
     while (true) {
       const { done, value } = await reader.read();
@@ -5580,13 +5585,21 @@ async function downloadModel(entry, outputPath, onProgress) {
       }
       fileStream.write(value);
       downloadedBytes += value.length;
-      if (onProgress) {
+      const now = Date.now();
+      if (onProgress && now - lastProgressUpdate > 1e3) {
         onProgress(downloadedBytes, totalSize);
-      } else if (totalSize > 0) {
-        if (downloadedBytes % (10 * 1024 * 1024) < value.length) {
-          const percent = (downloadedBytes / totalSize * 100).toFixed(1);
-          process.stdout.write(`\r  Progress: ${percent}% (${formatBytes2(downloadedBytes)} / ${formatBytes2(totalSize)})`);
-        }
+        lastProgressUpdate = now;
+      } else if (totalSize > 0 && now - lastProgressUpdate > 1e3) {
+        const elapsedSeconds = (now - startTime) / 1e3;
+        const bytesDownloadedThisSession = downloadedBytes - bytesAtStart;
+        const bytesPerSecond = bytesDownloadedThisSession / elapsedSeconds;
+        const remainingBytes = totalSize - downloadedBytes;
+        const etaSeconds = remainingBytes / bytesPerSecond;
+        const percent = (downloadedBytes / totalSize * 100).toFixed(1);
+        const speed = formatBytes2(bytesPerSecond);
+        const eta = formatTime(etaSeconds);
+        process.stdout.write(`\r  ${formatBytes2(downloadedBytes)} / ${formatBytes2(totalSize)}  (${percent}%)  ${speed}/s  ETA ~${eta}`);
+        lastProgressUpdate = now;
       }
     }
     fileStream.end();
@@ -5597,12 +5610,7 @@ async function downloadModel(entry, outputPath, onProgress) {
     if (totalSize > 0 && downloadedBytes !== totalSize) {
       throw new Error(`Download incomplete: ${downloadedBytes} bytes downloaded, expected ${totalSize}`);
     }
-    if (onProgress) {
-      process.stdout.write("\r");
-    } else {
-      process.stdout.write("\r");
-    }
-    console.log(`  \u2713 Download complete (${formatBytes2(downloadedBytes)})`);
+    process.stdout.write("\r" + " ".repeat(80) + "\r");
     return outputPath;
   } catch (error) {
     fileStream.destroy();
@@ -5617,6 +5625,19 @@ function formatBytes2(bytes) {
   const i = Math.floor(Math.log(bytes) / Math.log(k));
   return `${(bytes / Math.pow(k, i)).toFixed(1)} ${sizes[i]}`;
 }
+function formatTime(seconds) {
+  if (seconds < 60) {
+    return `${Math.round(seconds)}s`;
+  }
+  const minutes = Math.floor(seconds / 60);
+  const remainingSeconds = Math.round(seconds % 60);
+  if (minutes < 60) {
+    return `${minutes}m ${remainingSeconds}s`;
+  }
+  const hours = Math.floor(minutes / 60);
+  const remainingMinutes = minutes % 60;
+  return `${hours}h ${remainingMinutes}m`;
+}
 // src/model-resolver.ts
 import { existsSync as existsSync3 } from "node:fs";
@@ -5636,24 +5657,83 @@ async function ensureModelDownloaded(policy, onProgress) {
     console.error(`  \u2717 Model not found in registry: ${policy.model_name} (${policy.quantization})`);
     return null;
   }
-  const modelsDir = process.env.AUXOT_MODELS_DIR || join5(homedir3(), ".auxot", "models");
-  const modelDir = join5(modelsDir, model.huggingface_id.replace("/", "_"));
-  const modelPath = join5(modelDir, model.file_name);
-  if (existsSync3(modelPath)) {
-    const { statSync: statSync4 } = await import("node:fs");
-    const stats = statSync4(modelPath);
-    if (model.file_size_bytes && stats.size === model.file_size_bytes) {
+  const modelsDir = process.env.AUXOT_MODELS_DIR || join4(homedir2(), ".auxot", "models");
+  const modelDir = join4(modelsDir, model.huggingface_id.replace("/", "_"));
+  const modelPath = join4(modelDir, model.file_name);
+  const shardMatch = model.file_name.match(/-(\d+)-of-(\d+)\.gguf$/);
+  if (shardMatch) {
+    const totalShards = parseInt(shardMatch[2], 10);
+    const fileBaseName = model.file_name.replace(/-\d+-of-\d+\.gguf$/, "");
+    const fileExtension = ".gguf";
+    console.log(`  Model has ${totalShards} shards, downloading all...`);
+    for (let shardNum = 1; shardNum <= totalShards; shardNum++) {
+      const paddedNum = String(shardNum).padStart(5, "0");
+      const shardFileName = `${fileBaseName}-${paddedNum}-of-${String(totalShards).padStart(5, "0")}${fileExtension}`;
+      const shardPath = join4(modelDir, shardFileName);
+      if (existsSync3(shardPath)) {
+        const { statSync: statSync4 } = await import("node:fs");
+        const stats = statSync4(shardPath);
+        console.log(`  \u2713 Shard ${shardNum}/${totalShards} already downloaded`);
+        continue;
+      }
+      const shardEntry = {
+        ...model,
+        file_name: shardFileName,
+        file_size_bytes: null
+        // Don't know the size, will get from Content-Length
+      };
+      console.log(`  Downloading shard ${shardNum}/${totalShards}...`);
+      try {
+        await downloadModel(shardEntry, shardPath, onProgress);
+        console.log(`  \u2713 Shard ${shardNum}/${totalShards} complete`);
+      } catch (error) {
+        console.error(`  \u2717 Shard ${shardNum}/${totalShards} failed:`, error);
+        throw error;
+      }
+    }
+    return modelPath;
+  } else {
+    if (existsSync3(modelPath)) {
+      const { statSync: statSync4 } = await import("node:fs");
+      const stats = statSync4(modelPath);
+      if (model.file_size_bytes && stats.size === model.file_size_bytes) {
+        return modelPath;
+      }
+    }
+    try {
+      await downloadModel(model, modelPath, onProgress);
       return modelPath;
+    } catch (error) {
+      console.error(`  \u2717 Download failed:`, error);
+      throw error;
     }
   }
-  console.log(`  Downloading model: ${model.model_name} (${model.quantization})`);
-  try {
-    await downloadModel(model, modelPath, onProgress);
-    return modelPath;
-  } catch (error) {
-    console.error(`  \u2717 Download failed:`, error);
-    throw error;
+}
+// src/port-finder.ts
+import { createServer } from "net";
+async function findAvailablePort(minPort = 1e4, maxPort = 65535, maxAttempts = 100) {
+  for (let i = 0; i < maxAttempts; i++) {
+    const port = Math.floor(Math.random() * (maxPort - minPort + 1)) + minPort;
+    const isAvailable = await isPortAvailable(port);
+    if (isAvailable) {
+      return port;
+    }
   }
+  throw new Error(`No available ports found after ${maxAttempts} attempts in range ${minPort}-${maxPort}`);
+}
+function isPortAvailable(port) {
+  return new Promise((resolve) => {
+    const server = createServer();
+    server.once("error", () => {
+      resolve(false);
+    });
+    server.once("listening", () => {
+      server.close();
+      resolve(true);
+    });
+    server.listen(port, "127.0.0.1");
+  });
 }
 // src/index.ts
@@ -5717,28 +5797,79 @@ if (!config.gpuKey.startsWith("gpu.")) {
 }
 async function main() {
   setDebugLevel(config.debugLevel);
-  console.log("Auxot GPU Worker CLI");
-  console.log("====================");
-  if (config.debugLevel > 0) {
-    console.log(`Debug Level: ${config.debugLevel}`);
-  }
-  console.log();
+  console.log("Auxot Worker");
+  console.log("============\n");
   try {
-    console.log("[1/4] Loading GPU ID...");
-    const gpuId = await getOrCreateGpuId();
-    console.log(`GPU ID: ${gpuId}`);
-    console.log();
-    console.log("[2/4] Connecting to Auxot platform...");
     const baseUrl = config.auxotUrl.replace(/^http/, "ws").replace(/^https/, "wss");
     const wsUrl = `${baseUrl}/api/gpu/client`;
-    const placeholderCapabilities = {
-      model: "pending",
-      ctx_size: 0,
-      backend: "cpu"
-      // Placeholder - will be updated after discovery
-    };
-    const wsConnection = new WebSocketConnection(config.gpuKey, placeholderCapabilities);
-    let llamaProcess = null;
+    console.log("\u25B6 Control Plane");
+    console.log(`  \u2713 Connected         ${wsUrl}`);
+    const policy = await fetchPolicy();
+    console.log(`  \u2713 Authenticated`);
+    console.log();
+    console.log("\u25B6 Downloading Model");
+    console.log(`  ${policy.model_name} (${policy.quantization})`);
+    const modelPath = await ensureModelDownloaded(policy);
+    if (!modelPath) {
+      throw new Error(`Model not found in registry: ${policy.model_name} (${policy.quantization})`);
+    }
+    console.log(`  \u2713 Model ready`);
+    console.log();
+    console.log("\u25B6 Control Plane");
+    const binaryPath = await ensureLlamaBinary();
+    const gpuLayers = 9999;
+    const llamaPort = await findAvailablePort();
+    const llamaUrl = `http://127.0.0.1:${llamaPort}`;
+    const llamaProcess = await spawnLlamaCpp({
+      binaryPath,
+      modelPath,
+      contextSize: policy.context_size,
+      parallelism: policy.max_parallelism,
+      port: llamaPort,
+      host: "127.0.0.1",
+      gpuLayers
+    });
+    await waitForLlamaReady(llamaUrl);
+    await new Promise((resolve) => setTimeout(resolve, 2e3));
+    try {
+      const warmupResponse = await fetch(`${llamaUrl}/v1/chat/completions`, {
+        method: "POST",
+        headers: { "Content-Type": "application/json" },
+        body: JSON.stringify({
+          model: "placeholder",
+          messages: [{ role: "user", content: "Hi" }],
+          max_tokens: 1,
+          stream: false
+        })
+      });
+      if (warmupResponse.ok)
+        await warmupResponse.json();
+    } catch (error) {
+    }
+    let capabilities = null;
+    for (let attempt = 1; attempt <= 3; attempt++) {
+      try {
+        capabilities = await discoverCapabilities(llamaUrl);
+        break;
+      } catch (error) {
+        if (attempt === 3) {
+          throw error;
+        }
+        await new Promise((resolve) => setTimeout(resolve, 2e3));
+      }
+    }
+    if (!capabilities) {
+      throw new Error("Failed to discover capabilities after 3 attempts");
+    }
+    console.log(`  \u2713 llama.cpp         Running locally`);
+    console.log();
+    console.log("\u25B6 Runtime");
+    console.log(`  \u2022 Model             ${policy.model_name} (${policy.quantization})`);
+    console.log(`  \u2022 Context           ${policy.context_size.toLocaleString()}`);
+    console.log(`  \u2022 Parallelism       ${policy.max_parallelism}`);
+    console.log(`  \u2022 Backend           llama.cpp (${capabilities.backend})`);
+    console.log();
+    const wsConnection = new WebSocketConnection(config.gpuKey, capabilities);
     const activeJobs = /* @__PURE__ */ new Map();
     wsConnection.onJob(async (job) => {
       const abortController = new AbortController();
@@ -5747,12 +5878,9 @@ async function main() {
         const currentCapabilities = wsConnection.getCapabilities();
         const result = await processJob(
           job,
-          "http://127.0.0.1:9002",
-          // Always use local llama.cpp (spawned by worker-cli)
+          llamaUrl,
           currentCapabilities,
-          // Pass capabilities for max_tokens_default
           abortController.signal,
-          // Pass abort signal
           (token) => {
             wsConnection.sendToken(job.job_id, token);
           }
@@ -5766,11 +5894,7 @@ async function main() {
           result.outputTokens,
           result.tool_calls
         );
-        if (wasCancelled) {
-          console.log(`\u2713 Job ${job.job_id} cancelled - sent partial response`);
-          wsConnection.sendError(job.job_id, "Job cancelled by user");
-        } else {
-          console.log(`\u2713 Job ${job.job_id} completed successfully`);
+        if (!wasCancelled) {
         }
       } catch (error) {
         console.error(`\u2717 Job ${job.job_id} failed:`, error);
@@ -5782,151 +5906,50 @@ async function main() {
         activeJobs.delete(job.job_id);
       }
     });
-    wsConnection.onPolicy(async (policy) => {
-      console.log("[3/4] Setting up llama.cpp...");
-      console.log(`  Policy: ${policy.model_name} (${policy.quantization})`);
-      console.log(`  Context size: ${policy.context_size}`);
-      console.log(`  Max parallelism: ${policy.max_parallelism}`);
-      try {
-        console.log("  Downloading/checking model...");
-        const modelPath = await ensureModelDownloaded(policy);
-        if (!modelPath) {
-          throw new Error(`Model not found in registry: ${policy.model_name} (${policy.quantization})`);
-        }
-        console.log(`  \u2713 Model ready: ${modelPath}`);
-        console.log("  Downloading/checking llama.cpp binary...");
-        const binaryPath = await ensureLlamaBinary();
-        console.log(`  \u2713 Binary ready: ${binaryPath}`);
-        const gpuLayers = 9999;
-        console.log("  Spawning llama.cpp process...");
-        llamaProcess = await spawnLlamaCpp({
-          binaryPath,
-          modelPath,
-          contextSize: policy.context_size,
-          parallelism: policy.max_parallelism,
-          port: 9002,
-          host: "127.0.0.1",
-          gpuLayers
-          // Enable GPU acceleration
-        });
-        const setupCrashHandler = (proc) => {
-          proc.onCrash(async (code, signal) => {
-            console.error(`
-[llama.cpp] Process crashed (code: ${code}, signal: ${signal})`);
-            console.log("[llama.cpp] Attempting to restart...");
-            try {
-              await new Promise((resolve) => setTimeout(resolve, 2e3));
-              if (llamaProcess) {
-                const restarted = await llamaProcess.restart();
-                llamaProcess = restarted;
-                setupCrashHandler(restarted);
-              }
-              await waitForLlamaReady("http://127.0.0.1:9002");
-              console.log("[llama.cpp] \u2713 Restarted successfully");
-              const capabilities2 = await discoverCapabilities("http://127.0.0.1:9002");
-              wsConnection.updateCapabilities(capabilities2);
-              wsConnection.sendConfig(capabilities2);
-              console.log("[llama.cpp] \u2713 Capabilities updated after restart");
-            } catch (restartError) {
-              console.error("[llama.cpp] \u2717 Failed to restart:", restartError);
-              console.error("[llama.cpp] Worker will continue but may not process jobs correctly");
-            }
-          });
-        };
-        setupCrashHandler(llamaProcess);
-        console.log("  Waiting for llama.cpp to be ready...");
-        await waitForLlamaReady("http://127.0.0.1:9002");
-        console.log("  \u2713 llama.cpp is ready");
-        console.log("  Warming up model...");
-        try {
-          const warmupResponse = await fetch("http://127.0.0.1:9002/v1/chat/completions", {
-            method: "POST",
-            headers: { "Content-Type": "application/json" },
-            body: JSON.stringify({
-              model: "placeholder",
-              // Will use default model
-              messages: [{ role: "user", content: "Hi" }],
-              max_tokens: 1,
-              // Just 1 token to warm up
-              stream: false
-            })
-          });
-          if (warmupResponse.ok) {
-            await warmupResponse.json();
-            console.log("  \u2713 Model warmed up");
-          }
-        } catch (error) {
-          console.warn("  \u26A0 Model warm-up failed (non-fatal):", error);
-        }
-        console.log("  Discovering capabilities...");
-        const capabilities = await discoverCapabilities("http://127.0.0.1:9002");
-        wsConnection.updateCapabilities(capabilities);
-        wsConnection.sendConfig(capabilities);
-        console.log("  \u2713 Capabilities discovered and sent to server");
-      } catch (error) {
-        console.error("  \u2717 Failed to setup llama.cpp:", error);
-        if (llamaProcess) {
-          try {
-            llamaProcess.stop();
-          } catch (cleanupError) {
-          }
-        }
-        throw error;
-      }
-    });
-    wsConnection.onConfigAck((success, error) => {
-      if (!success) {
-        console.error("  \u2717 Server rejected configuration:", error);
-        wsConnection.close();
-        if (llamaProcess) {
-          try {
-            llamaProcess.stop();
-          } catch (cleanupError) {
-          }
-        }
-        process.exit(1);
-      }
-      console.log("  \u2713 Configuration validated by server");
-    });
     wsConnection.onCancel((cancelMessage) => {
-      console.log(`
-=== Cancelling job ${cancelMessage.job_id} ===`);
       const abortController = activeJobs.get(cancelMessage.job_id);
       if (abortController) {
         abortController.abort();
-        console.log(`Sent abort signal to job ${cancelMessage.job_id}`);
-      } else {
-        console.log(`Job ${cancelMessage.job_id} not found in active jobs (may have already completed)`);
+      }
+    });
+    llamaProcess.onCrash(async (code, signal) => {
+      console.error(`
+\u2717 llama.cpp crashed (code: ${code}, signal: ${signal})`);
+      console.log("  Restarting...");
+      try {
+        await new Promise((resolve) => setTimeout(resolve, 2e3));
+        const restarted = await llamaProcess.restart();
+        await waitForLlamaReady(llamaUrl);
+        const newCapabilities = await discoverCapabilities(llamaUrl);
+        wsConnection.updateCapabilities(newCapabilities);
+        wsConnection.sendConfig(newCapabilities);
+        console.log("  \u2713 Recovered\n");
+      } catch (restartError) {
+        console.error("  \u2717 Failed to restart:", restartError);
       }
     });
     try {
       await wsConnection.connect(wsUrl);
-      console.log();
     } catch (error) {
       const errorMsg = error instanceof Error ? error.message : "Unknown error";
-      if (errorMsg.includes("Policy validation failed") || errorMsg.includes("policy")) {
-        console.error("\n\u2717 Policy validation failed. Please configure your llama.cpp server to match the GPU key policy.");
-        console.error("   See error details above for specific mismatches.");
-      } else if (errorMsg.includes("GPU key policy not configured")) {
-        console.error("\n\u2717 GPU key policy is not configured.");
-        console.error("   Please configure the policy in the web UI before connecting workers.");
-      } else {
-        console.error("\n\u2717 Connection failed:", errorMsg);
-      }
+      console.error("\u2717 Connection failed:", errorMsg);
+      llamaProcess.stop();
       process.exit(1);
     }
-    console.log("[4/4] Ready to process jobs");
-    console.log("Waiting for work assignments...");
-    console.log("Press Ctrl+C to stop");
-    console.log();
+    wsConnection.sendConfig(capabilities);
+    const gpuId = wsConnection.getGpuId();
+    if (gpuId) {
+      console.log("\u25B6 GPU");
+      console.log(`  \u2713 ID assigned       ${gpuId}`);
+      console.log();
+    }
+    console.log("\u2713 Worker ready");
+    console.log("  Listening for jobs\u2026\n");
     const shutdown = () => {
-      console.log("\nShutting down...");
-      if (llamaProcess) {
-        try {
-          llamaProcess.stop();
-        } catch (error) {
-          console.error("Error stopping llama.cpp:", error);
-        }
+      console.log("\n\u2713 Shutting down gracefully...");
+      try {
+        llamaProcess.stop();
+      } catch (error) {
       }
       wsConnection.close();
       process.exit(0);
@@ -5934,9 +5957,35 @@ async function main() {
     process.on("SIGINT", shutdown);
     process.on("SIGTERM", shutdown);
   } catch (error) {
-    console.error("Fatal error:", error);
+    console.error("\u2717 Fatal error:", error);
     process.exit(1);
   }
 }
+async function fetchPolicy() {
+  return new Promise((resolve, reject) => {
+    const baseUrl = config.auxotUrl.replace(/^http/, "ws").replace(/^https/, "wss");
+    const wsUrl = `${baseUrl}/api/gpu/client`;
+    const placeholderCapabilities = {
+      model: "pending",
+      ctx_size: 0,
+      backend: "cpu"
+    };
+    const tempConnection = new WebSocketConnection(config.gpuKey, placeholderCapabilities);
+    tempConnection.setSilentMode(true);
+    const timeout = setTimeout(() => {
+      tempConnection.close();
+      reject(new Error("Timeout fetching policy"));
+    }, 3e4);
+    tempConnection.onPolicy(async (policy) => {
+      clearTimeout(timeout);
+      tempConnection.close();
+      resolve(policy);
+    });
+    tempConnection.connect(wsUrl).catch((error) => {
+      clearTimeout(timeout);
+      reject(error);
+    });
+  });
+}
 main();
 //# sourceMappingURL=index.js.map