@auxot/worker-cli 0.1.6 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -5,37 +5,6 @@ var __export = (target, all) => {
5
5
  __defProp(target, name, { get: all[name], enumerable: true });
6
6
  };
7
7
 
8
- // src/gpu-id.ts
9
- import { randomUUID } from "crypto";
10
- import { readFile, writeFile, mkdir } from "fs/promises";
11
- import { homedir } from "os";
12
- import { join } from "path";
13
- var AUXOT_DIR = join(homedir(), ".auxot");
14
- var GPU_ID_FILE = join(AUXOT_DIR, "gpu-id");
15
- async function getOrCreateGpuId() {
16
- try {
17
- const existingId = await readFile(GPU_ID_FILE, "utf-8");
18
- const trimmed = existingId.trim();
19
- const uuidRegex2 = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
20
- if (uuidRegex2.test(trimmed)) {
21
- return trimmed;
22
- }
23
- console.warn("Invalid GPU ID found, generating new one");
24
- } catch (error) {
25
- }
26
- const newId = randomUUID();
27
- try {
28
- await mkdir(AUXOT_DIR, { recursive: true });
29
- await writeFile(GPU_ID_FILE, newId, "utf-8");
30
- console.log(`Generated new GPU ID: ${newId}`);
31
- console.log(`Stored in: ${GPU_ID_FILE}`);
32
- } catch (error) {
33
- console.error("Failed to save GPU ID:", error);
34
- throw error;
35
- }
36
- return newId;
37
- }
38
-
39
8
  // src/capabilities.ts
40
9
  function normalizeModelName(filePath) {
41
10
  const filename = filePath.split("/").pop() || filePath;
@@ -87,12 +56,10 @@ async function discoverCapabilities(llamaUrl) {
87
56
  capabilities.vram_gb = Math.round(props.total_vram_mb / 1024);
88
57
  }
89
58
  if (capabilities.ctx_size !== 4096) {
90
- console.log("Discovered capabilities:", capabilities);
91
59
  return capabilities;
92
60
  }
93
61
  }
94
62
  } catch (propsError) {
95
- console.warn("/props endpoint not available, trying /health");
96
63
  }
97
64
  try {
98
65
  const healthResponse = await fetch(`${llamaUrl}/health`);
@@ -100,17 +67,13 @@ async function discoverCapabilities(llamaUrl) {
100
67
  const health = await healthResponse.json();
101
68
  if (health.n_ctx) {
102
69
  capabilities.ctx_size = health.n_ctx;
103
- console.log(`Runtime context size from /health: ${capabilities.ctx_size}`);
104
70
  }
105
71
  }
106
72
  } catch {
107
- console.warn("/health endpoint not available");
108
73
  }
109
74
  if (capabilities.ctx_size === 4096 && model.meta?.n_ctx_train) {
110
- console.warn("Could not determine runtime context size, using n_ctx_train as fallback");
111
75
  capabilities.ctx_size = model.meta.n_ctx_train;
112
76
  }
113
- console.log("Discovered capabilities:", capabilities);
114
77
  return capabilities;
115
78
  } catch (error) {
116
79
  console.error("Failed to discover capabilities:", error);
@@ -282,7 +245,8 @@ function validatePolicy(discoveredCapabilities, policy) {
282
245
  const warnings = [];
283
246
  const discoveredNormalized = normalizeModelName2(discoveredCapabilities.model || "");
284
247
  const policyNormalized = normalizeModelName2(policy.model_name);
285
- if (discoveredNormalized !== policyNormalized) {
248
+ const isModelMatch = discoveredNormalized === policyNormalized || discoveredNormalized.startsWith(policyNormalized) || policyNormalized.startsWith(discoveredNormalized);
249
+ if (!isModelMatch) {
286
250
  errors.push(
287
251
  `Model name mismatch: discovered "${discoveredCapabilities.model}" (normalized: "${discoveredNormalized}") does not match policy "${policy.model_name}" (normalized: "${policyNormalized}")`
288
252
  );
@@ -348,6 +312,8 @@ var WebSocketConnection = class {
348
312
  heartbeatTimer = null;
349
313
  reconnectTimer = null;
350
314
  gpuKey;
315
+ gpuId = null;
316
+ // Server-assigned GPU ID
351
317
  capabilities;
352
318
  onJobCallback = null;
353
319
  onCancelCallback = null;
@@ -359,10 +325,19 @@ var WebSocketConnection = class {
359
325
  shouldReconnect = true;
360
326
  isReconnecting = false;
361
327
  policy = null;
328
+ silentDisconnect = false;
329
+ // Suppress disconnect messages
362
330
  constructor(gpuKey, capabilities) {
363
331
  this.gpuKey = gpuKey;
364
332
  this.capabilities = capabilities;
365
333
  }
334
+ /**
335
+ * Set silent mode (suppress disconnect messages)
336
+ * Used during download phase to avoid jarring messages
337
+ */
338
+ setSilentMode(silent) {
339
+ this.silentDisconnect = silent;
340
+ }
366
341
  /**
367
342
  * Connect to WebSocket server and send hello message
368
343
  */
@@ -380,7 +355,7 @@ var WebSocketConnection = class {
380
355
  reject(new Error("No WebSocket URL configured"));
381
356
  return;
382
357
  }
383
- if (!this.isReconnecting) {
358
+ if (!this.isReconnecting && !this.silentDisconnect) {
384
359
  console.log(`Connecting to ${this.wsUrl}...`);
385
360
  }
386
361
  try {
@@ -414,6 +389,9 @@ var WebSocketConnection = class {
414
389
  if (message.type === "hello_ack") {
415
390
  clearTimeout(connectionTimeout);
416
391
  if (message.success) {
392
+ if (message.gpu_id) {
393
+ this.gpuId = message.gpu_id;
394
+ }
417
395
  if (!message.policy) {
418
396
  const errorMsg = "Server did not send policy in hello_ack";
419
397
  console.error(`\u2717 ${errorMsg}`);
@@ -427,16 +405,14 @@ var WebSocketConnection = class {
427
405
  try {
428
406
  await this.onPolicyCallback(message.policy);
429
407
  } catch (error) {
430
- console.error("[Policy Callback] Error:", error);
408
+ if (!this.silentDisconnect) {
409
+ console.error("[Policy Callback] Error:", error);
410
+ }
431
411
  this.shouldReconnect = false;
432
412
  this.ws?.close();
433
413
  reject(error);
434
414
  return;
435
415
  }
436
- console.log("\u2713 Successfully authenticated with server");
437
- console.log(` Policy: ${message.policy.model_name} (${message.policy.quantization})`);
438
- console.log(" Spawning llama.cpp process...");
439
- console.log(" (Capabilities validation will happen via config message)");
440
416
  } else {
441
417
  const validation = await validatePolicy(this.capabilities, message.policy);
442
418
  if (validation.warnings && validation.warnings.length > 0) {
@@ -545,12 +521,12 @@ var WebSocketConnection = class {
545
521
  this.isConnected = false;
546
522
  this.stopHeartbeat();
547
523
  if (this.shouldReconnect) {
548
- if (!this.isReconnecting) {
524
+ if (!this.isReconnecting && !this.silentDisconnect) {
549
525
  console.log("WebSocket disconnected, will continue to retry...");
550
526
  this.isReconnecting = true;
551
527
  }
552
528
  this.scheduleReconnect();
553
- } else {
529
+ } else if (!this.silentDisconnect) {
554
530
  console.log("WebSocket disconnected");
555
531
  }
556
532
  });
@@ -694,6 +670,12 @@ var WebSocketConnection = class {
694
670
  getPolicy() {
695
671
  return this.policy;
696
672
  }
673
+ /**
674
+ * Get GPU ID assigned by server
675
+ */
676
+ getGpuId() {
677
+ return this.gpuId;
678
+ }
697
679
  /**
698
680
  * Get current capabilities
699
681
  */
@@ -771,6 +753,12 @@ async function processJob(job, llamaUrl, capabilities, abortSignal, onToken) {
771
753
  const reader = response.body.getReader();
772
754
  const decoder = new TextDecoder();
773
755
  const toolCallsMap = /* @__PURE__ */ new Map();
756
+ let hasReceivedFirstToken = false;
757
+ const keepaliveInterval = setInterval(() => {
758
+ if (!hasReceivedFirstToken) {
759
+ onToken("");
760
+ }
761
+ }, 1e4);
774
762
  const parser = createParser((event) => {
775
763
  if (event.type === "reconnect-interval")
776
764
  return;
@@ -783,6 +771,7 @@ async function processJob(job, llamaUrl, capabilities, abortSignal, onToken) {
783
771
  }
784
772
  const content = chunk.choices[0]?.delta?.content;
785
773
  if (content) {
774
+ hasReceivedFirstToken = true;
786
775
  fullResponse += content;
787
776
  onToken(content);
788
777
  }
@@ -834,6 +823,7 @@ async function processJob(job, llamaUrl, capabilities, abortSignal, onToken) {
834
823
  throw error;
835
824
  }
836
825
  } finally {
826
+ clearInterval(keepaliveInterval);
837
827
  reader.releaseLock();
838
828
  }
839
829
  let durationMs;
@@ -879,9 +869,9 @@ import { spawn } from "child_process";
879
869
 
880
870
  // src/llama-binary.ts
881
871
  import { existsSync, chmodSync, statSync } from "node:fs";
882
- import { mkdir as mkdir2, unlink } from "node:fs/promises";
883
- import { join as join2 } from "node:path";
884
- import { homedir as homedir2 } from "os";
872
+ import { mkdir, unlink } from "node:fs/promises";
873
+ import { join } from "node:path";
874
+ import { homedir } from "os";
885
875
  import { platform as platform2, arch } from "os";
886
876
  import { createWriteStream } from "node:fs";
887
877
  import { exec as exec2 } from "child_process";
@@ -1038,18 +1028,18 @@ async function getArchiveName() {
1038
1028
  function getCacheDir() {
1039
1029
  const os = platform2();
1040
1030
  const architecture = arch();
1041
- const cacheDir = process.env.AUXOT_LLAMA_CACHE_DIR || join2(homedir2(), ".auxot", "llama-server");
1042
- return join2(cacheDir, `${os}-${architecture}`);
1031
+ const cacheDir = process.env.AUXOT_LLAMA_CACHE_DIR || join(homedir(), ".auxot", "llama-server");
1032
+ return join(cacheDir, `${os}-${architecture}`);
1043
1033
  }
1044
1034
  function getBinaryPath() {
1045
1035
  const cacheDir = getCacheDir();
1046
- return join2(cacheDir, `llama-${LLAMA_CPP_VERSION}`, "llama-server");
1036
+ return join(cacheDir, `llama-${LLAMA_CPP_VERSION}`, "llama-server");
1047
1037
  }
1048
1038
  async function downloadLlamaBinary(onProgress) {
1049
1039
  const { archiveName, warning } = await getArchiveName();
1050
1040
  const binaryPath = getBinaryPath();
1051
1041
  const cacheDir = getCacheDir();
1052
- const archivePath = join2(cacheDir, archiveName);
1042
+ const archivePath = join(cacheDir, archiveName);
1053
1043
  if (warning) {
1054
1044
  console.warn(` \u26A0 ${warning}`);
1055
1045
  }
@@ -1061,7 +1051,7 @@ async function downloadLlamaBinary(onProgress) {
1061
1051
  }
1062
1052
  }
1063
1053
  if (!existsSync(cacheDir)) {
1064
- await mkdir2(cacheDir, { recursive: true });
1054
+ await mkdir(cacheDir, { recursive: true });
1065
1055
  }
1066
1056
  const downloadUrl = `https://github.com/${LLAMA_CPP_REPO}/releases/download/${LLAMA_CPP_VERSION}/${archiveName}`;
1067
1057
  console.log(` Downloading llama.cpp binary...`);
@@ -1235,37 +1225,27 @@ async function spawnLlamaCpp(options) {
1235
1225
  let stdoutBuffer = "";
1236
1226
  childProcess.stdout?.on("data", (data) => {
1237
1227
  stdoutBuffer += data.toString();
1238
- const lines = stdoutBuffer.split("\n");
1239
- stdoutBuffer = lines.pop() || "";
1240
- for (const line of lines) {
1241
- const trimmed = line.trim();
1242
- if (trimmed) {
1243
- console.log(`[llama.cpp stdout] ${trimmed}`);
1244
- }
1228
+ if (stdoutBuffer.length > 1e4) {
1229
+ stdoutBuffer = stdoutBuffer.slice(-5e3);
1245
1230
  }
1246
1231
  });
1247
1232
  let stderrBuffer = "";
1248
1233
  childProcess.stderr?.on("data", (data) => {
1249
- stderrBuffer += data.toString();
1250
- const lines = stderrBuffer.split("\n");
1251
- stderrBuffer = lines.pop() || "";
1234
+ const chunk = data.toString();
1235
+ stderrBuffer += chunk;
1236
+ const lines = chunk.split("\n");
1252
1237
  for (const line of lines) {
1253
- const trimmed = line.trim();
1254
- if (trimmed) {
1255
- console.error(`[llama.cpp stderr] ${trimmed}`);
1238
+ const lower = line.toLowerCase();
1239
+ if (lower.includes("error") || lower.includes("fatal") || lower.includes("crash") || lower.includes("failed")) {
1240
+ console.error(`[llama.cpp] ${line.trim()}`);
1256
1241
  }
1257
1242
  }
1243
+ if (stderrBuffer.length > 1e4) {
1244
+ stderrBuffer = stderrBuffer.slice(-5e3);
1245
+ }
1258
1246
  });
1259
1247
  childProcess.on("exit", (code, signal) => {
1260
1248
  isRunning = false;
1261
- if (stdoutBuffer.trim()) {
1262
- console.log(`[llama.cpp stdout] ${stdoutBuffer.trim()}`);
1263
- stdoutBuffer = "";
1264
- }
1265
- if (stderrBuffer.trim()) {
1266
- console.error(`[llama.cpp stderr] ${stderrBuffer.trim()}`);
1267
- stderrBuffer = "";
1268
- }
1269
1249
  if (code !== null) {
1270
1250
  console.log(`[llama.cpp] Process exited with code ${code}`);
1271
1251
  if (code !== 0) {
@@ -1344,15 +1324,23 @@ async function spawnLlamaCpp(options) {
1344
1324
  }
1345
1325
  };
1346
1326
  }
1347
- async function waitForLlamaReady(url, timeoutMs = 3e4) {
1327
+ async function waitForLlamaReady(url, timeoutMs = 6e4) {
1348
1328
  const startTime = Date.now();
1349
- const checkInterval = 500;
1329
+ const checkInterval = 1e3;
1350
1330
  while (Date.now() - startTime < timeoutMs) {
1351
1331
  try {
1352
- const response = await fetch(`${url}/v1/models`);
1332
+ const response = await fetch(`${url}/v1/models`, {
1333
+ signal: AbortSignal.timeout(5e3)
1334
+ // 5 second timeout per request
1335
+ });
1353
1336
  if (response.ok) {
1354
- console.log("[llama.cpp] Server is ready");
1355
- return;
1337
+ const contentType = response.headers.get("content-type");
1338
+ if (contentType && contentType.includes("application/json")) {
1339
+ const data = await response.json();
1340
+ if (data && (data.data || data.object)) {
1341
+ return;
1342
+ }
1343
+ }
1356
1344
  }
1357
1345
  } catch (error) {
1358
1346
  }
@@ -5413,10 +5401,11 @@ var ModelRegistryEntrySchema = external_exports.object({
5413
5401
  family: ModelFamilySchema,
5414
5402
  parameters: external_exports.string(),
5415
5403
  default_context_size: external_exports.number().int().positive(),
5404
+ max_context_size: external_exports.number().int().positive(),
5416
5405
  vram_requirements_gb: external_exports.number().positive(),
5417
5406
  capabilities: external_exports.array(ModelCapabilitySchema).min(1),
5418
5407
  file_name: external_exports.string(),
5419
- file_size_bytes: external_exports.number().int().positive().optional()
5408
+ file_size_bytes: external_exports.number().int().positive().nullable().optional()
5420
5409
  });
5421
5410
  var ModelRegistrySchema = external_exports.object({
5422
5411
  version: external_exports.string(),
@@ -5429,7 +5418,7 @@ function validateModelRegistry(data) {
5429
5418
 
5430
5419
  // ../../packages/model-registry/dist/src/loader.js
5431
5420
  import { readFileSync, statSync as statSync2 } from "node:fs";
5432
- import { join as join3, dirname as dirname2 } from "node:path";
5421
+ import { join as join2, dirname as dirname2 } from "node:path";
5433
5422
  import { fileURLToPath } from "node:url";
5434
5423
  var __filename = fileURLToPath(import.meta.url);
5435
5424
  var __dirname = dirname2(__filename);
@@ -5438,11 +5427,11 @@ var cachedRegistryPath = null;
5438
5427
  var cachedRegistryMtime = null;
5439
5428
  function loadRegistry() {
5440
5429
  const registryPaths = [
5441
- join3(__dirname, "..", "..", "registry.json"),
5430
+ join2(__dirname, "..", "..", "registry.json"),
5442
5431
  // From dist/src/ -> package root
5443
- join3(__dirname, "..", "registry.json"),
5432
+ join2(__dirname, "..", "registry.json"),
5444
5433
  // From dist/ -> package root (if running from dist/)
5445
- join3(__dirname, "registry.json")
5434
+ join2(__dirname, "registry.json")
5446
5435
  // Same directory (if copied there)
5447
5436
  ];
5448
5437
  let registryPath = null;
@@ -5509,17 +5498,17 @@ function getModels(registry, filters) {
5509
5498
  }
5510
5499
 
5511
5500
  // src/model-resolver.ts
5512
- import { join as join5 } from "path";
5513
- import { homedir as homedir3 } from "os";
5501
+ import { join as join4 } from "path";
5502
+ import { homedir as homedir2 } from "os";
5514
5503
 
5515
5504
  // src/model-downloader.ts
5516
5505
  import { createWriteStream as createWriteStream2, existsSync as existsSync2, statSync as statSync3 } from "node:fs";
5517
- import { mkdir as mkdir3 } from "node:fs/promises";
5506
+ import { mkdir as mkdir2 } from "node:fs/promises";
5518
5507
  import { dirname as dirname3 } from "node:path";
5519
5508
  async function downloadModel(entry, outputPath, onProgress) {
5520
5509
  const outputDir = dirname3(outputPath);
5521
5510
  if (!existsSync2(outputDir)) {
5522
- await mkdir3(outputDir, { recursive: true });
5511
+ await mkdir2(outputDir, { recursive: true });
5523
5512
  }
5524
5513
  if (existsSync2(outputPath)) {
5525
5514
  const stats = statSync3(outputPath);
@@ -5527,9 +5516,11 @@ async function downloadModel(entry, outputPath, onProgress) {
5527
5516
  console.log(` \u2713 Model already downloaded (${formatBytes2(stats.size)})`);
5528
5517
  return outputPath;
5529
5518
  }
5530
- if (entry.file_size_bytes && stats.size !== entry.file_size_bytes) {
5531
- console.log(` \u2298 Existing file size mismatch (${formatBytes2(stats.size)} vs ${formatBytes2(entry.file_size_bytes)})`);
5532
- console.log(` \u2298 Re-downloading...`);
5519
+ if (entry.file_size_bytes && stats.size < entry.file_size_bytes) {
5520
+ console.log(` \u2299 Partial download found (${formatBytes2(stats.size)} / ${formatBytes2(entry.file_size_bytes)})`);
5521
+ console.log(` \u2299 Resuming download...`);
5522
+ } else if (entry.file_size_bytes && stats.size > entry.file_size_bytes) {
5523
+ console.log(` \u2298 File is larger than expected, restarting download...`);
5533
5524
  const { unlink: unlink2 } = await import("node:fs/promises");
5534
5525
  await unlink2(outputPath);
5535
5526
  }
@@ -5546,10 +5537,6 @@ async function downloadModel(entry, outputPath, onProgress) {
5546
5537
  if (existsSync2(outputPath)) {
5547
5538
  const stats = statSync3(outputPath);
5548
5539
  startByte = stats.size;
5549
- if (startByte > 0 && startByte < totalBytes) {
5550
- console.log(` Resuming from ${formatBytes2(startByte)}...`);
5551
- downloadedBytes = startByte;
5552
- }
5553
5540
  }
5554
5541
  const response = await fetch(downloadUrl, {
5555
5542
  headers: startByte > 0 ? {
@@ -5567,11 +5554,29 @@ async function downloadModel(entry, outputPath, onProgress) {
5567
5554
  }
5568
5555
  const contentLength = response.headers.get("content-length");
5569
5556
  const totalSize = contentLength ? parseInt(contentLength, 10) + startByte : totalBytes;
5557
+ if (startByte > 0) {
5558
+ if (startByte < totalSize) {
5559
+ console.log(` \u2299 Resuming from ${formatBytes2(startByte)}...`);
5560
+ downloadedBytes = startByte;
5561
+ } else if (startByte === totalSize) {
5562
+ console.log(` \u2713 Model already downloaded (${formatBytes2(startByte)})`);
5563
+ return outputPath;
5564
+ } else {
5565
+ console.log(` \u2298 File is larger than expected (${formatBytes2(startByte)} > ${formatBytes2(totalSize)}), restarting...`);
5566
+ const { unlink: unlink2 } = await import("node:fs/promises");
5567
+ await unlink2(outputPath);
5568
+ startByte = 0;
5569
+ downloadedBytes = 0;
5570
+ }
5571
+ }
5570
5572
  const fileStream = createWriteStream2(outputPath, { flags: startByte > 0 ? "a" : "w" });
5571
5573
  const reader = response.body?.getReader();
5572
5574
  if (!reader) {
5573
5575
  throw new Error("Response body is not readable");
5574
5576
  }
5577
+ let lastProgressUpdate = Date.now();
5578
+ const startTime = Date.now();
5579
+ const bytesAtStart = downloadedBytes;
5575
5580
  try {
5576
5581
  while (true) {
5577
5582
  const { done, value } = await reader.read();
@@ -5580,13 +5585,21 @@ async function downloadModel(entry, outputPath, onProgress) {
5580
5585
  }
5581
5586
  fileStream.write(value);
5582
5587
  downloadedBytes += value.length;
5583
- if (onProgress) {
5588
+ const now = Date.now();
5589
+ if (onProgress && now - lastProgressUpdate > 1e3) {
5584
5590
  onProgress(downloadedBytes, totalSize);
5585
- } else if (totalSize > 0) {
5586
- if (downloadedBytes % (10 * 1024 * 1024) < value.length) {
5587
- const percent = (downloadedBytes / totalSize * 100).toFixed(1);
5588
- process.stdout.write(`\r Progress: ${percent}% (${formatBytes2(downloadedBytes)} / ${formatBytes2(totalSize)})`);
5589
- }
5591
+ lastProgressUpdate = now;
5592
+ } else if (totalSize > 0 && now - lastProgressUpdate > 1e3) {
5593
+ const elapsedSeconds = (now - startTime) / 1e3;
5594
+ const bytesDownloadedThisSession = downloadedBytes - bytesAtStart;
5595
+ const bytesPerSecond = bytesDownloadedThisSession / elapsedSeconds;
5596
+ const remainingBytes = totalSize - downloadedBytes;
5597
+ const etaSeconds = remainingBytes / bytesPerSecond;
5598
+ const percent = (downloadedBytes / totalSize * 100).toFixed(1);
5599
+ const speed = formatBytes2(bytesPerSecond);
5600
+ const eta = formatTime(etaSeconds);
5601
+ process.stdout.write(`\r ${formatBytes2(downloadedBytes)} / ${formatBytes2(totalSize)} (${percent}%) ${speed}/s ETA ~${eta}`);
5602
+ lastProgressUpdate = now;
5590
5603
  }
5591
5604
  }
5592
5605
  fileStream.end();
@@ -5597,12 +5610,7 @@ async function downloadModel(entry, outputPath, onProgress) {
5597
5610
  if (totalSize > 0 && downloadedBytes !== totalSize) {
5598
5611
  throw new Error(`Download incomplete: ${downloadedBytes} bytes downloaded, expected ${totalSize}`);
5599
5612
  }
5600
- if (onProgress) {
5601
- process.stdout.write("\r");
5602
- } else {
5603
- process.stdout.write("\r");
5604
- }
5605
- console.log(` \u2713 Download complete (${formatBytes2(downloadedBytes)})`);
5613
+ process.stdout.write("\r" + " ".repeat(80) + "\r");
5606
5614
  return outputPath;
5607
5615
  } catch (error) {
5608
5616
  fileStream.destroy();
@@ -5617,6 +5625,19 @@ function formatBytes2(bytes) {
5617
5625
  const i = Math.floor(Math.log(bytes) / Math.log(k));
5618
5626
  return `${(bytes / Math.pow(k, i)).toFixed(1)} ${sizes[i]}`;
5619
5627
  }
5628
+ function formatTime(seconds) {
5629
+ if (seconds < 60) {
5630
+ return `${Math.round(seconds)}s`;
5631
+ }
5632
+ const minutes = Math.floor(seconds / 60);
5633
+ const remainingSeconds = Math.round(seconds % 60);
5634
+ if (minutes < 60) {
5635
+ return `${minutes}m ${remainingSeconds}s`;
5636
+ }
5637
+ const hours = Math.floor(minutes / 60);
5638
+ const remainingMinutes = minutes % 60;
5639
+ return `${hours}h ${remainingMinutes}m`;
5640
+ }
5620
5641
 
5621
5642
  // src/model-resolver.ts
5622
5643
  import { existsSync as existsSync3 } from "node:fs";
@@ -5636,24 +5657,83 @@ async function ensureModelDownloaded(policy, onProgress) {
5636
5657
  console.error(` \u2717 Model not found in registry: ${policy.model_name} (${policy.quantization})`);
5637
5658
  return null;
5638
5659
  }
5639
- const modelsDir = process.env.AUXOT_MODELS_DIR || join5(homedir3(), ".auxot", "models");
5640
- const modelDir = join5(modelsDir, model.huggingface_id.replace("/", "_"));
5641
- const modelPath = join5(modelDir, model.file_name);
5642
- if (existsSync3(modelPath)) {
5643
- const { statSync: statSync4 } = await import("node:fs");
5644
- const stats = statSync4(modelPath);
5645
- if (model.file_size_bytes && stats.size === model.file_size_bytes) {
5660
+ const modelsDir = process.env.AUXOT_MODELS_DIR || join4(homedir2(), ".auxot", "models");
5661
+ const modelDir = join4(modelsDir, model.huggingface_id.replace("/", "_"));
5662
+ const modelPath = join4(modelDir, model.file_name);
5663
+ const shardMatch = model.file_name.match(/-(\d+)-of-(\d+)\.gguf$/);
5664
+ if (shardMatch) {
5665
+ const totalShards = parseInt(shardMatch[2], 10);
5666
+ const fileBaseName = model.file_name.replace(/-\d+-of-\d+\.gguf$/, "");
5667
+ const fileExtension = ".gguf";
5668
+ console.log(` Model has ${totalShards} shards, downloading all...`);
5669
+ for (let shardNum = 1; shardNum <= totalShards; shardNum++) {
5670
+ const paddedNum = String(shardNum).padStart(5, "0");
5671
+ const shardFileName = `${fileBaseName}-${paddedNum}-of-${String(totalShards).padStart(5, "0")}${fileExtension}`;
5672
+ const shardPath = join4(modelDir, shardFileName);
5673
+ if (existsSync3(shardPath)) {
5674
+ const { statSync: statSync4 } = await import("node:fs");
5675
+ const stats = statSync4(shardPath);
5676
+ console.log(` \u2713 Shard ${shardNum}/${totalShards} already downloaded`);
5677
+ continue;
5678
+ }
5679
+ const shardEntry = {
5680
+ ...model,
5681
+ file_name: shardFileName,
5682
+ file_size_bytes: null
5683
+ // Don't know the size, will get from Content-Length
5684
+ };
5685
+ console.log(` Downloading shard ${shardNum}/${totalShards}...`);
5686
+ try {
5687
+ await downloadModel(shardEntry, shardPath, onProgress);
5688
+ console.log(` \u2713 Shard ${shardNum}/${totalShards} complete`);
5689
+ } catch (error) {
5690
+ console.error(` \u2717 Shard ${shardNum}/${totalShards} failed:`, error);
5691
+ throw error;
5692
+ }
5693
+ }
5694
+ return modelPath;
5695
+ } else {
5696
+ if (existsSync3(modelPath)) {
5697
+ const { statSync: statSync4 } = await import("node:fs");
5698
+ const stats = statSync4(modelPath);
5699
+ if (model.file_size_bytes && stats.size === model.file_size_bytes) {
5700
+ return modelPath;
5701
+ }
5702
+ }
5703
+ try {
5704
+ await downloadModel(model, modelPath, onProgress);
5646
5705
  return modelPath;
5706
+ } catch (error) {
5707
+ console.error(` \u2717 Download failed:`, error);
5708
+ throw error;
5647
5709
  }
5648
5710
  }
5649
- console.log(` Downloading model: ${model.model_name} (${model.quantization})`);
5650
- try {
5651
- await downloadModel(model, modelPath, onProgress);
5652
- return modelPath;
5653
- } catch (error) {
5654
- console.error(` \u2717 Download failed:`, error);
5655
- throw error;
5711
+ }
5712
+
5713
+ // src/port-finder.ts
5714
+ import { createServer } from "net";
5715
+ async function findAvailablePort(minPort = 1e4, maxPort = 65535, maxAttempts = 100) {
5716
+ for (let i = 0; i < maxAttempts; i++) {
5717
+ const port = Math.floor(Math.random() * (maxPort - minPort + 1)) + minPort;
5718
+ const isAvailable = await isPortAvailable(port);
5719
+ if (isAvailable) {
5720
+ return port;
5721
+ }
5656
5722
  }
5723
+ throw new Error(`No available ports found after ${maxAttempts} attempts in range ${minPort}-${maxPort}`);
5724
+ }
5725
+ function isPortAvailable(port) {
5726
+ return new Promise((resolve) => {
5727
+ const server = createServer();
5728
+ server.once("error", () => {
5729
+ resolve(false);
5730
+ });
5731
+ server.once("listening", () => {
5732
+ server.close();
5733
+ resolve(true);
5734
+ });
5735
+ server.listen(port, "127.0.0.1");
5736
+ });
5657
5737
  }
5658
5738
 
5659
5739
  // src/index.ts
@@ -5717,28 +5797,79 @@ if (!config.gpuKey.startsWith("gpu.")) {
5717
5797
  }
5718
5798
  async function main() {
5719
5799
  setDebugLevel(config.debugLevel);
5720
- console.log("Auxot GPU Worker CLI");
5721
- console.log("====================");
5722
- if (config.debugLevel > 0) {
5723
- console.log(`Debug Level: ${config.debugLevel}`);
5724
- }
5725
- console.log();
5800
+ console.log("Auxot Worker");
5801
+ console.log("============\n");
5726
5802
  try {
5727
- console.log("[1/4] Loading GPU ID...");
5728
- const gpuId = await getOrCreateGpuId();
5729
- console.log(`GPU ID: ${gpuId}`);
5730
- console.log();
5731
- console.log("[2/4] Connecting to Auxot platform...");
5732
5803
  const baseUrl = config.auxotUrl.replace(/^http/, "ws").replace(/^https/, "wss");
5733
5804
  const wsUrl = `${baseUrl}/api/gpu/client`;
5734
- const placeholderCapabilities = {
5735
- model: "pending",
5736
- ctx_size: 0,
5737
- backend: "cpu"
5738
- // Placeholder - will be updated after discovery
5739
- };
5740
- const wsConnection = new WebSocketConnection(config.gpuKey, placeholderCapabilities);
5741
- let llamaProcess = null;
5805
+ console.log("\u25B6 Control Plane");
5806
+ console.log(` \u2713 Connected ${wsUrl}`);
5807
+ const policy = await fetchPolicy();
5808
+ console.log(` \u2713 Authenticated`);
5809
+ console.log();
5810
+ console.log("\u25B6 Downloading Model");
5811
+ console.log(` ${policy.model_name} (${policy.quantization})`);
5812
+ const modelPath = await ensureModelDownloaded(policy);
5813
+ if (!modelPath) {
5814
+ throw new Error(`Model not found in registry: ${policy.model_name} (${policy.quantization})`);
5815
+ }
5816
+ console.log(` \u2713 Model ready`);
5817
+ console.log();
5818
+ console.log("\u25B6 Control Plane");
5819
+ const binaryPath = await ensureLlamaBinary();
5820
+ const gpuLayers = 9999;
5821
+ const llamaPort = await findAvailablePort();
5822
+ const llamaUrl = `http://127.0.0.1:${llamaPort}`;
5823
+ const llamaProcess = await spawnLlamaCpp({
5824
+ binaryPath,
5825
+ modelPath,
5826
+ contextSize: policy.context_size,
5827
+ parallelism: policy.max_parallelism,
5828
+ port: llamaPort,
5829
+ host: "127.0.0.1",
5830
+ gpuLayers
5831
+ });
5832
+ await waitForLlamaReady(llamaUrl);
5833
+ await new Promise((resolve) => setTimeout(resolve, 2e3));
5834
+ try {
5835
+ const warmupResponse = await fetch(`${llamaUrl}/v1/chat/completions`, {
5836
+ method: "POST",
5837
+ headers: { "Content-Type": "application/json" },
5838
+ body: JSON.stringify({
5839
+ model: "placeholder",
5840
+ messages: [{ role: "user", content: "Hi" }],
5841
+ max_tokens: 1,
5842
+ stream: false
5843
+ })
5844
+ });
5845
+ if (warmupResponse.ok)
5846
+ await warmupResponse.json();
5847
+ } catch (error) {
5848
+ }
5849
+ let capabilities = null;
5850
+ for (let attempt = 1; attempt <= 3; attempt++) {
5851
+ try {
5852
+ capabilities = await discoverCapabilities(llamaUrl);
5853
+ break;
5854
+ } catch (error) {
5855
+ if (attempt === 3) {
5856
+ throw error;
5857
+ }
5858
+ await new Promise((resolve) => setTimeout(resolve, 2e3));
5859
+ }
5860
+ }
5861
+ if (!capabilities) {
5862
+ throw new Error("Failed to discover capabilities after 3 attempts");
5863
+ }
5864
+ console.log(` \u2713 llama.cpp Running locally`);
5865
+ console.log();
5866
+ console.log("\u25B6 Runtime");
5867
+ console.log(` \u2022 Model ${policy.model_name} (${policy.quantization})`);
5868
+ console.log(` \u2022 Context ${policy.context_size.toLocaleString()}`);
5869
+ console.log(` \u2022 Parallelism ${policy.max_parallelism}`);
5870
+ console.log(` \u2022 Backend llama.cpp (${capabilities.backend})`);
5871
+ console.log();
5872
+ const wsConnection = new WebSocketConnection(config.gpuKey, capabilities);
5742
5873
  const activeJobs = /* @__PURE__ */ new Map();
5743
5874
  wsConnection.onJob(async (job) => {
5744
5875
  const abortController = new AbortController();
@@ -5747,12 +5878,9 @@ async function main() {
5747
5878
  const currentCapabilities = wsConnection.getCapabilities();
5748
5879
  const result = await processJob(
5749
5880
  job,
5750
- "http://127.0.0.1:9002",
5751
- // Always use local llama.cpp (spawned by worker-cli)
5881
+ llamaUrl,
5752
5882
  currentCapabilities,
5753
- // Pass capabilities for max_tokens_default
5754
5883
  abortController.signal,
5755
- // Pass abort signal
5756
5884
  (token) => {
5757
5885
  wsConnection.sendToken(job.job_id, token);
5758
5886
  }
@@ -5766,11 +5894,7 @@ async function main() {
5766
5894
  result.outputTokens,
5767
5895
  result.tool_calls
5768
5896
  );
5769
- if (wasCancelled) {
5770
- console.log(`\u2713 Job ${job.job_id} cancelled - sent partial response`);
5771
- wsConnection.sendError(job.job_id, "Job cancelled by user");
5772
- } else {
5773
- console.log(`\u2713 Job ${job.job_id} completed successfully`);
5897
+ if (!wasCancelled) {
5774
5898
  }
5775
5899
  } catch (error) {
5776
5900
  console.error(`\u2717 Job ${job.job_id} failed:`, error);
@@ -5782,151 +5906,50 @@ async function main() {
5782
5906
  activeJobs.delete(job.job_id);
5783
5907
  }
5784
5908
  });
5785
- wsConnection.onPolicy(async (policy) => {
5786
- console.log("[3/4] Setting up llama.cpp...");
5787
- console.log(` Policy: ${policy.model_name} (${policy.quantization})`);
5788
- console.log(` Context size: ${policy.context_size}`);
5789
- console.log(` Max parallelism: ${policy.max_parallelism}`);
5790
- try {
5791
- console.log(" Downloading/checking model...");
5792
- const modelPath = await ensureModelDownloaded(policy);
5793
- if (!modelPath) {
5794
- throw new Error(`Model not found in registry: ${policy.model_name} (${policy.quantization})`);
5795
- }
5796
- console.log(` \u2713 Model ready: ${modelPath}`);
5797
- console.log(" Downloading/checking llama.cpp binary...");
5798
- const binaryPath = await ensureLlamaBinary();
5799
- console.log(` \u2713 Binary ready: ${binaryPath}`);
5800
- const gpuLayers = 9999;
5801
- console.log(" Spawning llama.cpp process...");
5802
- llamaProcess = await spawnLlamaCpp({
5803
- binaryPath,
5804
- modelPath,
5805
- contextSize: policy.context_size,
5806
- parallelism: policy.max_parallelism,
5807
- port: 9002,
5808
- host: "127.0.0.1",
5809
- gpuLayers
5810
- // Enable GPU acceleration
5811
- });
5812
- const setupCrashHandler = (proc) => {
5813
- proc.onCrash(async (code, signal) => {
5814
- console.error(`
5815
- [llama.cpp] Process crashed (code: ${code}, signal: ${signal})`);
5816
- console.log("[llama.cpp] Attempting to restart...");
5817
- try {
5818
- await new Promise((resolve) => setTimeout(resolve, 2e3));
5819
- if (llamaProcess) {
5820
- const restarted = await llamaProcess.restart();
5821
- llamaProcess = restarted;
5822
- setupCrashHandler(restarted);
5823
- }
5824
- await waitForLlamaReady("http://127.0.0.1:9002");
5825
- console.log("[llama.cpp] \u2713 Restarted successfully");
5826
- const capabilities2 = await discoverCapabilities("http://127.0.0.1:9002");
5827
- wsConnection.updateCapabilities(capabilities2);
5828
- wsConnection.sendConfig(capabilities2);
5829
- console.log("[llama.cpp] \u2713 Capabilities updated after restart");
5830
- } catch (restartError) {
5831
- console.error("[llama.cpp] \u2717 Failed to restart:", restartError);
5832
- console.error("[llama.cpp] Worker will continue but may not process jobs correctly");
5833
- }
5834
- });
5835
- };
5836
- setupCrashHandler(llamaProcess);
5837
- console.log(" Waiting for llama.cpp to be ready...");
5838
- await waitForLlamaReady("http://127.0.0.1:9002");
5839
- console.log(" \u2713 llama.cpp is ready");
5840
- console.log(" Warming up model...");
5841
- try {
5842
- const warmupResponse = await fetch("http://127.0.0.1:9002/v1/chat/completions", {
5843
- method: "POST",
5844
- headers: { "Content-Type": "application/json" },
5845
- body: JSON.stringify({
5846
- model: "placeholder",
5847
- // Will use default model
5848
- messages: [{ role: "user", content: "Hi" }],
5849
- max_tokens: 1,
5850
- // Just 1 token to warm up
5851
- stream: false
5852
- })
5853
- });
5854
- if (warmupResponse.ok) {
5855
- await warmupResponse.json();
5856
- console.log(" \u2713 Model warmed up");
5857
- }
5858
- } catch (error) {
5859
- console.warn(" \u26A0 Model warm-up failed (non-fatal):", error);
5860
- }
5861
- console.log(" Discovering capabilities...");
5862
- const capabilities = await discoverCapabilities("http://127.0.0.1:9002");
5863
- wsConnection.updateCapabilities(capabilities);
5864
- wsConnection.sendConfig(capabilities);
5865
- console.log(" \u2713 Capabilities discovered and sent to server");
5866
- } catch (error) {
5867
- console.error(" \u2717 Failed to setup llama.cpp:", error);
5868
- if (llamaProcess) {
5869
- try {
5870
- llamaProcess.stop();
5871
- } catch (cleanupError) {
5872
- }
5873
- }
5874
- throw error;
5875
- }
5876
- });
5877
- wsConnection.onConfigAck((success, error) => {
5878
- if (!success) {
5879
- console.error(" \u2717 Server rejected configuration:", error);
5880
- wsConnection.close();
5881
- if (llamaProcess) {
5882
- try {
5883
- llamaProcess.stop();
5884
- } catch (cleanupError) {
5885
- }
5886
- }
5887
- process.exit(1);
5888
- }
5889
- console.log(" \u2713 Configuration validated by server");
5890
- });
5891
5909
  wsConnection.onCancel((cancelMessage) => {
5892
- console.log(`
5893
- === Cancelling job ${cancelMessage.job_id} ===`);
5894
5910
  const abortController = activeJobs.get(cancelMessage.job_id);
5895
5911
  if (abortController) {
5896
5912
  abortController.abort();
5897
- console.log(`Sent abort signal to job ${cancelMessage.job_id}`);
5898
- } else {
5899
- console.log(`Job ${cancelMessage.job_id} not found in active jobs (may have already completed)`);
5913
+ }
5914
+ });
5915
+ llamaProcess.onCrash(async (code, signal) => {
5916
+ console.error(`
5917
+ \u2717 llama.cpp crashed (code: ${code}, signal: ${signal})`);
5918
+ console.log(" Restarting...");
5919
+ try {
5920
+ await new Promise((resolve) => setTimeout(resolve, 2e3));
5921
+ const restarted = await llamaProcess.restart();
5922
+ await waitForLlamaReady(llamaUrl);
5923
+ const newCapabilities = await discoverCapabilities(llamaUrl);
5924
+ wsConnection.updateCapabilities(newCapabilities);
5925
+ wsConnection.sendConfig(newCapabilities);
5926
+ console.log(" \u2713 Recovered\n");
5927
+ } catch (restartError) {
5928
+ console.error(" \u2717 Failed to restart:", restartError);
5900
5929
  }
5901
5930
  });
5902
5931
  try {
5903
5932
  await wsConnection.connect(wsUrl);
5904
- console.log();
5905
5933
  } catch (error) {
5906
5934
  const errorMsg = error instanceof Error ? error.message : "Unknown error";
5907
- if (errorMsg.includes("Policy validation failed") || errorMsg.includes("policy")) {
5908
- console.error("\n\u2717 Policy validation failed. Please configure your llama.cpp server to match the GPU key policy.");
5909
- console.error(" See error details above for specific mismatches.");
5910
- } else if (errorMsg.includes("GPU key policy not configured")) {
5911
- console.error("\n\u2717 GPU key policy is not configured.");
5912
- console.error(" Please configure the policy in the web UI before connecting workers.");
5913
- } else {
5914
- console.error("\n\u2717 Connection failed:", errorMsg);
5915
- }
5935
+ console.error("\u2717 Connection failed:", errorMsg);
5936
+ llamaProcess.stop();
5916
5937
  process.exit(1);
5917
5938
  }
5918
- console.log("[4/4] Ready to process jobs");
5919
- console.log("Waiting for work assignments...");
5920
- console.log("Press Ctrl+C to stop");
5921
- console.log();
5939
+ wsConnection.sendConfig(capabilities);
5940
+ const gpuId = wsConnection.getGpuId();
5941
+ if (gpuId) {
5942
+ console.log("\u25B6 GPU");
5943
+ console.log(` \u2713 ID assigned ${gpuId}`);
5944
+ console.log();
5945
+ }
5946
+ console.log("\u2713 Worker ready");
5947
+ console.log(" Listening for jobs\u2026\n");
5922
5948
  const shutdown = () => {
5923
- console.log("\nShutting down...");
5924
- if (llamaProcess) {
5925
- try {
5926
- llamaProcess.stop();
5927
- } catch (error) {
5928
- console.error("Error stopping llama.cpp:", error);
5929
- }
5949
+ console.log("\n\u2713 Shutting down gracefully...");
5950
+ try {
5951
+ llamaProcess.stop();
5952
+ } catch (error) {
5930
5953
  }
5931
5954
  wsConnection.close();
5932
5955
  process.exit(0);
@@ -5934,9 +5957,35 @@ async function main() {
5934
5957
  process.on("SIGINT", shutdown);
5935
5958
  process.on("SIGTERM", shutdown);
5936
5959
  } catch (error) {
5937
- console.error("Fatal error:", error);
5960
+ console.error("\u2717 Fatal error:", error);
5938
5961
  process.exit(1);
5939
5962
  }
5940
5963
  }
5964
+ async function fetchPolicy() {
5965
+ return new Promise((resolve, reject) => {
5966
+ const baseUrl = config.auxotUrl.replace(/^http/, "ws").replace(/^https/, "wss");
5967
+ const wsUrl = `${baseUrl}/api/gpu/client`;
5968
+ const placeholderCapabilities = {
5969
+ model: "pending",
5970
+ ctx_size: 0,
5971
+ backend: "cpu"
5972
+ };
5973
+ const tempConnection = new WebSocketConnection(config.gpuKey, placeholderCapabilities);
5974
+ tempConnection.setSilentMode(true);
5975
+ const timeout = setTimeout(() => {
5976
+ tempConnection.close();
5977
+ reject(new Error("Timeout fetching policy"));
5978
+ }, 3e4);
5979
+ tempConnection.onPolicy(async (policy) => {
5980
+ clearTimeout(timeout);
5981
+ tempConnection.close();
5982
+ resolve(policy);
5983
+ });
5984
+ tempConnection.connect(wsUrl).catch((error) => {
5985
+ clearTimeout(timeout);
5986
+ reject(error);
5987
+ });
5988
+ });
5989
+ }
5941
5990
  main();
5942
5991
  //# sourceMappingURL=index.js.map