@auxot/worker-cli 0.1.5 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -26
- package/dist/index.js +332 -283
- package/dist/index.js.map +4 -4
- package/dist/registry.json +10386 -0
- package/package.json +2 -3
package/dist/index.js
CHANGED
|
@@ -5,37 +5,6 @@ var __export = (target, all) => {
|
|
|
5
5
|
__defProp(target, name, { get: all[name], enumerable: true });
|
|
6
6
|
};
|
|
7
7
|
|
|
8
|
-
// src/gpu-id.ts
|
|
9
|
-
import { randomUUID } from "crypto";
|
|
10
|
-
import { readFile, writeFile, mkdir } from "fs/promises";
|
|
11
|
-
import { homedir } from "os";
|
|
12
|
-
import { join } from "path";
|
|
13
|
-
var AUXOT_DIR = join(homedir(), ".auxot");
|
|
14
|
-
var GPU_ID_FILE = join(AUXOT_DIR, "gpu-id");
|
|
15
|
-
async function getOrCreateGpuId() {
|
|
16
|
-
try {
|
|
17
|
-
const existingId = await readFile(GPU_ID_FILE, "utf-8");
|
|
18
|
-
const trimmed = existingId.trim();
|
|
19
|
-
const uuidRegex2 = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
|
|
20
|
-
if (uuidRegex2.test(trimmed)) {
|
|
21
|
-
return trimmed;
|
|
22
|
-
}
|
|
23
|
-
console.warn("Invalid GPU ID found, generating new one");
|
|
24
|
-
} catch (error) {
|
|
25
|
-
}
|
|
26
|
-
const newId = randomUUID();
|
|
27
|
-
try {
|
|
28
|
-
await mkdir(AUXOT_DIR, { recursive: true });
|
|
29
|
-
await writeFile(GPU_ID_FILE, newId, "utf-8");
|
|
30
|
-
console.log(`Generated new GPU ID: ${newId}`);
|
|
31
|
-
console.log(`Stored in: ${GPU_ID_FILE}`);
|
|
32
|
-
} catch (error) {
|
|
33
|
-
console.error("Failed to save GPU ID:", error);
|
|
34
|
-
throw error;
|
|
35
|
-
}
|
|
36
|
-
return newId;
|
|
37
|
-
}
|
|
38
|
-
|
|
39
8
|
// src/capabilities.ts
|
|
40
9
|
function normalizeModelName(filePath) {
|
|
41
10
|
const filename = filePath.split("/").pop() || filePath;
|
|
@@ -87,12 +56,10 @@ async function discoverCapabilities(llamaUrl) {
|
|
|
87
56
|
capabilities.vram_gb = Math.round(props.total_vram_mb / 1024);
|
|
88
57
|
}
|
|
89
58
|
if (capabilities.ctx_size !== 4096) {
|
|
90
|
-
console.log("Discovered capabilities:", capabilities);
|
|
91
59
|
return capabilities;
|
|
92
60
|
}
|
|
93
61
|
}
|
|
94
62
|
} catch (propsError) {
|
|
95
|
-
console.warn("/props endpoint not available, trying /health");
|
|
96
63
|
}
|
|
97
64
|
try {
|
|
98
65
|
const healthResponse = await fetch(`${llamaUrl}/health`);
|
|
@@ -100,17 +67,13 @@ async function discoverCapabilities(llamaUrl) {
|
|
|
100
67
|
const health = await healthResponse.json();
|
|
101
68
|
if (health.n_ctx) {
|
|
102
69
|
capabilities.ctx_size = health.n_ctx;
|
|
103
|
-
console.log(`Runtime context size from /health: ${capabilities.ctx_size}`);
|
|
104
70
|
}
|
|
105
71
|
}
|
|
106
72
|
} catch {
|
|
107
|
-
console.warn("/health endpoint not available");
|
|
108
73
|
}
|
|
109
74
|
if (capabilities.ctx_size === 4096 && model.meta?.n_ctx_train) {
|
|
110
|
-
console.warn("Could not determine runtime context size, using n_ctx_train as fallback");
|
|
111
75
|
capabilities.ctx_size = model.meta.n_ctx_train;
|
|
112
76
|
}
|
|
113
|
-
console.log("Discovered capabilities:", capabilities);
|
|
114
77
|
return capabilities;
|
|
115
78
|
} catch (error) {
|
|
116
79
|
console.error("Failed to discover capabilities:", error);
|
|
@@ -282,7 +245,8 @@ function validatePolicy(discoveredCapabilities, policy) {
|
|
|
282
245
|
const warnings = [];
|
|
283
246
|
const discoveredNormalized = normalizeModelName2(discoveredCapabilities.model || "");
|
|
284
247
|
const policyNormalized = normalizeModelName2(policy.model_name);
|
|
285
|
-
|
|
248
|
+
const isModelMatch = discoveredNormalized === policyNormalized || discoveredNormalized.startsWith(policyNormalized) || policyNormalized.startsWith(discoveredNormalized);
|
|
249
|
+
if (!isModelMatch) {
|
|
286
250
|
errors.push(
|
|
287
251
|
`Model name mismatch: discovered "${discoveredCapabilities.model}" (normalized: "${discoveredNormalized}") does not match policy "${policy.model_name}" (normalized: "${policyNormalized}")`
|
|
288
252
|
);
|
|
@@ -348,6 +312,8 @@ var WebSocketConnection = class {
|
|
|
348
312
|
heartbeatTimer = null;
|
|
349
313
|
reconnectTimer = null;
|
|
350
314
|
gpuKey;
|
|
315
|
+
gpuId = null;
|
|
316
|
+
// Server-assigned GPU ID
|
|
351
317
|
capabilities;
|
|
352
318
|
onJobCallback = null;
|
|
353
319
|
onCancelCallback = null;
|
|
@@ -359,10 +325,19 @@ var WebSocketConnection = class {
|
|
|
359
325
|
shouldReconnect = true;
|
|
360
326
|
isReconnecting = false;
|
|
361
327
|
policy = null;
|
|
328
|
+
silentDisconnect = false;
|
|
329
|
+
// Suppress disconnect messages
|
|
362
330
|
constructor(gpuKey, capabilities) {
|
|
363
331
|
this.gpuKey = gpuKey;
|
|
364
332
|
this.capabilities = capabilities;
|
|
365
333
|
}
|
|
334
|
+
/**
|
|
335
|
+
* Set silent mode (suppress disconnect messages)
|
|
336
|
+
* Used during download phase to avoid jarring messages
|
|
337
|
+
*/
|
|
338
|
+
setSilentMode(silent) {
|
|
339
|
+
this.silentDisconnect = silent;
|
|
340
|
+
}
|
|
366
341
|
/**
|
|
367
342
|
* Connect to WebSocket server and send hello message
|
|
368
343
|
*/
|
|
@@ -380,7 +355,7 @@ var WebSocketConnection = class {
|
|
|
380
355
|
reject(new Error("No WebSocket URL configured"));
|
|
381
356
|
return;
|
|
382
357
|
}
|
|
383
|
-
if (!this.isReconnecting) {
|
|
358
|
+
if (!this.isReconnecting && !this.silentDisconnect) {
|
|
384
359
|
console.log(`Connecting to ${this.wsUrl}...`);
|
|
385
360
|
}
|
|
386
361
|
try {
|
|
@@ -414,6 +389,9 @@ var WebSocketConnection = class {
|
|
|
414
389
|
if (message.type === "hello_ack") {
|
|
415
390
|
clearTimeout(connectionTimeout);
|
|
416
391
|
if (message.success) {
|
|
392
|
+
if (message.gpu_id) {
|
|
393
|
+
this.gpuId = message.gpu_id;
|
|
394
|
+
}
|
|
417
395
|
if (!message.policy) {
|
|
418
396
|
const errorMsg = "Server did not send policy in hello_ack";
|
|
419
397
|
console.error(`\u2717 ${errorMsg}`);
|
|
@@ -427,16 +405,14 @@ var WebSocketConnection = class {
|
|
|
427
405
|
try {
|
|
428
406
|
await this.onPolicyCallback(message.policy);
|
|
429
407
|
} catch (error) {
|
|
430
|
-
|
|
408
|
+
if (!this.silentDisconnect) {
|
|
409
|
+
console.error("[Policy Callback] Error:", error);
|
|
410
|
+
}
|
|
431
411
|
this.shouldReconnect = false;
|
|
432
412
|
this.ws?.close();
|
|
433
413
|
reject(error);
|
|
434
414
|
return;
|
|
435
415
|
}
|
|
436
|
-
console.log("\u2713 Successfully authenticated with server");
|
|
437
|
-
console.log(` Policy: ${message.policy.model_name} (${message.policy.quantization})`);
|
|
438
|
-
console.log(" Spawning llama.cpp process...");
|
|
439
|
-
console.log(" (Capabilities validation will happen via config message)");
|
|
440
416
|
} else {
|
|
441
417
|
const validation = await validatePolicy(this.capabilities, message.policy);
|
|
442
418
|
if (validation.warnings && validation.warnings.length > 0) {
|
|
@@ -545,12 +521,12 @@ var WebSocketConnection = class {
|
|
|
545
521
|
this.isConnected = false;
|
|
546
522
|
this.stopHeartbeat();
|
|
547
523
|
if (this.shouldReconnect) {
|
|
548
|
-
if (!this.isReconnecting) {
|
|
524
|
+
if (!this.isReconnecting && !this.silentDisconnect) {
|
|
549
525
|
console.log("WebSocket disconnected, will continue to retry...");
|
|
550
526
|
this.isReconnecting = true;
|
|
551
527
|
}
|
|
552
528
|
this.scheduleReconnect();
|
|
553
|
-
} else {
|
|
529
|
+
} else if (!this.silentDisconnect) {
|
|
554
530
|
console.log("WebSocket disconnected");
|
|
555
531
|
}
|
|
556
532
|
});
|
|
@@ -694,6 +670,12 @@ var WebSocketConnection = class {
|
|
|
694
670
|
getPolicy() {
|
|
695
671
|
return this.policy;
|
|
696
672
|
}
|
|
673
|
+
/**
|
|
674
|
+
* Get GPU ID assigned by server
|
|
675
|
+
*/
|
|
676
|
+
getGpuId() {
|
|
677
|
+
return this.gpuId;
|
|
678
|
+
}
|
|
697
679
|
/**
|
|
698
680
|
* Get current capabilities
|
|
699
681
|
*/
|
|
@@ -771,6 +753,12 @@ async function processJob(job, llamaUrl, capabilities, abortSignal, onToken) {
|
|
|
771
753
|
const reader = response.body.getReader();
|
|
772
754
|
const decoder = new TextDecoder();
|
|
773
755
|
const toolCallsMap = /* @__PURE__ */ new Map();
|
|
756
|
+
let hasReceivedFirstToken = false;
|
|
757
|
+
const keepaliveInterval = setInterval(() => {
|
|
758
|
+
if (!hasReceivedFirstToken) {
|
|
759
|
+
onToken("");
|
|
760
|
+
}
|
|
761
|
+
}, 1e4);
|
|
774
762
|
const parser = createParser((event) => {
|
|
775
763
|
if (event.type === "reconnect-interval")
|
|
776
764
|
return;
|
|
@@ -783,6 +771,7 @@ async function processJob(job, llamaUrl, capabilities, abortSignal, onToken) {
|
|
|
783
771
|
}
|
|
784
772
|
const content = chunk.choices[0]?.delta?.content;
|
|
785
773
|
if (content) {
|
|
774
|
+
hasReceivedFirstToken = true;
|
|
786
775
|
fullResponse += content;
|
|
787
776
|
onToken(content);
|
|
788
777
|
}
|
|
@@ -834,6 +823,7 @@ async function processJob(job, llamaUrl, capabilities, abortSignal, onToken) {
|
|
|
834
823
|
throw error;
|
|
835
824
|
}
|
|
836
825
|
} finally {
|
|
826
|
+
clearInterval(keepaliveInterval);
|
|
837
827
|
reader.releaseLock();
|
|
838
828
|
}
|
|
839
829
|
let durationMs;
|
|
@@ -879,9 +869,9 @@ import { spawn } from "child_process";
|
|
|
879
869
|
|
|
880
870
|
// src/llama-binary.ts
|
|
881
871
|
import { existsSync, chmodSync, statSync } from "node:fs";
|
|
882
|
-
import { mkdir
|
|
883
|
-
import { join
|
|
884
|
-
import { homedir
|
|
872
|
+
import { mkdir, unlink } from "node:fs/promises";
|
|
873
|
+
import { join } from "node:path";
|
|
874
|
+
import { homedir } from "os";
|
|
885
875
|
import { platform as platform2, arch } from "os";
|
|
886
876
|
import { createWriteStream } from "node:fs";
|
|
887
877
|
import { exec as exec2 } from "child_process";
|
|
@@ -1038,18 +1028,18 @@ async function getArchiveName() {
|
|
|
1038
1028
|
function getCacheDir() {
|
|
1039
1029
|
const os = platform2();
|
|
1040
1030
|
const architecture = arch();
|
|
1041
|
-
const cacheDir = process.env.AUXOT_LLAMA_CACHE_DIR ||
|
|
1042
|
-
return
|
|
1031
|
+
const cacheDir = process.env.AUXOT_LLAMA_CACHE_DIR || join(homedir(), ".auxot", "llama-server");
|
|
1032
|
+
return join(cacheDir, `${os}-${architecture}`);
|
|
1043
1033
|
}
|
|
1044
1034
|
function getBinaryPath() {
|
|
1045
1035
|
const cacheDir = getCacheDir();
|
|
1046
|
-
return
|
|
1036
|
+
return join(cacheDir, `llama-${LLAMA_CPP_VERSION}`, "llama-server");
|
|
1047
1037
|
}
|
|
1048
1038
|
async function downloadLlamaBinary(onProgress) {
|
|
1049
1039
|
const { archiveName, warning } = await getArchiveName();
|
|
1050
1040
|
const binaryPath = getBinaryPath();
|
|
1051
1041
|
const cacheDir = getCacheDir();
|
|
1052
|
-
const archivePath =
|
|
1042
|
+
const archivePath = join(cacheDir, archiveName);
|
|
1053
1043
|
if (warning) {
|
|
1054
1044
|
console.warn(` \u26A0 ${warning}`);
|
|
1055
1045
|
}
|
|
@@ -1061,7 +1051,7 @@ async function downloadLlamaBinary(onProgress) {
|
|
|
1061
1051
|
}
|
|
1062
1052
|
}
|
|
1063
1053
|
if (!existsSync(cacheDir)) {
|
|
1064
|
-
await
|
|
1054
|
+
await mkdir(cacheDir, { recursive: true });
|
|
1065
1055
|
}
|
|
1066
1056
|
const downloadUrl = `https://github.com/${LLAMA_CPP_REPO}/releases/download/${LLAMA_CPP_VERSION}/${archiveName}`;
|
|
1067
1057
|
console.log(` Downloading llama.cpp binary...`);
|
|
@@ -1235,37 +1225,27 @@ async function spawnLlamaCpp(options) {
|
|
|
1235
1225
|
let stdoutBuffer = "";
|
|
1236
1226
|
childProcess.stdout?.on("data", (data) => {
|
|
1237
1227
|
stdoutBuffer += data.toString();
|
|
1238
|
-
|
|
1239
|
-
|
|
1240
|
-
for (const line of lines) {
|
|
1241
|
-
const trimmed = line.trim();
|
|
1242
|
-
if (trimmed) {
|
|
1243
|
-
console.log(`[llama.cpp stdout] ${trimmed}`);
|
|
1244
|
-
}
|
|
1228
|
+
if (stdoutBuffer.length > 1e4) {
|
|
1229
|
+
stdoutBuffer = stdoutBuffer.slice(-5e3);
|
|
1245
1230
|
}
|
|
1246
1231
|
});
|
|
1247
1232
|
let stderrBuffer = "";
|
|
1248
1233
|
childProcess.stderr?.on("data", (data) => {
|
|
1249
|
-
|
|
1250
|
-
|
|
1251
|
-
|
|
1234
|
+
const chunk = data.toString();
|
|
1235
|
+
stderrBuffer += chunk;
|
|
1236
|
+
const lines = chunk.split("\n");
|
|
1252
1237
|
for (const line of lines) {
|
|
1253
|
-
const
|
|
1254
|
-
if (
|
|
1255
|
-
console.error(`[llama.cpp
|
|
1238
|
+
const lower = line.toLowerCase();
|
|
1239
|
+
if (lower.includes("error") || lower.includes("fatal") || lower.includes("crash") || lower.includes("failed")) {
|
|
1240
|
+
console.error(`[llama.cpp] ${line.trim()}`);
|
|
1256
1241
|
}
|
|
1257
1242
|
}
|
|
1243
|
+
if (stderrBuffer.length > 1e4) {
|
|
1244
|
+
stderrBuffer = stderrBuffer.slice(-5e3);
|
|
1245
|
+
}
|
|
1258
1246
|
});
|
|
1259
1247
|
childProcess.on("exit", (code, signal) => {
|
|
1260
1248
|
isRunning = false;
|
|
1261
|
-
if (stdoutBuffer.trim()) {
|
|
1262
|
-
console.log(`[llama.cpp stdout] ${stdoutBuffer.trim()}`);
|
|
1263
|
-
stdoutBuffer = "";
|
|
1264
|
-
}
|
|
1265
|
-
if (stderrBuffer.trim()) {
|
|
1266
|
-
console.error(`[llama.cpp stderr] ${stderrBuffer.trim()}`);
|
|
1267
|
-
stderrBuffer = "";
|
|
1268
|
-
}
|
|
1269
1249
|
if (code !== null) {
|
|
1270
1250
|
console.log(`[llama.cpp] Process exited with code ${code}`);
|
|
1271
1251
|
if (code !== 0) {
|
|
@@ -1344,15 +1324,23 @@ async function spawnLlamaCpp(options) {
|
|
|
1344
1324
|
}
|
|
1345
1325
|
};
|
|
1346
1326
|
}
|
|
1347
|
-
async function waitForLlamaReady(url, timeoutMs =
|
|
1327
|
+
async function waitForLlamaReady(url, timeoutMs = 6e4) {
|
|
1348
1328
|
const startTime = Date.now();
|
|
1349
|
-
const checkInterval =
|
|
1329
|
+
const checkInterval = 1e3;
|
|
1350
1330
|
while (Date.now() - startTime < timeoutMs) {
|
|
1351
1331
|
try {
|
|
1352
|
-
const response = await fetch(`${url}/v1/models
|
|
1332
|
+
const response = await fetch(`${url}/v1/models`, {
|
|
1333
|
+
signal: AbortSignal.timeout(5e3)
|
|
1334
|
+
// 5 second timeout per request
|
|
1335
|
+
});
|
|
1353
1336
|
if (response.ok) {
|
|
1354
|
-
|
|
1355
|
-
|
|
1337
|
+
const contentType = response.headers.get("content-type");
|
|
1338
|
+
if (contentType && contentType.includes("application/json")) {
|
|
1339
|
+
const data = await response.json();
|
|
1340
|
+
if (data && (data.data || data.object)) {
|
|
1341
|
+
return;
|
|
1342
|
+
}
|
|
1343
|
+
}
|
|
1356
1344
|
}
|
|
1357
1345
|
} catch (error) {
|
|
1358
1346
|
}
|
|
@@ -5413,10 +5401,11 @@ var ModelRegistryEntrySchema = external_exports.object({
|
|
|
5413
5401
|
family: ModelFamilySchema,
|
|
5414
5402
|
parameters: external_exports.string(),
|
|
5415
5403
|
default_context_size: external_exports.number().int().positive(),
|
|
5404
|
+
max_context_size: external_exports.number().int().positive(),
|
|
5416
5405
|
vram_requirements_gb: external_exports.number().positive(),
|
|
5417
5406
|
capabilities: external_exports.array(ModelCapabilitySchema).min(1),
|
|
5418
5407
|
file_name: external_exports.string(),
|
|
5419
|
-
file_size_bytes: external_exports.number().int().positive().optional()
|
|
5408
|
+
file_size_bytes: external_exports.number().int().positive().nullable().optional()
|
|
5420
5409
|
});
|
|
5421
5410
|
var ModelRegistrySchema = external_exports.object({
|
|
5422
5411
|
version: external_exports.string(),
|
|
@@ -5429,7 +5418,7 @@ function validateModelRegistry(data) {
|
|
|
5429
5418
|
|
|
5430
5419
|
// ../../packages/model-registry/dist/src/loader.js
|
|
5431
5420
|
import { readFileSync, statSync as statSync2 } from "node:fs";
|
|
5432
|
-
import { join as
|
|
5421
|
+
import { join as join2, dirname as dirname2 } from "node:path";
|
|
5433
5422
|
import { fileURLToPath } from "node:url";
|
|
5434
5423
|
var __filename = fileURLToPath(import.meta.url);
|
|
5435
5424
|
var __dirname = dirname2(__filename);
|
|
@@ -5438,11 +5427,11 @@ var cachedRegistryPath = null;
|
|
|
5438
5427
|
var cachedRegistryMtime = null;
|
|
5439
5428
|
function loadRegistry() {
|
|
5440
5429
|
const registryPaths = [
|
|
5441
|
-
|
|
5430
|
+
join2(__dirname, "..", "..", "registry.json"),
|
|
5442
5431
|
// From dist/src/ -> package root
|
|
5443
|
-
|
|
5432
|
+
join2(__dirname, "..", "registry.json"),
|
|
5444
5433
|
// From dist/ -> package root (if running from dist/)
|
|
5445
|
-
|
|
5434
|
+
join2(__dirname, "registry.json")
|
|
5446
5435
|
// Same directory (if copied there)
|
|
5447
5436
|
];
|
|
5448
5437
|
let registryPath = null;
|
|
@@ -5509,17 +5498,17 @@ function getModels(registry, filters) {
|
|
|
5509
5498
|
}
|
|
5510
5499
|
|
|
5511
5500
|
// src/model-resolver.ts
|
|
5512
|
-
import { join as
|
|
5513
|
-
import { homedir as
|
|
5501
|
+
import { join as join4 } from "path";
|
|
5502
|
+
import { homedir as homedir2 } from "os";
|
|
5514
5503
|
|
|
5515
5504
|
// src/model-downloader.ts
|
|
5516
5505
|
import { createWriteStream as createWriteStream2, existsSync as existsSync2, statSync as statSync3 } from "node:fs";
|
|
5517
|
-
import { mkdir as
|
|
5506
|
+
import { mkdir as mkdir2 } from "node:fs/promises";
|
|
5518
5507
|
import { dirname as dirname3 } from "node:path";
|
|
5519
5508
|
async function downloadModel(entry, outputPath, onProgress) {
|
|
5520
5509
|
const outputDir = dirname3(outputPath);
|
|
5521
5510
|
if (!existsSync2(outputDir)) {
|
|
5522
|
-
await
|
|
5511
|
+
await mkdir2(outputDir, { recursive: true });
|
|
5523
5512
|
}
|
|
5524
5513
|
if (existsSync2(outputPath)) {
|
|
5525
5514
|
const stats = statSync3(outputPath);
|
|
@@ -5527,9 +5516,11 @@ async function downloadModel(entry, outputPath, onProgress) {
|
|
|
5527
5516
|
console.log(` \u2713 Model already downloaded (${formatBytes2(stats.size)})`);
|
|
5528
5517
|
return outputPath;
|
|
5529
5518
|
}
|
|
5530
|
-
if (entry.file_size_bytes && stats.size
|
|
5531
|
-
console.log(` \
|
|
5532
|
-
console.log(` \
|
|
5519
|
+
if (entry.file_size_bytes && stats.size < entry.file_size_bytes) {
|
|
5520
|
+
console.log(` \u2299 Partial download found (${formatBytes2(stats.size)} / ${formatBytes2(entry.file_size_bytes)})`);
|
|
5521
|
+
console.log(` \u2299 Resuming download...`);
|
|
5522
|
+
} else if (entry.file_size_bytes && stats.size > entry.file_size_bytes) {
|
|
5523
|
+
console.log(` \u2298 File is larger than expected, restarting download...`);
|
|
5533
5524
|
const { unlink: unlink2 } = await import("node:fs/promises");
|
|
5534
5525
|
await unlink2(outputPath);
|
|
5535
5526
|
}
|
|
@@ -5546,10 +5537,6 @@ async function downloadModel(entry, outputPath, onProgress) {
|
|
|
5546
5537
|
if (existsSync2(outputPath)) {
|
|
5547
5538
|
const stats = statSync3(outputPath);
|
|
5548
5539
|
startByte = stats.size;
|
|
5549
|
-
if (startByte > 0 && startByte < totalBytes) {
|
|
5550
|
-
console.log(` Resuming from ${formatBytes2(startByte)}...`);
|
|
5551
|
-
downloadedBytes = startByte;
|
|
5552
|
-
}
|
|
5553
5540
|
}
|
|
5554
5541
|
const response = await fetch(downloadUrl, {
|
|
5555
5542
|
headers: startByte > 0 ? {
|
|
@@ -5567,11 +5554,29 @@ async function downloadModel(entry, outputPath, onProgress) {
|
|
|
5567
5554
|
}
|
|
5568
5555
|
const contentLength = response.headers.get("content-length");
|
|
5569
5556
|
const totalSize = contentLength ? parseInt(contentLength, 10) + startByte : totalBytes;
|
|
5557
|
+
if (startByte > 0) {
|
|
5558
|
+
if (startByte < totalSize) {
|
|
5559
|
+
console.log(` \u2299 Resuming from ${formatBytes2(startByte)}...`);
|
|
5560
|
+
downloadedBytes = startByte;
|
|
5561
|
+
} else if (startByte === totalSize) {
|
|
5562
|
+
console.log(` \u2713 Model already downloaded (${formatBytes2(startByte)})`);
|
|
5563
|
+
return outputPath;
|
|
5564
|
+
} else {
|
|
5565
|
+
console.log(` \u2298 File is larger than expected (${formatBytes2(startByte)} > ${formatBytes2(totalSize)}), restarting...`);
|
|
5566
|
+
const { unlink: unlink2 } = await import("node:fs/promises");
|
|
5567
|
+
await unlink2(outputPath);
|
|
5568
|
+
startByte = 0;
|
|
5569
|
+
downloadedBytes = 0;
|
|
5570
|
+
}
|
|
5571
|
+
}
|
|
5570
5572
|
const fileStream = createWriteStream2(outputPath, { flags: startByte > 0 ? "a" : "w" });
|
|
5571
5573
|
const reader = response.body?.getReader();
|
|
5572
5574
|
if (!reader) {
|
|
5573
5575
|
throw new Error("Response body is not readable");
|
|
5574
5576
|
}
|
|
5577
|
+
let lastProgressUpdate = Date.now();
|
|
5578
|
+
const startTime = Date.now();
|
|
5579
|
+
const bytesAtStart = downloadedBytes;
|
|
5575
5580
|
try {
|
|
5576
5581
|
while (true) {
|
|
5577
5582
|
const { done, value } = await reader.read();
|
|
@@ -5580,13 +5585,21 @@ async function downloadModel(entry, outputPath, onProgress) {
|
|
|
5580
5585
|
}
|
|
5581
5586
|
fileStream.write(value);
|
|
5582
5587
|
downloadedBytes += value.length;
|
|
5583
|
-
|
|
5588
|
+
const now = Date.now();
|
|
5589
|
+
if (onProgress && now - lastProgressUpdate > 1e3) {
|
|
5584
5590
|
onProgress(downloadedBytes, totalSize);
|
|
5585
|
-
|
|
5586
|
-
|
|
5587
|
-
|
|
5588
|
-
|
|
5589
|
-
|
|
5591
|
+
lastProgressUpdate = now;
|
|
5592
|
+
} else if (totalSize > 0 && now - lastProgressUpdate > 1e3) {
|
|
5593
|
+
const elapsedSeconds = (now - startTime) / 1e3;
|
|
5594
|
+
const bytesDownloadedThisSession = downloadedBytes - bytesAtStart;
|
|
5595
|
+
const bytesPerSecond = bytesDownloadedThisSession / elapsedSeconds;
|
|
5596
|
+
const remainingBytes = totalSize - downloadedBytes;
|
|
5597
|
+
const etaSeconds = remainingBytes / bytesPerSecond;
|
|
5598
|
+
const percent = (downloadedBytes / totalSize * 100).toFixed(1);
|
|
5599
|
+
const speed = formatBytes2(bytesPerSecond);
|
|
5600
|
+
const eta = formatTime(etaSeconds);
|
|
5601
|
+
process.stdout.write(`\r ${formatBytes2(downloadedBytes)} / ${formatBytes2(totalSize)} (${percent}%) ${speed}/s ETA ~${eta}`);
|
|
5602
|
+
lastProgressUpdate = now;
|
|
5590
5603
|
}
|
|
5591
5604
|
}
|
|
5592
5605
|
fileStream.end();
|
|
@@ -5597,12 +5610,7 @@ async function downloadModel(entry, outputPath, onProgress) {
|
|
|
5597
5610
|
if (totalSize > 0 && downloadedBytes !== totalSize) {
|
|
5598
5611
|
throw new Error(`Download incomplete: ${downloadedBytes} bytes downloaded, expected ${totalSize}`);
|
|
5599
5612
|
}
|
|
5600
|
-
|
|
5601
|
-
process.stdout.write("\r");
|
|
5602
|
-
} else {
|
|
5603
|
-
process.stdout.write("\r");
|
|
5604
|
-
}
|
|
5605
|
-
console.log(` \u2713 Download complete (${formatBytes2(downloadedBytes)})`);
|
|
5613
|
+
process.stdout.write("\r" + " ".repeat(80) + "\r");
|
|
5606
5614
|
return outputPath;
|
|
5607
5615
|
} catch (error) {
|
|
5608
5616
|
fileStream.destroy();
|
|
@@ -5617,6 +5625,19 @@ function formatBytes2(bytes) {
|
|
|
5617
5625
|
const i = Math.floor(Math.log(bytes) / Math.log(k));
|
|
5618
5626
|
return `${(bytes / Math.pow(k, i)).toFixed(1)} ${sizes[i]}`;
|
|
5619
5627
|
}
|
|
5628
|
+
function formatTime(seconds) {
|
|
5629
|
+
if (seconds < 60) {
|
|
5630
|
+
return `${Math.round(seconds)}s`;
|
|
5631
|
+
}
|
|
5632
|
+
const minutes = Math.floor(seconds / 60);
|
|
5633
|
+
const remainingSeconds = Math.round(seconds % 60);
|
|
5634
|
+
if (minutes < 60) {
|
|
5635
|
+
return `${minutes}m ${remainingSeconds}s`;
|
|
5636
|
+
}
|
|
5637
|
+
const hours = Math.floor(minutes / 60);
|
|
5638
|
+
const remainingMinutes = minutes % 60;
|
|
5639
|
+
return `${hours}h ${remainingMinutes}m`;
|
|
5640
|
+
}
|
|
5620
5641
|
|
|
5621
5642
|
// src/model-resolver.ts
|
|
5622
5643
|
import { existsSync as existsSync3 } from "node:fs";
|
|
@@ -5636,24 +5657,83 @@ async function ensureModelDownloaded(policy, onProgress) {
|
|
|
5636
5657
|
console.error(` \u2717 Model not found in registry: ${policy.model_name} (${policy.quantization})`);
|
|
5637
5658
|
return null;
|
|
5638
5659
|
}
|
|
5639
|
-
const modelsDir = process.env.AUXOT_MODELS_DIR ||
|
|
5640
|
-
const modelDir =
|
|
5641
|
-
const modelPath =
|
|
5642
|
-
|
|
5643
|
-
|
|
5644
|
-
const
|
|
5645
|
-
|
|
5660
|
+
const modelsDir = process.env.AUXOT_MODELS_DIR || join4(homedir2(), ".auxot", "models");
|
|
5661
|
+
const modelDir = join4(modelsDir, model.huggingface_id.replace("/", "_"));
|
|
5662
|
+
const modelPath = join4(modelDir, model.file_name);
|
|
5663
|
+
const shardMatch = model.file_name.match(/-(\d+)-of-(\d+)\.gguf$/);
|
|
5664
|
+
if (shardMatch) {
|
|
5665
|
+
const totalShards = parseInt(shardMatch[2], 10);
|
|
5666
|
+
const fileBaseName = model.file_name.replace(/-\d+-of-\d+\.gguf$/, "");
|
|
5667
|
+
const fileExtension = ".gguf";
|
|
5668
|
+
console.log(` Model has ${totalShards} shards, downloading all...`);
|
|
5669
|
+
for (let shardNum = 1; shardNum <= totalShards; shardNum++) {
|
|
5670
|
+
const paddedNum = String(shardNum).padStart(5, "0");
|
|
5671
|
+
const shardFileName = `${fileBaseName}-${paddedNum}-of-${String(totalShards).padStart(5, "0")}${fileExtension}`;
|
|
5672
|
+
const shardPath = join4(modelDir, shardFileName);
|
|
5673
|
+
if (existsSync3(shardPath)) {
|
|
5674
|
+
const { statSync: statSync4 } = await import("node:fs");
|
|
5675
|
+
const stats = statSync4(shardPath);
|
|
5676
|
+
console.log(` \u2713 Shard ${shardNum}/${totalShards} already downloaded`);
|
|
5677
|
+
continue;
|
|
5678
|
+
}
|
|
5679
|
+
const shardEntry = {
|
|
5680
|
+
...model,
|
|
5681
|
+
file_name: shardFileName,
|
|
5682
|
+
file_size_bytes: null
|
|
5683
|
+
// Don't know the size, will get from Content-Length
|
|
5684
|
+
};
|
|
5685
|
+
console.log(` Downloading shard ${shardNum}/${totalShards}...`);
|
|
5686
|
+
try {
|
|
5687
|
+
await downloadModel(shardEntry, shardPath, onProgress);
|
|
5688
|
+
console.log(` \u2713 Shard ${shardNum}/${totalShards} complete`);
|
|
5689
|
+
} catch (error) {
|
|
5690
|
+
console.error(` \u2717 Shard ${shardNum}/${totalShards} failed:`, error);
|
|
5691
|
+
throw error;
|
|
5692
|
+
}
|
|
5693
|
+
}
|
|
5694
|
+
return modelPath;
|
|
5695
|
+
} else {
|
|
5696
|
+
if (existsSync3(modelPath)) {
|
|
5697
|
+
const { statSync: statSync4 } = await import("node:fs");
|
|
5698
|
+
const stats = statSync4(modelPath);
|
|
5699
|
+
if (model.file_size_bytes && stats.size === model.file_size_bytes) {
|
|
5700
|
+
return modelPath;
|
|
5701
|
+
}
|
|
5702
|
+
}
|
|
5703
|
+
try {
|
|
5704
|
+
await downloadModel(model, modelPath, onProgress);
|
|
5646
5705
|
return modelPath;
|
|
5706
|
+
} catch (error) {
|
|
5707
|
+
console.error(` \u2717 Download failed:`, error);
|
|
5708
|
+
throw error;
|
|
5647
5709
|
}
|
|
5648
5710
|
}
|
|
5649
|
-
|
|
5650
|
-
|
|
5651
|
-
|
|
5652
|
-
|
|
5653
|
-
|
|
5654
|
-
|
|
5655
|
-
|
|
5711
|
+
}
|
|
5712
|
+
|
|
5713
|
+
// src/port-finder.ts
|
|
5714
|
+
import { createServer } from "net";
|
|
5715
|
+
async function findAvailablePort(minPort = 1e4, maxPort = 65535, maxAttempts = 100) {
|
|
5716
|
+
for (let i = 0; i < maxAttempts; i++) {
|
|
5717
|
+
const port = Math.floor(Math.random() * (maxPort - minPort + 1)) + minPort;
|
|
5718
|
+
const isAvailable = await isPortAvailable(port);
|
|
5719
|
+
if (isAvailable) {
|
|
5720
|
+
return port;
|
|
5721
|
+
}
|
|
5656
5722
|
}
|
|
5723
|
+
throw new Error(`No available ports found after ${maxAttempts} attempts in range ${minPort}-${maxPort}`);
|
|
5724
|
+
}
|
|
5725
|
+
function isPortAvailable(port) {
|
|
5726
|
+
return new Promise((resolve) => {
|
|
5727
|
+
const server = createServer();
|
|
5728
|
+
server.once("error", () => {
|
|
5729
|
+
resolve(false);
|
|
5730
|
+
});
|
|
5731
|
+
server.once("listening", () => {
|
|
5732
|
+
server.close();
|
|
5733
|
+
resolve(true);
|
|
5734
|
+
});
|
|
5735
|
+
server.listen(port, "127.0.0.1");
|
|
5736
|
+
});
|
|
5657
5737
|
}
|
|
5658
5738
|
|
|
5659
5739
|
// src/index.ts
|
|
@@ -5717,28 +5797,79 @@ if (!config.gpuKey.startsWith("gpu.")) {
|
|
|
5717
5797
|
}
|
|
5718
5798
|
async function main() {
|
|
5719
5799
|
setDebugLevel(config.debugLevel);
|
|
5720
|
-
console.log("Auxot
|
|
5721
|
-
console.log("
|
|
5722
|
-
if (config.debugLevel > 0) {
|
|
5723
|
-
console.log(`Debug Level: ${config.debugLevel}`);
|
|
5724
|
-
}
|
|
5725
|
-
console.log();
|
|
5800
|
+
console.log("Auxot Worker");
|
|
5801
|
+
console.log("============\n");
|
|
5726
5802
|
try {
|
|
5727
|
-
console.log("[1/4] Loading GPU ID...");
|
|
5728
|
-
const gpuId = await getOrCreateGpuId();
|
|
5729
|
-
console.log(`GPU ID: ${gpuId}`);
|
|
5730
|
-
console.log();
|
|
5731
|
-
console.log("[2/4] Connecting to Auxot platform...");
|
|
5732
5803
|
const baseUrl = config.auxotUrl.replace(/^http/, "ws").replace(/^https/, "wss");
|
|
5733
5804
|
const wsUrl = `${baseUrl}/api/gpu/client`;
|
|
5734
|
-
|
|
5735
|
-
|
|
5736
|
-
|
|
5737
|
-
|
|
5738
|
-
|
|
5739
|
-
|
|
5740
|
-
|
|
5741
|
-
|
|
5805
|
+
console.log("\u25B6 Control Plane");
|
|
5806
|
+
console.log(` \u2713 Connected ${wsUrl}`);
|
|
5807
|
+
const policy = await fetchPolicy();
|
|
5808
|
+
console.log(` \u2713 Authenticated`);
|
|
5809
|
+
console.log();
|
|
5810
|
+
console.log("\u25B6 Downloading Model");
|
|
5811
|
+
console.log(` ${policy.model_name} (${policy.quantization})`);
|
|
5812
|
+
const modelPath = await ensureModelDownloaded(policy);
|
|
5813
|
+
if (!modelPath) {
|
|
5814
|
+
throw new Error(`Model not found in registry: ${policy.model_name} (${policy.quantization})`);
|
|
5815
|
+
}
|
|
5816
|
+
console.log(` \u2713 Model ready`);
|
|
5817
|
+
console.log();
|
|
5818
|
+
console.log("\u25B6 Control Plane");
|
|
5819
|
+
const binaryPath = await ensureLlamaBinary();
|
|
5820
|
+
const gpuLayers = 9999;
|
|
5821
|
+
const llamaPort = await findAvailablePort();
|
|
5822
|
+
const llamaUrl = `http://127.0.0.1:${llamaPort}`;
|
|
5823
|
+
const llamaProcess = await spawnLlamaCpp({
|
|
5824
|
+
binaryPath,
|
|
5825
|
+
modelPath,
|
|
5826
|
+
contextSize: policy.context_size,
|
|
5827
|
+
parallelism: policy.max_parallelism,
|
|
5828
|
+
port: llamaPort,
|
|
5829
|
+
host: "127.0.0.1",
|
|
5830
|
+
gpuLayers
|
|
5831
|
+
});
|
|
5832
|
+
await waitForLlamaReady(llamaUrl);
|
|
5833
|
+
await new Promise((resolve) => setTimeout(resolve, 2e3));
|
|
5834
|
+
try {
|
|
5835
|
+
const warmupResponse = await fetch(`${llamaUrl}/v1/chat/completions`, {
|
|
5836
|
+
method: "POST",
|
|
5837
|
+
headers: { "Content-Type": "application/json" },
|
|
5838
|
+
body: JSON.stringify({
|
|
5839
|
+
model: "placeholder",
|
|
5840
|
+
messages: [{ role: "user", content: "Hi" }],
|
|
5841
|
+
max_tokens: 1,
|
|
5842
|
+
stream: false
|
|
5843
|
+
})
|
|
5844
|
+
});
|
|
5845
|
+
if (warmupResponse.ok)
|
|
5846
|
+
await warmupResponse.json();
|
|
5847
|
+
} catch (error) {
|
|
5848
|
+
}
|
|
5849
|
+
let capabilities = null;
|
|
5850
|
+
for (let attempt = 1; attempt <= 3; attempt++) {
|
|
5851
|
+
try {
|
|
5852
|
+
capabilities = await discoverCapabilities(llamaUrl);
|
|
5853
|
+
break;
|
|
5854
|
+
} catch (error) {
|
|
5855
|
+
if (attempt === 3) {
|
|
5856
|
+
throw error;
|
|
5857
|
+
}
|
|
5858
|
+
await new Promise((resolve) => setTimeout(resolve, 2e3));
|
|
5859
|
+
}
|
|
5860
|
+
}
|
|
5861
|
+
if (!capabilities) {
|
|
5862
|
+
throw new Error("Failed to discover capabilities after 3 attempts");
|
|
5863
|
+
}
|
|
5864
|
+
console.log(` \u2713 llama.cpp Running locally`);
|
|
5865
|
+
console.log();
|
|
5866
|
+
console.log("\u25B6 Runtime");
|
|
5867
|
+
console.log(` \u2022 Model ${policy.model_name} (${policy.quantization})`);
|
|
5868
|
+
console.log(` \u2022 Context ${policy.context_size.toLocaleString()}`);
|
|
5869
|
+
console.log(` \u2022 Parallelism ${policy.max_parallelism}`);
|
|
5870
|
+
console.log(` \u2022 Backend llama.cpp (${capabilities.backend})`);
|
|
5871
|
+
console.log();
|
|
5872
|
+
const wsConnection = new WebSocketConnection(config.gpuKey, capabilities);
|
|
5742
5873
|
const activeJobs = /* @__PURE__ */ new Map();
|
|
5743
5874
|
wsConnection.onJob(async (job) => {
|
|
5744
5875
|
const abortController = new AbortController();
|
|
@@ -5747,12 +5878,9 @@ async function main() {
|
|
|
5747
5878
|
const currentCapabilities = wsConnection.getCapabilities();
|
|
5748
5879
|
const result = await processJob(
|
|
5749
5880
|
job,
|
|
5750
|
-
|
|
5751
|
-
// Always use local llama.cpp (spawned by worker-cli)
|
|
5881
|
+
llamaUrl,
|
|
5752
5882
|
currentCapabilities,
|
|
5753
|
-
// Pass capabilities for max_tokens_default
|
|
5754
5883
|
abortController.signal,
|
|
5755
|
-
// Pass abort signal
|
|
5756
5884
|
(token) => {
|
|
5757
5885
|
wsConnection.sendToken(job.job_id, token);
|
|
5758
5886
|
}
|
|
@@ -5766,11 +5894,7 @@ async function main() {
|
|
|
5766
5894
|
result.outputTokens,
|
|
5767
5895
|
result.tool_calls
|
|
5768
5896
|
);
|
|
5769
|
-
if (wasCancelled) {
|
|
5770
|
-
console.log(`\u2713 Job ${job.job_id} cancelled - sent partial response`);
|
|
5771
|
-
wsConnection.sendError(job.job_id, "Job cancelled by user");
|
|
5772
|
-
} else {
|
|
5773
|
-
console.log(`\u2713 Job ${job.job_id} completed successfully`);
|
|
5897
|
+
if (!wasCancelled) {
|
|
5774
5898
|
}
|
|
5775
5899
|
} catch (error) {
|
|
5776
5900
|
console.error(`\u2717 Job ${job.job_id} failed:`, error);
|
|
@@ -5782,151 +5906,50 @@ async function main() {
|
|
|
5782
5906
|
activeJobs.delete(job.job_id);
|
|
5783
5907
|
}
|
|
5784
5908
|
});
|
|
5785
|
-
wsConnection.onPolicy(async (policy) => {
|
|
5786
|
-
console.log("[3/4] Setting up llama.cpp...");
|
|
5787
|
-
console.log(` Policy: ${policy.model_name} (${policy.quantization})`);
|
|
5788
|
-
console.log(` Context size: ${policy.context_size}`);
|
|
5789
|
-
console.log(` Max parallelism: ${policy.max_parallelism}`);
|
|
5790
|
-
try {
|
|
5791
|
-
console.log(" Downloading/checking model...");
|
|
5792
|
-
const modelPath = await ensureModelDownloaded(policy);
|
|
5793
|
-
if (!modelPath) {
|
|
5794
|
-
throw new Error(`Model not found in registry: ${policy.model_name} (${policy.quantization})`);
|
|
5795
|
-
}
|
|
5796
|
-
console.log(` \u2713 Model ready: ${modelPath}`);
|
|
5797
|
-
console.log(" Downloading/checking llama.cpp binary...");
|
|
5798
|
-
const binaryPath = await ensureLlamaBinary();
|
|
5799
|
-
console.log(` \u2713 Binary ready: ${binaryPath}`);
|
|
5800
|
-
const gpuLayers = 9999;
|
|
5801
|
-
console.log(" Spawning llama.cpp process...");
|
|
5802
|
-
llamaProcess = await spawnLlamaCpp({
|
|
5803
|
-
binaryPath,
|
|
5804
|
-
modelPath,
|
|
5805
|
-
contextSize: policy.context_size,
|
|
5806
|
-
parallelism: policy.max_parallelism,
|
|
5807
|
-
port: 9002,
|
|
5808
|
-
host: "127.0.0.1",
|
|
5809
|
-
gpuLayers
|
|
5810
|
-
// Enable GPU acceleration
|
|
5811
|
-
});
|
|
5812
|
-
const setupCrashHandler = (proc) => {
|
|
5813
|
-
proc.onCrash(async (code, signal) => {
|
|
5814
|
-
console.error(`
|
|
5815
|
-
[llama.cpp] Process crashed (code: ${code}, signal: ${signal})`);
|
|
5816
|
-
console.log("[llama.cpp] Attempting to restart...");
|
|
5817
|
-
try {
|
|
5818
|
-
await new Promise((resolve) => setTimeout(resolve, 2e3));
|
|
5819
|
-
if (llamaProcess) {
|
|
5820
|
-
const restarted = await llamaProcess.restart();
|
|
5821
|
-
llamaProcess = restarted;
|
|
5822
|
-
setupCrashHandler(restarted);
|
|
5823
|
-
}
|
|
5824
|
-
await waitForLlamaReady("http://127.0.0.1:9002");
|
|
5825
|
-
console.log("[llama.cpp] \u2713 Restarted successfully");
|
|
5826
|
-
const capabilities2 = await discoverCapabilities("http://127.0.0.1:9002");
|
|
5827
|
-
wsConnection.updateCapabilities(capabilities2);
|
|
5828
|
-
wsConnection.sendConfig(capabilities2);
|
|
5829
|
-
console.log("[llama.cpp] \u2713 Capabilities updated after restart");
|
|
5830
|
-
} catch (restartError) {
|
|
5831
|
-
console.error("[llama.cpp] \u2717 Failed to restart:", restartError);
|
|
5832
|
-
console.error("[llama.cpp] Worker will continue but may not process jobs correctly");
|
|
5833
|
-
}
|
|
5834
|
-
});
|
|
5835
|
-
};
|
|
5836
|
-
setupCrashHandler(llamaProcess);
|
|
5837
|
-
console.log(" Waiting for llama.cpp to be ready...");
|
|
5838
|
-
await waitForLlamaReady("http://127.0.0.1:9002");
|
|
5839
|
-
console.log(" \u2713 llama.cpp is ready");
|
|
5840
|
-
console.log(" Warming up model...");
|
|
5841
|
-
try {
|
|
5842
|
-
const warmupResponse = await fetch("http://127.0.0.1:9002/v1/chat/completions", {
|
|
5843
|
-
method: "POST",
|
|
5844
|
-
headers: { "Content-Type": "application/json" },
|
|
5845
|
-
body: JSON.stringify({
|
|
5846
|
-
model: "placeholder",
|
|
5847
|
-
// Will use default model
|
|
5848
|
-
messages: [{ role: "user", content: "Hi" }],
|
|
5849
|
-
max_tokens: 1,
|
|
5850
|
-
// Just 1 token to warm up
|
|
5851
|
-
stream: false
|
|
5852
|
-
})
|
|
5853
|
-
});
|
|
5854
|
-
if (warmupResponse.ok) {
|
|
5855
|
-
await warmupResponse.json();
|
|
5856
|
-
console.log(" \u2713 Model warmed up");
|
|
5857
|
-
}
|
|
5858
|
-
} catch (error) {
|
|
5859
|
-
console.warn(" \u26A0 Model warm-up failed (non-fatal):", error);
|
|
5860
|
-
}
|
|
5861
|
-
console.log(" Discovering capabilities...");
|
|
5862
|
-
const capabilities = await discoverCapabilities("http://127.0.0.1:9002");
|
|
5863
|
-
wsConnection.updateCapabilities(capabilities);
|
|
5864
|
-
wsConnection.sendConfig(capabilities);
|
|
5865
|
-
console.log(" \u2713 Capabilities discovered and sent to server");
|
|
5866
|
-
} catch (error) {
|
|
5867
|
-
console.error(" \u2717 Failed to setup llama.cpp:", error);
|
|
5868
|
-
if (llamaProcess) {
|
|
5869
|
-
try {
|
|
5870
|
-
llamaProcess.stop();
|
|
5871
|
-
} catch (cleanupError) {
|
|
5872
|
-
}
|
|
5873
|
-
}
|
|
5874
|
-
throw error;
|
|
5875
|
-
}
|
|
5876
|
-
});
|
|
5877
|
-
wsConnection.onConfigAck((success, error) => {
|
|
5878
|
-
if (!success) {
|
|
5879
|
-
console.error(" \u2717 Server rejected configuration:", error);
|
|
5880
|
-
wsConnection.close();
|
|
5881
|
-
if (llamaProcess) {
|
|
5882
|
-
try {
|
|
5883
|
-
llamaProcess.stop();
|
|
5884
|
-
} catch (cleanupError) {
|
|
5885
|
-
}
|
|
5886
|
-
}
|
|
5887
|
-
process.exit(1);
|
|
5888
|
-
}
|
|
5889
|
-
console.log(" \u2713 Configuration validated by server");
|
|
5890
|
-
});
|
|
5891
5909
|
wsConnection.onCancel((cancelMessage) => {
|
|
5892
|
-
console.log(`
|
|
5893
|
-
=== Cancelling job ${cancelMessage.job_id} ===`);
|
|
5894
5910
|
const abortController = activeJobs.get(cancelMessage.job_id);
|
|
5895
5911
|
if (abortController) {
|
|
5896
5912
|
abortController.abort();
|
|
5897
|
-
|
|
5898
|
-
|
|
5899
|
-
|
|
5913
|
+
}
|
|
5914
|
+
});
|
|
5915
|
+
llamaProcess.onCrash(async (code, signal) => {
|
|
5916
|
+
console.error(`
|
|
5917
|
+
\u2717 llama.cpp crashed (code: ${code}, signal: ${signal})`);
|
|
5918
|
+
console.log(" Restarting...");
|
|
5919
|
+
try {
|
|
5920
|
+
await new Promise((resolve) => setTimeout(resolve, 2e3));
|
|
5921
|
+
const restarted = await llamaProcess.restart();
|
|
5922
|
+
await waitForLlamaReady(llamaUrl);
|
|
5923
|
+
const newCapabilities = await discoverCapabilities(llamaUrl);
|
|
5924
|
+
wsConnection.updateCapabilities(newCapabilities);
|
|
5925
|
+
wsConnection.sendConfig(newCapabilities);
|
|
5926
|
+
console.log(" \u2713 Recovered\n");
|
|
5927
|
+
} catch (restartError) {
|
|
5928
|
+
console.error(" \u2717 Failed to restart:", restartError);
|
|
5900
5929
|
}
|
|
5901
5930
|
});
|
|
5902
5931
|
try {
|
|
5903
5932
|
await wsConnection.connect(wsUrl);
|
|
5904
|
-
console.log();
|
|
5905
5933
|
} catch (error) {
|
|
5906
5934
|
const errorMsg = error instanceof Error ? error.message : "Unknown error";
|
|
5907
|
-
|
|
5908
|
-
|
|
5909
|
-
console.error(" See error details above for specific mismatches.");
|
|
5910
|
-
} else if (errorMsg.includes("GPU key policy not configured")) {
|
|
5911
|
-
console.error("\n\u2717 GPU key policy is not configured.");
|
|
5912
|
-
console.error(" Please configure the policy in the web UI before connecting workers.");
|
|
5913
|
-
} else {
|
|
5914
|
-
console.error("\n\u2717 Connection failed:", errorMsg);
|
|
5915
|
-
}
|
|
5935
|
+
console.error("\u2717 Connection failed:", errorMsg);
|
|
5936
|
+
llamaProcess.stop();
|
|
5916
5937
|
process.exit(1);
|
|
5917
5938
|
}
|
|
5918
|
-
|
|
5919
|
-
|
|
5920
|
-
|
|
5921
|
-
|
|
5939
|
+
wsConnection.sendConfig(capabilities);
|
|
5940
|
+
const gpuId = wsConnection.getGpuId();
|
|
5941
|
+
if (gpuId) {
|
|
5942
|
+
console.log("\u25B6 GPU");
|
|
5943
|
+
console.log(` \u2713 ID assigned ${gpuId}`);
|
|
5944
|
+
console.log();
|
|
5945
|
+
}
|
|
5946
|
+
console.log("\u2713 Worker ready");
|
|
5947
|
+
console.log(" Listening for jobs\u2026\n");
|
|
5922
5948
|
const shutdown = () => {
|
|
5923
|
-
console.log("\
|
|
5924
|
-
|
|
5925
|
-
|
|
5926
|
-
|
|
5927
|
-
} catch (error) {
|
|
5928
|
-
console.error("Error stopping llama.cpp:", error);
|
|
5929
|
-
}
|
|
5949
|
+
console.log("\n\u2713 Shutting down gracefully...");
|
|
5950
|
+
try {
|
|
5951
|
+
llamaProcess.stop();
|
|
5952
|
+
} catch (error) {
|
|
5930
5953
|
}
|
|
5931
5954
|
wsConnection.close();
|
|
5932
5955
|
process.exit(0);
|
|
@@ -5934,9 +5957,35 @@ async function main() {
|
|
|
5934
5957
|
process.on("SIGINT", shutdown);
|
|
5935
5958
|
process.on("SIGTERM", shutdown);
|
|
5936
5959
|
} catch (error) {
|
|
5937
|
-
console.error("Fatal error:", error);
|
|
5960
|
+
console.error("\u2717 Fatal error:", error);
|
|
5938
5961
|
process.exit(1);
|
|
5939
5962
|
}
|
|
5940
5963
|
}
|
|
5964
|
+
async function fetchPolicy() {
|
|
5965
|
+
return new Promise((resolve, reject) => {
|
|
5966
|
+
const baseUrl = config.auxotUrl.replace(/^http/, "ws").replace(/^https/, "wss");
|
|
5967
|
+
const wsUrl = `${baseUrl}/api/gpu/client`;
|
|
5968
|
+
const placeholderCapabilities = {
|
|
5969
|
+
model: "pending",
|
|
5970
|
+
ctx_size: 0,
|
|
5971
|
+
backend: "cpu"
|
|
5972
|
+
};
|
|
5973
|
+
const tempConnection = new WebSocketConnection(config.gpuKey, placeholderCapabilities);
|
|
5974
|
+
tempConnection.setSilentMode(true);
|
|
5975
|
+
const timeout = setTimeout(() => {
|
|
5976
|
+
tempConnection.close();
|
|
5977
|
+
reject(new Error("Timeout fetching policy"));
|
|
5978
|
+
}, 3e4);
|
|
5979
|
+
tempConnection.onPolicy(async (policy) => {
|
|
5980
|
+
clearTimeout(timeout);
|
|
5981
|
+
tempConnection.close();
|
|
5982
|
+
resolve(policy);
|
|
5983
|
+
});
|
|
5984
|
+
tempConnection.connect(wsUrl).catch((error) => {
|
|
5985
|
+
clearTimeout(timeout);
|
|
5986
|
+
reject(error);
|
|
5987
|
+
});
|
|
5988
|
+
});
|
|
5989
|
+
}
|
|
5941
5990
|
main();
|
|
5942
5991
|
//# sourceMappingURL=index.js.map
|