@auxot/worker-cli 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +156 -0
- package/dist/capabilities.js +125 -0
- package/dist/debug.js +54 -0
- package/dist/gpu-detection.js +171 -0
- package/dist/gpu-id.js +48 -0
- package/dist/index.js +341 -0
- package/dist/llama-binary.js +287 -0
- package/dist/llama-process.js +203 -0
- package/dist/llama.js +207 -0
- package/dist/model-downloader.js +145 -0
- package/dist/model-resolver.js +80 -0
- package/dist/policy-validator.js +242 -0
- package/dist/types.js +4 -0
- package/dist/websocket.js +433 -0
- package/package.json +57 -0
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Policy Validation
|
|
3
|
+
*
|
|
4
|
+
* Validates worker capabilities against GPU key policy.
|
|
5
|
+
*/
|
|
6
|
+
/**
|
|
7
|
+
* Normalize model name for comparison
|
|
8
|
+
* Extracts base model name + version + variant (removes quantization, parameters, file patterns)
|
|
9
|
+
*/
|
|
10
|
+
function normalizeModelName(name) {
|
|
11
|
+
// Remove common quantization suffixes
|
|
12
|
+
let normalized = name
|
|
13
|
+
.replace(/-GGUF$/i, '')
|
|
14
|
+
.replace(/\.gguf$/i, '');
|
|
15
|
+
// Remove path components (keep only filename)
|
|
16
|
+
const parts = normalized.split('/');
|
|
17
|
+
if (parts.length > 1) {
|
|
18
|
+
normalized = parts[parts.length - 1];
|
|
19
|
+
}
|
|
20
|
+
// Remove multi-GGUF file patterns (e.g., "-00001-of-00003")
|
|
21
|
+
normalized = normalized.replace(/-\d{5}-of-\d{5}$/i, '');
|
|
22
|
+
// Extract version number (e.g., "3", "2.5", "4", "3.3")
|
|
23
|
+
let version = '';
|
|
24
|
+
const versionMatch = normalized.match(/^(Qwen|Llama|Ministral|Devstral|Gemma|DeepSeek|Granite|GPT-OSS)[-_]?(\d+(?:\.\d+)?)/i);
|
|
25
|
+
if (versionMatch) {
|
|
26
|
+
version = versionMatch[2];
|
|
27
|
+
normalized = normalized.replace(new RegExp(`^${versionMatch[1]}[-_]?${versionMatch[2]}`, 'i'), versionMatch[1]);
|
|
28
|
+
}
|
|
29
|
+
// Extract variant (composite or single)
|
|
30
|
+
let variant = '';
|
|
31
|
+
const compositePatterns = [
|
|
32
|
+
/-VL-Instruct$/i,
|
|
33
|
+
/-VL-Thinking$/i,
|
|
34
|
+
/-VL-Chat$/i,
|
|
35
|
+
/-VL-Coder$/i,
|
|
36
|
+
/-VL-Code$/i,
|
|
37
|
+
];
|
|
38
|
+
for (const pattern of compositePatterns) {
|
|
39
|
+
const match = normalized.match(pattern);
|
|
40
|
+
if (match) {
|
|
41
|
+
variant = match[0].replace(/^-/i, '');
|
|
42
|
+
normalized = normalized.replace(pattern, '');
|
|
43
|
+
break;
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
if (!variant) {
|
|
47
|
+
const singlePatterns = [
|
|
48
|
+
/-Instruct$/i,
|
|
49
|
+
/-Thinking$/i,
|
|
50
|
+
/-Chat$/i,
|
|
51
|
+
/-Coder$/i,
|
|
52
|
+
/-Code$/i,
|
|
53
|
+
/-VL$/i,
|
|
54
|
+
/-Vision$/i,
|
|
55
|
+
/-Maverick$/i,
|
|
56
|
+
/-Scout$/i,
|
|
57
|
+
/-Reasoning$/i,
|
|
58
|
+
];
|
|
59
|
+
for (const pattern of singlePatterns) {
|
|
60
|
+
const match = normalized.match(pattern);
|
|
61
|
+
if (match) {
|
|
62
|
+
variant = match[0].replace(/^-/i, '');
|
|
63
|
+
normalized = normalized.replace(pattern, '');
|
|
64
|
+
break;
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
// Remove quantization patterns (Q4_K_M, Q5_K_S, etc.)
|
|
69
|
+
normalized = normalized.replace(/[-_]Q\d+[_\w]*/i, '');
|
|
70
|
+
// Remove parameter counts (7B, 13B, 30B, etc.)
|
|
71
|
+
normalized = normalized.replace(/[-_](\d+(?:\.\d+)?[BMK])(?![0-9])/i, '');
|
|
72
|
+
// Remove expert counts (A22B, E2B, etc.)
|
|
73
|
+
normalized = normalized.replace(/[-_]([AE]\d+[BMK])/i, '');
|
|
74
|
+
normalized = normalized.replace(/[-_](\d+[AE])(?![0-9])/i, '');
|
|
75
|
+
// Remove trailing version numbers that are NOT part of the model name
|
|
76
|
+
normalized = normalized.replace(/[-_](\d{4,})$/i, ''); // Remove 4+ digit trailing numbers
|
|
77
|
+
// Normalize base name
|
|
78
|
+
const nameLower = normalized.toLowerCase();
|
|
79
|
+
let base = '';
|
|
80
|
+
if (nameLower.startsWith('qwen')) {
|
|
81
|
+
base = 'Qwen';
|
|
82
|
+
}
|
|
83
|
+
else if (nameLower.startsWith('llama') || nameLower.startsWith('meta-llama')) {
|
|
84
|
+
base = 'Llama';
|
|
85
|
+
}
|
|
86
|
+
else if (nameLower.startsWith('ministral') || nameLower.startsWith('devstral')) {
|
|
87
|
+
base = 'Ministral';
|
|
88
|
+
}
|
|
89
|
+
else if (nameLower.startsWith('gemma')) {
|
|
90
|
+
base = 'Gemma';
|
|
91
|
+
}
|
|
92
|
+
else if (nameLower.startsWith('deepseek')) {
|
|
93
|
+
base = 'DeepSeek';
|
|
94
|
+
}
|
|
95
|
+
else if (nameLower.startsWith('granite')) {
|
|
96
|
+
base = 'Granite';
|
|
97
|
+
}
|
|
98
|
+
else if (nameLower.startsWith('gpt-oss') || nameLower.startsWith('gptoss')) {
|
|
99
|
+
base = 'GPT-OSS';
|
|
100
|
+
}
|
|
101
|
+
else {
|
|
102
|
+
base = normalized.trim();
|
|
103
|
+
}
|
|
104
|
+
// Combine base + version + variant
|
|
105
|
+
let result = base;
|
|
106
|
+
if (version) {
|
|
107
|
+
result += ` ${version}`;
|
|
108
|
+
}
|
|
109
|
+
if (variant) {
|
|
110
|
+
result += `-${variant}`;
|
|
111
|
+
}
|
|
112
|
+
return result.trim();
|
|
113
|
+
}
|
|
114
|
+
/**
|
|
115
|
+
* Extract quantization from model name or capabilities
|
|
116
|
+
*/
|
|
117
|
+
function extractQuantization(capabilities) {
|
|
118
|
+
// Try to extract from model name
|
|
119
|
+
const model = capabilities.model || '';
|
|
120
|
+
// Check for quantization patterns
|
|
121
|
+
const quantPatterns = [
|
|
122
|
+
'Q3_K_S', 'Q4_K_S', 'Q5_K_S', 'Q6_K', 'Q8_0', 'Q8_K',
|
|
123
|
+
'F16', 'F32', 'BF16',
|
|
124
|
+
];
|
|
125
|
+
for (const pattern of quantPatterns) {
|
|
126
|
+
if (model.includes(pattern)) {
|
|
127
|
+
return pattern;
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
return null;
|
|
131
|
+
}
|
|
132
|
+
/**
|
|
133
|
+
* Infer capabilities from model name
|
|
134
|
+
*/
|
|
135
|
+
function inferCapabilitiesFromModel(modelName) {
|
|
136
|
+
const name = modelName.toLowerCase();
|
|
137
|
+
const capabilities = [];
|
|
138
|
+
if (name.includes('vision') || name.includes('multimodal') || name.includes('vl-')) {
|
|
139
|
+
capabilities.push('vision');
|
|
140
|
+
}
|
|
141
|
+
if (name.includes('code') || name.includes('coder') || name.includes('starcoder')) {
|
|
142
|
+
capabilities.push('code');
|
|
143
|
+
}
|
|
144
|
+
if (name.includes('embed') || name.includes('embedding')) {
|
|
145
|
+
capabilities.push('embedding');
|
|
146
|
+
}
|
|
147
|
+
if (capabilities.length === 0) {
|
|
148
|
+
capabilities.push('chat');
|
|
149
|
+
}
|
|
150
|
+
return [...new Set(capabilities)];
|
|
151
|
+
}
|
|
152
|
+
/**
|
|
153
|
+
* Validate worker capabilities against policy
|
|
154
|
+
*/
|
|
155
|
+
/**
|
|
156
|
+
* Parse parameter count from string (e.g., "7B" -> 7e9, "30B" -> 30e9)
|
|
157
|
+
*/
|
|
158
|
+
function parseParameters(parameters) {
|
|
159
|
+
const match = parameters.match(/^(\d+(?:\.\d+)?)(B|M|K)$/i);
|
|
160
|
+
if (!match)
|
|
161
|
+
return 0;
|
|
162
|
+
const value = parseFloat(match[1]);
|
|
163
|
+
const unit = match[2].toUpperCase();
|
|
164
|
+
if (unit === 'B')
|
|
165
|
+
return value * 1e9;
|
|
166
|
+
if (unit === 'M')
|
|
167
|
+
return value * 1e6;
|
|
168
|
+
if (unit === 'K')
|
|
169
|
+
return value * 1e3;
|
|
170
|
+
return value;
|
|
171
|
+
}
|
|
172
|
+
export function validatePolicy(discoveredCapabilities, policy) {
|
|
173
|
+
const errors = [];
|
|
174
|
+
const warnings = []; // Declared but currently unused (reserved for future validation warnings)
|
|
175
|
+
// 0. Check model size limit for CPU mode (warning only - binary download already handles this)
|
|
176
|
+
// Note: This is just a warning since the policy comes from the server
|
|
177
|
+
// The actual binary selection (GPU vs CPU) happens in llama-binary.ts
|
|
178
|
+
// TODO: Add warnings here if needed in the future
|
|
179
|
+
// 1. Model name match (normalized comparison)
|
|
180
|
+
// Normalize both discovered and policy model names to base + version + variant
|
|
181
|
+
const discoveredNormalized = normalizeModelName(discoveredCapabilities.model || '');
|
|
182
|
+
const policyNormalized = normalizeModelName(policy.model_name);
|
|
183
|
+
if (discoveredNormalized !== policyNormalized) {
|
|
184
|
+
errors.push(`Model name mismatch: discovered "${discoveredCapabilities.model}" (normalized: "${discoveredNormalized}") ` +
|
|
185
|
+
`does not match policy "${policy.model_name}" (normalized: "${policyNormalized}")`);
|
|
186
|
+
}
|
|
187
|
+
// 2. Context size >= policy.context_size
|
|
188
|
+
const discoveredCtxSize = discoveredCapabilities.ctx_size || 0;
|
|
189
|
+
if (discoveredCtxSize < policy.context_size) {
|
|
190
|
+
errors.push(`Context size insufficient: discovered ${discoveredCtxSize} < required ${policy.context_size}`);
|
|
191
|
+
}
|
|
192
|
+
// 3. Quantization match (if specified in policy)
|
|
193
|
+
const discoveredQuant = extractQuantization(discoveredCapabilities);
|
|
194
|
+
if (discoveredQuant && discoveredQuant !== policy.quantization) {
|
|
195
|
+
errors.push(`Quantization mismatch: discovered "${discoveredQuant}" does not match policy "${policy.quantization}"`);
|
|
196
|
+
}
|
|
197
|
+
// 4. Capabilities match (worker must have all required capabilities)
|
|
198
|
+
const discoveredCaps = inferCapabilitiesFromModel(discoveredCapabilities.model || '');
|
|
199
|
+
const missingCaps = policy.capabilities.filter((requiredCap) => !discoveredCaps.includes(requiredCap));
|
|
200
|
+
if (missingCaps.length > 0) {
|
|
201
|
+
errors.push(`Missing required capabilities: ${missingCaps.join(', ')}. ` +
|
|
202
|
+
`Discovered: ${discoveredCaps.join(', ')}. ` +
|
|
203
|
+
`Required: ${policy.capabilities.join(', ')}`);
|
|
204
|
+
}
|
|
205
|
+
// 5. Parameters match (if specified in policy)
|
|
206
|
+
if (policy.parameters) {
|
|
207
|
+
const discoveredParams = discoveredCapabilities.parameters;
|
|
208
|
+
if (discoveredParams && discoveredParams !== policy.parameters) {
|
|
209
|
+
errors.push(`Parameters mismatch: discovered "${discoveredParams}" does not match policy "${policy.parameters}"`);
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
// 6. Family match (if specified in policy)
|
|
213
|
+
if (policy.family) {
|
|
214
|
+
// Infer family from model name or capabilities
|
|
215
|
+
// MoE models have:
|
|
216
|
+
// 1. "MoE" or "mixture-of-experts" in name
|
|
217
|
+
// 2. Expert notation like "A22B", "E22B" (e.g., "235B-A22B" = 235B total, 22B experts)
|
|
218
|
+
// 3. Multiple parameter counts separated by dashes (total-expert pattern)
|
|
219
|
+
const modelName = (discoveredCapabilities.model || '').toLowerCase();
|
|
220
|
+
// Check for explicit MoE indicators
|
|
221
|
+
const hasMoEKeyword = modelName.includes('moe') || modelName.includes('mixture-of-experts');
|
|
222
|
+
// Check for expert notation (A22B, E22B, etc.)
|
|
223
|
+
const hasExpertNotation = /[ae]\d+[bmk]/i.test(modelName);
|
|
224
|
+
// Check for total-expert parameter pattern (e.g., "235B-A22B", "70B-E2B")
|
|
225
|
+
const hasTotalExpertPattern = /\d+[bmk]-[ae]\d+[bmk]/i.test(modelName);
|
|
226
|
+
// Check for multiple large parameter counts separated by dashes
|
|
227
|
+
const paramPatterns = modelName.match(/\d+[bmk]/gi) || [];
|
|
228
|
+
const hasMultipleLargeParams = paramPatterns.length >= 2 &&
|
|
229
|
+
paramPatterns.some(p => /^(\d{2,}|[0-9]+0)[bmk]$/i.test(p)); // 2+ digits or ends in 0
|
|
230
|
+
const isMoE = hasMoEKeyword || hasExpertNotation || hasTotalExpertPattern || hasMultipleLargeParams;
|
|
231
|
+
const discoveredFamily = isMoE ? 'MoE' : 'Dense';
|
|
232
|
+
if (discoveredFamily !== policy.family) {
|
|
233
|
+
errors.push(`Family mismatch: discovered "${discoveredFamily}" does not match policy "${policy.family}". ` +
|
|
234
|
+
`Model name: "${discoveredCapabilities.model}"`);
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
return {
|
|
238
|
+
valid: errors.length === 0,
|
|
239
|
+
errors,
|
|
240
|
+
warnings: warnings.length > 0 ? warnings : undefined,
|
|
241
|
+
};
|
|
242
|
+
}
|
package/dist/types.js
ADDED
|
@@ -0,0 +1,433 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* WebSocket Connection Manager
|
|
3
|
+
*
|
|
4
|
+
* Handles connection to Auxot WebSocket server with:
|
|
5
|
+
* - Hello handshake
|
|
6
|
+
* - Heartbeat keepalive
|
|
7
|
+
* - Job message handling
|
|
8
|
+
* - Automatic reconnection with exponential backoff
|
|
9
|
+
*/
|
|
10
|
+
import WebSocket from 'ws';
|
|
11
|
+
import { logServerToClient, logClientToServer } from './debug.js';
|
|
12
|
+
import { validatePolicy } from './policy-validator.js';
|
|
13
|
+
const HEARTBEAT_INTERVAL = 5000; // 5 seconds
|
|
14
|
+
const INITIAL_RETRY_DELAY = 1000; // 1 second
|
|
15
|
+
const MAX_RETRY_DELAY = 60000; // 60 seconds
|
|
16
|
+
const RETRY_MULTIPLIER = 2; // Exponential backoff multiplier
|
|
17
|
+
export class WebSocketConnection {
|
|
18
|
+
ws = null;
|
|
19
|
+
heartbeatTimer = null;
|
|
20
|
+
reconnectTimer = null;
|
|
21
|
+
gpuKey;
|
|
22
|
+
capabilities;
|
|
23
|
+
onJobCallback = null;
|
|
24
|
+
onCancelCallback = null;
|
|
25
|
+
onPolicyCallback = null;
|
|
26
|
+
onConfigAckCallback = null;
|
|
27
|
+
wsUrl = '';
|
|
28
|
+
retryDelay = INITIAL_RETRY_DELAY;
|
|
29
|
+
isConnected = false;
|
|
30
|
+
shouldReconnect = true;
|
|
31
|
+
isReconnecting = false;
|
|
32
|
+
policy = null;
|
|
33
|
+
constructor(gpuKey, capabilities) {
|
|
34
|
+
this.gpuKey = gpuKey;
|
|
35
|
+
this.capabilities = capabilities;
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* Connect to WebSocket server and send hello message
|
|
39
|
+
*/
|
|
40
|
+
async connect(wsUrl) {
|
|
41
|
+
this.wsUrl = wsUrl;
|
|
42
|
+
this.shouldReconnect = true;
|
|
43
|
+
return this.attemptConnection();
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* Attempt to establish WebSocket connection
|
|
47
|
+
*/
|
|
48
|
+
async attemptConnection() {
|
|
49
|
+
return new Promise((resolve, reject) => {
|
|
50
|
+
if (!this.wsUrl) {
|
|
51
|
+
reject(new Error('No WebSocket URL configured'));
|
|
52
|
+
return;
|
|
53
|
+
}
|
|
54
|
+
// Only log if not already reconnecting (first attempt or successful reconnection)
|
|
55
|
+
if (!this.isReconnecting) {
|
|
56
|
+
console.log(`Connecting to ${this.wsUrl}...`);
|
|
57
|
+
}
|
|
58
|
+
try {
|
|
59
|
+
this.ws = new WebSocket(this.wsUrl);
|
|
60
|
+
}
|
|
61
|
+
catch (error) {
|
|
62
|
+
this.scheduleReconnect();
|
|
63
|
+
reject(error);
|
|
64
|
+
return;
|
|
65
|
+
}
|
|
66
|
+
// Set a connection timeout
|
|
67
|
+
const connectionTimeout = setTimeout(() => {
|
|
68
|
+
if (!this.isConnected) {
|
|
69
|
+
this.ws?.close();
|
|
70
|
+
this.scheduleReconnect();
|
|
71
|
+
reject(new Error('Connection timeout'));
|
|
72
|
+
}
|
|
73
|
+
}, 10000);
|
|
74
|
+
this.ws.on('open', () => {
|
|
75
|
+
// Send hello message (server will assign GPU ID)
|
|
76
|
+
const helloMsg = {
|
|
77
|
+
type: 'hello',
|
|
78
|
+
gpu_key: this.gpuKey,
|
|
79
|
+
capabilities: this.capabilities,
|
|
80
|
+
};
|
|
81
|
+
this.send(helloMsg);
|
|
82
|
+
});
|
|
83
|
+
this.ws.on('message', async (data) => {
|
|
84
|
+
try {
|
|
85
|
+
const message = JSON.parse(data.toString());
|
|
86
|
+
// Debug log (skip heartbeat_ack to reduce noise)
|
|
87
|
+
if (message.type !== 'heartbeat_ack') {
|
|
88
|
+
logServerToClient(message);
|
|
89
|
+
}
|
|
90
|
+
if (message.type === 'hello_ack') {
|
|
91
|
+
clearTimeout(connectionTimeout);
|
|
92
|
+
if (message.success) {
|
|
93
|
+
// Store policy from server
|
|
94
|
+
if (!message.policy) {
|
|
95
|
+
const errorMsg = 'Server did not send policy in hello_ack';
|
|
96
|
+
console.error(`✗ ${errorMsg}`);
|
|
97
|
+
this.shouldReconnect = false;
|
|
98
|
+
this.ws?.close();
|
|
99
|
+
reject(new Error(errorMsg));
|
|
100
|
+
return;
|
|
101
|
+
}
|
|
102
|
+
this.policy = message.policy;
|
|
103
|
+
// If policy callback is registered, defer validation (will validate via config message after spawning llama.cpp)
|
|
104
|
+
if (this.onPolicyCallback) {
|
|
105
|
+
// Spawn llama.cpp and discover capabilities (validation will happen via config message)
|
|
106
|
+
try {
|
|
107
|
+
await this.onPolicyCallback(message.policy);
|
|
108
|
+
}
|
|
109
|
+
catch (error) {
|
|
110
|
+
console.error('[Policy Callback] Error:', error);
|
|
111
|
+
this.shouldReconnect = false;
|
|
112
|
+
this.ws?.close();
|
|
113
|
+
reject(error);
|
|
114
|
+
return;
|
|
115
|
+
}
|
|
116
|
+
// Validation deferred - will happen via config message
|
|
117
|
+
// Connection is established but validation happens later
|
|
118
|
+
console.log('✓ Successfully authenticated with server');
|
|
119
|
+
console.log(` Policy: ${message.policy.model_name} (${message.policy.quantization})`);
|
|
120
|
+
console.log(' Spawning llama.cpp process...');
|
|
121
|
+
console.log(' (Capabilities validation will happen via config message)');
|
|
122
|
+
}
|
|
123
|
+
else {
|
|
124
|
+
// No policy callback - validate immediately (legacy flow)
|
|
125
|
+
const validation = await validatePolicy(this.capabilities, message.policy);
|
|
126
|
+
// Log warnings if any
|
|
127
|
+
if (validation.warnings && validation.warnings.length > 0) {
|
|
128
|
+
console.warn('⚠ Policy validation warnings:');
|
|
129
|
+
validation.warnings.forEach((warning) => {
|
|
130
|
+
console.warn(` - ${warning}`);
|
|
131
|
+
});
|
|
132
|
+
}
|
|
133
|
+
if (!validation.valid) {
|
|
134
|
+
console.error('✗ Policy validation failed:');
|
|
135
|
+
console.error(' Expected:');
|
|
136
|
+
console.error(` Model: ${message.policy.model_name}`);
|
|
137
|
+
console.error(` Quantization: ${message.policy.quantization}`);
|
|
138
|
+
console.error(` Context Size: ${message.policy.context_size}`);
|
|
139
|
+
console.error(` Capabilities: ${message.policy.capabilities.join(', ')}`);
|
|
140
|
+
if (message.policy.parameters) {
|
|
141
|
+
console.error(` Parameters: ${message.policy.parameters}`);
|
|
142
|
+
}
|
|
143
|
+
if (message.policy.family) {
|
|
144
|
+
console.error(` Family: ${message.policy.family}`);
|
|
145
|
+
}
|
|
146
|
+
console.error(' Discovered:');
|
|
147
|
+
console.error(` Model: ${this.capabilities.model}`);
|
|
148
|
+
console.error(` Context Size: ${this.capabilities.ctx_size}`);
|
|
149
|
+
if (this.capabilities.parameters) {
|
|
150
|
+
console.error(` Parameters: ${this.capabilities.parameters}`);
|
|
151
|
+
}
|
|
152
|
+
console.error(' Errors:');
|
|
153
|
+
validation.errors.forEach((error) => {
|
|
154
|
+
console.error(` - ${error}`);
|
|
155
|
+
});
|
|
156
|
+
console.error('\nPlease configure your llama.cpp server to match the policy requirements.');
|
|
157
|
+
this.shouldReconnect = false;
|
|
158
|
+
this.ws?.close();
|
|
159
|
+
reject(new Error(`Policy validation failed: ${validation.errors.join('; ')}`));
|
|
160
|
+
return;
|
|
161
|
+
}
|
|
162
|
+
console.log('✓ Successfully authenticated with server');
|
|
163
|
+
console.log(` Policy: ${message.policy.model_name} (${message.policy.quantization})`);
|
|
164
|
+
console.log('✓ Capabilities validated against policy');
|
|
165
|
+
}
|
|
166
|
+
this.isConnected = true;
|
|
167
|
+
this.isReconnecting = false;
|
|
168
|
+
this.retryDelay = INITIAL_RETRY_DELAY; // Reset retry delay on success
|
|
169
|
+
this.startHeartbeat();
|
|
170
|
+
resolve();
|
|
171
|
+
}
|
|
172
|
+
else {
|
|
173
|
+
// Check if there's a reconnect_in_seconds field (duplicate connection)
|
|
174
|
+
const errorMessage = message.error || 'Authentication failed';
|
|
175
|
+
if (message.reconnect_in_seconds || message.reconnectInSeconds) {
|
|
176
|
+
const reconnectIn = message.reconnect_in_seconds || message.reconnectInSeconds;
|
|
177
|
+
console.error(`✗ Worker UUID already connected! Are you trying to connect the same worker ID twice?`);
|
|
178
|
+
console.error(` The existing connection's presence key expires in ${reconnectIn} seconds.`);
|
|
179
|
+
console.error(` Waiting ${reconnectIn} seconds and retrying automatically...`);
|
|
180
|
+
// Close connection and schedule reconnect (do NOT reject - let retry handle it)
|
|
181
|
+
this.ws?.close();
|
|
182
|
+
// Wait for the specified time before reconnecting
|
|
183
|
+
this.retryDelay = Math.max(reconnectIn * 1000, INITIAL_RETRY_DELAY);
|
|
184
|
+
this.scheduleReconnect();
|
|
185
|
+
// Resolve instead of reject - this allows the app to continue with auto-retry
|
|
186
|
+
resolve();
|
|
187
|
+
}
|
|
188
|
+
else {
|
|
189
|
+
// Authentication errors (invalid key, key not found, etc.) are fatal - stop retrying
|
|
190
|
+
console.error('✗ Authentication failed:', errorMessage);
|
|
191
|
+
console.error(' Fatal error: Stopping retry attempts. Please check your GPU key and try again.');
|
|
192
|
+
// Stop retrying for authentication errors
|
|
193
|
+
this.shouldReconnect = false;
|
|
194
|
+
this.ws?.close();
|
|
195
|
+
// Don't schedule reconnect - this is a fatal error
|
|
196
|
+
reject(new Error(errorMessage));
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
else if (message.type === 'config_ack') {
|
|
201
|
+
// Config acknowledged (validation happened on server)
|
|
202
|
+
if (this.onConfigAckCallback) {
|
|
203
|
+
this.onConfigAckCallback(message.success, message.error);
|
|
204
|
+
}
|
|
205
|
+
else {
|
|
206
|
+
// Fallback: log and handle errors
|
|
207
|
+
if (message.success) {
|
|
208
|
+
console.log('✓ Capabilities validated by server');
|
|
209
|
+
}
|
|
210
|
+
else {
|
|
211
|
+
console.error(`✗ Config validation failed: ${message.error}`);
|
|
212
|
+
this.shouldReconnect = false;
|
|
213
|
+
this.ws?.close();
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
else if (message.type === 'heartbeat_ack') {
|
|
218
|
+
// Heartbeat acknowledged (silent)
|
|
219
|
+
}
|
|
220
|
+
else if (message.type === 'cancel') {
|
|
221
|
+
// Cancel message
|
|
222
|
+
console.log(`Received cancel request for job ${message.job_id}`);
|
|
223
|
+
if (this.onCancelCallback) {
|
|
224
|
+
this.onCancelCallback(message);
|
|
225
|
+
}
|
|
226
|
+
else {
|
|
227
|
+
console.warn('No cancel callback registered!');
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
else if (message.type === 'job') {
|
|
231
|
+
// Job received
|
|
232
|
+
console.log(`Received job ${message.job_id}`);
|
|
233
|
+
if (this.onJobCallback) {
|
|
234
|
+
this.onJobCallback(message).catch((err) => {
|
|
235
|
+
console.error('Error processing job:', err);
|
|
236
|
+
this.sendError(message.job_id, err.message);
|
|
237
|
+
});
|
|
238
|
+
}
|
|
239
|
+
else {
|
|
240
|
+
console.warn('No job callback registered!');
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
catch (error) {
|
|
245
|
+
console.error('Error parsing message:', error);
|
|
246
|
+
}
|
|
247
|
+
});
|
|
248
|
+
this.ws.on('error', (error) => {
|
|
249
|
+
clearTimeout(connectionTimeout);
|
|
250
|
+
// Silently handle connection errors during reconnection
|
|
251
|
+
});
|
|
252
|
+
this.ws.on('close', (code, reason) => {
|
|
253
|
+
clearTimeout(connectionTimeout);
|
|
254
|
+
this.isConnected = false;
|
|
255
|
+
this.stopHeartbeat();
|
|
256
|
+
if (this.shouldReconnect) {
|
|
257
|
+
if (!this.isReconnecting) {
|
|
258
|
+
console.log('WebSocket disconnected, will continue to retry...');
|
|
259
|
+
this.isReconnecting = true;
|
|
260
|
+
}
|
|
261
|
+
this.scheduleReconnect();
|
|
262
|
+
}
|
|
263
|
+
else {
|
|
264
|
+
console.log('WebSocket disconnected');
|
|
265
|
+
}
|
|
266
|
+
});
|
|
267
|
+
});
|
|
268
|
+
}
|
|
269
|
+
/**
|
|
270
|
+
* Schedule reconnection with exponential backoff
|
|
271
|
+
*/
|
|
272
|
+
scheduleReconnect() {
|
|
273
|
+
if (this.reconnectTimer || !this.shouldReconnect) {
|
|
274
|
+
return;
|
|
275
|
+
}
|
|
276
|
+
this.reconnectTimer = setTimeout(async () => {
|
|
277
|
+
this.reconnectTimer = null;
|
|
278
|
+
try {
|
|
279
|
+
await this.attemptConnection();
|
|
280
|
+
}
|
|
281
|
+
catch (error) {
|
|
282
|
+
// Connection failed, exponential backoff will continue (silently)
|
|
283
|
+
this.retryDelay = Math.min(this.retryDelay * RETRY_MULTIPLIER, MAX_RETRY_DELAY);
|
|
284
|
+
}
|
|
285
|
+
}, this.retryDelay);
|
|
286
|
+
}
|
|
287
|
+
/**
|
|
288
|
+
* Start sending heartbeat messages
|
|
289
|
+
*/
|
|
290
|
+
startHeartbeat() {
|
|
291
|
+
this.stopHeartbeat(); // Clear any existing timer
|
|
292
|
+
this.heartbeatTimer = setInterval(() => {
|
|
293
|
+
if (this.isConnected) {
|
|
294
|
+
const heartbeatMsg = {
|
|
295
|
+
type: 'heartbeat',
|
|
296
|
+
// Server identifies worker by WebSocket connection, not GPU ID
|
|
297
|
+
};
|
|
298
|
+
this.send(heartbeatMsg);
|
|
299
|
+
}
|
|
300
|
+
}, HEARTBEAT_INTERVAL);
|
|
301
|
+
}
|
|
302
|
+
/**
|
|
303
|
+
* Stop heartbeat timer
|
|
304
|
+
*/
|
|
305
|
+
stopHeartbeat() {
|
|
306
|
+
if (this.heartbeatTimer) {
|
|
307
|
+
clearInterval(this.heartbeatTimer);
|
|
308
|
+
this.heartbeatTimer = null;
|
|
309
|
+
}
|
|
310
|
+
}
|
|
311
|
+
/**
|
|
312
|
+
* Send message to server
|
|
313
|
+
*/
|
|
314
|
+
send(message) {
|
|
315
|
+
if (this.ws && this.ws.readyState === WebSocket.OPEN) {
|
|
316
|
+
const jsonString = JSON.stringify(message);
|
|
317
|
+
// Debug log (skip heartbeats to reduce noise)
|
|
318
|
+
if (message.type !== 'heartbeat') {
|
|
319
|
+
logClientToServer(message);
|
|
320
|
+
}
|
|
321
|
+
this.ws.send(jsonString);
|
|
322
|
+
}
|
|
323
|
+
}
|
|
324
|
+
/**
|
|
325
|
+
* Send token to server
|
|
326
|
+
*/
|
|
327
|
+
sendToken(jobId, token) {
|
|
328
|
+
this.send({
|
|
329
|
+
type: 'token',
|
|
330
|
+
job_id: jobId,
|
|
331
|
+
token,
|
|
332
|
+
});
|
|
333
|
+
}
|
|
334
|
+
/**
|
|
335
|
+
* Send completion to server with metadata
|
|
336
|
+
*/
|
|
337
|
+
sendComplete(jobId, fullResponse, durationMs, inputTokens, outputTokens, toolCalls) {
|
|
338
|
+
this.send({
|
|
339
|
+
type: 'complete',
|
|
340
|
+
job_id: jobId,
|
|
341
|
+
full_response: fullResponse,
|
|
342
|
+
duration_ms: durationMs,
|
|
343
|
+
input_tokens: inputTokens,
|
|
344
|
+
output_tokens: outputTokens,
|
|
345
|
+
tool_calls: toolCalls,
|
|
346
|
+
});
|
|
347
|
+
}
|
|
348
|
+
/**
|
|
349
|
+
* Send error to server
|
|
350
|
+
*/
|
|
351
|
+
sendError(jobId, error) {
|
|
352
|
+
this.send({
|
|
353
|
+
type: 'error',
|
|
354
|
+
job_id: jobId,
|
|
355
|
+
error,
|
|
356
|
+
});
|
|
357
|
+
}
|
|
358
|
+
/**
|
|
359
|
+
* Register callback for job messages
|
|
360
|
+
*/
|
|
361
|
+
onJob(callback) {
|
|
362
|
+
this.onJobCallback = callback;
|
|
363
|
+
}
|
|
364
|
+
/**
|
|
365
|
+
* Register callback for cancel messages
|
|
366
|
+
*/
|
|
367
|
+
onCancel(callback) {
|
|
368
|
+
this.onCancelCallback = callback;
|
|
369
|
+
}
|
|
370
|
+
/**
|
|
371
|
+
* Register callback for policy (called when policy is received in hello_ack)
|
|
372
|
+
* Used to spawn llama.cpp process before validation
|
|
373
|
+
*/
|
|
374
|
+
onPolicy(callback) {
|
|
375
|
+
this.onPolicyCallback = callback;
|
|
376
|
+
}
|
|
377
|
+
/**
|
|
378
|
+
* Register callback for config_ack (called when server validates our config)
|
|
379
|
+
*/
|
|
380
|
+
onConfigAck(callback) {
|
|
381
|
+
this.onConfigAckCallback = callback;
|
|
382
|
+
}
|
|
383
|
+
/**
|
|
384
|
+
* Update capabilities (called after spawning llama.cpp)
|
|
385
|
+
*/
|
|
386
|
+
updateCapabilities(capabilities) {
|
|
387
|
+
this.capabilities = capabilities;
|
|
388
|
+
}
|
|
389
|
+
/**
|
|
390
|
+
* Send config message to server (called after discovering capabilities)
|
|
391
|
+
*/
|
|
392
|
+
sendConfig(capabilities) {
|
|
393
|
+
this.send({
|
|
394
|
+
type: 'config',
|
|
395
|
+
capabilities,
|
|
396
|
+
});
|
|
397
|
+
}
|
|
398
|
+
/**
|
|
399
|
+
* Check if connected
|
|
400
|
+
*/
|
|
401
|
+
get connected() {
|
|
402
|
+
return this.isConnected;
|
|
403
|
+
}
|
|
404
|
+
/**
|
|
405
|
+
* Get policy received from server
|
|
406
|
+
*/
|
|
407
|
+
getPolicy() {
|
|
408
|
+
return this.policy;
|
|
409
|
+
}
|
|
410
|
+
/**
|
|
411
|
+
* Get current capabilities
|
|
412
|
+
*/
|
|
413
|
+
getCapabilities() {
|
|
414
|
+
return this.capabilities;
|
|
415
|
+
}
|
|
416
|
+
/**
|
|
417
|
+
* Close connection
|
|
418
|
+
*/
|
|
419
|
+
close() {
|
|
420
|
+
this.shouldReconnect = false;
|
|
421
|
+
this.stopHeartbeat();
|
|
422
|
+
if (this.reconnectTimer) {
|
|
423
|
+
clearTimeout(this.reconnectTimer);
|
|
424
|
+
this.reconnectTimer = null;
|
|
425
|
+
}
|
|
426
|
+
if (this.ws) {
|
|
427
|
+
this.ws.close();
|
|
428
|
+
this.ws = null;
|
|
429
|
+
}
|
|
430
|
+
this.isConnected = false;
|
|
431
|
+
this.policy = null; // Clear policy on disconnect
|
|
432
|
+
}
|
|
433
|
+
}
|