@auxot/worker-cli 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,242 @@
1
+ /**
2
+ * Policy Validation
3
+ *
4
+ * Validates worker capabilities against GPU key policy.
5
+ */
6
+ /**
7
+ * Normalize model name for comparison
8
+ * Extracts base model name + version + variant (removes quantization, parameters, file patterns)
9
+ */
10
+ function normalizeModelName(name) {
11
+ // Remove common quantization suffixes
12
+ let normalized = name
13
+ .replace(/-GGUF$/i, '')
14
+ .replace(/\.gguf$/i, '');
15
+ // Remove path components (keep only filename)
16
+ const parts = normalized.split('/');
17
+ if (parts.length > 1) {
18
+ normalized = parts[parts.length - 1];
19
+ }
20
+ // Remove multi-GGUF file patterns (e.g., "-00001-of-00003")
21
+ normalized = normalized.replace(/-\d{5}-of-\d{5}$/i, '');
22
+ // Extract version number (e.g., "3", "2.5", "4", "3.3")
23
+ let version = '';
24
+ const versionMatch = normalized.match(/^(Qwen|Llama|Ministral|Devstral|Gemma|DeepSeek|Granite|GPT-OSS)[-_]?(\d+(?:\.\d+)?)/i);
25
+ if (versionMatch) {
26
+ version = versionMatch[2];
27
+ normalized = normalized.replace(new RegExp(`^${versionMatch[1]}[-_]?${versionMatch[2]}`, 'i'), versionMatch[1]);
28
+ }
29
+ // Extract variant (composite or single)
30
+ let variant = '';
31
+ const compositePatterns = [
32
+ /-VL-Instruct$/i,
33
+ /-VL-Thinking$/i,
34
+ /-VL-Chat$/i,
35
+ /-VL-Coder$/i,
36
+ /-VL-Code$/i,
37
+ ];
38
+ for (const pattern of compositePatterns) {
39
+ const match = normalized.match(pattern);
40
+ if (match) {
41
+ variant = match[0].replace(/^-/i, '');
42
+ normalized = normalized.replace(pattern, '');
43
+ break;
44
+ }
45
+ }
46
+ if (!variant) {
47
+ const singlePatterns = [
48
+ /-Instruct$/i,
49
+ /-Thinking$/i,
50
+ /-Chat$/i,
51
+ /-Coder$/i,
52
+ /-Code$/i,
53
+ /-VL$/i,
54
+ /-Vision$/i,
55
+ /-Maverick$/i,
56
+ /-Scout$/i,
57
+ /-Reasoning$/i,
58
+ ];
59
+ for (const pattern of singlePatterns) {
60
+ const match = normalized.match(pattern);
61
+ if (match) {
62
+ variant = match[0].replace(/^-/i, '');
63
+ normalized = normalized.replace(pattern, '');
64
+ break;
65
+ }
66
+ }
67
+ }
68
+ // Remove quantization patterns (Q4_K_M, Q5_K_S, etc.)
69
+ normalized = normalized.replace(/[-_]Q\d+[_\w]*/i, '');
70
+ // Remove parameter counts (7B, 13B, 30B, etc.)
71
+ normalized = normalized.replace(/[-_](\d+(?:\.\d+)?[BMK])(?![0-9])/i, '');
72
+ // Remove expert counts (A22B, E2B, etc.)
73
+ normalized = normalized.replace(/[-_]([AE]\d+[BMK])/i, '');
74
+ normalized = normalized.replace(/[-_](\d+[AE])(?![0-9])/i, '');
75
+ // Remove trailing version numbers that are NOT part of the model name
76
+ normalized = normalized.replace(/[-_](\d{4,})$/i, ''); // Remove 4+ digit trailing numbers
77
+ // Normalize base name
78
+ const nameLower = normalized.toLowerCase();
79
+ let base = '';
80
+ if (nameLower.startsWith('qwen')) {
81
+ base = 'Qwen';
82
+ }
83
+ else if (nameLower.startsWith('llama') || nameLower.startsWith('meta-llama')) {
84
+ base = 'Llama';
85
+ }
86
+ else if (nameLower.startsWith('ministral') || nameLower.startsWith('devstral')) {
87
+ base = 'Ministral';
88
+ }
89
+ else if (nameLower.startsWith('gemma')) {
90
+ base = 'Gemma';
91
+ }
92
+ else if (nameLower.startsWith('deepseek')) {
93
+ base = 'DeepSeek';
94
+ }
95
+ else if (nameLower.startsWith('granite')) {
96
+ base = 'Granite';
97
+ }
98
+ else if (nameLower.startsWith('gpt-oss') || nameLower.startsWith('gptoss')) {
99
+ base = 'GPT-OSS';
100
+ }
101
+ else {
102
+ base = normalized.trim();
103
+ }
104
+ // Combine base + version + variant
105
+ let result = base;
106
+ if (version) {
107
+ result += ` ${version}`;
108
+ }
109
+ if (variant) {
110
+ result += `-${variant}`;
111
+ }
112
+ return result.trim();
113
+ }
114
+ /**
115
+ * Extract quantization from model name or capabilities
116
+ */
117
+ function extractQuantization(capabilities) {
118
+ // Try to extract from model name
119
+ const model = capabilities.model || '';
120
+ // Check for quantization patterns
121
+ const quantPatterns = [
122
+ 'Q3_K_S', 'Q4_K_S', 'Q5_K_S', 'Q6_K', 'Q8_0', 'Q8_K',
123
+ 'F16', 'F32', 'BF16',
124
+ ];
125
+ for (const pattern of quantPatterns) {
126
+ if (model.includes(pattern)) {
127
+ return pattern;
128
+ }
129
+ }
130
+ return null;
131
+ }
132
+ /**
133
+ * Infer capabilities from model name
134
+ */
135
+ function inferCapabilitiesFromModel(modelName) {
136
+ const name = modelName.toLowerCase();
137
+ const capabilities = [];
138
+ if (name.includes('vision') || name.includes('multimodal') || name.includes('vl-')) {
139
+ capabilities.push('vision');
140
+ }
141
+ if (name.includes('code') || name.includes('coder') || name.includes('starcoder')) {
142
+ capabilities.push('code');
143
+ }
144
+ if (name.includes('embed') || name.includes('embedding')) {
145
+ capabilities.push('embedding');
146
+ }
147
+ if (capabilities.length === 0) {
148
+ capabilities.push('chat');
149
+ }
150
+ return [...new Set(capabilities)];
151
+ }
152
+ /**
153
+ * Validate worker capabilities against policy
154
+ */
155
+ /**
156
+ * Parse parameter count from string (e.g., "7B" -> 7e9, "30B" -> 30e9)
157
+ */
158
+ function parseParameters(parameters) {
159
+ const match = parameters.match(/^(\d+(?:\.\d+)?)(B|M|K)$/i);
160
+ if (!match)
161
+ return 0;
162
+ const value = parseFloat(match[1]);
163
+ const unit = match[2].toUpperCase();
164
+ if (unit === 'B')
165
+ return value * 1e9;
166
+ if (unit === 'M')
167
+ return value * 1e6;
168
+ if (unit === 'K')
169
+ return value * 1e3;
170
+ return value;
171
+ }
172
+ export function validatePolicy(discoveredCapabilities, policy) {
173
+ const errors = [];
174
+ const warnings = []; // Declared but currently unused (reserved for future validation warnings)
175
+ // 0. Check model size limit for CPU mode (warning only - binary download already handles this)
176
+ // Note: This is just a warning since the policy comes from the server
177
+ // The actual binary selection (GPU vs CPU) happens in llama-binary.ts
178
+ // TODO: Add warnings here if needed in the future
179
+ // 1. Model name match (normalized comparison)
180
+ // Normalize both discovered and policy model names to base + version + variant
181
+ const discoveredNormalized = normalizeModelName(discoveredCapabilities.model || '');
182
+ const policyNormalized = normalizeModelName(policy.model_name);
183
+ if (discoveredNormalized !== policyNormalized) {
184
+ errors.push(`Model name mismatch: discovered "${discoveredCapabilities.model}" (normalized: "${discoveredNormalized}") ` +
185
+ `does not match policy "${policy.model_name}" (normalized: "${policyNormalized}")`);
186
+ }
187
+ // 2. Context size >= policy.context_size
188
+ const discoveredCtxSize = discoveredCapabilities.ctx_size || 0;
189
+ if (discoveredCtxSize < policy.context_size) {
190
+ errors.push(`Context size insufficient: discovered ${discoveredCtxSize} < required ${policy.context_size}`);
191
+ }
192
+ // 3. Quantization match (if specified in policy)
193
+ const discoveredQuant = extractQuantization(discoveredCapabilities);
194
+ if (discoveredQuant && discoveredQuant !== policy.quantization) {
195
+ errors.push(`Quantization mismatch: discovered "${discoveredQuant}" does not match policy "${policy.quantization}"`);
196
+ }
197
+ // 4. Capabilities match (worker must have all required capabilities)
198
+ const discoveredCaps = inferCapabilitiesFromModel(discoveredCapabilities.model || '');
199
+ const missingCaps = policy.capabilities.filter((requiredCap) => !discoveredCaps.includes(requiredCap));
200
+ if (missingCaps.length > 0) {
201
+ errors.push(`Missing required capabilities: ${missingCaps.join(', ')}. ` +
202
+ `Discovered: ${discoveredCaps.join(', ')}. ` +
203
+ `Required: ${policy.capabilities.join(', ')}`);
204
+ }
205
+ // 5. Parameters match (if specified in policy)
206
+ if (policy.parameters) {
207
+ const discoveredParams = discoveredCapabilities.parameters;
208
+ if (discoveredParams && discoveredParams !== policy.parameters) {
209
+ errors.push(`Parameters mismatch: discovered "${discoveredParams}" does not match policy "${policy.parameters}"`);
210
+ }
211
+ }
212
+ // 6. Family match (if specified in policy)
213
+ if (policy.family) {
214
+ // Infer family from model name or capabilities
215
+ // MoE models have:
216
+ // 1. "MoE" or "mixture-of-experts" in name
217
+ // 2. Expert notation like "A22B", "E22B" (e.g., "235B-A22B" = 235B total, 22B experts)
218
+ // 3. Multiple parameter counts separated by dashes (total-expert pattern)
219
+ const modelName = (discoveredCapabilities.model || '').toLowerCase();
220
+ // Check for explicit MoE indicators
221
+ const hasMoEKeyword = modelName.includes('moe') || modelName.includes('mixture-of-experts');
222
+ // Check for expert notation (A22B, E22B, etc.)
223
+ const hasExpertNotation = /[ae]\d+[bmk]/i.test(modelName);
224
+ // Check for total-expert parameter pattern (e.g., "235B-A22B", "70B-E2B")
225
+ const hasTotalExpertPattern = /\d+[bmk]-[ae]\d+[bmk]/i.test(modelName);
226
+ // Check for multiple large parameter counts separated by dashes
227
+ const paramPatterns = modelName.match(/\d+[bmk]/gi) || [];
228
+ const hasMultipleLargeParams = paramPatterns.length >= 2 &&
229
+ paramPatterns.some(p => /^(\d{2,}|[0-9]+0)[bmk]$/i.test(p)); // 2+ digits or ends in 0
230
+ const isMoE = hasMoEKeyword || hasExpertNotation || hasTotalExpertPattern || hasMultipleLargeParams;
231
+ const discoveredFamily = isMoE ? 'MoE' : 'Dense';
232
+ if (discoveredFamily !== policy.family) {
233
+ errors.push(`Family mismatch: discovered "${discoveredFamily}" does not match policy "${policy.family}". ` +
234
+ `Model name: "${discoveredCapabilities.model}"`);
235
+ }
236
+ }
237
+ return {
238
+ valid: errors.length === 0,
239
+ errors,
240
+ warnings: warnings.length > 0 ? warnings : undefined,
241
+ };
242
+ }
package/dist/types.js ADDED
@@ -0,0 +1,4 @@
1
+ /**
2
+ * Type definitions for worker CLI
3
+ */
4
+ export {};
@@ -0,0 +1,433 @@
1
+ /**
2
+ * WebSocket Connection Manager
3
+ *
4
+ * Handles connection to Auxot WebSocket server with:
5
+ * - Hello handshake
6
+ * - Heartbeat keepalive
7
+ * - Job message handling
8
+ * - Automatic reconnection with exponential backoff
9
+ */
10
+ import WebSocket from 'ws';
11
+ import { logServerToClient, logClientToServer } from './debug.js';
12
+ import { validatePolicy } from './policy-validator.js';
13
+ const HEARTBEAT_INTERVAL = 5000; // 5 seconds
14
+ const INITIAL_RETRY_DELAY = 1000; // 1 second
15
+ const MAX_RETRY_DELAY = 60000; // 60 seconds
16
+ const RETRY_MULTIPLIER = 2; // Exponential backoff multiplier
17
+ export class WebSocketConnection {
18
+ ws = null;
19
+ heartbeatTimer = null;
20
+ reconnectTimer = null;
21
+ gpuKey;
22
+ capabilities;
23
+ onJobCallback = null;
24
+ onCancelCallback = null;
25
+ onPolicyCallback = null;
26
+ onConfigAckCallback = null;
27
+ wsUrl = '';
28
+ retryDelay = INITIAL_RETRY_DELAY;
29
+ isConnected = false;
30
+ shouldReconnect = true;
31
+ isReconnecting = false;
32
+ policy = null;
33
+ constructor(gpuKey, capabilities) {
34
+ this.gpuKey = gpuKey;
35
+ this.capabilities = capabilities;
36
+ }
37
+ /**
38
+ * Connect to WebSocket server and send hello message
39
+ */
40
+ async connect(wsUrl) {
41
+ this.wsUrl = wsUrl;
42
+ this.shouldReconnect = true;
43
+ return this.attemptConnection();
44
+ }
45
+ /**
46
+ * Attempt to establish WebSocket connection
47
+ */
48
+ async attemptConnection() {
49
+ return new Promise((resolve, reject) => {
50
+ if (!this.wsUrl) {
51
+ reject(new Error('No WebSocket URL configured'));
52
+ return;
53
+ }
54
+ // Only log if not already reconnecting (first attempt or successful reconnection)
55
+ if (!this.isReconnecting) {
56
+ console.log(`Connecting to ${this.wsUrl}...`);
57
+ }
58
+ try {
59
+ this.ws = new WebSocket(this.wsUrl);
60
+ }
61
+ catch (error) {
62
+ this.scheduleReconnect();
63
+ reject(error);
64
+ return;
65
+ }
66
+ // Set a connection timeout
67
+ const connectionTimeout = setTimeout(() => {
68
+ if (!this.isConnected) {
69
+ this.ws?.close();
70
+ this.scheduleReconnect();
71
+ reject(new Error('Connection timeout'));
72
+ }
73
+ }, 10000);
74
+ this.ws.on('open', () => {
75
+ // Send hello message (server will assign GPU ID)
76
+ const helloMsg = {
77
+ type: 'hello',
78
+ gpu_key: this.gpuKey,
79
+ capabilities: this.capabilities,
80
+ };
81
+ this.send(helloMsg);
82
+ });
83
+ this.ws.on('message', async (data) => {
84
+ try {
85
+ const message = JSON.parse(data.toString());
86
+ // Debug log (skip heartbeat_ack to reduce noise)
87
+ if (message.type !== 'heartbeat_ack') {
88
+ logServerToClient(message);
89
+ }
90
+ if (message.type === 'hello_ack') {
91
+ clearTimeout(connectionTimeout);
92
+ if (message.success) {
93
+ // Store policy from server
94
+ if (!message.policy) {
95
+ const errorMsg = 'Server did not send policy in hello_ack';
96
+ console.error(`✗ ${errorMsg}`);
97
+ this.shouldReconnect = false;
98
+ this.ws?.close();
99
+ reject(new Error(errorMsg));
100
+ return;
101
+ }
102
+ this.policy = message.policy;
103
+ // If policy callback is registered, defer validation (will validate via config message after spawning llama.cpp)
104
+ if (this.onPolicyCallback) {
105
+ // Spawn llama.cpp and discover capabilities (validation will happen via config message)
106
+ try {
107
+ await this.onPolicyCallback(message.policy);
108
+ }
109
+ catch (error) {
110
+ console.error('[Policy Callback] Error:', error);
111
+ this.shouldReconnect = false;
112
+ this.ws?.close();
113
+ reject(error);
114
+ return;
115
+ }
116
+ // Validation deferred - will happen via config message
117
+ // Connection is established but validation happens later
118
+ console.log('✓ Successfully authenticated with server');
119
+ console.log(` Policy: ${message.policy.model_name} (${message.policy.quantization})`);
120
+ console.log(' Spawning llama.cpp process...');
121
+ console.log(' (Capabilities validation will happen via config message)');
122
+ }
123
+ else {
124
+ // No policy callback - validate immediately (legacy flow)
125
+ const validation = await validatePolicy(this.capabilities, message.policy);
126
+ // Log warnings if any
127
+ if (validation.warnings && validation.warnings.length > 0) {
128
+ console.warn('⚠ Policy validation warnings:');
129
+ validation.warnings.forEach((warning) => {
130
+ console.warn(` - ${warning}`);
131
+ });
132
+ }
133
+ if (!validation.valid) {
134
+ console.error('✗ Policy validation failed:');
135
+ console.error(' Expected:');
136
+ console.error(` Model: ${message.policy.model_name}`);
137
+ console.error(` Quantization: ${message.policy.quantization}`);
138
+ console.error(` Context Size: ${message.policy.context_size}`);
139
+ console.error(` Capabilities: ${message.policy.capabilities.join(', ')}`);
140
+ if (message.policy.parameters) {
141
+ console.error(` Parameters: ${message.policy.parameters}`);
142
+ }
143
+ if (message.policy.family) {
144
+ console.error(` Family: ${message.policy.family}`);
145
+ }
146
+ console.error(' Discovered:');
147
+ console.error(` Model: ${this.capabilities.model}`);
148
+ console.error(` Context Size: ${this.capabilities.ctx_size}`);
149
+ if (this.capabilities.parameters) {
150
+ console.error(` Parameters: ${this.capabilities.parameters}`);
151
+ }
152
+ console.error(' Errors:');
153
+ validation.errors.forEach((error) => {
154
+ console.error(` - ${error}`);
155
+ });
156
+ console.error('\nPlease configure your llama.cpp server to match the policy requirements.');
157
+ this.shouldReconnect = false;
158
+ this.ws?.close();
159
+ reject(new Error(`Policy validation failed: ${validation.errors.join('; ')}`));
160
+ return;
161
+ }
162
+ console.log('✓ Successfully authenticated with server');
163
+ console.log(` Policy: ${message.policy.model_name} (${message.policy.quantization})`);
164
+ console.log('✓ Capabilities validated against policy');
165
+ }
166
+ this.isConnected = true;
167
+ this.isReconnecting = false;
168
+ this.retryDelay = INITIAL_RETRY_DELAY; // Reset retry delay on success
169
+ this.startHeartbeat();
170
+ resolve();
171
+ }
172
+ else {
173
+ // Check if there's a reconnect_in_seconds field (duplicate connection)
174
+ const errorMessage = message.error || 'Authentication failed';
175
+ if (message.reconnect_in_seconds || message.reconnectInSeconds) {
176
+ const reconnectIn = message.reconnect_in_seconds || message.reconnectInSeconds;
177
+ console.error(`✗ Worker UUID already connected! Are you trying to connect the same worker ID twice?`);
178
+ console.error(` The existing connection's presence key expires in ${reconnectIn} seconds.`);
179
+ console.error(` Waiting ${reconnectIn} seconds and retrying automatically...`);
180
+ // Close connection and schedule reconnect (do NOT reject - let retry handle it)
181
+ this.ws?.close();
182
+ // Wait for the specified time before reconnecting
183
+ this.retryDelay = Math.max(reconnectIn * 1000, INITIAL_RETRY_DELAY);
184
+ this.scheduleReconnect();
185
+ // Resolve instead of reject - this allows the app to continue with auto-retry
186
+ resolve();
187
+ }
188
+ else {
189
+ // Authentication errors (invalid key, key not found, etc.) are fatal - stop retrying
190
+ console.error('✗ Authentication failed:', errorMessage);
191
+ console.error(' Fatal error: Stopping retry attempts. Please check your GPU key and try again.');
192
+ // Stop retrying for authentication errors
193
+ this.shouldReconnect = false;
194
+ this.ws?.close();
195
+ // Don't schedule reconnect - this is a fatal error
196
+ reject(new Error(errorMessage));
197
+ }
198
+ }
199
+ }
200
+ else if (message.type === 'config_ack') {
201
+ // Config acknowledged (validation happened on server)
202
+ if (this.onConfigAckCallback) {
203
+ this.onConfigAckCallback(message.success, message.error);
204
+ }
205
+ else {
206
+ // Fallback: log and handle errors
207
+ if (message.success) {
208
+ console.log('✓ Capabilities validated by server');
209
+ }
210
+ else {
211
+ console.error(`✗ Config validation failed: ${message.error}`);
212
+ this.shouldReconnect = false;
213
+ this.ws?.close();
214
+ }
215
+ }
216
+ }
217
+ else if (message.type === 'heartbeat_ack') {
218
+ // Heartbeat acknowledged (silent)
219
+ }
220
+ else if (message.type === 'cancel') {
221
+ // Cancel message
222
+ console.log(`Received cancel request for job ${message.job_id}`);
223
+ if (this.onCancelCallback) {
224
+ this.onCancelCallback(message);
225
+ }
226
+ else {
227
+ console.warn('No cancel callback registered!');
228
+ }
229
+ }
230
+ else if (message.type === 'job') {
231
+ // Job received
232
+ console.log(`Received job ${message.job_id}`);
233
+ if (this.onJobCallback) {
234
+ this.onJobCallback(message).catch((err) => {
235
+ console.error('Error processing job:', err);
236
+ this.sendError(message.job_id, err.message);
237
+ });
238
+ }
239
+ else {
240
+ console.warn('No job callback registered!');
241
+ }
242
+ }
243
+ }
244
+ catch (error) {
245
+ console.error('Error parsing message:', error);
246
+ }
247
+ });
248
+ this.ws.on('error', (error) => {
249
+ clearTimeout(connectionTimeout);
250
+ // Silently handle connection errors during reconnection
251
+ });
252
+ this.ws.on('close', (code, reason) => {
253
+ clearTimeout(connectionTimeout);
254
+ this.isConnected = false;
255
+ this.stopHeartbeat();
256
+ if (this.shouldReconnect) {
257
+ if (!this.isReconnecting) {
258
+ console.log('WebSocket disconnected, will continue to retry...');
259
+ this.isReconnecting = true;
260
+ }
261
+ this.scheduleReconnect();
262
+ }
263
+ else {
264
+ console.log('WebSocket disconnected');
265
+ }
266
+ });
267
+ });
268
+ }
269
+ /**
270
+ * Schedule reconnection with exponential backoff
271
+ */
272
+ scheduleReconnect() {
273
+ if (this.reconnectTimer || !this.shouldReconnect) {
274
+ return;
275
+ }
276
+ this.reconnectTimer = setTimeout(async () => {
277
+ this.reconnectTimer = null;
278
+ try {
279
+ await this.attemptConnection();
280
+ }
281
+ catch (error) {
282
+ // Connection failed, exponential backoff will continue (silently)
283
+ this.retryDelay = Math.min(this.retryDelay * RETRY_MULTIPLIER, MAX_RETRY_DELAY);
284
+ }
285
+ }, this.retryDelay);
286
+ }
287
+ /**
288
+ * Start sending heartbeat messages
289
+ */
290
+ startHeartbeat() {
291
+ this.stopHeartbeat(); // Clear any existing timer
292
+ this.heartbeatTimer = setInterval(() => {
293
+ if (this.isConnected) {
294
+ const heartbeatMsg = {
295
+ type: 'heartbeat',
296
+ // Server identifies worker by WebSocket connection, not GPU ID
297
+ };
298
+ this.send(heartbeatMsg);
299
+ }
300
+ }, HEARTBEAT_INTERVAL);
301
+ }
302
+ /**
303
+ * Stop heartbeat timer
304
+ */
305
+ stopHeartbeat() {
306
+ if (this.heartbeatTimer) {
307
+ clearInterval(this.heartbeatTimer);
308
+ this.heartbeatTimer = null;
309
+ }
310
+ }
311
+ /**
312
+ * Send message to server
313
+ */
314
+ send(message) {
315
+ if (this.ws && this.ws.readyState === WebSocket.OPEN) {
316
+ const jsonString = JSON.stringify(message);
317
+ // Debug log (skip heartbeats to reduce noise)
318
+ if (message.type !== 'heartbeat') {
319
+ logClientToServer(message);
320
+ }
321
+ this.ws.send(jsonString);
322
+ }
323
+ }
324
+ /**
325
+ * Send token to server
326
+ */
327
+ sendToken(jobId, token) {
328
+ this.send({
329
+ type: 'token',
330
+ job_id: jobId,
331
+ token,
332
+ });
333
+ }
334
+ /**
335
+ * Send completion to server with metadata
336
+ */
337
+ sendComplete(jobId, fullResponse, durationMs, inputTokens, outputTokens, toolCalls) {
338
+ this.send({
339
+ type: 'complete',
340
+ job_id: jobId,
341
+ full_response: fullResponse,
342
+ duration_ms: durationMs,
343
+ input_tokens: inputTokens,
344
+ output_tokens: outputTokens,
345
+ tool_calls: toolCalls,
346
+ });
347
+ }
348
+ /**
349
+ * Send error to server
350
+ */
351
+ sendError(jobId, error) {
352
+ this.send({
353
+ type: 'error',
354
+ job_id: jobId,
355
+ error,
356
+ });
357
+ }
358
+ /**
359
+ * Register callback for job messages
360
+ */
361
+ onJob(callback) {
362
+ this.onJobCallback = callback;
363
+ }
364
+ /**
365
+ * Register callback for cancel messages
366
+ */
367
+ onCancel(callback) {
368
+ this.onCancelCallback = callback;
369
+ }
370
+ /**
371
+ * Register callback for policy (called when policy is received in hello_ack)
372
+ * Used to spawn llama.cpp process before validation
373
+ */
374
+ onPolicy(callback) {
375
+ this.onPolicyCallback = callback;
376
+ }
377
+ /**
378
+ * Register callback for config_ack (called when server validates our config)
379
+ */
380
+ onConfigAck(callback) {
381
+ this.onConfigAckCallback = callback;
382
+ }
383
+ /**
384
+ * Update capabilities (called after spawning llama.cpp)
385
+ */
386
+ updateCapabilities(capabilities) {
387
+ this.capabilities = capabilities;
388
+ }
389
+ /**
390
+ * Send config message to server (called after discovering capabilities)
391
+ */
392
+ sendConfig(capabilities) {
393
+ this.send({
394
+ type: 'config',
395
+ capabilities,
396
+ });
397
+ }
398
+ /**
399
+ * Check if connected
400
+ */
401
+ get connected() {
402
+ return this.isConnected;
403
+ }
404
+ /**
405
+ * Get policy received from server
406
+ */
407
+ getPolicy() {
408
+ return this.policy;
409
+ }
410
+ /**
411
+ * Get current capabilities
412
+ */
413
+ getCapabilities() {
414
+ return this.capabilities;
415
+ }
416
+ /**
417
+ * Close connection
418
+ */
419
+ close() {
420
+ this.shouldReconnect = false;
421
+ this.stopHeartbeat();
422
+ if (this.reconnectTimer) {
423
+ clearTimeout(this.reconnectTimer);
424
+ this.reconnectTimer = null;
425
+ }
426
+ if (this.ws) {
427
+ this.ws.close();
428
+ this.ws = null;
429
+ }
430
+ this.isConnected = false;
431
+ this.policy = null; // Clear policy on disconnect
432
+ }
433
+ }