@hardlydifficult/worker-server 1.0.6 → 1.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +263 -270
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# @hardlydifficult/worker-server
|
|
2
2
|
|
|
3
|
-
WebSocket-based worker server with health monitoring, request routing, and load balancing.
|
|
3
|
+
A WebSocket-based worker server for managing remote worker connections with health monitoring, request routing, and load balancing.
|
|
4
4
|
|
|
5
5
|
## Installation
|
|
6
6
|
|
|
@@ -13,204 +13,218 @@ npm install @hardlydifficult/worker-server
|
|
|
13
13
|
```typescript
|
|
14
14
|
import { WorkerServer } from "@hardlydifficult/worker-server";
|
|
15
15
|
|
|
16
|
-
const server = new WorkerServer({
|
|
17
|
-
port: 8080,
|
|
18
|
-
authToken: "my-secret-token", // optional
|
|
19
|
-
logger: console, // optional, defaults to no-op
|
|
20
|
-
});
|
|
16
|
+
const server = new WorkerServer({ port: 3000 });
|
|
21
17
|
|
|
22
|
-
// Handle worker registrations and messages
|
|
23
18
|
server.onWorkerConnected((worker) => {
|
|
24
|
-
console.log(
|
|
25
|
-
});
|
|
26
|
-
|
|
27
|
-
server.onWorkerDisconnected((worker, pendingRequestIds) => {
|
|
28
|
-
console.log(`Worker ${worker.id} disconnected with ${pendingRequestIds.size} pending requests`);
|
|
19
|
+
console.log("Worker connected:", worker.id);
|
|
29
20
|
});
|
|
30
21
|
|
|
31
|
-
server.onWorkerMessage("
|
|
32
|
-
console.log(
|
|
22
|
+
server.onWorkerMessage("work_request", async (worker, message) => {
|
|
23
|
+
console.log("Received request from", worker.id, message);
|
|
24
|
+
server.send(worker.id, { type: "work_complete", requestId: message.requestId });
|
|
33
25
|
});
|
|
34
26
|
|
|
35
|
-
// Start the server
|
|
36
27
|
await server.start();
|
|
37
|
-
|
|
38
|
-
// Get an available worker supporting a model
|
|
39
|
-
const worker = server.getAvailableWorker("gpt-4");
|
|
40
|
-
if (worker) {
|
|
41
|
-
server.send(worker.id, { type: "work_request", requestId: "req-1" });
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
// Stop when done
|
|
45
|
-
await server.stop();
|
|
28
|
+
console.log("Worker server running on port 3000");
|
|
46
29
|
```
|
|
47
30
|
|
|
48
31
|
## Core Concepts
|
|
49
32
|
|
|
50
|
-
### Worker
|
|
33
|
+
### Worker Registration
|
|
51
34
|
|
|
52
|
-
The server
|
|
53
|
-
|
|
54
|
-
#### Registration and Authentication
|
|
55
|
-
|
|
56
|
-
Workers connect via WebSocket and send a `worker_registration` message with an optional `authToken`. If the server is configured with `authToken`, the worker must provide a matching token.
|
|
35
|
+
Workers connect via WebSocket and register with identity and capabilities. The server supports optional authentication and tracks worker health via heartbeat protocol.
|
|
57
36
|
|
|
58
37
|
```typescript
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
38
|
+
server.onWorkerConnected((worker) => {
|
|
39
|
+
// Worker capabilities include supported models and concurrency limits
|
|
40
|
+
console.log(
|
|
41
|
+
`Worker ${worker.name} supports: ${worker.capabilities.models.map(m => m.modelId).join(", ")}`
|
|
42
|
+
);
|
|
64
43
|
});
|
|
44
|
+
```
|
|
65
45
|
|
|
66
|
-
|
|
67
|
-
|
|
46
|
+
### Message Routing
|
|
47
|
+
|
|
48
|
+
Messages are dispatched by `type` field to registered handlers.
|
|
49
|
+
|
|
50
|
+
```typescript
|
|
51
|
+
server.onWorkerMessage("work_complete", (worker, message) => {
|
|
52
|
+
console.log("Work completed for", message.requestId);
|
|
68
53
|
});
|
|
69
54
|
|
|
70
|
-
|
|
55
|
+
server.send(workerId, { type: "work_request", requestId: "req-1" });
|
|
56
|
+
server.broadcast({ type: "shutdown" });
|
|
71
57
|
```
|
|
72
58
|
|
|
73
|
-
|
|
59
|
+
### Request Tracking & Load Balancing
|
|
74
60
|
|
|
75
|
-
|
|
61
|
+
Track active requests and select the least-loaded available worker.
|
|
76
62
|
|
|
77
63
|
```typescript
|
|
78
|
-
const
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
64
|
+
const worker = server.getAvailableWorker("sonnet", "inference");
|
|
65
|
+
if (worker) {
|
|
66
|
+
server.trackRequest(worker.id, requestId, "inference");
|
|
67
|
+
server.send(worker.id, { type: "start", requestId });
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
// Later, release the request
|
|
71
|
+
server.releaseRequest(requestId, { incrementCompleted: true });
|
|
84
72
|
```
|
|
85
73
|
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
74
|
+
## Core Components
|
|
75
|
+
|
|
76
|
+
### WorkerServer
|
|
77
|
+
|
|
78
|
+
Main server class managing WebSocket connections, HTTP endpoints, and worker pool.
|
|
91
79
|
|
|
92
|
-
####
|
|
80
|
+
#### Constructor
|
|
93
81
|
|
|
94
|
-
|
|
82
|
+
| Parameter | Type | Default | Description |
|
|
83
|
+
|--|------|---------|-----|
|
|
84
|
+
| `port` | `number` | — | HTTP + WebSocket server port |
|
|
85
|
+
| `authToken` | `string` (optional) | — | Token required for worker registration |
|
|
86
|
+
| `heartbeatTimeoutMs` | `number` | 60000 | Timeout before marking worker unhealthy |
|
|
87
|
+
| `healthCheckIntervalMs` | `number` | 10000 | Interval for health checks |
|
|
88
|
+
| `heartbeatIntervalMs` | `number` | 15000 | Heartbeat interval communicated to workers |
|
|
89
|
+
| `logger` | `WorkerServerLogger` (optional) | No-op | Logger instance |
|
|
90
|
+
|
|
91
|
+
#### Lifecycle Management
|
|
95
92
|
|
|
96
93
|
```typescript
|
|
97
|
-
server
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
94
|
+
const server = new WorkerServer({ port: 8080, authToken: "secret" });
|
|
95
|
+
|
|
96
|
+
// Start the server
|
|
97
|
+
await server.start();
|
|
98
|
+
|
|
99
|
+
// Stop the server gracefully
|
|
100
|
+
await server.stop();
|
|
101
101
|
```
|
|
102
102
|
|
|
103
|
-
|
|
103
|
+
#### Registration Handlers
|
|
104
104
|
|
|
105
|
-
|
|
105
|
+
```typescript
|
|
106
|
+
// Called when a worker successfully registers
|
|
107
|
+
const unsubscribeConnected = server.onWorkerConnected((worker) => {
|
|
108
|
+
console.log(`Worker connected: ${worker.name}`);
|
|
109
|
+
});
|
|
106
110
|
|
|
107
|
-
|
|
111
|
+
// Called when a worker disconnects
|
|
112
|
+
const unsubscribeDisconnected = server.onWorkerDisconnected((worker, pending) => {
|
|
113
|
+
console.log(`Worker disconnected with ${pending.size} pending requests`);
|
|
114
|
+
});
|
|
115
|
+
```
|
|
108
116
|
|
|
109
|
-
|
|
117
|
+
#### Message Handling
|
|
110
118
|
|
|
111
119
|
```typescript
|
|
112
|
-
//
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
server.send(worker.id, { type: "work_request", requestId: "req-1" });
|
|
117
|
-
}
|
|
120
|
+
// Register handlers for domain-specific messages by type
|
|
121
|
+
server.onWorkerMessage("work_request", (worker, message) => {
|
|
122
|
+
// Process work request from worker
|
|
123
|
+
});
|
|
118
124
|
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
server.releaseRequest(message.requestId, { incrementCompleted: true });
|
|
125
|
+
server.onWorkerMessage("status_update", (worker, message) => {
|
|
126
|
+
// Handle status updates from worker
|
|
122
127
|
});
|
|
123
128
|
```
|
|
124
129
|
|
|
125
|
-
####
|
|
130
|
+
#### Sending Messages
|
|
131
|
+
|
|
132
|
+
```typescript
|
|
133
|
+
// Send to a specific worker
|
|
134
|
+
const success = server.send("worker-1", { type: "stop", reason: "shutdown" });
|
|
135
|
+
|
|
136
|
+
// Broadcast to all connected workers
|
|
137
|
+
server.broadcast({ type: "maintenance_start" });
|
|
138
|
+
```
|
|
126
139
|
|
|
127
|
-
|
|
140
|
+
#### Pool Queries
|
|
128
141
|
|
|
129
142
|
```typescript
|
|
130
|
-
// Get least-loaded worker supporting a model
|
|
131
|
-
const worker = server.getAvailableWorker("
|
|
132
|
-
// → least-loaded worker that supports "gpt-4"
|
|
143
|
+
// Get least-loaded worker supporting a specific model
|
|
144
|
+
const worker = server.getAvailableWorker("sonnet-3.5");
|
|
133
145
|
|
|
134
146
|
// Get any available worker (model-agnostic)
|
|
135
147
|
const anyWorker = server.getAnyAvailableWorker();
|
|
136
|
-
// → any worker (Available or Busy status)
|
|
137
|
-
```
|
|
138
148
|
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
149
|
+
// Get all worker info
|
|
150
|
+
const workers = server.getWorkerInfo(); // Returns WorkerInfo[]
|
|
151
|
+
```
|
|
142
152
|
|
|
143
|
-
|
|
153
|
+
#### Request Tracking
|
|
144
154
|
|
|
145
155
|
```typescript
|
|
146
|
-
//
|
|
147
|
-
|
|
148
|
-
models: [{ modelId: "gpt-4", ... }],
|
|
149
|
-
maxConcurrentRequests: 5,
|
|
150
|
-
concurrencyLimits: {
|
|
151
|
-
inference: 2,
|
|
152
|
-
embedding: 4,
|
|
153
|
-
}
|
|
154
|
-
}
|
|
156
|
+
// Track a request assigned to a worker
|
|
157
|
+
server.trackRequest("worker-1", "req-123", "inference");
|
|
155
158
|
|
|
156
|
-
//
|
|
157
|
-
server.
|
|
159
|
+
// Release a completed request
|
|
160
|
+
server.releaseRequest("req-123", { incrementCompleted: true });
|
|
158
161
|
```
|
|
159
162
|
|
|
160
|
-
|
|
163
|
+
#### Extensibility
|
|
164
|
+
|
|
165
|
+
| Method | Description |
|
|
166
|
+
|--|---|
|
|
167
|
+
| `addHttpHandler(handler)` | Add an HTTP handler (called in order until one returns `true`) |
|
|
168
|
+
| `addWebSocketEndpoint(path, handler)` | Add a WebSocket endpoint at a custom path |
|
|
161
169
|
|
|
162
|
-
|
|
170
|
+
#### Event Handlers
|
|
163
171
|
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
172
|
+
| Method | Return | Description |
|
|
173
|
+
|--|--|---|
|
|
174
|
+
| `onWorkerConnected(handler)` | `() => void` | Called when worker registers |
|
|
175
|
+
| `onWorkerDisconnected(handler)` | `() => void` | Called when worker disconnects (includes pending requests) |
|
|
176
|
+
| `onWorkerMessage(type, handler)` | `() => void` | Register handler for a message type |
|
|
168
177
|
|
|
169
|
-
|
|
170
|
-
console.log(`Worker ${worker.id} metrics:`, message.metrics);
|
|
171
|
-
});
|
|
172
|
-
```
|
|
178
|
+
### WorkerPool
|
|
173
179
|
|
|
174
|
-
|
|
180
|
+
Internal class managing worker state and selection. Exposed via `WorkerServer`.
|
|
175
181
|
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
182
|
+
| Method | Description |
|
|
183
|
+
|--|---|
|
|
184
|
+
| `getAvailableWorker(model, category?)` | Get least-loaded available worker supporting model |
|
|
185
|
+
| `getAnyAvailableWorker()` | Get any available/busy worker |
|
|
186
|
+
| `trackRequest(workerId, requestId, category?)` | Mark request as in-progress |
|
|
187
|
+
| `releaseRequest(requestId, { incrementCompleted? })` | Release tracked request |
|
|
188
|
+
| `getWorkerInfoList()` | Get public info for all workers |
|
|
189
|
+
| `checkHealth(timeoutMs)` | Return IDs of dead workers (heartbeat > 3x timeout) |
|
|
190
|
+
| `send(workerId, message)` | Send message to specific worker |
|
|
191
|
+
| `broadcast(message)` | Broadcast to all workers |
|
|
192
|
+
| `closeAll()` | Close all worker connections |
|
|
181
193
|
|
|
182
|
-
###
|
|
194
|
+
### ConnectionHandler
|
|
183
195
|
|
|
184
|
-
|
|
196
|
+
Handles WebSocket connection lifecycle and protocol message routing.
|
|
197
|
+
|
|
198
|
+
#### Message Routing
|
|
185
199
|
|
|
186
200
|
```typescript
|
|
187
|
-
|
|
188
|
-
// Returns false if worker not found or WebSocket not open
|
|
189
|
-
```
|
|
201
|
+
import { ConnectionHandler } from "@hardlydifficult/worker-server";
|
|
190
202
|
|
|
191
|
-
|
|
203
|
+
const handler = new ConnectionHandler(pool, config, logger);
|
|
192
204
|
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
205
|
+
// Register handlers for custom message types
|
|
206
|
+
const unregister = handler.onMessage("custom_type", (worker, message) => {
|
|
207
|
+
console.log(`Received from ${worker.id}:`, message);
|
|
208
|
+
});
|
|
196
209
|
```
|
|
197
210
|
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
#### Additional WebSocket Endpoints
|
|
211
|
+
#### Event Handlers
|
|
201
212
|
|
|
202
213
|
```typescript
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
ws.on("message", (data) => {
|
|
206
|
-
console.log("Metrics client message:", data.toString());
|
|
207
|
-
});
|
|
214
|
+
handler.onWorkerConnected((worker) => {
|
|
215
|
+
console.log("Worker connected:", worker.id);
|
|
208
216
|
});
|
|
209
217
|
|
|
210
|
-
|
|
218
|
+
handler.onWorkerDisconnected((worker, pending) => {
|
|
219
|
+
console.log("Worker disconnected with pending:", pending.size);
|
|
220
|
+
});
|
|
211
221
|
```
|
|
212
222
|
|
|
213
|
-
|
|
223
|
+
## Advanced Features
|
|
224
|
+
|
|
225
|
+
### HTTP Endpoints
|
|
226
|
+
|
|
227
|
+
Custom HTTP handlers can be added:
|
|
214
228
|
|
|
215
229
|
```typescript
|
|
216
230
|
server.addHttpHandler(async (req, res) => {
|
|
@@ -219,192 +233,171 @@ server.addHttpHandler(async (req, res) => {
|
|
|
219
233
|
res.end(JSON.stringify({ status: "ok" }));
|
|
220
234
|
return true;
|
|
221
235
|
}
|
|
222
|
-
return false; // continue to next handler
|
|
236
|
+
return false; // continue to next handler
|
|
223
237
|
});
|
|
224
|
-
|
|
225
|
-
// Custom HTTP responses take precedence over 404
|
|
226
238
|
```
|
|
227
239
|
|
|
228
|
-
|
|
240
|
+
### Custom WebSocket Endpoints
|
|
229
241
|
|
|
230
|
-
|
|
242
|
+
Additional WebSocket paths can be handled:
|
|
231
243
|
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
| `send(workerId, message)` | Send a JSON message to a specific worker |
|
|
240
|
-
| `broadcast(message)` | Broadcast a JSON message to all workers |
|
|
241
|
-
| `getAvailableWorker(model, category?)` | Get least-loaded worker supporting model |
|
|
242
|
-
| `getAnyAvailableWorker()` | Get any available/Busy worker |
|
|
243
|
-
| `getWorkerCount()` | Total connected worker count |
|
|
244
|
-
| `getAvailableWorkerCount()` | Available worker count |
|
|
245
|
-
| `getWorkerInfo()` | Get public info about all workers |
|
|
246
|
-
| `trackRequest(workerId, requestId, category?)` | Track request as in-progress |
|
|
247
|
-
| `releaseRequest(requestId, options?)` | Release tracked request |
|
|
248
|
-
| `addHttpHandler(handler)` | Add HTTP request handler |
|
|
249
|
-
| `addWebSocketEndpoint(path, handler)` | Add custom WebSocket endpoint |
|
|
250
|
-
| `start()` | Start HTTP + WebSocket server |
|
|
251
|
-
| `stop()` | Stop server and close all connections |
|
|
252
|
-
|
|
253
|
-
### `WorkerPool`
|
|
254
|
-
|
|
255
|
-
Internal pool manager with public helpers.
|
|
256
|
-
|
|
257
|
-
| Method | Description |
|
|
258
|
-
|--------|-------------|
|
|
259
|
-
| `add(worker)` | Add a connected worker to the pool |
|
|
260
|
-
| `remove(id)` | Remove worker by ID |
|
|
261
|
-
| `get(id)` | Get worker by ID |
|
|
262
|
-
| `has(id)` | Check if worker is in pool |
|
|
263
|
-
| `getAvailableWorker(model, category?)` | Get available worker by model |
|
|
264
|
-
| `getAnyAvailableWorker()` | Get any available/Busy worker |
|
|
265
|
-
| `getCount()` | Total worker count |
|
|
266
|
-
| `getAvailableCount()` | Available worker count |
|
|
267
|
-
| `getWorkerInfoList()` | Get public info for all workers |
|
|
268
|
-
| `checkHealth(timeoutMs)` | Check worker health and return dead IDs |
|
|
269
|
-
| `send(workerId, message)` | Send message to worker |
|
|
270
|
-
| `broadcast(message)` | Broadcast to all workers |
|
|
271
|
-
| `closeAll()` | Close all worker connections |
|
|
244
|
+
```typescript
|
|
245
|
+
server.addWebSocketEndpoint("/ws/admin", (ws) => {
|
|
246
|
+
ws.on("message", (data) => {
|
|
247
|
+
// Handle admin WebSocket messages
|
|
248
|
+
});
|
|
249
|
+
});
|
|
250
|
+
```
|
|
272
251
|
|
|
273
|
-
###
|
|
252
|
+
### Authentication
|
|
274
253
|
|
|
275
|
-
|
|
254
|
+
Optionally require authentication tokens from workers:
|
|
276
255
|
|
|
277
256
|
```typescript
|
|
278
|
-
|
|
257
|
+
const server = new WorkerServer({
|
|
258
|
+
port: 8080,
|
|
259
|
+
authToken: "your-secret-token"
|
|
260
|
+
});
|
|
279
261
|
|
|
280
|
-
|
|
281
|
-
const publicInfo = toWorkerInfo(internal);
|
|
282
|
-
// No websocket reference in publicInfo
|
|
262
|
+
// Workers must send registration with matching authToken
|
|
283
263
|
```
|
|
284
264
|
|
|
285
|
-
###
|
|
265
|
+
### Load Balancing with Category Limits
|
|
286
266
|
|
|
287
|
-
|
|
288
|
-
|------|-------------|
|
|
289
|
-
| `WorkerStatus` | `available`, `busy`, `draining`, `unhealthy` |
|
|
290
|
-
| `ModelInfo` | Model capabilities and metadata |
|
|
291
|
-
| `WorkerCapabilities` | Worker capacity, models, and concurrency limits |
|
|
292
|
-
| `WorkerInfo` | Public worker state |
|
|
293
|
-
| `ConnectedWorker` | Internal state (includes WebSocket) |
|
|
294
|
-
| `WorkerServerOptions` | Configuration for `WorkerServer` |
|
|
295
|
-
| `WorkerServerLogger` | Logger interface |
|
|
296
|
-
| `HttpRequestHandler` | HTTP request handler type |
|
|
297
|
-
| `WorkerMessageHandler<T>` | Typed message handler |
|
|
298
|
-
| `WorkerConnectedHandler` | Worker connected event handler |
|
|
299
|
-
| `WorkerDisconnectedHandler` | Worker disconnected event handler |
|
|
300
|
-
| `WebSocketConnectionHandler` | Custom WebSocket endpoint handler |
|
|
267
|
+
Workers can declare per-category concurrency limits:
|
|
301
268
|
|
|
302
|
-
|
|
269
|
+
```typescript
|
|
270
|
+
const capabilities = {
|
|
271
|
+
models: [{ modelId: "sonnet", ... }],
|
|
272
|
+
maxConcurrentRequests: 10,
|
|
273
|
+
concurrencyLimits: {
|
|
274
|
+
inference: 5, // max 5 concurrent inference requests
|
|
275
|
+
embeddings: 2 // max 2 concurrent embedding requests
|
|
276
|
+
}
|
|
277
|
+
};
|
|
278
|
+
```
|
|
303
279
|
|
|
304
|
-
|
|
280
|
+
Requests are then tracked by category:
|
|
305
281
|
|
|
306
282
|
```typescript
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
healthCheckIntervalMs: 10_000,
|
|
310
|
-
heartbeatIntervalMs: 15_000,
|
|
311
|
-
}
|
|
283
|
+
server.trackRequest("worker-1", "req-1", "inference");
|
|
284
|
+
server.releaseRequest("req-1"); // category looked up automatically
|
|
312
285
|
```
|
|
313
286
|
|
|
314
|
-
|
|
287
|
+
## Types and Interfaces
|
|
288
|
+
|
|
289
|
+
### WorkerInfo
|
|
290
|
+
|
|
291
|
+
| Field | Type | Description |
|
|
292
|
+
|--|------|---|
|
|
293
|
+
| `id` | `string` | Unique worker identifier |
|
|
294
|
+
| `name` | `string` | Worker-assigned name |
|
|
295
|
+
| `status` | `WorkerStatus` | Current status (`available`, `busy`, `draining`, `unhealthy`) |
|
|
296
|
+
| `capabilities` | `WorkerCapabilities` | Supported models and limits |
|
|
297
|
+
| `sessionId` | `string` | Unique session identifier |
|
|
298
|
+
| `connectedAt` | `Date` | Connection timestamp |
|
|
299
|
+
| `lastHeartbeat` | `Date` | Last heartbeat timestamp |
|
|
300
|
+
| `activeRequests` | `number` | Currently active requests |
|
|
301
|
+
| `completedRequests` | `number` | Completed request count |
|
|
302
|
+
| `pendingRequestIds` | `ReadonlySet<string>` | Pending request IDs |
|
|
303
|
+
| `categoryActiveRequests` | `ReadonlyMap<string, number>` | Active requests per category |
|
|
304
|
+
|
|
305
|
+
### WorkerCapabilities
|
|
306
|
+
|
|
307
|
+
| Field | Type | Description |
|
|
308
|
+
|--|------|---|
|
|
309
|
+
| `models` | `ModelInfo[]` | Supported models |
|
|
310
|
+
| `maxConcurrentRequests` | `number` | Overall concurrency limit |
|
|
311
|
+
| `metadata?` | `Record<string, unknown>` | Optional metadata |
|
|
312
|
+
| `concurrencyLimits?` | `Record<string, number>` | Per-category concurrency limits |
|
|
313
|
+
|
|
314
|
+
### ModelInfo
|
|
315
|
+
|
|
316
|
+
| Field | Type | Description |
|
|
317
|
+
|--|------|---|
|
|
318
|
+
| `modelId` | `string` | Model identifier |
|
|
319
|
+
| `displayName` | `string` | Human-readable name |
|
|
320
|
+
| `maxContextTokens` | `number` | Maximum context window |
|
|
321
|
+
| `maxOutputTokens` | `number` | Maximum output length |
|
|
322
|
+
| `supportsStreaming` | `boolean` | Streaming support |
|
|
323
|
+
| `supportsVision?` | `boolean` | Vision support (optional) |
|
|
324
|
+
| `supportsTools?` | `boolean` | Tool use support (optional) |
|
|
325
|
+
|
|
326
|
+
### WorkerStatus
|
|
327
|
+
|
|
328
|
+
- `"available"` — Worker can accept new requests
|
|
329
|
+
- `"busy"` — Worker at max concurrency
|
|
330
|
+
- `"draining"` — Worker shutting down, no new requests
|
|
331
|
+
- `"unhealthy"` — Missed heartbeats
|
|
332
|
+
|
|
333
|
+
### Configuration Options
|
|
315
334
|
|
|
316
|
-
|
|
335
|
+
```typescript
|
|
336
|
+
interface WorkerServerOptions {
|
|
337
|
+
port: number;
|
|
338
|
+
authToken?: string;
|
|
339
|
+
heartbeatTimeoutMs?: number; // default: 60000
|
|
340
|
+
healthCheckIntervalMs?: number; // default: 10000
|
|
341
|
+
heartbeatIntervalMs?: number; // default: 15000
|
|
342
|
+
logger?: WorkerServerLogger;
|
|
343
|
+
}
|
|
344
|
+
```
|
|
317
345
|
|
|
318
|
-
|
|
346
|
+
### Logger Interface
|
|
319
347
|
|
|
320
348
|
```typescript
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
349
|
+
interface WorkerServerLogger {
|
|
350
|
+
debug(message: string, context?: Record<string, unknown>): void;
|
|
351
|
+
info(message: string, context?: Record<string, unknown>): void;
|
|
352
|
+
warn(message: string, context?: Record<string, unknown>): void;
|
|
353
|
+
error(message: string, context?: Record<string, unknown>): void;
|
|
354
|
+
}
|
|
324
355
|
```
|
|
325
356
|
|
|
326
357
|
## Appendix
|
|
327
358
|
|
|
328
|
-
###
|
|
359
|
+
### Protocol Messages
|
|
329
360
|
|
|
330
|
-
|
|
361
|
+
**Worker Registration (worker → server)**
|
|
331
362
|
|
|
332
|
-
```
|
|
363
|
+
```typescript
|
|
333
364
|
{
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
"modelId": "gpt-4",
|
|
340
|
-
"displayName": "GPT-4",
|
|
341
|
-
"maxContextTokens": 8192,
|
|
342
|
-
"maxOutputTokens": 4096,
|
|
343
|
-
"supportsStreaming": true
|
|
344
|
-
}],
|
|
345
|
-
"maxConcurrentRequests": 5,
|
|
346
|
-
"concurrencyLimits": {
|
|
347
|
-
"inference": 2,
|
|
348
|
-
"embedding": 4
|
|
349
|
-
}
|
|
350
|
-
},
|
|
351
|
-
"authToken": "optional"
|
|
365
|
+
type: "worker_registration",
|
|
366
|
+
workerId: string,
|
|
367
|
+
workerName: string,
|
|
368
|
+
capabilities: WorkerCapabilities,
|
|
369
|
+
authToken?: string
|
|
352
370
|
}
|
|
353
371
|
```
|
|
354
372
|
|
|
355
|
-
|
|
373
|
+
**Registration Acknowledgment (server → worker)**
|
|
356
374
|
|
|
357
|
-
```
|
|
375
|
+
```typescript
|
|
358
376
|
{
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
377
|
+
type: "worker_registration_ack",
|
|
378
|
+
success: boolean,
|
|
379
|
+
error?: string,
|
|
380
|
+
sessionId?: string,
|
|
381
|
+
heartbeatIntervalMs?: number
|
|
363
382
|
}
|
|
364
383
|
```
|
|
365
384
|
|
|
366
|
-
|
|
385
|
+
**Heartbeat (worker → server)**
|
|
367
386
|
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
```json
|
|
387
|
+
```typescript
|
|
371
388
|
{
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
389
|
+
type: "heartbeat",
|
|
390
|
+
workerId: string,
|
|
391
|
+
timestamp: string
|
|
375
392
|
}
|
|
376
393
|
```
|
|
377
394
|
|
|
378
|
-
|
|
395
|
+
**Heartbeat Acknowledgment (server → worker)**
|
|
379
396
|
|
|
380
|
-
```
|
|
397
|
+
```typescript
|
|
381
398
|
{
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
399
|
+
type: "heartbeat_ack",
|
|
400
|
+
timestamp: string,
|
|
401
|
+
nextHeartbeatDeadline: string
|
|
385
402
|
}
|
|
386
|
-
```
|
|
387
|
-
|
|
388
|
-
### Status Transitions
|
|
389
|
-
|
|
390
|
-
- `Available` → `Busy` when `activeRequests >= maxConcurrentRequests`
|
|
391
|
-
- `Busy` → `Available` when `activeRequests < maxConcurrentRequests`
|
|
392
|
-
- Any → `Unhealthy` on heartbeat timeout
|
|
393
|
-
- `Unhealthy` → `Available/Busy` on heartbeat recovery
|
|
394
|
-
|
|
395
|
-
### Concurrent Request Tracking
|
|
396
|
-
|
|
397
|
-
The pool tracks requests per-worker and per-category (if provided). It automatically decrements the category count when releasing a tracked request.
|
|
398
|
-
|
|
399
|
-
### Worker Protocol Summary
|
|
400
|
-
|
|
401
|
-
Workers communicate using JSON messages with a `type` field:
|
|
402
|
-
|
|
403
|
-
| Message | Direction | Description |
|
|
404
|
-
|---------|-----------|-------------|
|
|
405
|
-
| `worker_registration` | Worker → Server | Register with capabilities |
|
|
406
|
-
| `worker_registration_ack` | Server → Worker | Acknowledgment with session ID |
|
|
407
|
-
| `heartbeat` | Worker → Server | Periodic health check |
|
|
408
|
-
| `heartbeat_ack` | Server → Worker | Acknowledgment with next deadline |
|
|
409
|
-
|
|
410
|
-
All other message types are routed to registered handlers via `onWorkerMessage()`.
|
|
403
|
+
```
|