@ruvector/edge-net 0.1.7 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ledger.js +663 -0
- package/monitor.js +675 -0
- package/onnx-worker.js +482 -0
- package/package.json +40 -5
- package/qdag.js +582 -0
- package/scheduler.js +764 -0
- package/signaling.js +732 -0
package/scheduler.js
ADDED
|
@@ -0,0 +1,764 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @ruvector/edge-net Task Scheduler with Load Balancing
|
|
3
|
+
*
|
|
4
|
+
* Distributed task scheduling with:
|
|
5
|
+
* - Priority queuing
|
|
6
|
+
* - Load balancing across workers
|
|
7
|
+
* - Task affinity and locality
|
|
8
|
+
* - Retry and failure handling
|
|
9
|
+
* - Resource allocation
|
|
10
|
+
*
|
|
11
|
+
* @module @ruvector/edge-net/scheduler
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
import { EventEmitter } from 'events';
|
|
15
|
+
import { randomBytes } from 'crypto';
|
|
16
|
+
|
|
17
|
+
// ============================================
|
|
18
|
+
// TASK PRIORITY
|
|
19
|
+
// ============================================
|
|
20
|
+
|
|
21
|
+
export const TaskPriority = {
|
|
22
|
+
CRITICAL: 0,
|
|
23
|
+
HIGH: 1,
|
|
24
|
+
MEDIUM: 2,
|
|
25
|
+
LOW: 3,
|
|
26
|
+
};
|
|
27
|
+
|
|
28
|
+
export const TaskStatus = {
|
|
29
|
+
PENDING: 'pending',
|
|
30
|
+
QUEUED: 'queued',
|
|
31
|
+
ASSIGNED: 'assigned',
|
|
32
|
+
RUNNING: 'running',
|
|
33
|
+
COMPLETED: 'completed',
|
|
34
|
+
FAILED: 'failed',
|
|
35
|
+
CANCELLED: 'cancelled',
|
|
36
|
+
RETRYING: 'retrying',
|
|
37
|
+
};
|
|
38
|
+
|
|
39
|
+
// ============================================
|
|
40
|
+
// TASK
|
|
41
|
+
// ============================================
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Task definition
|
|
45
|
+
*/
|
|
46
|
+
export class Task {
|
|
47
|
+
constructor(options = {}) {
|
|
48
|
+
this.id = options.id || `task-${randomBytes(8).toString('hex')}`;
|
|
49
|
+
this.type = options.type || 'compute';
|
|
50
|
+
this.data = options.data;
|
|
51
|
+
this.options = options.options || {};
|
|
52
|
+
|
|
53
|
+
// Priority and scheduling
|
|
54
|
+
this.priority = options.priority ?? TaskPriority.MEDIUM;
|
|
55
|
+
this.deadline = options.deadline || null;
|
|
56
|
+
this.timeout = options.timeout || 60000; // 1 minute default
|
|
57
|
+
|
|
58
|
+
// Retry configuration
|
|
59
|
+
this.maxRetries = options.maxRetries ?? 3;
|
|
60
|
+
this.retryCount = 0;
|
|
61
|
+
this.retryDelay = options.retryDelay || 1000;
|
|
62
|
+
|
|
63
|
+
// Affinity
|
|
64
|
+
this.preferredWorker = options.preferredWorker || null;
|
|
65
|
+
this.requiredCapabilities = options.requiredCapabilities || [];
|
|
66
|
+
|
|
67
|
+
// Resource requirements
|
|
68
|
+
this.resources = options.resources || {
|
|
69
|
+
cpu: 1,
|
|
70
|
+
memory: 256, // MB
|
|
71
|
+
};
|
|
72
|
+
|
|
73
|
+
// Status tracking
|
|
74
|
+
this.status = TaskStatus.PENDING;
|
|
75
|
+
this.assignedTo = null;
|
|
76
|
+
this.result = null;
|
|
77
|
+
this.error = null;
|
|
78
|
+
|
|
79
|
+
// Timestamps
|
|
80
|
+
this.createdAt = Date.now();
|
|
81
|
+
this.queuedAt = null;
|
|
82
|
+
this.startedAt = null;
|
|
83
|
+
this.completedAt = null;
|
|
84
|
+
|
|
85
|
+
// Callback
|
|
86
|
+
this.resolve = null;
|
|
87
|
+
this.reject = null;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
/**
|
|
91
|
+
* Set task result
|
|
92
|
+
*/
|
|
93
|
+
setResult(result) {
|
|
94
|
+
this.result = result;
|
|
95
|
+
this.status = TaskStatus.COMPLETED;
|
|
96
|
+
this.completedAt = Date.now();
|
|
97
|
+
if (this.resolve) this.resolve(result);
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
/**
|
|
101
|
+
* Set task error
|
|
102
|
+
*/
|
|
103
|
+
setError(error) {
|
|
104
|
+
this.error = error;
|
|
105
|
+
this.status = TaskStatus.FAILED;
|
|
106
|
+
this.completedAt = Date.now();
|
|
107
|
+
if (this.reject) this.reject(error);
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
/**
|
|
111
|
+
* Check if task can retry
|
|
112
|
+
*/
|
|
113
|
+
canRetry() {
|
|
114
|
+
return this.retryCount < this.maxRetries;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
/**
|
|
118
|
+
* Check if task has expired
|
|
119
|
+
*/
|
|
120
|
+
isExpired() {
|
|
121
|
+
if (!this.deadline) return false;
|
|
122
|
+
return Date.now() > this.deadline;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
/**
|
|
126
|
+
* Get task age in milliseconds
|
|
127
|
+
*/
|
|
128
|
+
age() {
|
|
129
|
+
return Date.now() - this.createdAt;
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
/**
|
|
133
|
+
* Get execution time in milliseconds
|
|
134
|
+
*/
|
|
135
|
+
executionTime() {
|
|
136
|
+
if (!this.startedAt) return 0;
|
|
137
|
+
const end = this.completedAt || Date.now();
|
|
138
|
+
return end - this.startedAt;
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
toJSON() {
|
|
142
|
+
return {
|
|
143
|
+
id: this.id,
|
|
144
|
+
type: this.type,
|
|
145
|
+
data: this.data,
|
|
146
|
+
options: this.options,
|
|
147
|
+
priority: this.priority,
|
|
148
|
+
status: this.status,
|
|
149
|
+
assignedTo: this.assignedTo,
|
|
150
|
+
result: this.result,
|
|
151
|
+
error: this.error?.message || this.error,
|
|
152
|
+
retryCount: this.retryCount,
|
|
153
|
+
createdAt: this.createdAt,
|
|
154
|
+
completedAt: this.completedAt,
|
|
155
|
+
};
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
// ============================================
|
|
160
|
+
// WORKER INFO
|
|
161
|
+
// ============================================
|
|
162
|
+
|
|
163
|
+
/**
|
|
164
|
+
* Worker information for scheduling
|
|
165
|
+
*/
|
|
166
|
+
export class WorkerInfo {
|
|
167
|
+
constructor(options = {}) {
|
|
168
|
+
this.id = options.id;
|
|
169
|
+
this.capabilities = options.capabilities || [];
|
|
170
|
+
this.maxConcurrent = options.maxConcurrent || 4;
|
|
171
|
+
|
|
172
|
+
// Current state
|
|
173
|
+
this.activeTasks = new Set();
|
|
174
|
+
this.status = 'idle'; // idle, busy, offline
|
|
175
|
+
|
|
176
|
+
// Resources
|
|
177
|
+
this.resources = {
|
|
178
|
+
cpu: options.cpu || 4,
|
|
179
|
+
memory: options.memory || 4096,
|
|
180
|
+
cpuUsed: 0,
|
|
181
|
+
memoryUsed: 0,
|
|
182
|
+
};
|
|
183
|
+
|
|
184
|
+
// Performance metrics
|
|
185
|
+
this.metrics = {
|
|
186
|
+
tasksCompleted: 0,
|
|
187
|
+
tasksFailed: 0,
|
|
188
|
+
avgExecutionTime: 0,
|
|
189
|
+
lastTaskTime: 0,
|
|
190
|
+
successRate: 1.0,
|
|
191
|
+
};
|
|
192
|
+
|
|
193
|
+
// Timestamps
|
|
194
|
+
this.connectedAt = Date.now();
|
|
195
|
+
this.lastSeen = Date.now();
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
/**
|
|
199
|
+
* Check if worker has capacity
|
|
200
|
+
*/
|
|
201
|
+
hasCapacity() {
|
|
202
|
+
return this.activeTasks.size < this.maxConcurrent;
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
/**
|
|
206
|
+
* Check if worker has required capabilities
|
|
207
|
+
*/
|
|
208
|
+
hasCapabilities(required) {
|
|
209
|
+
if (!required || required.length === 0) return true;
|
|
210
|
+
return required.every(cap => this.capabilities.includes(cap));
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
/**
|
|
214
|
+
* Check if worker has resources
|
|
215
|
+
*/
|
|
216
|
+
hasResources(required) {
|
|
217
|
+
const cpuAvailable = this.resources.cpu - this.resources.cpuUsed;
|
|
218
|
+
const memAvailable = this.resources.memory - this.resources.memoryUsed;
|
|
219
|
+
|
|
220
|
+
return cpuAvailable >= (required.cpu || 1) &&
|
|
221
|
+
memAvailable >= (required.memory || 256);
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
/**
|
|
225
|
+
* Allocate resources for task
|
|
226
|
+
*/
|
|
227
|
+
allocate(task) {
|
|
228
|
+
this.activeTasks.add(task.id);
|
|
229
|
+
this.resources.cpuUsed += task.resources.cpu || 1;
|
|
230
|
+
this.resources.memoryUsed += task.resources.memory || 256;
|
|
231
|
+
this.status = this.activeTasks.size >= this.maxConcurrent ? 'busy' : 'idle';
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
/**
|
|
235
|
+
* Release resources from task
|
|
236
|
+
*/
|
|
237
|
+
release(task) {
|
|
238
|
+
this.activeTasks.delete(task.id);
|
|
239
|
+
this.resources.cpuUsed = Math.max(0, this.resources.cpuUsed - (task.resources.cpu || 1));
|
|
240
|
+
this.resources.memoryUsed = Math.max(0, this.resources.memoryUsed - (task.resources.memory || 256));
|
|
241
|
+
this.status = this.activeTasks.size >= this.maxConcurrent ? 'busy' : 'idle';
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
/**
|
|
245
|
+
* Update metrics after task completion
|
|
246
|
+
*/
|
|
247
|
+
updateMetrics(task, success) {
|
|
248
|
+
if (success) {
|
|
249
|
+
this.metrics.tasksCompleted++;
|
|
250
|
+
} else {
|
|
251
|
+
this.metrics.tasksFailed++;
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
const total = this.metrics.tasksCompleted + this.metrics.tasksFailed;
|
|
255
|
+
this.metrics.successRate = this.metrics.tasksCompleted / total;
|
|
256
|
+
|
|
257
|
+
// Update average execution time
|
|
258
|
+
const execTime = task.executionTime();
|
|
259
|
+
this.metrics.avgExecutionTime =
|
|
260
|
+
(this.metrics.avgExecutionTime * (total - 1) + execTime) / total;
|
|
261
|
+
|
|
262
|
+
this.metrics.lastTaskTime = Date.now();
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
/**
|
|
266
|
+
* Calculate worker score for scheduling
|
|
267
|
+
*/
|
|
268
|
+
score(task) {
|
|
269
|
+
let score = 100;
|
|
270
|
+
|
|
271
|
+
// Prefer workers with capacity
|
|
272
|
+
if (!this.hasCapacity()) return -1;
|
|
273
|
+
|
|
274
|
+
// Prefer workers with better success rate
|
|
275
|
+
score += this.metrics.successRate * 20;
|
|
276
|
+
|
|
277
|
+
// Prefer workers with lower load
|
|
278
|
+
const loadRatio = this.activeTasks.size / this.maxConcurrent;
|
|
279
|
+
score -= loadRatio * 30;
|
|
280
|
+
|
|
281
|
+
// Prefer workers with faster execution
|
|
282
|
+
if (this.metrics.avgExecutionTime > 0) {
|
|
283
|
+
score -= Math.min(this.metrics.avgExecutionTime / 1000, 20);
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
// Affinity bonus
|
|
287
|
+
if (task.preferredWorker === this.id) {
|
|
288
|
+
score += 50;
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
return score;
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
// ============================================
|
|
296
|
+
// PRIORITY QUEUE
|
|
297
|
+
// ============================================
|
|
298
|
+
|
|
299
|
+
/**
|
|
300
|
+
* Priority queue for tasks
|
|
301
|
+
*/
|
|
302
|
+
class PriorityQueue {
|
|
303
|
+
constructor() {
|
|
304
|
+
this.queues = new Map([
|
|
305
|
+
[TaskPriority.CRITICAL, []],
|
|
306
|
+
[TaskPriority.HIGH, []],
|
|
307
|
+
[TaskPriority.MEDIUM, []],
|
|
308
|
+
[TaskPriority.LOW, []],
|
|
309
|
+
]);
|
|
310
|
+
this.size = 0;
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
enqueue(task) {
|
|
314
|
+
const queue = this.queues.get(task.priority) || this.queues.get(TaskPriority.MEDIUM);
|
|
315
|
+
queue.push(task);
|
|
316
|
+
this.size++;
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
dequeue() {
|
|
320
|
+
for (const [priority, queue] of this.queues) {
|
|
321
|
+
if (queue.length > 0) {
|
|
322
|
+
this.size--;
|
|
323
|
+
return queue.shift();
|
|
324
|
+
}
|
|
325
|
+
}
|
|
326
|
+
return null;
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
peek() {
|
|
330
|
+
for (const [priority, queue] of this.queues) {
|
|
331
|
+
if (queue.length > 0) {
|
|
332
|
+
return queue[0];
|
|
333
|
+
}
|
|
334
|
+
}
|
|
335
|
+
return null;
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
remove(taskId) {
|
|
339
|
+
for (const [priority, queue] of this.queues) {
|
|
340
|
+
const index = queue.findIndex(t => t.id === taskId);
|
|
341
|
+
if (index >= 0) {
|
|
342
|
+
queue.splice(index, 1);
|
|
343
|
+
this.size--;
|
|
344
|
+
return true;
|
|
345
|
+
}
|
|
346
|
+
}
|
|
347
|
+
return false;
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
getAll() {
|
|
351
|
+
const all = [];
|
|
352
|
+
for (const [priority, queue] of this.queues) {
|
|
353
|
+
all.push(...queue);
|
|
354
|
+
}
|
|
355
|
+
return all;
|
|
356
|
+
}
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
// ============================================
|
|
360
|
+
// TASK SCHEDULER
|
|
361
|
+
// ============================================
|
|
362
|
+
|
|
363
|
+
/**
|
|
364
|
+
* Distributed Task Scheduler
|
|
365
|
+
*/
|
|
366
|
+
export class TaskScheduler extends EventEmitter {
|
|
367
|
+
constructor(options = {}) {
|
|
368
|
+
super();
|
|
369
|
+
this.id = options.id || `scheduler-${randomBytes(8).toString('hex')}`;
|
|
370
|
+
|
|
371
|
+
// Task queues
|
|
372
|
+
this.pending = new PriorityQueue();
|
|
373
|
+
this.running = new Map(); // taskId -> Task
|
|
374
|
+
this.completed = new Map(); // taskId -> Task (limited)
|
|
375
|
+
|
|
376
|
+
// Workers
|
|
377
|
+
this.workers = new Map(); // workerId -> WorkerInfo
|
|
378
|
+
|
|
379
|
+
// Configuration
|
|
380
|
+
this.maxCompleted = options.maxCompleted || 1000;
|
|
381
|
+
this.schedulingInterval = options.schedulingInterval || 100;
|
|
382
|
+
this.cleanupInterval = options.cleanupInterval || 60000;
|
|
383
|
+
|
|
384
|
+
// Stats
|
|
385
|
+
this.stats = {
|
|
386
|
+
submitted: 0,
|
|
387
|
+
completed: 0,
|
|
388
|
+
failed: 0,
|
|
389
|
+
retried: 0,
|
|
390
|
+
avgWaitTime: 0,
|
|
391
|
+
avgExecutionTime: 0,
|
|
392
|
+
};
|
|
393
|
+
|
|
394
|
+
// Internal
|
|
395
|
+
this.schedulerTimer = null;
|
|
396
|
+
this.cleanupTimer = null;
|
|
397
|
+
this.started = false;
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
/**
|
|
401
|
+
* Start the scheduler
|
|
402
|
+
*/
|
|
403
|
+
start() {
|
|
404
|
+
if (this.started) return;
|
|
405
|
+
|
|
406
|
+
this.schedulerTimer = setInterval(() => {
|
|
407
|
+
this.schedule();
|
|
408
|
+
}, this.schedulingInterval);
|
|
409
|
+
|
|
410
|
+
this.cleanupTimer = setInterval(() => {
|
|
411
|
+
this.cleanup();
|
|
412
|
+
}, this.cleanupInterval);
|
|
413
|
+
|
|
414
|
+
this.started = true;
|
|
415
|
+
this.emit('started');
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
/**
|
|
419
|
+
* Stop the scheduler
|
|
420
|
+
*/
|
|
421
|
+
stop() {
|
|
422
|
+
if (this.schedulerTimer) {
|
|
423
|
+
clearInterval(this.schedulerTimer);
|
|
424
|
+
}
|
|
425
|
+
if (this.cleanupTimer) {
|
|
426
|
+
clearInterval(this.cleanupTimer);
|
|
427
|
+
}
|
|
428
|
+
this.started = false;
|
|
429
|
+
this.emit('stopped');
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
/**
|
|
433
|
+
* Register a worker
|
|
434
|
+
*/
|
|
435
|
+
registerWorker(worker) {
|
|
436
|
+
const workerInfo = worker instanceof WorkerInfo
|
|
437
|
+
? worker
|
|
438
|
+
: new WorkerInfo(worker);
|
|
439
|
+
|
|
440
|
+
this.workers.set(workerInfo.id, workerInfo);
|
|
441
|
+
this.emit('worker-registered', { workerId: workerInfo.id });
|
|
442
|
+
|
|
443
|
+
// Trigger scheduling
|
|
444
|
+
this.schedule();
|
|
445
|
+
|
|
446
|
+
return workerInfo;
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
/**
|
|
450
|
+
* Unregister a worker
|
|
451
|
+
*/
|
|
452
|
+
unregisterWorker(workerId) {
|
|
453
|
+
const worker = this.workers.get(workerId);
|
|
454
|
+
if (!worker) return;
|
|
455
|
+
|
|
456
|
+
// Requeue active tasks
|
|
457
|
+
for (const taskId of worker.activeTasks) {
|
|
458
|
+
const task = this.running.get(taskId);
|
|
459
|
+
if (task) {
|
|
460
|
+
task.status = TaskStatus.PENDING;
|
|
461
|
+
task.assignedTo = null;
|
|
462
|
+
this.running.delete(taskId);
|
|
463
|
+
this.pending.enqueue(task);
|
|
464
|
+
}
|
|
465
|
+
}
|
|
466
|
+
|
|
467
|
+
this.workers.delete(workerId);
|
|
468
|
+
this.emit('worker-unregistered', { workerId });
|
|
469
|
+
}
|
|
470
|
+
|
|
471
|
+
/**
|
|
472
|
+
* Submit a task
|
|
473
|
+
*/
|
|
474
|
+
submit(taskOptions) {
|
|
475
|
+
const task = taskOptions instanceof Task
|
|
476
|
+
? taskOptions
|
|
477
|
+
: new Task(taskOptions);
|
|
478
|
+
|
|
479
|
+
task.status = TaskStatus.QUEUED;
|
|
480
|
+
task.queuedAt = Date.now();
|
|
481
|
+
this.stats.submitted++;
|
|
482
|
+
|
|
483
|
+
this.pending.enqueue(task);
|
|
484
|
+
this.emit('task-submitted', { taskId: task.id });
|
|
485
|
+
|
|
486
|
+
// Return promise for task completion
|
|
487
|
+
return new Promise((resolve, reject) => {
|
|
488
|
+
task.resolve = resolve;
|
|
489
|
+
task.reject = reject;
|
|
490
|
+
});
|
|
491
|
+
}
|
|
492
|
+
|
|
493
|
+
/**
|
|
494
|
+
* Submit batch of tasks
|
|
495
|
+
*/
|
|
496
|
+
submitBatch(tasks) {
|
|
497
|
+
return Promise.all(tasks.map(t => this.submit(t)));
|
|
498
|
+
}
|
|
499
|
+
|
|
500
|
+
/**
|
|
501
|
+
* Cancel a task
|
|
502
|
+
*/
|
|
503
|
+
cancel(taskId) {
|
|
504
|
+
// Check pending queue
|
|
505
|
+
if (this.pending.remove(taskId)) {
|
|
506
|
+
this.emit('task-cancelled', { taskId });
|
|
507
|
+
return true;
|
|
508
|
+
}
|
|
509
|
+
|
|
510
|
+
// Check running tasks
|
|
511
|
+
const task = this.running.get(taskId);
|
|
512
|
+
if (task) {
|
|
513
|
+
task.status = TaskStatus.CANCELLED;
|
|
514
|
+
task.completedAt = Date.now();
|
|
515
|
+
if (task.reject) {
|
|
516
|
+
task.reject(new Error('Task cancelled'));
|
|
517
|
+
}
|
|
518
|
+
|
|
519
|
+
// Release worker resources
|
|
520
|
+
if (task.assignedTo) {
|
|
521
|
+
const worker = this.workers.get(task.assignedTo);
|
|
522
|
+
if (worker) worker.release(task);
|
|
523
|
+
}
|
|
524
|
+
|
|
525
|
+
this.running.delete(taskId);
|
|
526
|
+
this.emit('task-cancelled', { taskId });
|
|
527
|
+
return true;
|
|
528
|
+
}
|
|
529
|
+
|
|
530
|
+
return false;
|
|
531
|
+
}
|
|
532
|
+
|
|
533
|
+
/**
|
|
534
|
+
* Main scheduling loop
|
|
535
|
+
*/
|
|
536
|
+
schedule() {
|
|
537
|
+
while (this.pending.size > 0) {
|
|
538
|
+
const task = this.pending.peek();
|
|
539
|
+
if (!task) break;
|
|
540
|
+
|
|
541
|
+
// Check if task expired
|
|
542
|
+
if (task.isExpired()) {
|
|
543
|
+
this.pending.dequeue();
|
|
544
|
+
task.setError(new Error('Task deadline exceeded'));
|
|
545
|
+
this.stats.failed++;
|
|
546
|
+
this.emit('task-expired', { taskId: task.id });
|
|
547
|
+
continue;
|
|
548
|
+
}
|
|
549
|
+
|
|
550
|
+
// Find best worker
|
|
551
|
+
const worker = this.selectWorker(task);
|
|
552
|
+
if (!worker) break; // No available workers
|
|
553
|
+
|
|
554
|
+
// Assign task
|
|
555
|
+
this.pending.dequeue();
|
|
556
|
+
this.assignTask(task, worker);
|
|
557
|
+
}
|
|
558
|
+
}
|
|
559
|
+
|
|
560
|
+
/**
|
|
561
|
+
* Select best worker for task
|
|
562
|
+
*/
|
|
563
|
+
selectWorker(task) {
|
|
564
|
+
let bestWorker = null;
|
|
565
|
+
let bestScore = -Infinity;
|
|
566
|
+
|
|
567
|
+
for (const [workerId, worker] of this.workers) {
|
|
568
|
+
// Skip offline workers
|
|
569
|
+
if (worker.status === 'offline') continue;
|
|
570
|
+
|
|
571
|
+
// Check capabilities
|
|
572
|
+
if (!worker.hasCapabilities(task.requiredCapabilities)) continue;
|
|
573
|
+
|
|
574
|
+
// Check resources
|
|
575
|
+
if (!worker.hasResources(task.resources)) continue;
|
|
576
|
+
|
|
577
|
+
// Calculate score
|
|
578
|
+
const score = worker.score(task);
|
|
579
|
+
if (score > bestScore) {
|
|
580
|
+
bestScore = score;
|
|
581
|
+
bestWorker = worker;
|
|
582
|
+
}
|
|
583
|
+
}
|
|
584
|
+
|
|
585
|
+
return bestWorker;
|
|
586
|
+
}
|
|
587
|
+
|
|
588
|
+
/**
|
|
589
|
+
* Assign task to worker
|
|
590
|
+
*/
|
|
591
|
+
assignTask(task, worker) {
|
|
592
|
+
task.status = TaskStatus.ASSIGNED;
|
|
593
|
+
task.assignedTo = worker.id;
|
|
594
|
+
task.startedAt = Date.now();
|
|
595
|
+
|
|
596
|
+
worker.allocate(task);
|
|
597
|
+
this.running.set(task.id, task);
|
|
598
|
+
|
|
599
|
+
// Calculate wait time
|
|
600
|
+
const waitTime = task.startedAt - task.queuedAt;
|
|
601
|
+
this.stats.avgWaitTime =
|
|
602
|
+
(this.stats.avgWaitTime * this.stats.submitted + waitTime) /
|
|
603
|
+
(this.stats.submitted + 1);
|
|
604
|
+
|
|
605
|
+
this.emit('task-assigned', {
|
|
606
|
+
taskId: task.id,
|
|
607
|
+
workerId: worker.id,
|
|
608
|
+
waitTime,
|
|
609
|
+
});
|
|
610
|
+
|
|
611
|
+
// Set timeout
|
|
612
|
+
setTimeout(() => {
|
|
613
|
+
this.checkTaskTimeout(task.id);
|
|
614
|
+
}, task.timeout);
|
|
615
|
+
}
|
|
616
|
+
|
|
617
|
+
/**
|
|
618
|
+
* Check if task has timed out
|
|
619
|
+
*/
|
|
620
|
+
checkTaskTimeout(taskId) {
|
|
621
|
+
const task = this.running.get(taskId);
|
|
622
|
+
if (!task || task.status === TaskStatus.COMPLETED) return;
|
|
623
|
+
|
|
624
|
+
if (task.executionTime() >= task.timeout) {
|
|
625
|
+
this.handleTaskFailure(task, new Error('Task timeout'));
|
|
626
|
+
}
|
|
627
|
+
}
|
|
628
|
+
|
|
629
|
+
/**
|
|
630
|
+
* Report task completion
|
|
631
|
+
*/
|
|
632
|
+
completeTask(taskId, result) {
|
|
633
|
+
const task = this.running.get(taskId);
|
|
634
|
+
if (!task) return;
|
|
635
|
+
|
|
636
|
+
task.setResult(result);
|
|
637
|
+
this.running.delete(taskId);
|
|
638
|
+
|
|
639
|
+
// Update worker
|
|
640
|
+
const worker = this.workers.get(task.assignedTo);
|
|
641
|
+
if (worker) {
|
|
642
|
+
worker.release(task);
|
|
643
|
+
worker.updateMetrics(task, true);
|
|
644
|
+
}
|
|
645
|
+
|
|
646
|
+
// Update stats
|
|
647
|
+
this.stats.completed++;
|
|
648
|
+
this.stats.avgExecutionTime =
|
|
649
|
+
(this.stats.avgExecutionTime * (this.stats.completed - 1) + task.executionTime()) /
|
|
650
|
+
this.stats.completed;
|
|
651
|
+
|
|
652
|
+
// Store in completed (limited)
|
|
653
|
+
this.completed.set(taskId, task);
|
|
654
|
+
if (this.completed.size > this.maxCompleted) {
|
|
655
|
+
const oldest = this.completed.keys().next().value;
|
|
656
|
+
this.completed.delete(oldest);
|
|
657
|
+
}
|
|
658
|
+
|
|
659
|
+
this.emit('task-completed', { taskId, result, executionTime: task.executionTime() });
|
|
660
|
+
}
|
|
661
|
+
|
|
662
|
+
/**
|
|
663
|
+
* Report task failure
|
|
664
|
+
*/
|
|
665
|
+
failTask(taskId, error) {
|
|
666
|
+
const task = this.running.get(taskId);
|
|
667
|
+
if (!task) return;
|
|
668
|
+
|
|
669
|
+
this.handleTaskFailure(task, error);
|
|
670
|
+
}
|
|
671
|
+
|
|
672
|
+
/**
|
|
673
|
+
* Handle task failure with retry logic
|
|
674
|
+
*/
|
|
675
|
+
handleTaskFailure(task, error) {
|
|
676
|
+
this.running.delete(task.id);
|
|
677
|
+
|
|
678
|
+
// Update worker
|
|
679
|
+
const worker = this.workers.get(task.assignedTo);
|
|
680
|
+
if (worker) {
|
|
681
|
+
worker.release(task);
|
|
682
|
+
worker.updateMetrics(task, false);
|
|
683
|
+
}
|
|
684
|
+
|
|
685
|
+
// Check for retry
|
|
686
|
+
if (task.canRetry()) {
|
|
687
|
+
task.retryCount++;
|
|
688
|
+
task.status = TaskStatus.RETRYING;
|
|
689
|
+
task.assignedTo = null;
|
|
690
|
+
this.stats.retried++;
|
|
691
|
+
|
|
692
|
+
// Re-queue with delay
|
|
693
|
+
setTimeout(() => {
|
|
694
|
+
task.status = TaskStatus.QUEUED;
|
|
695
|
+
this.pending.enqueue(task);
|
|
696
|
+
this.emit('task-retrying', { taskId: task.id, retryCount: task.retryCount });
|
|
697
|
+
}, task.retryDelay * task.retryCount);
|
|
698
|
+
|
|
699
|
+
} else {
|
|
700
|
+
task.setError(error);
|
|
701
|
+
this.stats.failed++;
|
|
702
|
+
this.emit('task-failed', { taskId: task.id, error: error.message });
|
|
703
|
+
}
|
|
704
|
+
}
|
|
705
|
+
|
|
706
|
+
/**
|
|
707
|
+
* Cleanup old completed tasks and offline workers
|
|
708
|
+
*/
|
|
709
|
+
cleanup() {
|
|
710
|
+
const now = Date.now();
|
|
711
|
+
const workerTimeout = 60000; // 1 minute
|
|
712
|
+
|
|
713
|
+
// Check for offline workers
|
|
714
|
+
for (const [workerId, worker] of this.workers) {
|
|
715
|
+
if (now - worker.lastSeen > workerTimeout) {
|
|
716
|
+
worker.status = 'offline';
|
|
717
|
+
}
|
|
718
|
+
}
|
|
719
|
+
|
|
720
|
+
this.emit('cleanup');
|
|
721
|
+
}
|
|
722
|
+
|
|
723
|
+
/**
|
|
724
|
+
* Get scheduler status
|
|
725
|
+
*/
|
|
726
|
+
getStatus() {
|
|
727
|
+
return {
|
|
728
|
+
id: this.id,
|
|
729
|
+
started: this.started,
|
|
730
|
+
pending: this.pending.size,
|
|
731
|
+
running: this.running.size,
|
|
732
|
+
completed: this.completed.size,
|
|
733
|
+
workers: {
|
|
734
|
+
total: this.workers.size,
|
|
735
|
+
idle: Array.from(this.workers.values()).filter(w => w.status === 'idle').length,
|
|
736
|
+
busy: Array.from(this.workers.values()).filter(w => w.status === 'busy').length,
|
|
737
|
+
offline: Array.from(this.workers.values()).filter(w => w.status === 'offline').length,
|
|
738
|
+
},
|
|
739
|
+
stats: this.stats,
|
|
740
|
+
};
|
|
741
|
+
}
|
|
742
|
+
|
|
743
|
+
/**
|
|
744
|
+
* Get task by ID
|
|
745
|
+
*/
|
|
746
|
+
getTask(taskId) {
|
|
747
|
+
return this.running.get(taskId) ||
|
|
748
|
+
this.completed.get(taskId) ||
|
|
749
|
+
this.pending.getAll().find(t => t.id === taskId);
|
|
750
|
+
}
|
|
751
|
+
|
|
752
|
+
/**
|
|
753
|
+
* Get all workers
|
|
754
|
+
*/
|
|
755
|
+
getWorkers() {
|
|
756
|
+
return Array.from(this.workers.values());
|
|
757
|
+
}
|
|
758
|
+
}
|
|
759
|
+
|
|
760
|
+
// ============================================
|
|
761
|
+
// EXPORTS
|
|
762
|
+
// ============================================
|
|
763
|
+
|
|
764
|
+
export default TaskScheduler;
|