@gravito/zenith 1.1.2 → 1.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +15 -0
- package/README.md +77 -22
- package/README.zh-TW.md +88 -0
- package/dist/bin.js +64681 -15842
- package/dist/client/assets/index-C80c1frR.css +1 -0
- package/dist/client/assets/index-CrWem9u3.js +434 -0
- package/dist/server/index.js +64681 -15842
- package/package.json +9 -7
- package/postcss.config.js +4 -4
- package/src/client/Layout.tsx +36 -39
- package/src/client/Sidebar.tsx +7 -7
- package/src/client/ThroughputChart.tsx +31 -17
- package/src/client/WorkerStatus.tsx +56 -80
- package/src/client/components/ConfirmDialog.tsx +22 -14
- package/src/client/components/JobInspector.tsx +95 -162
- package/src/client/index.css +29 -31
- package/src/client/pages/LoginPage.tsx +33 -31
- package/src/client/pages/MetricsPage.tsx +65 -37
- package/src/client/pages/OverviewPage.tsx +30 -28
- package/src/client/pages/PulsePage.tsx +111 -190
- package/src/client/pages/QueuesPage.tsx +82 -83
- package/src/client/pages/SchedulesPage.tsx +56 -61
- package/src/client/pages/SettingsPage.tsx +118 -137
- package/src/client/pages/WorkersPage.tsx +101 -115
- package/src/server/services/CommandService.ts +8 -9
- package/src/server/services/PulseService.ts +61 -4
- package/src/server/services/QueueService.ts +293 -0
- package/src/shared/types.ts +38 -13
- package/tailwind.config.js +75 -68
- package/tsconfig.json +28 -37
- package/tsconfig.node.json +9 -11
- package/dist/client/assets/index-BSMp8oq_.js +0 -436
- package/dist/client/assets/index-BwxlHx-_.css +0 -1
- package/dist/client/index.html +0 -13
- package/src/client/index.html +0 -12
- /package/{ECOSYSTEM_EXPANSION_RFC.md → doc/ECOSYSTEM_EXPANSION_RFC.md} +0 -0
|
@@ -6,15 +6,33 @@ import { LogStreamProcessor } from './LogStreamProcessor'
|
|
|
6
6
|
import { MaintenanceScheduler } from './MaintenanceScheduler'
|
|
7
7
|
import { QueueMetricsCollector } from './QueueMetricsCollector'
|
|
8
8
|
|
|
9
|
+
/**
|
|
10
|
+
* Snapshot of queue statistics.
|
|
11
|
+
*
|
|
12
|
+
* @public
|
|
13
|
+
* @since 3.0.0
|
|
14
|
+
*/
|
|
9
15
|
export interface QueueStats {
|
|
16
|
+
/** Name of the queue. */
|
|
10
17
|
name: string
|
|
18
|
+
/** Number of jobs waiting in the queue. */
|
|
11
19
|
waiting: number
|
|
20
|
+
/** Number of jobs delayed. */
|
|
12
21
|
delayed: number
|
|
22
|
+
/** Number of jobs that failed. */
|
|
13
23
|
failed: number
|
|
24
|
+
/** Number of jobs currently being processed. */
|
|
14
25
|
active: number
|
|
26
|
+
/** Whether the queue is currently paused. */
|
|
15
27
|
paused: boolean
|
|
16
28
|
}
|
|
17
29
|
|
|
30
|
+
/**
|
|
31
|
+
* Health report from a worker instance.
|
|
32
|
+
*
|
|
33
|
+
* @public
|
|
34
|
+
* @since 3.0.0
|
|
35
|
+
*/
|
|
18
36
|
export interface WorkerReport {
|
|
19
37
|
id: string
|
|
20
38
|
hostname: string
|
|
@@ -31,6 +49,12 @@ export interface WorkerReport {
|
|
|
31
49
|
loadAvg: number[]
|
|
32
50
|
}
|
|
33
51
|
|
|
52
|
+
/**
|
|
53
|
+
* A standard system log message.
|
|
54
|
+
*
|
|
55
|
+
* @public
|
|
56
|
+
* @since 3.0.0
|
|
57
|
+
*/
|
|
34
58
|
export interface SystemLog {
|
|
35
59
|
level: 'info' | 'warn' | 'error' | 'success'
|
|
36
60
|
message: string
|
|
@@ -39,12 +63,34 @@ export interface SystemLog {
|
|
|
39
63
|
timestamp: string
|
|
40
64
|
}
|
|
41
65
|
|
|
66
|
+
/**
|
|
67
|
+
* Aggregated global statistics.
|
|
68
|
+
*
|
|
69
|
+
* @public
|
|
70
|
+
* @since 3.0.0
|
|
71
|
+
*/
|
|
42
72
|
export interface GlobalStats {
|
|
43
73
|
queues: QueueStats[]
|
|
44
74
|
throughput: { timestamp: string; count: number }[]
|
|
45
75
|
workers: WorkerReport[]
|
|
46
76
|
}
|
|
47
77
|
|
|
78
|
+
/**
|
|
79
|
+
* QueueService acts as the central orchestrator for all queue-related operations.
|
|
80
|
+
*
|
|
81
|
+
* It bridges the gap between the raw Redis data, the persistent SQL storage,
|
|
82
|
+
* and the real-time dashboard. It handles:
|
|
83
|
+
* - Direct queue manipulation (pause, resume, purge).
|
|
84
|
+
* - Job lifecycle management (retry, delete).
|
|
85
|
+
* - System-wide metric aggregation and alerting.
|
|
86
|
+
* - Log stream processing and archiving.
|
|
87
|
+
*
|
|
88
|
+
* This service is designed to be the single source of truth for the
|
|
89
|
+
* Zenith Console.
|
|
90
|
+
*
|
|
91
|
+
* @public
|
|
92
|
+
* @since 3.0.0
|
|
93
|
+
*/
|
|
48
94
|
export class QueueService {
|
|
49
95
|
private redis: Redis
|
|
50
96
|
private subRedis: Redis
|
|
@@ -56,6 +102,13 @@ export class QueueService {
|
|
|
56
102
|
private metricsCollector: QueueMetricsCollector
|
|
57
103
|
private maintenanceScheduler: MaintenanceScheduler
|
|
58
104
|
|
|
105
|
+
/**
|
|
106
|
+
* Initializes the QueueService.
|
|
107
|
+
*
|
|
108
|
+
* @param redisUrl - The Redis connection string (e.g., redis://localhost:6379).
|
|
109
|
+
* @param prefix - Key prefix for all Redis keys used by the queues.
|
|
110
|
+
* @param persistence - Optional configuration for MySQL persistence.
|
|
111
|
+
*/
|
|
59
112
|
constructor(
|
|
60
113
|
redisUrl: string,
|
|
61
114
|
prefix = 'queue:',
|
|
@@ -95,6 +148,15 @@ export class QueueService {
|
|
|
95
148
|
this.alerts = new AlertService(redisUrl)
|
|
96
149
|
}
|
|
97
150
|
|
|
151
|
+
/**
|
|
152
|
+
* Connects to all required backing services.
|
|
153
|
+
*
|
|
154
|
+
* Establishes connections to Redis, the AlertService, and the LogStreamProcessor.
|
|
155
|
+
* Also starts the maintenance scheduler.
|
|
156
|
+
*
|
|
157
|
+
* @returns Promise resolving when all connections are ready.
|
|
158
|
+
* @throws {Error} If Redis or AlertService fails to connect.
|
|
159
|
+
*/
|
|
98
160
|
async connect() {
|
|
99
161
|
await Promise.all([
|
|
100
162
|
this.redis.connect(),
|
|
@@ -106,6 +168,21 @@ export class QueueService {
|
|
|
106
168
|
this.maintenanceScheduler.start(30000)
|
|
107
169
|
}
|
|
108
170
|
|
|
171
|
+
/**
|
|
172
|
+
* Subscribes to real-time system logs.
|
|
173
|
+
*
|
|
174
|
+
* @param callback - Function to be called when a new log arrives.
|
|
175
|
+
* @returns Unsubscribe function.
|
|
176
|
+
*
|
|
177
|
+
* @example
|
|
178
|
+
* ```typescript
|
|
179
|
+
* const unsub = queueService.onLog((log) => {
|
|
180
|
+
* console.log('New log:', log.message);
|
|
181
|
+
* });
|
|
182
|
+
* // Later...
|
|
183
|
+
* unsub();
|
|
184
|
+
* ```
|
|
185
|
+
*/
|
|
109
186
|
onLog(callback: (msg: SystemLog) => void): () => void {
|
|
110
187
|
const unsub = this.logProcessor.onLog(callback)
|
|
111
188
|
const emitterUnsub = () => {
|
|
@@ -117,25 +194,58 @@ export class QueueService {
|
|
|
117
194
|
}
|
|
118
195
|
}
|
|
119
196
|
|
|
197
|
+
/**
|
|
198
|
+
* Retrieves current statistics for all known queues.
|
|
199
|
+
*
|
|
200
|
+
* @returns List of queue statistics.
|
|
201
|
+
*/
|
|
120
202
|
async listQueues(): Promise<QueueStats[]> {
|
|
121
203
|
return this.metricsCollector.listQueues()
|
|
122
204
|
}
|
|
123
205
|
|
|
206
|
+
/**
|
|
207
|
+
* Pauses a specific queue, preventing it from processing jobs.
|
|
208
|
+
*
|
|
209
|
+
* @param queueName - The name of the queue to pause.
|
|
210
|
+
* @returns True if successful.
|
|
211
|
+
* @throws {Error} If Redis operation fails.
|
|
212
|
+
*/
|
|
124
213
|
async pauseQueue(queueName: string): Promise<boolean> {
|
|
125
214
|
await this.redis.set(`${this.prefix}${queueName}:paused`, '1')
|
|
126
215
|
return true
|
|
127
216
|
}
|
|
128
217
|
|
|
218
|
+
/**
|
|
219
|
+
* Resumes a paused queue.
|
|
220
|
+
*
|
|
221
|
+
* @param queueName - The name of the queue to resume.
|
|
222
|
+
* @returns True if successful.
|
|
223
|
+
* @throws {Error} If Redis operation fails.
|
|
224
|
+
*/
|
|
129
225
|
async resumeQueue(queueName: string): Promise<boolean> {
|
|
130
226
|
await this.redis.del(`${this.prefix}${queueName}:paused`)
|
|
131
227
|
return true
|
|
132
228
|
}
|
|
133
229
|
|
|
230
|
+
/**
|
|
231
|
+
* Checks if a queue is currently paused.
|
|
232
|
+
*
|
|
233
|
+
* @param queueName - The name of the queue.
|
|
234
|
+
* @returns True if paused, false otherwise.
|
|
235
|
+
*/
|
|
134
236
|
async isQueuePaused(queueName: string): Promise<boolean> {
|
|
135
237
|
const paused = await this.redis.get(`${this.prefix}${queueName}:paused`)
|
|
136
238
|
return paused === '1'
|
|
137
239
|
}
|
|
138
240
|
|
|
241
|
+
/**
|
|
242
|
+
* Moves all delayed jobs in a queue back to the waiting list immediately.
|
|
243
|
+
*
|
|
244
|
+
* Useful for manually forcing retries or clearing backlogs.
|
|
245
|
+
*
|
|
246
|
+
* @param queueName - The name of the queue.
|
|
247
|
+
* @returns The number of jobs moved.
|
|
248
|
+
*/
|
|
139
249
|
async retryDelayedJob(queueName: string): Promise<number> {
|
|
140
250
|
const key = `${this.prefix}${queueName}`
|
|
141
251
|
const delayKey = `${key}:delayed`
|
|
@@ -157,6 +267,15 @@ export class QueueService {
|
|
|
157
267
|
return movedCount
|
|
158
268
|
}
|
|
159
269
|
|
|
270
|
+
/**
|
|
271
|
+
* Retrieves a paginated list of jobs from a specific queue and state.
|
|
272
|
+
*
|
|
273
|
+
* @param queueName - The queue to query.
|
|
274
|
+
* @param type - The state to filter by (waiting, delayed, failed).
|
|
275
|
+
* @param start - Start index (0-based).
|
|
276
|
+
* @param stop - Stop index (inclusive).
|
|
277
|
+
* @returns List of job objects.
|
|
278
|
+
*/
|
|
160
279
|
async getJobs(
|
|
161
280
|
queueName: string,
|
|
162
281
|
type: 'waiting' | 'delayed' | 'failed' = 'waiting',
|
|
@@ -210,6 +329,14 @@ export class QueueService {
|
|
|
210
329
|
}
|
|
211
330
|
}
|
|
212
331
|
|
|
332
|
+
/**
|
|
333
|
+
* Records a snapshot of system metrics and triggers alerts if needed.
|
|
334
|
+
*
|
|
335
|
+
* Called periodically by the metrics collector.
|
|
336
|
+
*
|
|
337
|
+
* @param nodes - Current state of nodes (from PulseService).
|
|
338
|
+
* @param injectedWorkers - Optional worker data (for testing).
|
|
339
|
+
*/
|
|
213
340
|
async recordStatusMetrics(
|
|
214
341
|
nodes: Record<string, any> = {},
|
|
215
342
|
injectedWorkers?: any[]
|
|
@@ -253,6 +380,12 @@ export class QueueService {
|
|
|
253
380
|
.catch((err) => console.error('[AlertService] Rule Evaluation Error:', err))
|
|
254
381
|
}
|
|
255
382
|
|
|
383
|
+
/**
|
|
384
|
+
* Subscribes to global stats updates.
|
|
385
|
+
*
|
|
386
|
+
* @param callback - Function called with new stats.
|
|
387
|
+
* @returns Unsubscribe function.
|
|
388
|
+
*/
|
|
256
389
|
onStats(callback: (stats: GlobalStats) => void): () => void {
|
|
257
390
|
this.logEmitter.on('stats', callback)
|
|
258
391
|
return () => {
|
|
@@ -260,6 +393,13 @@ export class QueueService {
|
|
|
260
393
|
}
|
|
261
394
|
}
|
|
262
395
|
|
|
396
|
+
/**
|
|
397
|
+
* Retrieves historical data for a specific metric.
|
|
398
|
+
*
|
|
399
|
+
* @param metric - The metric name (waiting, delayed, failed, workers).
|
|
400
|
+
* @param limit - Number of data points to return (minutes).
|
|
401
|
+
* @returns Array of values.
|
|
402
|
+
*/
|
|
263
403
|
async getMetricHistory(metric: string, limit = 15): Promise<number[]> {
|
|
264
404
|
const now = Math.floor(Date.now() / 60000)
|
|
265
405
|
const keys = []
|
|
@@ -271,6 +411,11 @@ export class QueueService {
|
|
|
271
411
|
return values.map((v) => parseInt(v || '0', 10))
|
|
272
412
|
}
|
|
273
413
|
|
|
414
|
+
/**
|
|
415
|
+
* Calculates system throughput (jobs per minute).
|
|
416
|
+
*
|
|
417
|
+
* @returns Array of { timestamp, count } objects for the last 15 minutes.
|
|
418
|
+
*/
|
|
274
419
|
async getThroughputData(): Promise<{ timestamp: string; count: number }[]> {
|
|
275
420
|
const now = Math.floor(Date.now() / 60000)
|
|
276
421
|
const results = []
|
|
@@ -288,10 +433,23 @@ export class QueueService {
|
|
|
288
433
|
return results
|
|
289
434
|
}
|
|
290
435
|
|
|
436
|
+
/**
|
|
437
|
+
* Lists all active workers.
|
|
438
|
+
*
|
|
439
|
+
* @returns Array of worker reports.
|
|
440
|
+
*/
|
|
291
441
|
async listWorkers(): Promise<WorkerReport[]> {
|
|
292
442
|
return this.metricsCollector.listWorkers()
|
|
293
443
|
}
|
|
294
444
|
|
|
445
|
+
/**
|
|
446
|
+
* Deletes a specific job from a queue.
|
|
447
|
+
*
|
|
448
|
+
* @param queueName - The queue name.
|
|
449
|
+
* @param type - The list to remove from (waiting, delayed, failed).
|
|
450
|
+
* @param jobRaw - The raw JSON string of the job to remove.
|
|
451
|
+
* @returns True if removed, false otherwise.
|
|
452
|
+
*/
|
|
295
453
|
async deleteJob(
|
|
296
454
|
queueName: string,
|
|
297
455
|
type: 'waiting' | 'delayed' | 'failed',
|
|
@@ -310,6 +468,13 @@ export class QueueService {
|
|
|
310
468
|
return result > 0
|
|
311
469
|
}
|
|
312
470
|
|
|
471
|
+
/**
|
|
472
|
+
* Retries a specific failed or delayed job immediately.
|
|
473
|
+
*
|
|
474
|
+
* @param queueName - The queue name.
|
|
475
|
+
* @param jobRaw - The raw JSON string of the job.
|
|
476
|
+
* @returns True if successfully moved to waiting list.
|
|
477
|
+
*/
|
|
313
478
|
async retryJob(queueName: string, jobRaw: string): Promise<boolean> {
|
|
314
479
|
const key = `${this.prefix}${queueName}`
|
|
315
480
|
const delayKey = `${key}:delayed`
|
|
@@ -330,6 +495,13 @@ export class QueueService {
|
|
|
330
495
|
return result === 1
|
|
331
496
|
}
|
|
332
497
|
|
|
498
|
+
/**
|
|
499
|
+
* Purges all jobs from a queue (waiting, delayed, failed, active).
|
|
500
|
+
*
|
|
501
|
+
* ⚠️ Destructive operation. Irreversible.
|
|
502
|
+
*
|
|
503
|
+
* @param queueName - The queue to purge.
|
|
504
|
+
*/
|
|
333
505
|
async purgeQueue(queueName: string): Promise<void> {
|
|
334
506
|
const pipe = this.redis.pipeline()
|
|
335
507
|
pipe.del(`${this.prefix}${queueName}`)
|
|
@@ -339,14 +511,32 @@ export class QueueService {
|
|
|
339
511
|
await pipe.exec()
|
|
340
512
|
}
|
|
341
513
|
|
|
514
|
+
/**
|
|
515
|
+
* Retries all failed jobs in a queue.
|
|
516
|
+
*
|
|
517
|
+
* @param queueName - The queue name.
|
|
518
|
+
* @returns Number of jobs retried.
|
|
519
|
+
*/
|
|
342
520
|
async retryAllFailedJobs(queueName: string): Promise<number> {
|
|
343
521
|
return await this.manager.retryFailed(queueName, 10000)
|
|
344
522
|
}
|
|
345
523
|
|
|
524
|
+
/**
|
|
525
|
+
* Clears all failed jobs from a queue.
|
|
526
|
+
*
|
|
527
|
+
* @param queueName - The queue name.
|
|
528
|
+
*/
|
|
346
529
|
async clearFailedJobs(queueName: string): Promise<void> {
|
|
347
530
|
await this.manager.clearFailed(queueName)
|
|
348
531
|
}
|
|
349
532
|
|
|
533
|
+
/**
|
|
534
|
+
* Gets the count of jobs in a specific state.
|
|
535
|
+
*
|
|
536
|
+
* @param queueName - Queue name.
|
|
537
|
+
* @param type - Job state.
|
|
538
|
+
* @returns Count of jobs.
|
|
539
|
+
*/
|
|
350
540
|
async getJobCount(queueName: string, type: 'waiting' | 'delayed' | 'failed'): Promise<number> {
|
|
351
541
|
const key =
|
|
352
542
|
type === 'delayed'
|
|
@@ -358,6 +548,13 @@ export class QueueService {
|
|
|
358
548
|
return type === 'delayed' ? await this.redis.zcard(key) : await this.redis.llen(key)
|
|
359
549
|
}
|
|
360
550
|
|
|
551
|
+
/**
|
|
552
|
+
* Deletes all jobs in a specific state from a queue.
|
|
553
|
+
*
|
|
554
|
+
* @param queueName - Queue name.
|
|
555
|
+
* @param type - Job state to clear.
|
|
556
|
+
* @returns Number of jobs deleted.
|
|
557
|
+
*/
|
|
361
558
|
async deleteAllJobs(queueName: string, type: 'waiting' | 'delayed' | 'failed'): Promise<number> {
|
|
362
559
|
const key =
|
|
363
560
|
type === 'delayed'
|
|
@@ -371,6 +568,13 @@ export class QueueService {
|
|
|
371
568
|
return count
|
|
372
569
|
}
|
|
373
570
|
|
|
571
|
+
/**
|
|
572
|
+
* Retries all jobs in a specific state (delayed or failed).
|
|
573
|
+
*
|
|
574
|
+
* @param queueName - Queue name.
|
|
575
|
+
* @param type - Job state.
|
|
576
|
+
* @returns Number of jobs retried.
|
|
577
|
+
*/
|
|
374
578
|
async retryAllJobs(queueName: string, type: 'delayed' | 'failed'): Promise<number> {
|
|
375
579
|
if (type === 'delayed') {
|
|
376
580
|
return await this.retryDelayedJob(queueName)
|
|
@@ -379,6 +583,14 @@ export class QueueService {
|
|
|
379
583
|
}
|
|
380
584
|
}
|
|
381
585
|
|
|
586
|
+
/**
|
|
587
|
+
* Deletes a specific set of jobs.
|
|
588
|
+
*
|
|
589
|
+
* @param queueName - Queue name.
|
|
590
|
+
* @param type - Job state.
|
|
591
|
+
* @param jobRaws - Array of raw job strings.
|
|
592
|
+
* @returns Number of jobs deleted.
|
|
593
|
+
*/
|
|
382
594
|
async deleteJobs(
|
|
383
595
|
queueName: string,
|
|
384
596
|
type: 'waiting' | 'delayed' | 'failed',
|
|
@@ -403,6 +615,14 @@ export class QueueService {
|
|
|
403
615
|
return results?.reduce((acc, [_, res]) => acc + ((res as number) || 0), 0) || 0
|
|
404
616
|
}
|
|
405
617
|
|
|
618
|
+
/**
|
|
619
|
+
* Retries a specific set of jobs.
|
|
620
|
+
*
|
|
621
|
+
* @param queueName - Queue name.
|
|
622
|
+
* @param type - Job state.
|
|
623
|
+
* @param jobRaws - Array of raw job strings.
|
|
624
|
+
* @returns Number of jobs retried.
|
|
625
|
+
*/
|
|
406
626
|
async retryJobs(
|
|
407
627
|
queueName: string,
|
|
408
628
|
type: 'delayed' | 'failed',
|
|
@@ -434,6 +654,11 @@ export class QueueService {
|
|
|
434
654
|
return count
|
|
435
655
|
}
|
|
436
656
|
|
|
657
|
+
/**
|
|
658
|
+
* Publishes a log message to the stream and archives it.
|
|
659
|
+
*
|
|
660
|
+
* @param log - Log entry details.
|
|
661
|
+
*/
|
|
437
662
|
async publishLog(log: { level: string; message: string; workerId: string; queue?: string }) {
|
|
438
663
|
const payload = {
|
|
439
664
|
...log,
|
|
@@ -462,11 +687,25 @@ export class QueueService {
|
|
|
462
687
|
}
|
|
463
688
|
}
|
|
464
689
|
|
|
690
|
+
/**
|
|
691
|
+
* Retrieves recent log history from Redis.
|
|
692
|
+
*
|
|
693
|
+
* @returns List of recent logs (max 100).
|
|
694
|
+
*/
|
|
465
695
|
async getLogHistory(): Promise<any[]> {
|
|
466
696
|
const logs = await this.redis.lrange('flux_console:logs:history', 0, -1)
|
|
467
697
|
return logs.map((l) => JSON.parse(l)).reverse()
|
|
468
698
|
}
|
|
469
699
|
|
|
700
|
+
/**
|
|
701
|
+
* Searches for jobs across all queues and states.
|
|
702
|
+
*
|
|
703
|
+
* Scans Redis structures in real-time. Note: This can be expensive on large queues.
|
|
704
|
+
*
|
|
705
|
+
* @param query - Search term (ID, name, or data).
|
|
706
|
+
* @param options - Search options (limit, type).
|
|
707
|
+
* @returns List of matching jobs.
|
|
708
|
+
*/
|
|
470
709
|
async searchJobs(
|
|
471
710
|
query: string,
|
|
472
711
|
options: { limit?: number; type?: 'all' | 'waiting' | 'delayed' | 'failed' } = {}
|
|
@@ -520,6 +759,16 @@ export class QueueService {
|
|
|
520
759
|
return results
|
|
521
760
|
}
|
|
522
761
|
|
|
762
|
+
/**
|
|
763
|
+
* Retrieves archived jobs from persistent storage (MySQL).
|
|
764
|
+
*
|
|
765
|
+
* @param queue - Queue name.
|
|
766
|
+
* @param page - Page number.
|
|
767
|
+
* @param limit - Page size.
|
|
768
|
+
* @param status - Filter by status.
|
|
769
|
+
* @param filter - Additional filters (jobId, time range).
|
|
770
|
+
* @returns Paginated list of jobs.
|
|
771
|
+
*/
|
|
523
772
|
async getArchiveJobs(
|
|
524
773
|
queue: string,
|
|
525
774
|
page = 1,
|
|
@@ -544,6 +793,13 @@ export class QueueService {
|
|
|
544
793
|
}
|
|
545
794
|
}
|
|
546
795
|
|
|
796
|
+
/**
|
|
797
|
+
* Searches archived jobs in persistent storage.
|
|
798
|
+
*
|
|
799
|
+
* @param query - Search term.
|
|
800
|
+
* @param options - Pagination options.
|
|
801
|
+
* @returns Matching jobs.
|
|
802
|
+
*/
|
|
547
803
|
async searchArchive(
|
|
548
804
|
query: string,
|
|
549
805
|
options: { limit?: number; page?: number; queue?: string } = {}
|
|
@@ -563,6 +819,12 @@ export class QueueService {
|
|
|
563
819
|
}
|
|
564
820
|
}
|
|
565
821
|
|
|
822
|
+
/**
|
|
823
|
+
* Retrieves archived logs from persistent storage.
|
|
824
|
+
*
|
|
825
|
+
* @param options - Filters and pagination.
|
|
826
|
+
* @returns Paginated logs.
|
|
827
|
+
*/
|
|
566
828
|
async getArchivedLogs(
|
|
567
829
|
options: {
|
|
568
830
|
page?: number
|
|
@@ -591,6 +853,12 @@ export class QueueService {
|
|
|
591
853
|
return { logs, total }
|
|
592
854
|
}
|
|
593
855
|
|
|
856
|
+
/**
|
|
857
|
+
* Cleans up old archived data based on retention policy.
|
|
858
|
+
*
|
|
859
|
+
* @param days - Retention period in days.
|
|
860
|
+
* @returns Number of records deleted.
|
|
861
|
+
*/
|
|
594
862
|
async cleanupArchive(days: number): Promise<number> {
|
|
595
863
|
const persistence = this.manager.getPersistence()
|
|
596
864
|
if (!persistence) {
|
|
@@ -599,11 +867,21 @@ export class QueueService {
|
|
|
599
867
|
return await persistence.cleanup(days)
|
|
600
868
|
}
|
|
601
869
|
|
|
870
|
+
/**
|
|
871
|
+
* Lists all registered Cron schedules.
|
|
872
|
+
*
|
|
873
|
+
* @returns List of schedules.
|
|
874
|
+
*/
|
|
602
875
|
async listSchedules(): Promise<any[]> {
|
|
603
876
|
const scheduler = this.manager.getScheduler()
|
|
604
877
|
return await scheduler.list()
|
|
605
878
|
}
|
|
606
879
|
|
|
880
|
+
/**
|
|
881
|
+
* Registers a new Cron schedule.
|
|
882
|
+
*
|
|
883
|
+
* @param config - Schedule configuration.
|
|
884
|
+
*/
|
|
607
885
|
async registerSchedule(config: {
|
|
608
886
|
id: string
|
|
609
887
|
cron: string
|
|
@@ -614,16 +892,31 @@ export class QueueService {
|
|
|
614
892
|
await scheduler.register(config)
|
|
615
893
|
}
|
|
616
894
|
|
|
895
|
+
/**
|
|
896
|
+
* Removes a Cron schedule.
|
|
897
|
+
*
|
|
898
|
+
* @param id - Schedule ID.
|
|
899
|
+
*/
|
|
617
900
|
async removeSchedule(id: string): Promise<void> {
|
|
618
901
|
const scheduler = this.manager.getScheduler()
|
|
619
902
|
await scheduler.remove(id)
|
|
620
903
|
}
|
|
621
904
|
|
|
905
|
+
/**
|
|
906
|
+
* Manually triggers a scheduled job immediately.
|
|
907
|
+
*
|
|
908
|
+
* @param id - Schedule ID.
|
|
909
|
+
*/
|
|
622
910
|
async runScheduleNow(id: string): Promise<void> {
|
|
623
911
|
const scheduler = this.manager.getScheduler()
|
|
624
912
|
await scheduler.runNow(id)
|
|
625
913
|
}
|
|
626
914
|
|
|
915
|
+
/**
|
|
916
|
+
* Processes schedule ticks.
|
|
917
|
+
*
|
|
918
|
+
* Should be called periodically to check for due schedules.
|
|
919
|
+
*/
|
|
627
920
|
async tickScheduler(): Promise<void> {
|
|
628
921
|
const scheduler = this.manager.getScheduler()
|
|
629
922
|
await scheduler.tick()
|
package/src/shared/types.ts
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Metrics representing CPU usage for a specific node/process.
|
|
3
3
|
*
|
|
4
|
+
* Used to visualize load distribution and identify CPU-bound workers.
|
|
5
|
+
*
|
|
4
6
|
* @public
|
|
5
7
|
* @since 3.0.0
|
|
6
8
|
*/
|
|
@@ -9,13 +11,15 @@ export interface PulseCpu {
|
|
|
9
11
|
system: number
|
|
10
12
|
/** Process-specific CPU usage percentage (0-100). */
|
|
11
13
|
process: number
|
|
12
|
-
/** Number of CPU cores available. */
|
|
14
|
+
/** Number of CPU cores available on the host. */
|
|
13
15
|
cores: number
|
|
14
16
|
}
|
|
15
17
|
|
|
16
18
|
/**
|
|
17
19
|
* Metrics representing memory usage for a specific node/process.
|
|
18
20
|
*
|
|
21
|
+
* Critical for detecting memory leaks and capacity planning.
|
|
22
|
+
*
|
|
19
23
|
* @public
|
|
20
24
|
* @since 3.0.0
|
|
21
25
|
*/
|
|
@@ -43,6 +47,9 @@ export interface PulseMemory {
|
|
|
43
47
|
/**
|
|
44
48
|
* Runtime metadata for a monitored process.
|
|
45
49
|
*
|
|
50
|
+
* Provides context about the environment (Node.js version, platform)
|
|
51
|
+
* and current health status.
|
|
52
|
+
*
|
|
46
53
|
* @public
|
|
47
54
|
* @since 3.0.0
|
|
48
55
|
*/
|
|
@@ -53,13 +60,16 @@ export interface PulseRuntime {
|
|
|
53
60
|
framework: string
|
|
54
61
|
/** Current process status (e.g., 'online', 'maintenance'). */
|
|
55
62
|
status?: string
|
|
56
|
-
/** Last few error messages from the process. */
|
|
63
|
+
/** Last few error messages captured from the process stderr/logs. */
|
|
57
64
|
errors?: string[]
|
|
58
65
|
}
|
|
59
66
|
|
|
60
67
|
/**
|
|
61
68
|
* Statistics snapshot for a specific queue.
|
|
62
69
|
*
|
|
70
|
+
* Represents the state of a queue at a specific point in time, including
|
|
71
|
+
* job counts and throughput metrics. Used for dashboard graphs.
|
|
72
|
+
*
|
|
63
73
|
* @public
|
|
64
74
|
* @since 3.0.0
|
|
65
75
|
*/
|
|
@@ -75,7 +85,7 @@ export interface QueueSnapshot {
|
|
|
75
85
|
failed: number
|
|
76
86
|
delayed: number
|
|
77
87
|
}
|
|
78
|
-
/** Historical throughput data. */
|
|
88
|
+
/** Historical throughput data (jobs processed per minute). */
|
|
79
89
|
throughput?: {
|
|
80
90
|
in: number
|
|
81
91
|
out: number
|
|
@@ -85,23 +95,26 @@ export interface QueueSnapshot {
|
|
|
85
95
|
/**
|
|
86
96
|
* Represents a single application instance (node) monitored by Zenith.
|
|
87
97
|
*
|
|
98
|
+
* A PulseNode corresponds to a running process (e.g., a worker, API server)
|
|
99
|
+
* that emits heartbeats. These nodes form the cluster topology.
|
|
100
|
+
*
|
|
88
101
|
* @public
|
|
89
102
|
* @since 3.0.0
|
|
90
103
|
*/
|
|
91
104
|
export interface PulseNode {
|
|
92
|
-
/** Unique execution ID for the node. */
|
|
105
|
+
/** Unique execution ID for the node (usually UUID). */
|
|
93
106
|
id: string
|
|
94
|
-
/** Service group name. */
|
|
107
|
+
/** Service group name (e.g., "payment-worker", "api-gateway"). */
|
|
95
108
|
service: string
|
|
96
109
|
/** Programming language or runtime type. */
|
|
97
110
|
language: 'node' | 'bun' | 'deno' | 'php' | 'go' | 'python' | 'other'
|
|
98
|
-
/** Application version. */
|
|
111
|
+
/** Application version (from package.json). */
|
|
99
112
|
version: string
|
|
100
|
-
/** Process identifier. */
|
|
113
|
+
/** Process identifier (PID). */
|
|
101
114
|
pid: number
|
|
102
115
|
/** Hostname of the machine. */
|
|
103
116
|
hostname: string
|
|
104
|
-
/** Operating system platform. */
|
|
117
|
+
/** Operating system platform (darwin, linux, win32). */
|
|
105
118
|
platform: string
|
|
106
119
|
/** CPU metrics. */
|
|
107
120
|
cpu: PulseCpu
|
|
@@ -113,13 +126,16 @@ export interface PulseNode {
|
|
|
113
126
|
runtime: PulseRuntime
|
|
114
127
|
/** Unstructured metadata (e.g., framework-specific details). */
|
|
115
128
|
meta?: any
|
|
116
|
-
/** Epoch timestamp of the last heartbeat. */
|
|
129
|
+
/** Epoch timestamp of the last heartbeat received. */
|
|
117
130
|
timestamp: number
|
|
118
131
|
}
|
|
119
132
|
|
|
120
133
|
/**
|
|
121
134
|
* Definition of an alert rule for monitoring health.
|
|
122
135
|
*
|
|
136
|
+
* Alert rules define conditions that trigger notifications, such as
|
|
137
|
+
* high queue backlogs or worker failures.
|
|
138
|
+
*
|
|
123
139
|
* @public
|
|
124
140
|
* @since 3.0.0
|
|
125
141
|
*/
|
|
@@ -130,17 +146,19 @@ export interface AlertRule {
|
|
|
130
146
|
name: string
|
|
131
147
|
/** The metric type to monitor. */
|
|
132
148
|
type: 'backlog' | 'failure' | 'worker_lost' | 'node_cpu' | 'node_ram'
|
|
133
|
-
/** The value that triggers the alert. */
|
|
149
|
+
/** The value that triggers the alert (e.g., > 100 jobs). */
|
|
134
150
|
threshold: number
|
|
135
|
-
/** Optional queue name
|
|
151
|
+
/** Optional queue name to scope the rule to. */
|
|
136
152
|
queue?: string
|
|
137
|
-
/** Minutes to wait before re-triggering the alert. */
|
|
153
|
+
/** Minutes to wait before re-triggering the alert (debounce). */
|
|
138
154
|
cooldownMinutes: number
|
|
139
155
|
}
|
|
140
156
|
|
|
141
157
|
/**
|
|
142
158
|
* Configuration for alert notification channels.
|
|
143
159
|
*
|
|
160
|
+
* Defines where alerts should be sent when triggered.
|
|
161
|
+
*
|
|
144
162
|
* @public
|
|
145
163
|
* @since 3.0.0
|
|
146
164
|
*/
|
|
@@ -170,13 +188,15 @@ export interface AlertConfig {
|
|
|
170
188
|
/**
|
|
171
189
|
* Configuration for automated system maintenance.
|
|
172
190
|
*
|
|
191
|
+
* Controls data retention policies and auto-cleanup tasks.
|
|
192
|
+
*
|
|
173
193
|
* @public
|
|
174
194
|
* @since 3.0.0
|
|
175
195
|
*/
|
|
176
196
|
export interface MaintenanceConfig {
|
|
177
197
|
/** Whether to automatically delete old data. */
|
|
178
198
|
autoCleanup: boolean
|
|
179
|
-
/** Number of days to retain records. */
|
|
199
|
+
/** Number of days to retain records (logs, metrics). */
|
|
180
200
|
retentionDays: number
|
|
181
201
|
/** Timestamp of the last maintenance run. */
|
|
182
202
|
lastRun?: number
|
|
@@ -184,6 +204,11 @@ export interface MaintenanceConfig {
|
|
|
184
204
|
|
|
185
205
|
/**
|
|
186
206
|
* Represents a historical alert event.
|
|
207
|
+
*
|
|
208
|
+
* Stored in the database/log to track system health history.
|
|
209
|
+
*
|
|
210
|
+
* @public
|
|
211
|
+
* @since 3.0.0
|
|
187
212
|
*/
|
|
188
213
|
export interface AlertEvent {
|
|
189
214
|
id?: string
|