@gravito/zenith 1.1.2 → 1.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/CHANGELOG.md +15 -0
  2. package/README.md +77 -22
  3. package/README.zh-TW.md +88 -0
  4. package/dist/bin.js +64681 -15842
  5. package/dist/client/assets/index-C80c1frR.css +1 -0
  6. package/dist/client/assets/index-CrWem9u3.js +434 -0
  7. package/dist/server/index.js +64681 -15842
  8. package/package.json +9 -7
  9. package/postcss.config.js +4 -4
  10. package/src/client/Layout.tsx +36 -39
  11. package/src/client/Sidebar.tsx +7 -7
  12. package/src/client/ThroughputChart.tsx +31 -17
  13. package/src/client/WorkerStatus.tsx +56 -80
  14. package/src/client/components/ConfirmDialog.tsx +22 -14
  15. package/src/client/components/JobInspector.tsx +95 -162
  16. package/src/client/index.css +29 -31
  17. package/src/client/pages/LoginPage.tsx +33 -31
  18. package/src/client/pages/MetricsPage.tsx +65 -37
  19. package/src/client/pages/OverviewPage.tsx +30 -28
  20. package/src/client/pages/PulsePage.tsx +111 -190
  21. package/src/client/pages/QueuesPage.tsx +82 -83
  22. package/src/client/pages/SchedulesPage.tsx +56 -61
  23. package/src/client/pages/SettingsPage.tsx +118 -137
  24. package/src/client/pages/WorkersPage.tsx +101 -115
  25. package/src/server/services/CommandService.ts +8 -9
  26. package/src/server/services/PulseService.ts +61 -4
  27. package/src/server/services/QueueService.ts +293 -0
  28. package/src/shared/types.ts +38 -13
  29. package/tailwind.config.js +75 -68
  30. package/tsconfig.json +28 -37
  31. package/tsconfig.node.json +9 -11
  32. package/dist/client/assets/index-BSMp8oq_.js +0 -436
  33. package/dist/client/assets/index-BwxlHx-_.css +0 -1
  34. package/dist/client/index.html +0 -13
  35. package/src/client/index.html +0 -12
  36. /package/{ECOSYSTEM_EXPANSION_RFC.md → doc/ECOSYSTEM_EXPANSION_RFC.md} +0 -0
@@ -6,15 +6,33 @@ import { LogStreamProcessor } from './LogStreamProcessor'
6
6
  import { MaintenanceScheduler } from './MaintenanceScheduler'
7
7
  import { QueueMetricsCollector } from './QueueMetricsCollector'
8
8
 
9
+ /**
10
+ * Snapshot of queue statistics.
11
+ *
12
+ * @public
13
+ * @since 3.0.0
14
+ */
9
15
  export interface QueueStats {
16
+ /** Name of the queue. */
10
17
  name: string
18
+ /** Number of jobs waiting in the queue. */
11
19
  waiting: number
20
+ /** Number of jobs delayed. */
12
21
  delayed: number
22
+ /** Number of jobs that failed. */
13
23
  failed: number
24
+ /** Number of jobs currently being processed. */
14
25
  active: number
26
+ /** Whether the queue is currently paused. */
15
27
  paused: boolean
16
28
  }
17
29
 
30
+ /**
31
+ * Health report from a worker instance.
32
+ *
33
+ * @public
34
+ * @since 3.0.0
35
+ */
18
36
  export interface WorkerReport {
19
37
  id: string
20
38
  hostname: string
@@ -31,6 +49,12 @@ export interface WorkerReport {
31
49
  loadAvg: number[]
32
50
  }
33
51
 
52
+ /**
53
+ * A standard system log message.
54
+ *
55
+ * @public
56
+ * @since 3.0.0
57
+ */
34
58
  export interface SystemLog {
35
59
  level: 'info' | 'warn' | 'error' | 'success'
36
60
  message: string
@@ -39,12 +63,34 @@ export interface SystemLog {
39
63
  timestamp: string
40
64
  }
41
65
 
66
+ /**
67
+ * Aggregated global statistics.
68
+ *
69
+ * @public
70
+ * @since 3.0.0
71
+ */
42
72
  export interface GlobalStats {
43
73
  queues: QueueStats[]
44
74
  throughput: { timestamp: string; count: number }[]
45
75
  workers: WorkerReport[]
46
76
  }
47
77
 
78
+ /**
79
+ * QueueService acts as the central orchestrator for all queue-related operations.
80
+ *
81
+ * It bridges the gap between the raw Redis data, the persistent SQL storage,
82
+ * and the real-time dashboard. It handles:
83
+ * - Direct queue manipulation (pause, resume, purge).
84
+ * - Job lifecycle management (retry, delete).
85
+ * - System-wide metric aggregation and alerting.
86
+ * - Log stream processing and archiving.
87
+ *
88
+ * This service is designed to be the single source of truth for the
89
+ * Zenith Console.
90
+ *
91
+ * @public
92
+ * @since 3.0.0
93
+ */
48
94
  export class QueueService {
49
95
  private redis: Redis
50
96
  private subRedis: Redis
@@ -56,6 +102,13 @@ export class QueueService {
56
102
  private metricsCollector: QueueMetricsCollector
57
103
  private maintenanceScheduler: MaintenanceScheduler
58
104
 
105
+ /**
106
+ * Initializes the QueueService.
107
+ *
108
+ * @param redisUrl - The Redis connection string (e.g., redis://localhost:6379).
109
+ * @param prefix - Key prefix for all Redis keys used by the queues.
110
+ * @param persistence - Optional configuration for MySQL persistence.
111
+ */
59
112
  constructor(
60
113
  redisUrl: string,
61
114
  prefix = 'queue:',
@@ -95,6 +148,15 @@ export class QueueService {
95
148
  this.alerts = new AlertService(redisUrl)
96
149
  }
97
150
 
151
+ /**
152
+ * Connects to all required backing services.
153
+ *
154
+ * Establishes connections to Redis, the AlertService, and the LogStreamProcessor.
155
+ * Also starts the maintenance scheduler.
156
+ *
157
+ * @returns Promise resolving when all connections are ready.
158
+ * @throws {Error} If Redis or AlertService fails to connect.
159
+ */
98
160
  async connect() {
99
161
  await Promise.all([
100
162
  this.redis.connect(),
@@ -106,6 +168,21 @@ export class QueueService {
106
168
  this.maintenanceScheduler.start(30000)
107
169
  }
108
170
 
171
+ /**
172
+ * Subscribes to real-time system logs.
173
+ *
174
+ * @param callback - Function to be called when a new log arrives.
175
+ * @returns Unsubscribe function.
176
+ *
177
+ * @example
178
+ * ```typescript
179
+ * const unsub = queueService.onLog((log) => {
180
+ * console.log('New log:', log.message);
181
+ * });
182
+ * // Later...
183
+ * unsub();
184
+ * ```
185
+ */
109
186
  onLog(callback: (msg: SystemLog) => void): () => void {
110
187
  const unsub = this.logProcessor.onLog(callback)
111
188
  const emitterUnsub = () => {
@@ -117,25 +194,58 @@ export class QueueService {
117
194
  }
118
195
  }
119
196
 
197
+ /**
198
+ * Retrieves current statistics for all known queues.
199
+ *
200
+ * @returns List of queue statistics.
201
+ */
120
202
  async listQueues(): Promise<QueueStats[]> {
121
203
  return this.metricsCollector.listQueues()
122
204
  }
123
205
 
206
+ /**
207
+ * Pauses a specific queue, preventing it from processing jobs.
208
+ *
209
+ * @param queueName - The name of the queue to pause.
210
+ * @returns True if successful.
211
+ * @throws {Error} If Redis operation fails.
212
+ */
124
213
  async pauseQueue(queueName: string): Promise<boolean> {
125
214
  await this.redis.set(`${this.prefix}${queueName}:paused`, '1')
126
215
  return true
127
216
  }
128
217
 
218
+ /**
219
+ * Resumes a paused queue.
220
+ *
221
+ * @param queueName - The name of the queue to resume.
222
+ * @returns True if successful.
223
+ * @throws {Error} If Redis operation fails.
224
+ */
129
225
  async resumeQueue(queueName: string): Promise<boolean> {
130
226
  await this.redis.del(`${this.prefix}${queueName}:paused`)
131
227
  return true
132
228
  }
133
229
 
230
+ /**
231
+ * Checks if a queue is currently paused.
232
+ *
233
+ * @param queueName - The name of the queue.
234
+ * @returns True if paused, false otherwise.
235
+ */
134
236
  async isQueuePaused(queueName: string): Promise<boolean> {
135
237
  const paused = await this.redis.get(`${this.prefix}${queueName}:paused`)
136
238
  return paused === '1'
137
239
  }
138
240
 
241
+ /**
242
+ * Moves all delayed jobs in a queue back to the waiting list immediately.
243
+ *
244
+ * Useful for manually forcing retries or clearing backlogs.
245
+ *
246
+ * @param queueName - The name of the queue.
247
+ * @returns The number of jobs moved.
248
+ */
139
249
  async retryDelayedJob(queueName: string): Promise<number> {
140
250
  const key = `${this.prefix}${queueName}`
141
251
  const delayKey = `${key}:delayed`
@@ -157,6 +267,15 @@ export class QueueService {
157
267
  return movedCount
158
268
  }
159
269
 
270
+ /**
271
+ * Retrieves a paginated list of jobs from a specific queue and state.
272
+ *
273
+ * @param queueName - The queue to query.
274
+ * @param type - The state to filter by (waiting, delayed, failed).
275
+ * @param start - Start index (0-based).
276
+ * @param stop - Stop index (inclusive).
277
+ * @returns List of job objects.
278
+ */
160
279
  async getJobs(
161
280
  queueName: string,
162
281
  type: 'waiting' | 'delayed' | 'failed' = 'waiting',
@@ -210,6 +329,14 @@ export class QueueService {
210
329
  }
211
330
  }
212
331
 
332
+ /**
333
+ * Records a snapshot of system metrics and triggers alerts if needed.
334
+ *
335
+ * Called periodically by the metrics collector.
336
+ *
337
+ * @param nodes - Current state of nodes (from PulseService).
338
+ * @param injectedWorkers - Optional worker data (for testing).
339
+ */
213
340
  async recordStatusMetrics(
214
341
  nodes: Record<string, any> = {},
215
342
  injectedWorkers?: any[]
@@ -253,6 +380,12 @@ export class QueueService {
253
380
  .catch((err) => console.error('[AlertService] Rule Evaluation Error:', err))
254
381
  }
255
382
 
383
+ /**
384
+ * Subscribes to global stats updates.
385
+ *
386
+ * @param callback - Function called with new stats.
387
+ * @returns Unsubscribe function.
388
+ */
256
389
  onStats(callback: (stats: GlobalStats) => void): () => void {
257
390
  this.logEmitter.on('stats', callback)
258
391
  return () => {
@@ -260,6 +393,13 @@ export class QueueService {
260
393
  }
261
394
  }
262
395
 
396
+ /**
397
+ * Retrieves historical data for a specific metric.
398
+ *
399
+ * @param metric - The metric name (waiting, delayed, failed, workers).
400
+ * @param limit - Number of data points to return (minutes).
401
+ * @returns Array of values.
402
+ */
263
403
  async getMetricHistory(metric: string, limit = 15): Promise<number[]> {
264
404
  const now = Math.floor(Date.now() / 60000)
265
405
  const keys = []
@@ -271,6 +411,11 @@ export class QueueService {
271
411
  return values.map((v) => parseInt(v || '0', 10))
272
412
  }
273
413
 
414
+ /**
415
+ * Calculates system throughput (jobs per minute).
416
+ *
417
+ * @returns Array of { timestamp, count } objects for the last 15 minutes.
418
+ */
274
419
  async getThroughputData(): Promise<{ timestamp: string; count: number }[]> {
275
420
  const now = Math.floor(Date.now() / 60000)
276
421
  const results = []
@@ -288,10 +433,23 @@ export class QueueService {
288
433
  return results
289
434
  }
290
435
 
436
+ /**
437
+ * Lists all active workers.
438
+ *
439
+ * @returns Array of worker reports.
440
+ */
291
441
  async listWorkers(): Promise<WorkerReport[]> {
292
442
  return this.metricsCollector.listWorkers()
293
443
  }
294
444
 
445
+ /**
446
+ * Deletes a specific job from a queue.
447
+ *
448
+ * @param queueName - The queue name.
449
+ * @param type - The list to remove from (waiting, delayed, failed).
450
+ * @param jobRaw - The raw JSON string of the job to remove.
451
+ * @returns True if removed, false otherwise.
452
+ */
295
453
  async deleteJob(
296
454
  queueName: string,
297
455
  type: 'waiting' | 'delayed' | 'failed',
@@ -310,6 +468,13 @@ export class QueueService {
310
468
  return result > 0
311
469
  }
312
470
 
471
+ /**
472
+ * Retries a specific failed or delayed job immediately.
473
+ *
474
+ * @param queueName - The queue name.
475
+ * @param jobRaw - The raw JSON string of the job.
476
+ * @returns True if successfully moved to waiting list.
477
+ */
313
478
  async retryJob(queueName: string, jobRaw: string): Promise<boolean> {
314
479
  const key = `${this.prefix}${queueName}`
315
480
  const delayKey = `${key}:delayed`
@@ -330,6 +495,13 @@ export class QueueService {
330
495
  return result === 1
331
496
  }
332
497
 
498
+ /**
499
+ * Purges all jobs from a queue (waiting, delayed, failed, active).
500
+ *
501
+ * ⚠️ Destructive operation. Irreversible.
502
+ *
503
+ * @param queueName - The queue to purge.
504
+ */
333
505
  async purgeQueue(queueName: string): Promise<void> {
334
506
  const pipe = this.redis.pipeline()
335
507
  pipe.del(`${this.prefix}${queueName}`)
@@ -339,14 +511,32 @@ export class QueueService {
339
511
  await pipe.exec()
340
512
  }
341
513
 
514
+ /**
515
+ * Retries all failed jobs in a queue.
516
+ *
517
+ * @param queueName - The queue name.
518
+ * @returns Number of jobs retried.
519
+ */
342
520
  async retryAllFailedJobs(queueName: string): Promise<number> {
343
521
  return await this.manager.retryFailed(queueName, 10000)
344
522
  }
345
523
 
524
+ /**
525
+ * Clears all failed jobs from a queue.
526
+ *
527
+ * @param queueName - The queue name.
528
+ */
346
529
  async clearFailedJobs(queueName: string): Promise<void> {
347
530
  await this.manager.clearFailed(queueName)
348
531
  }
349
532
 
533
+ /**
534
+ * Gets the count of jobs in a specific state.
535
+ *
536
+ * @param queueName - Queue name.
537
+ * @param type - Job state.
538
+ * @returns Count of jobs.
539
+ */
350
540
  async getJobCount(queueName: string, type: 'waiting' | 'delayed' | 'failed'): Promise<number> {
351
541
  const key =
352
542
  type === 'delayed'
@@ -358,6 +548,13 @@ export class QueueService {
358
548
  return type === 'delayed' ? await this.redis.zcard(key) : await this.redis.llen(key)
359
549
  }
360
550
 
551
+ /**
552
+ * Deletes all jobs in a specific state from a queue.
553
+ *
554
+ * @param queueName - Queue name.
555
+ * @param type - Job state to clear.
556
+ * @returns Number of jobs deleted.
557
+ */
361
558
  async deleteAllJobs(queueName: string, type: 'waiting' | 'delayed' | 'failed'): Promise<number> {
362
559
  const key =
363
560
  type === 'delayed'
@@ -371,6 +568,13 @@ export class QueueService {
371
568
  return count
372
569
  }
373
570
 
571
+ /**
572
+ * Retries all jobs in a specific state (delayed or failed).
573
+ *
574
+ * @param queueName - Queue name.
575
+ * @param type - Job state.
576
+ * @returns Number of jobs retried.
577
+ */
374
578
  async retryAllJobs(queueName: string, type: 'delayed' | 'failed'): Promise<number> {
375
579
  if (type === 'delayed') {
376
580
  return await this.retryDelayedJob(queueName)
@@ -379,6 +583,14 @@ export class QueueService {
379
583
  }
380
584
  }
381
585
 
586
+ /**
587
+ * Deletes a specific set of jobs.
588
+ *
589
+ * @param queueName - Queue name.
590
+ * @param type - Job state.
591
+ * @param jobRaws - Array of raw job strings.
592
+ * @returns Number of jobs deleted.
593
+ */
382
594
  async deleteJobs(
383
595
  queueName: string,
384
596
  type: 'waiting' | 'delayed' | 'failed',
@@ -403,6 +615,14 @@ export class QueueService {
403
615
  return results?.reduce((acc, [_, res]) => acc + ((res as number) || 0), 0) || 0
404
616
  }
405
617
 
618
+ /**
619
+ * Retries a specific set of jobs.
620
+ *
621
+ * @param queueName - Queue name.
622
+ * @param type - Job state.
623
+ * @param jobRaws - Array of raw job strings.
624
+ * @returns Number of jobs retried.
625
+ */
406
626
  async retryJobs(
407
627
  queueName: string,
408
628
  type: 'delayed' | 'failed',
@@ -434,6 +654,11 @@ export class QueueService {
434
654
  return count
435
655
  }
436
656
 
657
+ /**
658
+ * Publishes a log message to the stream and archives it.
659
+ *
660
+ * @param log - Log entry details.
661
+ */
437
662
  async publishLog(log: { level: string; message: string; workerId: string; queue?: string }) {
438
663
  const payload = {
439
664
  ...log,
@@ -462,11 +687,25 @@ export class QueueService {
462
687
  }
463
688
  }
464
689
 
690
+ /**
691
+ * Retrieves recent log history from Redis.
692
+ *
693
+ * @returns List of recent logs (max 100).
694
+ */
465
695
  async getLogHistory(): Promise<any[]> {
466
696
  const logs = await this.redis.lrange('flux_console:logs:history', 0, -1)
467
697
  return logs.map((l) => JSON.parse(l)).reverse()
468
698
  }
469
699
 
700
+ /**
701
+ * Searches for jobs across all queues and states.
702
+ *
703
+ * Scans Redis structures in real-time. Note: This can be expensive on large queues.
704
+ *
705
+ * @param query - Search term (ID, name, or data).
706
+ * @param options - Search options (limit, type).
707
+ * @returns List of matching jobs.
708
+ */
470
709
  async searchJobs(
471
710
  query: string,
472
711
  options: { limit?: number; type?: 'all' | 'waiting' | 'delayed' | 'failed' } = {}
@@ -520,6 +759,16 @@ export class QueueService {
520
759
  return results
521
760
  }
522
761
 
762
+ /**
763
+ * Retrieves archived jobs from persistent storage (MySQL).
764
+ *
765
+ * @param queue - Queue name.
766
+ * @param page - Page number.
767
+ * @param limit - Page size.
768
+ * @param status - Filter by status.
769
+ * @param filter - Additional filters (jobId, time range).
770
+ * @returns Paginated list of jobs.
771
+ */
523
772
  async getArchiveJobs(
524
773
  queue: string,
525
774
  page = 1,
@@ -544,6 +793,13 @@ export class QueueService {
544
793
  }
545
794
  }
546
795
 
796
+ /**
797
+ * Searches archived jobs in persistent storage.
798
+ *
799
+ * @param query - Search term.
800
+ * @param options - Pagination options.
801
+ * @returns Matching jobs.
802
+ */
547
803
  async searchArchive(
548
804
  query: string,
549
805
  options: { limit?: number; page?: number; queue?: string } = {}
@@ -563,6 +819,12 @@ export class QueueService {
563
819
  }
564
820
  }
565
821
 
822
+ /**
823
+ * Retrieves archived logs from persistent storage.
824
+ *
825
+ * @param options - Filters and pagination.
826
+ * @returns Paginated logs.
827
+ */
566
828
  async getArchivedLogs(
567
829
  options: {
568
830
  page?: number
@@ -591,6 +853,12 @@ export class QueueService {
591
853
  return { logs, total }
592
854
  }
593
855
 
856
+ /**
857
+ * Cleans up old archived data based on retention policy.
858
+ *
859
+ * @param days - Retention period in days.
860
+ * @returns Number of records deleted.
861
+ */
594
862
  async cleanupArchive(days: number): Promise<number> {
595
863
  const persistence = this.manager.getPersistence()
596
864
  if (!persistence) {
@@ -599,11 +867,21 @@ export class QueueService {
599
867
  return await persistence.cleanup(days)
600
868
  }
601
869
 
870
+ /**
871
+ * Lists all registered Cron schedules.
872
+ *
873
+ * @returns List of schedules.
874
+ */
602
875
  async listSchedules(): Promise<any[]> {
603
876
  const scheduler = this.manager.getScheduler()
604
877
  return await scheduler.list()
605
878
  }
606
879
 
880
+ /**
881
+ * Registers a new Cron schedule.
882
+ *
883
+ * @param config - Schedule configuration.
884
+ */
607
885
  async registerSchedule(config: {
608
886
  id: string
609
887
  cron: string
@@ -614,16 +892,31 @@ export class QueueService {
614
892
  await scheduler.register(config)
615
893
  }
616
894
 
895
+ /**
896
+ * Removes a Cron schedule.
897
+ *
898
+ * @param id - Schedule ID.
899
+ */
617
900
  async removeSchedule(id: string): Promise<void> {
618
901
  const scheduler = this.manager.getScheduler()
619
902
  await scheduler.remove(id)
620
903
  }
621
904
 
905
+ /**
906
+ * Manually triggers a scheduled job immediately.
907
+ *
908
+ * @param id - Schedule ID.
909
+ */
622
910
  async runScheduleNow(id: string): Promise<void> {
623
911
  const scheduler = this.manager.getScheduler()
624
912
  await scheduler.runNow(id)
625
913
  }
626
914
 
915
+ /**
916
+ * Processes schedule ticks.
917
+ *
918
+ * Should be called periodically to check for due schedules.
919
+ */
627
920
  async tickScheduler(): Promise<void> {
628
921
  const scheduler = this.manager.getScheduler()
629
922
  await scheduler.tick()
@@ -1,6 +1,8 @@
1
1
  /**
2
2
  * Metrics representing CPU usage for a specific node/process.
3
3
  *
4
+ * Used to visualize load distribution and identify CPU-bound workers.
5
+ *
4
6
  * @public
5
7
  * @since 3.0.0
6
8
  */
@@ -9,13 +11,15 @@ export interface PulseCpu {
9
11
  system: number
10
12
  /** Process-specific CPU usage percentage (0-100). */
11
13
  process: number
12
- /** Number of CPU cores available. */
14
+ /** Number of CPU cores available on the host. */
13
15
  cores: number
14
16
  }
15
17
 
16
18
  /**
17
19
  * Metrics representing memory usage for a specific node/process.
18
20
  *
21
+ * Critical for detecting memory leaks and capacity planning.
22
+ *
19
23
  * @public
20
24
  * @since 3.0.0
21
25
  */
@@ -43,6 +47,9 @@ export interface PulseMemory {
43
47
  /**
44
48
  * Runtime metadata for a monitored process.
45
49
  *
50
+ * Provides context about the environment (Node.js version, platform)
51
+ * and current health status.
52
+ *
46
53
  * @public
47
54
  * @since 3.0.0
48
55
  */
@@ -53,13 +60,16 @@ export interface PulseRuntime {
53
60
  framework: string
54
61
  /** Current process status (e.g., 'online', 'maintenance'). */
55
62
  status?: string
56
- /** Last few error messages from the process. */
63
+ /** Last few error messages captured from the process stderr/logs. */
57
64
  errors?: string[]
58
65
  }
59
66
 
60
67
  /**
61
68
  * Statistics snapshot for a specific queue.
62
69
  *
70
+ * Represents the state of a queue at a specific point in time, including
71
+ * job counts and throughput metrics. Used for dashboard graphs.
72
+ *
63
73
  * @public
64
74
  * @since 3.0.0
65
75
  */
@@ -75,7 +85,7 @@ export interface QueueSnapshot {
75
85
  failed: number
76
86
  delayed: number
77
87
  }
78
- /** Historical throughput data. */
88
+ /** Historical throughput data (jobs processed per minute). */
79
89
  throughput?: {
80
90
  in: number
81
91
  out: number
@@ -85,23 +95,26 @@ export interface QueueSnapshot {
85
95
  /**
86
96
  * Represents a single application instance (node) monitored by Zenith.
87
97
  *
98
+ * A PulseNode corresponds to a running process (e.g., a worker, API server)
99
+ * that emits heartbeats. These nodes form the cluster topology.
100
+ *
88
101
  * @public
89
102
  * @since 3.0.0
90
103
  */
91
104
  export interface PulseNode {
92
- /** Unique execution ID for the node. */
105
+ /** Unique execution ID for the node (usually UUID). */
93
106
  id: string
94
- /** Service group name. */
107
+ /** Service group name (e.g., "payment-worker", "api-gateway"). */
95
108
  service: string
96
109
  /** Programming language or runtime type. */
97
110
  language: 'node' | 'bun' | 'deno' | 'php' | 'go' | 'python' | 'other'
98
- /** Application version. */
111
+ /** Application version (from package.json). */
99
112
  version: string
100
- /** Process identifier. */
113
+ /** Process identifier (PID). */
101
114
  pid: number
102
115
  /** Hostname of the machine. */
103
116
  hostname: string
104
- /** Operating system platform. */
117
+ /** Operating system platform (darwin, linux, win32). */
105
118
  platform: string
106
119
  /** CPU metrics. */
107
120
  cpu: PulseCpu
@@ -113,13 +126,16 @@ export interface PulseNode {
113
126
  runtime: PulseRuntime
114
127
  /** Unstructured metadata (e.g., framework-specific details). */
115
128
  meta?: any
116
- /** Epoch timestamp of the last heartbeat. */
129
+ /** Epoch timestamp of the last heartbeat received. */
117
130
  timestamp: number
118
131
  }
119
132
 
120
133
  /**
121
134
  * Definition of an alert rule for monitoring health.
122
135
  *
136
+ * Alert rules define conditions that trigger notifications, such as
137
+ * high queue backlogs or worker failures.
138
+ *
123
139
  * @public
124
140
  * @since 3.0.0
125
141
  */
@@ -130,17 +146,19 @@ export interface AlertRule {
130
146
  name: string
131
147
  /** The metric type to monitor. */
132
148
  type: 'backlog' | 'failure' | 'worker_lost' | 'node_cpu' | 'node_ram'
133
- /** The value that triggers the alert. */
149
+ /** The value that triggers the alert (e.g., > 100 jobs). */
134
150
  threshold: number
135
- /** Optional queue name (if applicable). */
151
+ /** Optional queue name to scope the rule to. */
136
152
  queue?: string
137
- /** Minutes to wait before re-triggering the alert. */
153
+ /** Minutes to wait before re-triggering the alert (debounce). */
138
154
  cooldownMinutes: number
139
155
  }
140
156
 
141
157
  /**
142
158
  * Configuration for alert notification channels.
143
159
  *
160
+ * Defines where alerts should be sent when triggered.
161
+ *
144
162
  * @public
145
163
  * @since 3.0.0
146
164
  */
@@ -170,13 +188,15 @@ export interface AlertConfig {
170
188
  /**
171
189
  * Configuration for automated system maintenance.
172
190
  *
191
+ * Controls data retention policies and auto-cleanup tasks.
192
+ *
173
193
  * @public
174
194
  * @since 3.0.0
175
195
  */
176
196
  export interface MaintenanceConfig {
177
197
  /** Whether to automatically delete old data. */
178
198
  autoCleanup: boolean
179
- /** Number of days to retain records. */
199
+ /** Number of days to retain records (logs, metrics). */
180
200
  retentionDays: number
181
201
  /** Timestamp of the last maintenance run. */
182
202
  lastRun?: number
@@ -184,6 +204,11 @@ export interface MaintenanceConfig {
184
204
 
185
205
  /**
186
206
  * Represents a historical alert event.
207
+ *
208
+ * Stored in the database/log to track system health history.
209
+ *
210
+ * @public
211
+ * @since 3.0.0
187
212
  */
188
213
  export interface AlertEvent {
189
214
  id?: string