@gravito/zenith 0.1.0-beta.1 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/CHANGELOG.md +9 -0
  2. package/dist/bin.js +38846 -27303
  3. package/dist/client/assets/index-C332gZ-J.css +1 -0
  4. package/dist/client/assets/index-D4HibwTK.js +436 -0
  5. package/dist/client/index.html +2 -2
  6. package/dist/server/index.js +38846 -27303
  7. package/docs/ALERTING_GUIDE.md +71 -0
  8. package/docs/LARAVEL_ZENITH_ROADMAP.md +109 -0
  9. package/docs/QUASAR_MASTER_PLAN.md +140 -0
  10. package/package.json +52 -48
  11. package/scripts/debug_redis_keys.ts +24 -0
  12. package/specs/PULSE_SPEC.md +86 -0
  13. package/src/client/App.tsx +2 -0
  14. package/src/client/Layout.tsx +18 -0
  15. package/src/client/Sidebar.tsx +2 -1
  16. package/src/client/WorkerStatus.tsx +121 -76
  17. package/src/client/components/BrandIcons.tsx +138 -0
  18. package/src/client/components/ConfirmDialog.tsx +0 -1
  19. package/src/client/components/JobInspector.tsx +18 -6
  20. package/src/client/components/PageHeader.tsx +38 -0
  21. package/src/client/pages/OverviewPage.tsx +17 -20
  22. package/src/client/pages/PulsePage.tsx +478 -0
  23. package/src/client/pages/QueuesPage.tsx +1 -3
  24. package/src/client/pages/SettingsPage.tsx +640 -78
  25. package/src/client/pages/WorkersPage.tsx +71 -3
  26. package/src/client/pages/index.ts +1 -0
  27. package/src/server/index.ts +311 -11
  28. package/src/server/services/AlertService.ts +189 -41
  29. package/src/server/services/CommandService.ts +137 -0
  30. package/src/server/services/PulseService.ts +80 -0
  31. package/src/server/services/QueueService.ts +63 -6
  32. package/src/shared/types.ts +99 -0
  33. package/tsconfig.json +2 -2
  34. package/ARCHITECTURE.md +0 -88
  35. package/BATCH_OPERATIONS_IMPLEMENTATION.md +0 -159
  36. package/EVOLUTION_BLUEPRINT.md +0 -112
  37. package/JOBINSPECTOR_SCROLL_FIX.md +0 -152
  38. package/PULSE_IMPLEMENTATION_PLAN.md +0 -111
  39. package/TESTING_BATCH_OPERATIONS.md +0 -252
  40. package/dist/client/assets/index-DGYEwTDL.css +0 -1
  41. package/dist/client/assets/index-oyTdySX0.js +0 -421
  42. /package/{DEPLOYMENT.md → docs/DEPLOYMENT.md} +0 -0
  43. /package/{DOCS_INTERNAL.md → docs/DOCS_INTERNAL.md} +0 -0
  44. /package/{QUICK_TEST_GUIDE.md → docs/QUICK_TEST_GUIDE.md} +0 -0
  45. /package/{ROADMAP.md → docs/ROADMAP.md} +0 -0
@@ -1,7 +1,7 @@
1
1
  import { useQuery, useQueryClient } from '@tanstack/react-query'
2
2
  import { motion } from 'framer-motion'
3
3
  import { AlertCircle, Clock, Cpu, Gauge, MemoryStick, RefreshCcw, Server, Zap } from 'lucide-react'
4
- import React, { useEffect } from 'react'
4
+ import { useEffect } from 'react'
5
5
  import { cn } from '../utils'
6
6
 
7
7
  interface Worker {
@@ -18,7 +18,21 @@ interface Worker {
18
18
  total?: number
19
19
  }
20
20
  }
21
- queues?: string[]
21
+ queues?: {
22
+ name: string
23
+ size: {
24
+ waiting: number
25
+ active: number
26
+ failed: number
27
+ delayed: number
28
+ }
29
+ }[]
30
+ meta?: {
31
+ laravel?: {
32
+ workerCount: number
33
+ roots: string[]
34
+ }
35
+ }
22
36
  }
23
37
 
24
38
  export function WorkersPage() {
@@ -288,8 +302,62 @@ export function WorkersPage() {
288
302
  </div>
289
303
  )}
290
304
 
305
+ {/* Laravel & Queue Info (New) */}
306
+ <div className="mt-6 space-y-3">
307
+ {/* Monitored Queues */}
308
+ {worker.queues && worker.queues.length > 0 && (
309
+ <div className="bg-muted/10 p-3 rounded-xl border border-border/50">
310
+ <div className="flex items-center gap-2 mb-2">
311
+ <div className="w-1.5 h-1.5 bg-orange-500 rounded-full" />
312
+ <span className="text-[9px] font-black uppercase tracking-widest text-muted-foreground">
313
+ Monitored Queues
314
+ </span>
315
+ </div>
316
+ <div className="flex flex-wrap gap-2">
317
+ {worker.queues.map((q, i) => (
318
+ <div
319
+ key={i}
320
+ className="flex items-center gap-1.5 text-xs font-bold text-foreground/80 bg-background/80 px-2 py-1 rounded-md shadow-sm border border-border/50"
321
+ >
322
+ <span className="opacity-70">{q.name}</span>
323
+ {(q.size.waiting > 0 || q.size.failed > 0) && (
324
+ <span
325
+ className={cn(
326
+ 'px-1 rounded bg-muted text-[9px]',
327
+ q.size.failed > 0
328
+ ? 'text-red-500 bg-red-500/10'
329
+ : 'text-amber-500 bg-amber-500/10'
330
+ )}
331
+ >
332
+ {q.size.failed > 0
333
+ ? `${q.size.failed} failed`
334
+ : `${q.size.waiting} wait`}
335
+ </span>
336
+ )}
337
+ </div>
338
+ ))}
339
+ </div>
340
+ </div>
341
+ )}
342
+
343
+ {/* Laravel Workers Info */}
344
+ {worker.meta?.laravel && (
345
+ <div className="flex items-center justify-between p-3 bg-red-500/5 border border-red-500/10 rounded-xl">
346
+ <div className="flex items-center gap-2">
347
+ <span className="w-1.5 h-1.5 bg-red-500 rounded-full animate-pulse" />
348
+ <span className="text-[10px] font-black uppercase tracking-widest text-red-500/80">
349
+ Laravel Workers
350
+ </span>
351
+ </div>
352
+ <span className="font-mono text-sm font-black text-red-500">
353
+ {worker.meta.laravel.workerCount || 0}
354
+ </span>
355
+ </div>
356
+ )}
357
+ </div>
358
+
291
359
  {/* Uptime */}
292
- <div className="mt-6 pt-4 border-t border-border/30 flex items-center justify-between">
360
+ <div className="mt-4 pt-4 border-t border-border/30 flex items-center justify-between">
293
361
  <div className="flex items-center gap-2 text-muted-foreground">
294
362
  <Clock size={14} />
295
363
  <span className="text-[10px] font-bold uppercase tracking-widest">Uptime</span>
@@ -1,6 +1,7 @@
1
1
  export { LoginPage } from './LoginPage'
2
2
  export { MetricsPage } from './MetricsPage'
3
3
  export { OverviewPage } from './OverviewPage'
4
+ export { PulsePage } from './PulsePage'
4
5
  export { QueuesPage } from './QueuesPage'
5
6
  export { SchedulesPage } from './SchedulesPage'
6
7
  export { SettingsPage } from './SettingsPage'
@@ -1,9 +1,14 @@
1
1
  import { DB } from '@gravito/atlas'
2
2
  import { Photon } from '@gravito/photon'
3
+ import { QuasarAgent } from '@gravito/quasar'
3
4
  import { MySQLPersistence, SQLitePersistence } from '@gravito/stream'
5
+ import fs from 'fs'
4
6
  import { serveStatic } from 'hono/bun'
5
7
  import { getCookie } from 'hono/cookie'
6
8
  import { streamSSE } from 'hono/streaming'
9
+ import os from 'os'
10
+ import path from 'path'
11
+ import { fileURLToPath } from 'url'
7
12
  import {
8
13
  authMiddleware,
9
14
  createSession,
@@ -11,6 +16,8 @@ import {
11
16
  isAuthEnabled,
12
17
  verifyPassword,
13
18
  } from './middleware/auth'
19
+ import { CommandService } from './services/CommandService'
20
+ import { PulseService } from './services/PulseService'
14
21
  import { QueueService } from './services/QueueService'
15
22
 
16
23
  const app = new Photon()
@@ -58,15 +65,105 @@ if (dbDriver === 'sqlite' || process.env.DB_HOST) {
58
65
 
59
66
  // Service Initialization
60
67
  const queueService = new QueueService(REDIS_URL, QUEUE_PREFIX, persistence)
68
+ const pulseService = new PulseService(REDIS_URL)
69
+ const commandService = new CommandService(REDIS_URL)
61
70
 
62
71
  queueService
63
72
  .connect()
73
+ .then(() => pulseService.connect())
74
+ .then(() => commandService.connect())
64
75
  .then(() => {
76
+ // Start Self-Monitoring (Quasar)
77
+ const agent = new QuasarAgent({
78
+ service: 'flux-console',
79
+ redisUrl: REDIS_URL,
80
+ })
81
+ agent.start().catch((err) => console.error('[FluxConsole] Quasar Agent Error:', err))
82
+
65
83
  console.log(`[FluxConsole] Connected to Redis at ${REDIS_URL}`)
66
84
  // Start background metrics recording (Reduced from 5s to 2s for better real-time feel)
67
- setInterval(() => {
68
- queueService.recordStatusMetrics().catch(console.error)
69
- }, 2000)
85
+ const updateMetrics = async () => {
86
+ try {
87
+ const [pulseNodes, legacyWorkers] = await Promise.all([
88
+ pulseService.getNodes(),
89
+ queueService.listWorkers(),
90
+ ])
91
+
92
+ const pulseWorkers = Object.values(pulseNodes)
93
+ .flat()
94
+ .flatMap((node) => {
95
+ const mainNode = {
96
+ id: node.id,
97
+ service: node.service,
98
+ status: node.runtime.status || 'online',
99
+ pid: node.pid,
100
+ uptime: node.runtime.uptime,
101
+ metrics: {
102
+ cpu: node.cpu.process,
103
+ cores: node.cpu.cores,
104
+ ram: {
105
+ rss: node.memory.process.rss,
106
+ heapUsed: node.memory.process.heapUsed,
107
+ total: node.memory.system.total,
108
+ },
109
+ },
110
+ queues: node.queues,
111
+ meta: node.meta,
112
+ }
113
+
114
+ const subWorkers: any[] = []
115
+ if (node.meta?.laravel?.workers && Array.isArray(node.meta.laravel.workers)) {
116
+ node.meta.laravel.workers.forEach((w: any) => {
117
+ subWorkers.push({
118
+ id: `${node.id}-php-${w.pid}`,
119
+ service: `${node.service} / LARAVEL`,
120
+ status: w.status === 'running' || w.status === 'sleep' ? 'online' : 'idle',
121
+ pid: w.pid,
122
+ uptime: node.runtime.uptime,
123
+ metrics: {
124
+ cpu: w.cpu,
125
+ cores: 1,
126
+ ram: {
127
+ rss: w.memory,
128
+ heapUsed: w.memory,
129
+ total: node.memory.system.total,
130
+ },
131
+ },
132
+ meta: { isVirtual: true, cmdline: w.cmdline },
133
+ })
134
+ })
135
+ }
136
+ return [mainNode, ...subWorkers]
137
+ })
138
+
139
+ const formattedLegacy = legacyWorkers.map((w) => ({
140
+ id: w.id,
141
+ status: 'online',
142
+ pid: w.pid,
143
+ uptime: w.uptime,
144
+ metrics: {
145
+ cpu: (w.loadAvg[0] || 0) * 100,
146
+ cores: 0,
147
+ ram: {
148
+ rss: parseInt(w.memory.rss || '0', 10),
149
+ heapUsed: parseInt(w.memory.heapUsed || '0', 10),
150
+ total: 0,
151
+ },
152
+ },
153
+ queues: w.queues.map((q) => ({
154
+ name: q,
155
+ size: { waiting: 0, active: 0, failed: 0, delayed: 0 },
156
+ })),
157
+ meta: {},
158
+ }))
159
+
160
+ await queueService.recordStatusMetrics(pulseNodes, [...pulseWorkers, ...formattedLegacy])
161
+ } catch (err) {
162
+ console.error('[FluxConsole] Metrics Update Error:', err)
163
+ }
164
+ }
165
+
166
+ setInterval(updateMetrics, 2000)
70
167
 
71
168
  // Start Scheduler Tick (Reduced from 10s to 5s)
72
169
  setInterval(() => {
@@ -74,7 +171,7 @@ queueService
74
171
  }, 5000)
75
172
 
76
173
  // Record initial snapshot
77
- queueService.recordStatusMetrics().catch(console.error)
174
+ updateMetrics()
78
175
  })
79
176
  .catch((err) => {
80
177
  console.error('[FluxConsole] Failed to connect to Redis', err)
@@ -302,9 +399,91 @@ api.get('/throughput', async (c) => {
302
399
 
303
400
  api.get('/workers', async (c) => {
304
401
  try {
305
- const workers = await queueService.listWorkers()
306
- return c.json({ workers })
402
+ const [legacyWorkers, pulseNodes] = await Promise.all([
403
+ queueService.listWorkers(),
404
+ pulseService.getNodes(),
405
+ ])
406
+
407
+ // Transform PulseNodes to match the frontend Worker interface
408
+ const pulseWorkers = Object.values(pulseNodes)
409
+ .flat()
410
+ .flatMap((node) => {
411
+ // 1. The Main Agent Node
412
+ const mainNode = {
413
+ id: node.id,
414
+ service: node.service,
415
+ status: node.runtime.status || 'online',
416
+ pid: node.pid,
417
+ uptime: node.runtime.uptime,
418
+ metrics: {
419
+ cpu: node.cpu.process,
420
+ cores: node.cpu.cores,
421
+ ram: {
422
+ rss: node.memory.process.rss,
423
+ heapUsed: node.memory.process.heapUsed,
424
+ total: node.memory.system.total,
425
+ },
426
+ },
427
+ queues: node.queues,
428
+ meta: node.meta,
429
+ }
430
+
431
+ // 2. Virtual Child Workers (e.g. Laravel)
432
+ const subWorkers: any[] = []
433
+ if (node.meta?.laravel?.workers && Array.isArray(node.meta.laravel.workers)) {
434
+ node.meta.laravel.workers.forEach((w: any) => {
435
+ subWorkers.push({
436
+ id: `${node.id}-php-${w.pid}`,
437
+ service: `${node.service} / LARAVEL`, // Distinct service name
438
+ status: w.status === 'running' || w.status === 'sleep' ? 'online' : 'idle',
439
+ pid: w.pid,
440
+ uptime: node.runtime.uptime, // Inherit uptime for now, or 0
441
+ metrics: {
442
+ cpu: w.cpu, // Per-process CPU
443
+ cores: 1, // Single threaded PHP
444
+ ram: {
445
+ rss: w.memory,
446
+ heapUsed: w.memory,
447
+ total: node.memory.system.total,
448
+ },
449
+ },
450
+ meta: {
451
+ // Tag it so UI can maybe style it differently?
452
+ isVirtual: true,
453
+ cmdline: w.cmdline,
454
+ },
455
+ })
456
+ })
457
+ }
458
+
459
+ return [mainNode, ...subWorkers]
460
+ })
461
+
462
+ // Transform Legacy Workers to match interface (best effort)
463
+ const formattedLegacy = legacyWorkers.map((w) => ({
464
+ id: w.id,
465
+ status: 'online',
466
+ pid: w.pid,
467
+ uptime: w.uptime,
468
+ metrics: {
469
+ cpu: (w.loadAvg[0] || 0) * 100, // Rough estimate
470
+ cores: 0,
471
+ ram: {
472
+ rss: parseInt(w.memory.rss || '0', 10),
473
+ heapUsed: parseInt(w.memory.heapUsed || '0', 10),
474
+ total: 0,
475
+ },
476
+ },
477
+ queues: w.queues.map((q) => ({
478
+ name: q,
479
+ size: { waiting: 0, active: 0, failed: 0, delayed: 0 },
480
+ })),
481
+ meta: {},
482
+ }))
483
+
484
+ return c.json({ workers: [...pulseWorkers, ...formattedLegacy] })
307
485
  } catch (_err) {
486
+ console.error(_err)
308
487
  return c.json({ error: 'Failed to fetch workers' }, 500)
309
488
  }
310
489
  })
@@ -328,19 +507,83 @@ api.get('/metrics/history', async (c) => {
328
507
 
329
508
  api.get('/system/status', (c) => {
330
509
  const mem = process.memoryUsage()
510
+ const totalMem = os.totalmem()
511
+
512
+ // Find package.json (relative to this file in src/server/index.ts)
513
+ const __dirname = path.dirname(fileURLToPath(import.meta.url))
514
+ const pkgPath = path.resolve(__dirname, '../../package.json')
515
+ let pkg = { version: '0.1.0-unknown', name: '@gravito/zenith' }
516
+ try {
517
+ pkg = JSON.parse(fs.readFileSync(pkgPath, 'utf8'))
518
+ } catch (_e) {
519
+ // fallback
520
+ }
521
+
331
522
  return c.json({
332
523
  node: process.version,
333
524
  memory: {
334
525
  rss: `${(mem.rss / 1024 / 1024).toFixed(2)} MB`,
335
526
  heapUsed: `${(mem.heapUsed / 1024 / 1024).toFixed(2)} MB`,
336
- total: '4.00 GB', // Hardcoded limit for demo aesthetic
527
+ total: `${(totalMem / 1024 / 1024 / 1024).toFixed(2)} GB`,
337
528
  },
338
- engine: 'v0.1.0-beta.1',
529
+ version: pkg.version,
530
+ package: pkg.name,
531
+ engine: `Zenith ${pkg.version}`,
339
532
  uptime: process.uptime(),
340
- env: process.env.NODE_ENV || 'production-east-1',
533
+ env:
534
+ process.env.NODE_ENV === 'production'
535
+ ? `production (${os.hostname()})`
536
+ : `development (${os.hostname()})`,
537
+ redisUrl: process.env.REDIS_URL || 'redis://localhost:6379',
341
538
  })
342
539
  })
343
540
 
541
+ // --- Pulse Monitoring ---
542
+ api.get('/pulse/nodes', async (c) => {
543
+ try {
544
+ const nodes = await pulseService.getNodes()
545
+ return c.json({ nodes })
546
+ } catch (_err) {
547
+ return c.json({ error: 'Failed to fetch pulse nodes' }, 500)
548
+ }
549
+ })
550
+
551
+ // --- Pulse Remote Control (Phase 3) ---
552
+ api.post('/pulse/command', async (c) => {
553
+ try {
554
+ const { service, nodeId, type, queue, jobKey, driver, action } = await c.req.json()
555
+
556
+ // Validate required fields
557
+ if (!service || !nodeId || !type || !queue || !jobKey) {
558
+ return c.json({ error: 'Missing required fields: service, nodeId, type, queue, jobKey' }, 400)
559
+ }
560
+
561
+ // Validate command type
562
+ if (type !== 'RETRY_JOB' && type !== 'DELETE_JOB' && type !== 'LARAVEL_ACTION') {
563
+ return c.json(
564
+ { error: 'Invalid command type. Allowed: RETRY_JOB, DELETE_JOB, LARAVEL_ACTION' },
565
+ 400
566
+ )
567
+ }
568
+
569
+ const commandId = await commandService.sendCommand(service, nodeId, type, {
570
+ queue,
571
+ jobKey,
572
+ driver: driver || 'redis',
573
+ action,
574
+ })
575
+
576
+ return c.json({
577
+ success: true,
578
+ commandId,
579
+ message: `Command ${type} sent to ${nodeId}. Observe job state for result.`,
580
+ })
581
+ } catch (err) {
582
+ console.error('[CommandService] Error:', err)
583
+ return c.json({ error: 'Failed to send command' }, 500)
584
+ }
585
+ })
586
+
344
587
  api.post('/queues/:name/jobs/delete', async (c) => {
345
588
  const queueName = c.req.param('name')
346
589
  const { type, raw } = await c.req.json()
@@ -454,9 +697,23 @@ api.get('/logs/stream', async (c) => {
454
697
  })
455
698
  })
456
699
 
700
+ // 4. Poll Pulse Nodes per client (simple polling for now)
701
+ const pulseInterval = setInterval(async () => {
702
+ try {
703
+ const nodes = await pulseService.getNodes()
704
+ await stream.writeSSE({
705
+ data: JSON.stringify({ nodes }),
706
+ event: 'pulse',
707
+ })
708
+ } catch (err) {
709
+ // ignore errors
710
+ }
711
+ }, 2000)
712
+
457
713
  stream.onAbort(() => {
458
714
  unsubscribeLogs()
459
715
  unsubscribeStats()
716
+ clearInterval(pulseInterval)
460
717
  })
461
718
 
462
719
  // Keep alive
@@ -508,17 +765,60 @@ api.delete('/schedules/:id', async (c) => {
508
765
  })
509
766
 
510
767
  // --- Alerting ---
511
- api.get('/alerts/config', (c) => {
768
+ api.get('/alerts/config', async (c) => {
512
769
  return c.json({
513
770
  rules: queueService.alerts.getRules(),
514
- webhookEnabled: !!process.env.SLACK_WEBHOOK_URL,
771
+ config: queueService.alerts.getConfig(),
772
+ maintenance: await queueService.getMaintenanceConfig(),
515
773
  })
516
774
  })
517
775
 
776
+ api.post('/maintenance/config', async (c) => {
777
+ const config = await c.req.json()
778
+ try {
779
+ await queueService.saveMaintenanceConfig(config)
780
+ return c.json({ success: true })
781
+ } catch (err) {
782
+ return c.json({ error: 'Failed to save maintenance config' }, 500)
783
+ }
784
+ })
785
+
786
+ api.post('/alerts/config', async (c) => {
787
+ const config = await c.req.json()
788
+ try {
789
+ await queueService.alerts.saveConfig(config)
790
+ return c.json({ success: true })
791
+ } catch (err) {
792
+ return c.json({ error: 'Failed to save alert config' }, 500)
793
+ }
794
+ })
795
+
796
+ api.post('/alerts/rules', async (c) => {
797
+ const rule = await c.req.json()
798
+ try {
799
+ await queueService.alerts.addRule(rule)
800
+ return c.json({ success: true })
801
+ } catch (err) {
802
+ return c.json({ error: 'Failed to add rule' }, 500)
803
+ }
804
+ })
805
+
806
+ api.delete('/alerts/rules/:id', async (c) => {
807
+ const id = c.req.param('id')
808
+ try {
809
+ await queueService.alerts.deleteRule(id)
810
+ return c.json({ success: true })
811
+ } catch (err) {
812
+ return c.json({ error: 'Failed to delete rule' }, 500)
813
+ }
814
+ })
815
+
518
816
  api.post('/alerts/test', async (c) => {
519
817
  try {
818
+ const nodes = await pulseService.getNodes()
520
819
  queueService.alerts.check({
521
820
  queues: [],
821
+ nodes,
522
822
  workers: [
523
823
  {
524
824
  id: 'test-node',