@gravito/zenith 0.1.0-beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ARCHITECTURE.md +88 -0
- package/BATCH_OPERATIONS_IMPLEMENTATION.md +159 -0
- package/DEMO.md +156 -0
- package/DEPLOYMENT.md +157 -0
- package/DOCS_INTERNAL.md +73 -0
- package/Dockerfile +46 -0
- package/Dockerfile.demo-worker +29 -0
- package/EVOLUTION_BLUEPRINT.md +112 -0
- package/JOBINSPECTOR_SCROLL_FIX.md +152 -0
- package/PULSE_IMPLEMENTATION_PLAN.md +111 -0
- package/QUICK_TEST_GUIDE.md +72 -0
- package/README.md +33 -0
- package/ROADMAP.md +85 -0
- package/TESTING_BATCH_OPERATIONS.md +252 -0
- package/bin/flux-console.ts +2 -0
- package/dist/bin.js +108196 -0
- package/dist/client/assets/index-DGYEwTDL.css +1 -0
- package/dist/client/assets/index-oyTdySX0.js +421 -0
- package/dist/client/index.html +13 -0
- package/dist/server/index.js +108191 -0
- package/docker-compose.yml +40 -0
- package/docs/integrations/LARAVEL.md +207 -0
- package/package.json +50 -0
- package/postcss.config.js +6 -0
- package/scripts/flood-logs.ts +21 -0
- package/scripts/seed.ts +213 -0
- package/scripts/verify-throttle.ts +45 -0
- package/scripts/worker.ts +123 -0
- package/src/bin.ts +6 -0
- package/src/client/App.tsx +70 -0
- package/src/client/Layout.tsx +644 -0
- package/src/client/Sidebar.tsx +102 -0
- package/src/client/ThroughputChart.tsx +135 -0
- package/src/client/WorkerStatus.tsx +170 -0
- package/src/client/components/ConfirmDialog.tsx +103 -0
- package/src/client/components/JobInspector.tsx +524 -0
- package/src/client/components/LogArchiveModal.tsx +383 -0
- package/src/client/components/NotificationBell.tsx +203 -0
- package/src/client/components/Toaster.tsx +80 -0
- package/src/client/components/UserProfileDropdown.tsx +177 -0
- package/src/client/contexts/AuthContext.tsx +93 -0
- package/src/client/contexts/NotificationContext.tsx +103 -0
- package/src/client/index.css +174 -0
- package/src/client/index.html +12 -0
- package/src/client/main.tsx +15 -0
- package/src/client/pages/LoginPage.tsx +153 -0
- package/src/client/pages/MetricsPage.tsx +408 -0
- package/src/client/pages/OverviewPage.tsx +511 -0
- package/src/client/pages/QueuesPage.tsx +372 -0
- package/src/client/pages/SchedulesPage.tsx +531 -0
- package/src/client/pages/SettingsPage.tsx +449 -0
- package/src/client/pages/WorkersPage.tsx +316 -0
- package/src/client/pages/index.ts +7 -0
- package/src/client/utils.ts +6 -0
- package/src/server/index.ts +556 -0
- package/src/server/middleware/auth.ts +127 -0
- package/src/server/services/AlertService.ts +160 -0
- package/src/server/services/QueueService.ts +828 -0
- package/tailwind.config.js +73 -0
- package/tests/placeholder.test.ts +7 -0
- package/tsconfig.json +38 -0
- package/tsconfig.node.json +12 -0
- package/vite.config.ts +27 -0
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
import { EventEmitter } from 'events'
|
|
2
|
+
import type { WorkerReport } from './QueueService'
|
|
3
|
+
|
|
4
|
+
export interface AlertRule {
|
|
5
|
+
id: string
|
|
6
|
+
name: string
|
|
7
|
+
type: 'backlog' | 'failure' | 'worker_lost'
|
|
8
|
+
threshold: number
|
|
9
|
+
queue?: string // Optional: specific queue or all
|
|
10
|
+
cooldownMinutes: number
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export interface AlertEvent {
|
|
14
|
+
ruleId: string
|
|
15
|
+
timestamp: number
|
|
16
|
+
message: string
|
|
17
|
+
severity: 'warning' | 'critical'
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export class AlertService {
|
|
21
|
+
private rules: AlertRule[] = []
|
|
22
|
+
private cooldowns: Map<string, number> = new Map()
|
|
23
|
+
private webhookUrl: string | null = process.env.SLACK_WEBHOOK_URL || null
|
|
24
|
+
private emitter = new EventEmitter()
|
|
25
|
+
|
|
26
|
+
constructor() {
|
|
27
|
+
// Default Rules
|
|
28
|
+
this.rules = [
|
|
29
|
+
{
|
|
30
|
+
id: 'global_failure_spike',
|
|
31
|
+
name: 'High Failure Rate',
|
|
32
|
+
type: 'failure',
|
|
33
|
+
threshold: 50, // More than 50 failed jobs
|
|
34
|
+
cooldownMinutes: 30,
|
|
35
|
+
},
|
|
36
|
+
{
|
|
37
|
+
id: 'global_backlog_critical',
|
|
38
|
+
name: 'Queue Backlog Warning',
|
|
39
|
+
type: 'backlog',
|
|
40
|
+
threshold: 1000, // More than 1000 waiting jobs
|
|
41
|
+
cooldownMinutes: 60,
|
|
42
|
+
},
|
|
43
|
+
{
|
|
44
|
+
id: 'no_workers_online',
|
|
45
|
+
name: 'All Workers Offline',
|
|
46
|
+
type: 'worker_lost',
|
|
47
|
+
threshold: 1, // < 1 worker
|
|
48
|
+
cooldownMinutes: 15,
|
|
49
|
+
},
|
|
50
|
+
]
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
setWebhook(url: string | null) {
|
|
54
|
+
this.webhookUrl = url
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
onAlert(callback: (event: AlertEvent) => void) {
|
|
58
|
+
this.emitter.on('alert', callback)
|
|
59
|
+
return () => this.emitter.off('alert', callback)
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* Evaluates rules against provided data.
|
|
64
|
+
* Extremely lightweight: only uses existing metrics data.
|
|
65
|
+
*/
|
|
66
|
+
async check(data: {
|
|
67
|
+
queues: any[]
|
|
68
|
+
workers: WorkerReport[]
|
|
69
|
+
totals: { waiting: number; delayed: number; failed: number }
|
|
70
|
+
}) {
|
|
71
|
+
const now = Date.now()
|
|
72
|
+
|
|
73
|
+
for (const rule of this.rules) {
|
|
74
|
+
// 1. Check Cool-down
|
|
75
|
+
const lastFire = this.cooldowns.get(rule.id) || 0
|
|
76
|
+
if (now - lastFire < rule.cooldownMinutes * 60 * 1000) {
|
|
77
|
+
continue
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
let fired = false
|
|
81
|
+
let message = ''
|
|
82
|
+
let severity: 'warning' | 'critical' = 'warning'
|
|
83
|
+
|
|
84
|
+
// 2. Evaluate Rule
|
|
85
|
+
switch (rule.type) {
|
|
86
|
+
case 'backlog':
|
|
87
|
+
if (data.totals.waiting >= rule.threshold) {
|
|
88
|
+
fired = true
|
|
89
|
+
severity = 'critical'
|
|
90
|
+
message = `Queue backlog detected: ${data.totals.waiting} jobs waiting across all queues.`
|
|
91
|
+
}
|
|
92
|
+
break
|
|
93
|
+
|
|
94
|
+
case 'failure':
|
|
95
|
+
if (data.totals.failed >= rule.threshold) {
|
|
96
|
+
fired = true
|
|
97
|
+
severity = 'warning'
|
|
98
|
+
message = `High failure count: ${data.totals.failed} jobs are currently in failed state.`
|
|
99
|
+
}
|
|
100
|
+
break
|
|
101
|
+
|
|
102
|
+
case 'worker_lost':
|
|
103
|
+
if (data.workers.length < rule.threshold) {
|
|
104
|
+
fired = true
|
|
105
|
+
severity = 'critical'
|
|
106
|
+
message = `System Incident: Zero worker nodes detected! Jobs will not be processed.`
|
|
107
|
+
}
|
|
108
|
+
break
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
// 3. Dispatch if fired
|
|
112
|
+
if (fired) {
|
|
113
|
+
this.cooldowns.set(rule.id, now)
|
|
114
|
+
const event: AlertEvent = {
|
|
115
|
+
ruleId: rule.id,
|
|
116
|
+
timestamp: now,
|
|
117
|
+
message,
|
|
118
|
+
severity,
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
this.emitter.emit('alert', event)
|
|
122
|
+
this.notify(event)
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
/**
|
|
128
|
+
* Send notification to external channels.
|
|
129
|
+
* Fire-and-forget to ensure zero impact on main loop latency.
|
|
130
|
+
*/
|
|
131
|
+
private notify(event: AlertEvent) {
|
|
132
|
+
if (!this.webhookUrl) return
|
|
133
|
+
|
|
134
|
+
// Simple Slack formatting
|
|
135
|
+
const payload = {
|
|
136
|
+
text: `*Flux Console Alert [${event.severity.toUpperCase()}]*\n${event.message}\n_Time: ${new Date(event.timestamp).toISOString()}_`,
|
|
137
|
+
attachments: [
|
|
138
|
+
{
|
|
139
|
+
color: event.severity === 'critical' ? '#ef4444' : '#f59e0b',
|
|
140
|
+
fields: [
|
|
141
|
+
{ title: 'Rule', value: event.ruleId, short: true },
|
|
142
|
+
{ title: 'Severity', value: event.severity, short: true },
|
|
143
|
+
],
|
|
144
|
+
},
|
|
145
|
+
],
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
fetch(this.webhookUrl, {
|
|
149
|
+
method: 'POST',
|
|
150
|
+
headers: { 'Content-Type': 'application/json' },
|
|
151
|
+
body: JSON.stringify(payload),
|
|
152
|
+
}).catch((err) => {
|
|
153
|
+
console.error('[AlertService] Failed to send notification:', err.message)
|
|
154
|
+
})
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
getRules() {
|
|
158
|
+
return this.rules
|
|
159
|
+
}
|
|
160
|
+
}
|