@gravito/zenith 0.1.0-beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ARCHITECTURE.md +88 -0
- package/BATCH_OPERATIONS_IMPLEMENTATION.md +159 -0
- package/DEMO.md +156 -0
- package/DEPLOYMENT.md +157 -0
- package/DOCS_INTERNAL.md +73 -0
- package/Dockerfile +46 -0
- package/Dockerfile.demo-worker +29 -0
- package/EVOLUTION_BLUEPRINT.md +112 -0
- package/JOBINSPECTOR_SCROLL_FIX.md +152 -0
- package/PULSE_IMPLEMENTATION_PLAN.md +111 -0
- package/QUICK_TEST_GUIDE.md +72 -0
- package/README.md +33 -0
- package/ROADMAP.md +85 -0
- package/TESTING_BATCH_OPERATIONS.md +252 -0
- package/bin/flux-console.ts +2 -0
- package/dist/bin.js +108196 -0
- package/dist/client/assets/index-DGYEwTDL.css +1 -0
- package/dist/client/assets/index-oyTdySX0.js +421 -0
- package/dist/client/index.html +13 -0
- package/dist/server/index.js +108191 -0
- package/docker-compose.yml +40 -0
- package/docs/integrations/LARAVEL.md +207 -0
- package/package.json +50 -0
- package/postcss.config.js +6 -0
- package/scripts/flood-logs.ts +21 -0
- package/scripts/seed.ts +213 -0
- package/scripts/verify-throttle.ts +45 -0
- package/scripts/worker.ts +123 -0
- package/src/bin.ts +6 -0
- package/src/client/App.tsx +70 -0
- package/src/client/Layout.tsx +644 -0
- package/src/client/Sidebar.tsx +102 -0
- package/src/client/ThroughputChart.tsx +135 -0
- package/src/client/WorkerStatus.tsx +170 -0
- package/src/client/components/ConfirmDialog.tsx +103 -0
- package/src/client/components/JobInspector.tsx +524 -0
- package/src/client/components/LogArchiveModal.tsx +383 -0
- package/src/client/components/NotificationBell.tsx +203 -0
- package/src/client/components/Toaster.tsx +80 -0
- package/src/client/components/UserProfileDropdown.tsx +177 -0
- package/src/client/contexts/AuthContext.tsx +93 -0
- package/src/client/contexts/NotificationContext.tsx +103 -0
- package/src/client/index.css +174 -0
- package/src/client/index.html +12 -0
- package/src/client/main.tsx +15 -0
- package/src/client/pages/LoginPage.tsx +153 -0
- package/src/client/pages/MetricsPage.tsx +408 -0
- package/src/client/pages/OverviewPage.tsx +511 -0
- package/src/client/pages/QueuesPage.tsx +372 -0
- package/src/client/pages/SchedulesPage.tsx +531 -0
- package/src/client/pages/SettingsPage.tsx +449 -0
- package/src/client/pages/WorkersPage.tsx +316 -0
- package/src/client/pages/index.ts +7 -0
- package/src/client/utils.ts +6 -0
- package/src/server/index.ts +556 -0
- package/src/server/middleware/auth.ts +127 -0
- package/src/server/services/AlertService.ts +160 -0
- package/src/server/services/QueueService.ts +828 -0
- package/tailwind.config.js +73 -0
- package/tests/placeholder.test.ts +7 -0
- package/tsconfig.json +38 -0
- package/tsconfig.node.json +12 -0
- package/vite.config.ts +27 -0
|
@@ -0,0 +1,828 @@
|
|
|
1
|
+
import { EventEmitter } from 'node:events'
|
|
2
|
+
import { type MySQLPersistence, QueueManager } from '@gravito/stream'
|
|
3
|
+
import { Redis } from 'ioredis'
|
|
4
|
+
import { AlertService } from './AlertService'
|
|
5
|
+
|
|
6
|
+
export interface QueueStats {
|
|
7
|
+
name: string
|
|
8
|
+
waiting: number
|
|
9
|
+
delayed: number
|
|
10
|
+
failed: number
|
|
11
|
+
active: number
|
|
12
|
+
paused: boolean
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
export interface WorkerReport {
|
|
16
|
+
id: string
|
|
17
|
+
hostname: string
|
|
18
|
+
pid: number
|
|
19
|
+
uptime: number
|
|
20
|
+
memory: {
|
|
21
|
+
rss: string
|
|
22
|
+
heapTotal: string
|
|
23
|
+
heapUsed: string
|
|
24
|
+
}
|
|
25
|
+
queues: string[]
|
|
26
|
+
concurrency: number
|
|
27
|
+
timestamp: string
|
|
28
|
+
loadAvg: number[]
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
export interface SystemLog {
|
|
32
|
+
level: 'info' | 'warn' | 'error' | 'success'
|
|
33
|
+
message: string
|
|
34
|
+
workerId: string
|
|
35
|
+
queue?: string
|
|
36
|
+
timestamp: string
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
export interface GlobalStats {
|
|
40
|
+
queues: QueueStats[]
|
|
41
|
+
throughput: { timestamp: string; count: number }[]
|
|
42
|
+
workers: WorkerReport[]
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
export class QueueService {
|
|
46
|
+
private redis: Redis
|
|
47
|
+
private subRedis: Redis
|
|
48
|
+
private prefix: string
|
|
49
|
+
private logEmitter = new EventEmitter()
|
|
50
|
+
private logThrottleCount = 0
|
|
51
|
+
private logThrottleReset = Date.now()
|
|
52
|
+
private readonly MAX_LOGS_PER_SEC = 50
|
|
53
|
+
private manager: QueueManager
|
|
54
|
+
public alerts = new AlertService()
|
|
55
|
+
|
|
56
|
+
constructor(
|
|
57
|
+
redisUrl: string,
|
|
58
|
+
prefix = 'queue:',
|
|
59
|
+
persistence?: {
|
|
60
|
+
adapter: MySQLPersistence
|
|
61
|
+
archiveCompleted?: boolean
|
|
62
|
+
archiveFailed?: boolean
|
|
63
|
+
archiveEnqueued?: boolean
|
|
64
|
+
}
|
|
65
|
+
) {
|
|
66
|
+
this.redis = new Redis(redisUrl, {
|
|
67
|
+
lazyConnect: true,
|
|
68
|
+
})
|
|
69
|
+
this.subRedis = new Redis(redisUrl, {
|
|
70
|
+
lazyConnect: true,
|
|
71
|
+
})
|
|
72
|
+
this.prefix = prefix
|
|
73
|
+
this.logEmitter.setMaxListeners(1000)
|
|
74
|
+
|
|
75
|
+
// Initialized for potential use
|
|
76
|
+
this.manager = new QueueManager({
|
|
77
|
+
default: 'redis',
|
|
78
|
+
connections: {
|
|
79
|
+
redis: {
|
|
80
|
+
driver: 'redis',
|
|
81
|
+
client: this.redis as any,
|
|
82
|
+
prefix,
|
|
83
|
+
},
|
|
84
|
+
},
|
|
85
|
+
persistence,
|
|
86
|
+
})
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
async connect() {
|
|
90
|
+
await Promise.all([this.redis.connect(), this.subRedis.connect()])
|
|
91
|
+
|
|
92
|
+
// Setup single Redis subscription
|
|
93
|
+
await this.subRedis.subscribe('flux_console:logs')
|
|
94
|
+
this.subRedis.on('message', (channel, message) => {
|
|
95
|
+
if (channel === 'flux_console:logs') {
|
|
96
|
+
try {
|
|
97
|
+
// Throttling: Reset counter every second
|
|
98
|
+
const now = Date.now()
|
|
99
|
+
if (now - this.logThrottleReset > 1000) {
|
|
100
|
+
this.logThrottleReset = now
|
|
101
|
+
this.logThrottleCount = 0
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
// Emit only if under limit
|
|
105
|
+
if (this.logThrottleCount < this.MAX_LOGS_PER_SEC) {
|
|
106
|
+
this.logThrottleCount++
|
|
107
|
+
const log = JSON.parse(message)
|
|
108
|
+
this.logEmitter.emit('log', log)
|
|
109
|
+
|
|
110
|
+
// Increment throughput counter if it's a job final status
|
|
111
|
+
if (log.level === 'success' || log.level === 'error') {
|
|
112
|
+
const minute = Math.floor(Date.now() / 60000)
|
|
113
|
+
this.redis
|
|
114
|
+
.incr(`flux_console:throughput:${minute}`)
|
|
115
|
+
.then(() => {
|
|
116
|
+
this.redis.expire(`flux_console:throughput:${minute}`, 3600)
|
|
117
|
+
})
|
|
118
|
+
.catch(() => {})
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
} catch (_e) {
|
|
122
|
+
// Ignore
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
})
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
/**
|
|
129
|
+
* Subscribes to the live log stream.
|
|
130
|
+
* Returns a cleanup function.
|
|
131
|
+
*/
|
|
132
|
+
onLog(callback: (msg: SystemLog) => void): () => void {
|
|
133
|
+
this.logEmitter.on('log', callback)
|
|
134
|
+
return () => {
|
|
135
|
+
this.logEmitter.off('log', callback)
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
/**
|
|
140
|
+
* Discovers queues using SCAN to avoid blocking Redis.
|
|
141
|
+
*/
|
|
142
|
+
async listQueues(): Promise<QueueStats[]> {
|
|
143
|
+
const queues = new Set<string>()
|
|
144
|
+
let cursor = '0'
|
|
145
|
+
let limit = 1000
|
|
146
|
+
|
|
147
|
+
do {
|
|
148
|
+
const result = await this.redis.scan(cursor, 'MATCH', `${this.prefix}*`, 'COUNT', 100)
|
|
149
|
+
cursor = result[0]
|
|
150
|
+
const keys = result[1]
|
|
151
|
+
|
|
152
|
+
for (const key of keys) {
|
|
153
|
+
const relative = key.slice(this.prefix.length)
|
|
154
|
+
const parts = relative.split(':')
|
|
155
|
+
const candidateName = parts[0]
|
|
156
|
+
if (
|
|
157
|
+
candidateName &&
|
|
158
|
+
candidateName !== 'active' &&
|
|
159
|
+
candidateName !== 'schedules' &&
|
|
160
|
+
candidateName !== 'schedule' &&
|
|
161
|
+
candidateName !== 'lock'
|
|
162
|
+
) {
|
|
163
|
+
queues.add(candidateName)
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
limit--
|
|
167
|
+
} while (cursor !== '0' && limit > 0)
|
|
168
|
+
|
|
169
|
+
const stats: QueueStats[] = []
|
|
170
|
+
const queueNames = Array.from(queues).sort()
|
|
171
|
+
|
|
172
|
+
const BATCH_SIZE = 10
|
|
173
|
+
|
|
174
|
+
for (let i = 0; i < queueNames.length; i += BATCH_SIZE) {
|
|
175
|
+
const batch = queueNames.slice(i, i + BATCH_SIZE)
|
|
176
|
+
const batchResults = await Promise.all(
|
|
177
|
+
batch.map(async (name) => {
|
|
178
|
+
const waiting = await this.redis.llen(`${this.prefix}${name}`)
|
|
179
|
+
const delayed = await this.redis.zcard(`${this.prefix}${name}:delayed`)
|
|
180
|
+
const failed = await this.redis.llen(`${this.prefix}${name}:failed`)
|
|
181
|
+
const active = await this.redis.scard(`${this.prefix}${name}:active`)
|
|
182
|
+
const paused = await this.redis.get(`${this.prefix}${name}:paused`)
|
|
183
|
+
return { name, waiting, delayed, failed, active, paused: paused === '1' }
|
|
184
|
+
})
|
|
185
|
+
)
|
|
186
|
+
stats.push(...batchResults)
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
return stats
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
/**
|
|
193
|
+
* Pause a queue (workers will stop processing new jobs)
|
|
194
|
+
*/
|
|
195
|
+
async pauseQueue(queueName: string): Promise<boolean> {
|
|
196
|
+
await this.redis.set(`${this.prefix}${queueName}:paused`, '1')
|
|
197
|
+
return true
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
/**
|
|
201
|
+
* Resume a paused queue
|
|
202
|
+
*/
|
|
203
|
+
async resumeQueue(queueName: string): Promise<boolean> {
|
|
204
|
+
await this.redis.del(`${this.prefix}${queueName}:paused`)
|
|
205
|
+
return true
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
/**
|
|
209
|
+
* Check if a queue is paused
|
|
210
|
+
*/
|
|
211
|
+
async isQueuePaused(queueName: string): Promise<boolean> {
|
|
212
|
+
const paused = await this.redis.get(`${this.prefix}${queueName}:paused`)
|
|
213
|
+
return paused === '1'
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
async retryDelayedJob(queueName: string): Promise<number> {
|
|
217
|
+
const key = `${this.prefix}${queueName}`
|
|
218
|
+
const delayKey = `${key}:delayed`
|
|
219
|
+
|
|
220
|
+
const script = `
|
|
221
|
+
local delayKey = KEYS[1]
|
|
222
|
+
local queueKey = KEYS[2]
|
|
223
|
+
|
|
224
|
+
local jobs = redis.call('ZRANGE', delayKey, 0, -1)
|
|
225
|
+
|
|
226
|
+
if #jobs > 0 then
|
|
227
|
+
redis.call('LPUSH', queueKey, unpack(jobs))
|
|
228
|
+
redis.call('DEL', delayKey)
|
|
229
|
+
end
|
|
230
|
+
return #jobs
|
|
231
|
+
`
|
|
232
|
+
|
|
233
|
+
const movedCount = (await this.redis.eval(script, 2, delayKey, key)) as number
|
|
234
|
+
return movedCount
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
async getJobs(
|
|
238
|
+
queueName: string,
|
|
239
|
+
type: 'waiting' | 'delayed' | 'failed' = 'waiting',
|
|
240
|
+
start = 0,
|
|
241
|
+
stop = 49
|
|
242
|
+
): Promise<any[]> {
|
|
243
|
+
const key = `${this.prefix}${queueName}`
|
|
244
|
+
let rawJobs: string[] = []
|
|
245
|
+
|
|
246
|
+
if (type === 'delayed') {
|
|
247
|
+
const results = await this.redis.zrange(`${key}:delayed`, start, stop, 'WITHSCORES')
|
|
248
|
+
const formatted = []
|
|
249
|
+
for (let i = 0; i < results.length; i += 2) {
|
|
250
|
+
const jobStr = results[i]!
|
|
251
|
+
const score = results[i + 1]!
|
|
252
|
+
try {
|
|
253
|
+
const parsed = JSON.parse(jobStr)
|
|
254
|
+
formatted.push({
|
|
255
|
+
...parsed,
|
|
256
|
+
_raw: jobStr,
|
|
257
|
+
scheduledAt: new Date(parseInt(score, 10)).toISOString(),
|
|
258
|
+
})
|
|
259
|
+
} catch (_e) {
|
|
260
|
+
formatted.push({ _raw: jobStr, _error: 'Failed to parse JSON' })
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
return formatted
|
|
264
|
+
} else {
|
|
265
|
+
const listKey = type === 'failed' ? `${key}:failed` : key
|
|
266
|
+
rawJobs = await this.redis.lrange(listKey, start, stop)
|
|
267
|
+
|
|
268
|
+
const jobs = rawJobs.map((jobStr) => {
|
|
269
|
+
try {
|
|
270
|
+
const parsed = JSON.parse(jobStr)
|
|
271
|
+
return { ...parsed, _raw: jobStr }
|
|
272
|
+
} catch (_e) {
|
|
273
|
+
return { _raw: jobStr, _error: 'Failed to parse JSON' }
|
|
274
|
+
}
|
|
275
|
+
})
|
|
276
|
+
|
|
277
|
+
// If we got few results and have persistence, merge with archive
|
|
278
|
+
const persistence = this.manager.getPersistence()
|
|
279
|
+
if (jobs.length < stop - start + 1 && persistence && type === 'failed') {
|
|
280
|
+
const archived = await persistence.list(queueName, {
|
|
281
|
+
limit: stop - start + 1 - jobs.length,
|
|
282
|
+
status: type as 'failed',
|
|
283
|
+
})
|
|
284
|
+
return [...jobs, ...archived.map((a: any) => ({ ...a, _archived: true }))]
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
return jobs
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
/**
|
|
292
|
+
* Records a snapshot of current global statistics for sparklines.
|
|
293
|
+
*/
|
|
294
|
+
async recordStatusMetrics(): Promise<void> {
|
|
295
|
+
const stats = await this.listQueues()
|
|
296
|
+
const totals = stats.reduce(
|
|
297
|
+
(acc, q) => {
|
|
298
|
+
acc.waiting += q.waiting
|
|
299
|
+
acc.delayed += q.delayed
|
|
300
|
+
acc.failed += q.failed
|
|
301
|
+
return acc
|
|
302
|
+
},
|
|
303
|
+
{ waiting: 0, delayed: 0, failed: 0 }
|
|
304
|
+
)
|
|
305
|
+
|
|
306
|
+
const now = Math.floor(Date.now() / 60000)
|
|
307
|
+
const pipe = this.redis.pipeline()
|
|
308
|
+
|
|
309
|
+
// Store snapshots for last 60 minutes
|
|
310
|
+
pipe.set(`flux_console:metrics:waiting:${now}`, totals.waiting, 'EX', 3600)
|
|
311
|
+
pipe.set(`flux_console:metrics:delayed:${now}`, totals.delayed, 'EX', 3600)
|
|
312
|
+
pipe.set(`flux_console:metrics:failed:${now}`, totals.failed, 'EX', 3600)
|
|
313
|
+
|
|
314
|
+
// Also record worker count
|
|
315
|
+
const workers = await this.listWorkers()
|
|
316
|
+
pipe.set(`flux_console:metrics:workers:${now}`, workers.length, 'EX', 3600)
|
|
317
|
+
|
|
318
|
+
await pipe.exec()
|
|
319
|
+
|
|
320
|
+
// Real-time Broadcast
|
|
321
|
+
this.logEmitter.emit('stats', {
|
|
322
|
+
queues: stats,
|
|
323
|
+
throughput: await this.getThroughputData(),
|
|
324
|
+
workers,
|
|
325
|
+
})
|
|
326
|
+
|
|
327
|
+
// Evaluate Alert Rules (Near Zero Overhead)
|
|
328
|
+
this.alerts
|
|
329
|
+
.check({
|
|
330
|
+
queues: stats,
|
|
331
|
+
workers,
|
|
332
|
+
totals,
|
|
333
|
+
})
|
|
334
|
+
.catch((err) => console.error('[AlertService] Rule Evaluation Error:', err))
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
/**
|
|
338
|
+
* Subscribes to real-time stats updates.
|
|
339
|
+
*/
|
|
340
|
+
onStats(callback: (stats: GlobalStats) => void): () => void {
|
|
341
|
+
this.logEmitter.on('stats', callback)
|
|
342
|
+
return () => {
|
|
343
|
+
this.logEmitter.off('stats', callback)
|
|
344
|
+
}
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
/**
|
|
348
|
+
* Gets historical data for a specific metric.
|
|
349
|
+
*/
|
|
350
|
+
async getMetricHistory(metric: string, limit = 15): Promise<number[]> {
|
|
351
|
+
const now = Math.floor(Date.now() / 60000)
|
|
352
|
+
const keys = []
|
|
353
|
+
for (let i = limit - 1; i >= 0; i--) {
|
|
354
|
+
keys.push(`flux_console:metrics:${metric}:${now - i}`)
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
const values = await this.redis.mget(...keys)
|
|
358
|
+
return values.map((v) => parseInt(v || '0', 10))
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
/**
|
|
362
|
+
* Retrieves throughput data for the last 15 minutes.
|
|
363
|
+
*/
|
|
364
|
+
async getThroughputData(): Promise<{ timestamp: string; count: number }[]> {
|
|
365
|
+
const now = Math.floor(Date.now() / 60000)
|
|
366
|
+
const results = []
|
|
367
|
+
|
|
368
|
+
for (let i = 14; i >= 0; i--) {
|
|
369
|
+
const t = now - i
|
|
370
|
+
const count = await this.redis.get(`flux_console:throughput:${t}`)
|
|
371
|
+
const date = new Date(t * 60000)
|
|
372
|
+
results.push({
|
|
373
|
+
timestamp: `${date.getHours().toString().padStart(2, '0')}:${date.getMinutes().toString().padStart(2, '0')}`,
|
|
374
|
+
count: parseInt(count || '0', 10),
|
|
375
|
+
})
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
return results
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
/**
|
|
382
|
+
* Lists all active workers by scanning heartbeat keys.
|
|
383
|
+
*/
|
|
384
|
+
async listWorkers(): Promise<WorkerReport[]> {
|
|
385
|
+
const workers: WorkerReport[] = []
|
|
386
|
+
let cursor = '0'
|
|
387
|
+
|
|
388
|
+
do {
|
|
389
|
+
const [nextCursor, keys] = await this.redis.scan(cursor, 'MATCH', 'flux_console:worker:*')
|
|
390
|
+
cursor = nextCursor
|
|
391
|
+
|
|
392
|
+
if (keys.length > 0) {
|
|
393
|
+
const values = await this.redis.mget(...keys)
|
|
394
|
+
values.forEach((v) => {
|
|
395
|
+
if (v) {
|
|
396
|
+
try {
|
|
397
|
+
workers.push(JSON.parse(v))
|
|
398
|
+
} catch (_e) {
|
|
399
|
+
// Ignore malformed
|
|
400
|
+
}
|
|
401
|
+
}
|
|
402
|
+
})
|
|
403
|
+
}
|
|
404
|
+
} while (cursor !== '0')
|
|
405
|
+
|
|
406
|
+
return workers
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
/**
|
|
410
|
+
* Deletes a specific job from a queue or delayed pool.
|
|
411
|
+
*/
|
|
412
|
+
async deleteJob(
|
|
413
|
+
queueName: string,
|
|
414
|
+
type: 'waiting' | 'delayed' | 'failed',
|
|
415
|
+
jobRaw: string
|
|
416
|
+
): Promise<boolean> {
|
|
417
|
+
const key =
|
|
418
|
+
type === 'delayed'
|
|
419
|
+
? `${this.prefix}${queueName}:delayed`
|
|
420
|
+
: type === 'failed'
|
|
421
|
+
? `${this.prefix}${queueName}:failed`
|
|
422
|
+
: `${this.prefix}${queueName}`
|
|
423
|
+
const result =
|
|
424
|
+
type === 'delayed'
|
|
425
|
+
? await this.redis.zrem(key, jobRaw)
|
|
426
|
+
: await this.redis.lrem(key, 0, jobRaw)
|
|
427
|
+
return result > 0
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
/**
|
|
431
|
+
* Retries a specific delayed job by moving it back to the waiting queue.
|
|
432
|
+
*/
|
|
433
|
+
async retryJob(queueName: string, jobRaw: string): Promise<boolean> {
|
|
434
|
+
const key = `${this.prefix}${queueName}`
|
|
435
|
+
const delayKey = `${key}:delayed`
|
|
436
|
+
|
|
437
|
+
// Atomically move from ZSET to LIST
|
|
438
|
+
const script = `
|
|
439
|
+
local delayKey = KEYS[1]
|
|
440
|
+
local queueKey = KEYS[2]
|
|
441
|
+
local jobRaw = ARGV[1]
|
|
442
|
+
|
|
443
|
+
local removed = redis.call('ZREM', delayKey, jobRaw)
|
|
444
|
+
if removed > 0 then
|
|
445
|
+
redis.call('LPUSH', queueKey, jobRaw)
|
|
446
|
+
return 1
|
|
447
|
+
end
|
|
448
|
+
return 0
|
|
449
|
+
`
|
|
450
|
+
const result = await this.redis.eval(script, 2, delayKey, key, jobRaw)
|
|
451
|
+
return result === 1
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
/**
|
|
455
|
+
* Purges all jobs from a queue.
|
|
456
|
+
*/
|
|
457
|
+
async purgeQueue(queueName: string): Promise<void> {
|
|
458
|
+
const pipe = this.redis.pipeline()
|
|
459
|
+
pipe.del(`${this.prefix}${queueName}`)
|
|
460
|
+
pipe.del(`${this.prefix}${queueName}:delayed`)
|
|
461
|
+
pipe.del(`${this.prefix}${queueName}:failed`)
|
|
462
|
+
pipe.del(`${this.prefix}${queueName}:active`)
|
|
463
|
+
await pipe.exec()
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
/**
|
|
467
|
+
* Retries all failed jobs in a queue.
|
|
468
|
+
*/
|
|
469
|
+
async retryAllFailedJobs(queueName: string): Promise<number> {
|
|
470
|
+
// Navigate via QueueManager -> Driver to use safe RPOPLPUSH (avoids Lua stack overflow)
|
|
471
|
+
// We pass a large number to retry "all" (effectively batch processing)
|
|
472
|
+
return await this.manager.retryFailed(queueName, 10000)
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
/**
|
|
476
|
+
* Clears all failed jobs (DLQ).
|
|
477
|
+
*/
|
|
478
|
+
async clearFailedJobs(queueName: string): Promise<void> {
|
|
479
|
+
await this.manager.clearFailed(queueName)
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
/**
|
|
483
|
+
* Get total count of jobs in a queue by type.
|
|
484
|
+
*/
|
|
485
|
+
async getJobCount(queueName: string, type: 'waiting' | 'delayed' | 'failed'): Promise<number> {
|
|
486
|
+
const key =
|
|
487
|
+
type === 'delayed'
|
|
488
|
+
? `${this.prefix}${queueName}:delayed`
|
|
489
|
+
: type === 'failed'
|
|
490
|
+
? `${this.prefix}${queueName}:failed`
|
|
491
|
+
: `${this.prefix}${queueName}`
|
|
492
|
+
|
|
493
|
+
return type === 'delayed' ? await this.redis.zcard(key) : await this.redis.llen(key)
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
/**
|
|
497
|
+
* Delete ALL jobs of a specific type from a queue.
|
|
498
|
+
*/
|
|
499
|
+
async deleteAllJobs(queueName: string, type: 'waiting' | 'delayed' | 'failed'): Promise<number> {
|
|
500
|
+
const key =
|
|
501
|
+
type === 'delayed'
|
|
502
|
+
? `${this.prefix}${queueName}:delayed`
|
|
503
|
+
: type === 'failed'
|
|
504
|
+
? `${this.prefix}${queueName}:failed`
|
|
505
|
+
: `${this.prefix}${queueName}`
|
|
506
|
+
|
|
507
|
+
const count = await this.getJobCount(queueName, type)
|
|
508
|
+
await this.redis.del(key)
|
|
509
|
+
return count
|
|
510
|
+
}
|
|
511
|
+
|
|
512
|
+
/**
|
|
513
|
+
* Retry ALL jobs of a specific type (delayed or failed).
|
|
514
|
+
*/
|
|
515
|
+
async retryAllJobs(queueName: string, type: 'delayed' | 'failed'): Promise<number> {
|
|
516
|
+
if (type === 'delayed') {
|
|
517
|
+
return await this.retryDelayedJob(queueName)
|
|
518
|
+
} else {
|
|
519
|
+
return await this.retryAllFailedJobs(queueName)
|
|
520
|
+
}
|
|
521
|
+
}
|
|
522
|
+
|
|
523
|
+
/**
|
|
524
|
+
* Bulk deletes jobs (works for waiting, delayed, failed).
|
|
525
|
+
*/
|
|
526
|
+
async deleteJobs(
|
|
527
|
+
queueName: string,
|
|
528
|
+
type: 'waiting' | 'delayed' | 'failed',
|
|
529
|
+
jobRaws: string[]
|
|
530
|
+
): Promise<number> {
|
|
531
|
+
const key =
|
|
532
|
+
type === 'delayed'
|
|
533
|
+
? `${this.prefix}${queueName}:delayed`
|
|
534
|
+
: type === 'failed'
|
|
535
|
+
? `${this.prefix}${queueName}:failed`
|
|
536
|
+
: `${this.prefix}${queueName}`
|
|
537
|
+
|
|
538
|
+
const pipe = this.redis.pipeline()
|
|
539
|
+
for (const raw of jobRaws) {
|
|
540
|
+
if (type === 'delayed') {
|
|
541
|
+
pipe.zrem(key, raw)
|
|
542
|
+
} else {
|
|
543
|
+
pipe.lrem(key, 1, raw)
|
|
544
|
+
}
|
|
545
|
+
}
|
|
546
|
+
const results = await pipe.exec()
|
|
547
|
+
return results?.reduce((acc, [_, res]) => acc + ((res as number) || 0), 0) || 0
|
|
548
|
+
}
|
|
549
|
+
|
|
550
|
+
/**
|
|
551
|
+
* Bulk retries jobs (moves from failed/delayed to waiting).
|
|
552
|
+
*/
|
|
553
|
+
async retryJobs(
|
|
554
|
+
queueName: string,
|
|
555
|
+
type: 'delayed' | 'failed',
|
|
556
|
+
jobRaws: string[]
|
|
557
|
+
): Promise<number> {
|
|
558
|
+
const key = `${this.prefix}${queueName}`
|
|
559
|
+
const sourceKey = type === 'delayed' ? `${key}:delayed` : `${key}:failed`
|
|
560
|
+
|
|
561
|
+
const pipe = this.redis.pipeline()
|
|
562
|
+
for (const raw of jobRaws) {
|
|
563
|
+
if (type === 'delayed') {
|
|
564
|
+
pipe.zrem(sourceKey, raw)
|
|
565
|
+
pipe.lpush(key, raw)
|
|
566
|
+
} else {
|
|
567
|
+
pipe.lrem(sourceKey, 1, raw)
|
|
568
|
+
pipe.lpush(key, raw)
|
|
569
|
+
}
|
|
570
|
+
}
|
|
571
|
+
const results = await pipe.exec()
|
|
572
|
+
// Each successful retry is 2 operations in pipeline (remove + push),
|
|
573
|
+
// but we count the successfully removed jobs.
|
|
574
|
+
let count = 0
|
|
575
|
+
if (results) {
|
|
576
|
+
for (let i = 0; i < results.length; i += 2) {
|
|
577
|
+
const result = results[i]
|
|
578
|
+
if (result && !result[0] && (result[1] as number) > 0) {
|
|
579
|
+
count++
|
|
580
|
+
}
|
|
581
|
+
}
|
|
582
|
+
}
|
|
583
|
+
return count
|
|
584
|
+
}
|
|
585
|
+
|
|
586
|
+
/**
|
|
587
|
+
* Publishes a log message (used by workers).
|
|
588
|
+
*/
|
|
589
|
+
async publishLog(log: { level: string; message: string; workerId: string; queue?: string }) {
|
|
590
|
+
const payload = {
|
|
591
|
+
...log,
|
|
592
|
+
timestamp: new Date().toISOString(),
|
|
593
|
+
}
|
|
594
|
+
await this.redis.publish('flux_console:logs', JSON.stringify(payload))
|
|
595
|
+
|
|
596
|
+
// Also store in a capped list for history (last 100 logs)
|
|
597
|
+
const pipe = this.redis.pipeline()
|
|
598
|
+
pipe.lpush('flux_console:logs:history', JSON.stringify(payload))
|
|
599
|
+
pipe.ltrim('flux_console:logs:history', 0, 99)
|
|
600
|
+
|
|
601
|
+
// Increment throughput counter for this minute
|
|
602
|
+
const now = Math.floor(Date.now() / 60000)
|
|
603
|
+
pipe.incr(`flux_console:throughput:${now}`)
|
|
604
|
+
pipe.expire(`flux_console:throughput:${now}`, 3600) // Keep for 1 hour
|
|
605
|
+
|
|
606
|
+
await pipe.exec()
|
|
607
|
+
|
|
608
|
+
// NEW: Archive to persistence if enabled
|
|
609
|
+
const persistence = this.manager.getPersistence()
|
|
610
|
+
if (persistence) {
|
|
611
|
+
persistence
|
|
612
|
+
.archiveLog({
|
|
613
|
+
...log,
|
|
614
|
+
timestamp: new Date(),
|
|
615
|
+
})
|
|
616
|
+
.catch((err: any) => console.error('[QueueService] Log Archive Error:', err))
|
|
617
|
+
}
|
|
618
|
+
}
|
|
619
|
+
|
|
620
|
+
/**
|
|
621
|
+
* Gets recent log history.
|
|
622
|
+
*/
|
|
623
|
+
async getLogHistory(): Promise<any[]> {
|
|
624
|
+
const logs = await this.redis.lrange('flux_console:logs:history', 0, -1)
|
|
625
|
+
return logs.map((l) => JSON.parse(l)).reverse()
|
|
626
|
+
}
|
|
627
|
+
|
|
628
|
+
/**
|
|
629
|
+
* Search jobs across all queues by ID or data content.
|
|
630
|
+
*/
|
|
631
|
+
async searchJobs(
|
|
632
|
+
query: string,
|
|
633
|
+
options: { limit?: number; type?: 'all' | 'waiting' | 'delayed' | 'failed' } = {}
|
|
634
|
+
): Promise<any[]> {
|
|
635
|
+
const { limit = 20, type = 'all' } = options
|
|
636
|
+
const results: any[] = []
|
|
637
|
+
const queryLower = query.toLowerCase()
|
|
638
|
+
|
|
639
|
+
// Get all queues
|
|
640
|
+
const queues = await this.listQueues()
|
|
641
|
+
|
|
642
|
+
for (const queue of queues) {
|
|
643
|
+
if (results.length >= limit) {
|
|
644
|
+
break
|
|
645
|
+
}
|
|
646
|
+
|
|
647
|
+
const types = type === 'all' ? ['waiting', 'delayed', 'failed'] : [type]
|
|
648
|
+
|
|
649
|
+
for (const jobType of types) {
|
|
650
|
+
if (results.length >= limit) {
|
|
651
|
+
break
|
|
652
|
+
}
|
|
653
|
+
|
|
654
|
+
const jobs = await this.getJobs(queue.name, jobType as any, 0, 99)
|
|
655
|
+
|
|
656
|
+
for (const job of jobs) {
|
|
657
|
+
if (results.length >= limit) {
|
|
658
|
+
break
|
|
659
|
+
}
|
|
660
|
+
|
|
661
|
+
// Search in job ID
|
|
662
|
+
const idMatch = job.id && String(job.id).toLowerCase().includes(queryLower)
|
|
663
|
+
|
|
664
|
+
// Search in job name
|
|
665
|
+
const nameMatch = job.name && String(job.name).toLowerCase().includes(queryLower)
|
|
666
|
+
|
|
667
|
+
// Search in job data (stringify and search)
|
|
668
|
+
let dataMatch = false
|
|
669
|
+
try {
|
|
670
|
+
const dataStr = JSON.stringify(job.data || job).toLowerCase()
|
|
671
|
+
dataMatch = dataStr.includes(queryLower)
|
|
672
|
+
} catch (_e) {
|
|
673
|
+
// Ignore stringify errors
|
|
674
|
+
}
|
|
675
|
+
|
|
676
|
+
if (idMatch || nameMatch || dataMatch) {
|
|
677
|
+
results.push({
|
|
678
|
+
...job,
|
|
679
|
+
_queue: queue.name,
|
|
680
|
+
_type: jobType,
|
|
681
|
+
_matchType: idMatch ? 'id' : nameMatch ? 'name' : 'data',
|
|
682
|
+
})
|
|
683
|
+
}
|
|
684
|
+
}
|
|
685
|
+
}
|
|
686
|
+
}
|
|
687
|
+
|
|
688
|
+
return results
|
|
689
|
+
}
|
|
690
|
+
|
|
691
|
+
/**
|
|
692
|
+
* List jobs from the SQL archive.
|
|
693
|
+
*/
|
|
694
|
+
async getArchiveJobs(
|
|
695
|
+
queue: string,
|
|
696
|
+
page = 1,
|
|
697
|
+
limit = 50,
|
|
698
|
+
status?: 'completed' | 'failed',
|
|
699
|
+
filter: { jobId?: string; startTime?: Date; endTime?: Date } = {}
|
|
700
|
+
): Promise<{ jobs: any[]; total: number }> {
|
|
701
|
+
const persistence = this.manager.getPersistence()
|
|
702
|
+
if (!persistence) {
|
|
703
|
+
return { jobs: [], total: 0 }
|
|
704
|
+
}
|
|
705
|
+
|
|
706
|
+
const offset = (page - 1) * limit
|
|
707
|
+
const [jobs, total] = await Promise.all([
|
|
708
|
+
persistence.list(queue, { limit, offset, status, ...filter }),
|
|
709
|
+
persistence.count(queue, { status, ...filter }),
|
|
710
|
+
])
|
|
711
|
+
|
|
712
|
+
return {
|
|
713
|
+
jobs: jobs.map((j: any) => ({ ...j, _archived: true })),
|
|
714
|
+
total,
|
|
715
|
+
}
|
|
716
|
+
}
|
|
717
|
+
|
|
718
|
+
/**
|
|
719
|
+
* Search jobs from the SQL archive.
|
|
720
|
+
*/
|
|
721
|
+
async searchArchive(
|
|
722
|
+
query: string,
|
|
723
|
+
options: { limit?: number; page?: number; queue?: string } = {}
|
|
724
|
+
): Promise<{ jobs: any[]; total: number }> {
|
|
725
|
+
const persistence = this.manager.getPersistence() as any
|
|
726
|
+
if (!persistence || typeof persistence.search !== 'function') {
|
|
727
|
+
return { jobs: [], total: 0 }
|
|
728
|
+
}
|
|
729
|
+
|
|
730
|
+
const { limit = 50, page = 1, queue } = options
|
|
731
|
+
const offset = (page - 1) * limit
|
|
732
|
+
|
|
733
|
+
const jobs = await persistence.search(query, { limit, offset, queue })
|
|
734
|
+
// For search, precise total count is harder without a dedicated search count method,
|
|
735
|
+
// so we'll return the results length or a hypothetical high number if results match the limit.
|
|
736
|
+
return {
|
|
737
|
+
jobs: jobs.map((j: any) => ({ ...j, _archived: true })),
|
|
738
|
+
total: jobs.length === limit ? limit * page + 1 : (page - 1) * limit + jobs.length,
|
|
739
|
+
}
|
|
740
|
+
}
|
|
741
|
+
|
|
742
|
+
/**
|
|
743
|
+
* List logs from the SQL archive.
|
|
744
|
+
*/
|
|
745
|
+
async getArchivedLogs(
|
|
746
|
+
options: {
|
|
747
|
+
page?: number
|
|
748
|
+
limit?: number
|
|
749
|
+
level?: string
|
|
750
|
+
workerId?: string
|
|
751
|
+
queue?: string
|
|
752
|
+
search?: string
|
|
753
|
+
startTime?: Date
|
|
754
|
+
endTime?: Date
|
|
755
|
+
} = {}
|
|
756
|
+
): Promise<{ logs: any[]; total: number }> {
|
|
757
|
+
const persistence = this.manager.getPersistence()
|
|
758
|
+
if (!persistence) {
|
|
759
|
+
return { logs: [], total: 0 }
|
|
760
|
+
}
|
|
761
|
+
|
|
762
|
+
const { page = 1, limit = 50, ...filters } = options
|
|
763
|
+
const offset = (page - 1) * limit
|
|
764
|
+
|
|
765
|
+
const [logs, total] = await Promise.all([
|
|
766
|
+
persistence.listLogs({ limit, offset, ...filters }),
|
|
767
|
+
persistence.countLogs(filters),
|
|
768
|
+
])
|
|
769
|
+
|
|
770
|
+
return { logs, total }
|
|
771
|
+
}
|
|
772
|
+
|
|
773
|
+
/**
|
|
774
|
+
* Cleans up old archived jobs from SQL.
|
|
775
|
+
*/
|
|
776
|
+
async cleanupArchive(days: number): Promise<number> {
|
|
777
|
+
const persistence = this.manager.getPersistence()
|
|
778
|
+
if (!persistence) {
|
|
779
|
+
return 0
|
|
780
|
+
}
|
|
781
|
+
return await persistence.cleanup(days)
|
|
782
|
+
}
|
|
783
|
+
|
|
784
|
+
/**
|
|
785
|
+
* List all recurring schedules.
|
|
786
|
+
*/
|
|
787
|
+
async listSchedules(): Promise<any[]> {
|
|
788
|
+
const scheduler = this.manager.getScheduler()
|
|
789
|
+
return await scheduler.list()
|
|
790
|
+
}
|
|
791
|
+
|
|
792
|
+
/**
|
|
793
|
+
* Register a new recurring schedule.
|
|
794
|
+
*/
|
|
795
|
+
async registerSchedule(config: {
|
|
796
|
+
id: string
|
|
797
|
+
cron: string
|
|
798
|
+
queue: string
|
|
799
|
+
job: any
|
|
800
|
+
}): Promise<void> {
|
|
801
|
+
const scheduler = this.manager.getScheduler()
|
|
802
|
+
await scheduler.register(config)
|
|
803
|
+
}
|
|
804
|
+
|
|
805
|
+
/**
|
|
806
|
+
* Remove a recurring schedule.
|
|
807
|
+
*/
|
|
808
|
+
async removeSchedule(id: string): Promise<void> {
|
|
809
|
+
const scheduler = this.manager.getScheduler()
|
|
810
|
+
await scheduler.remove(id)
|
|
811
|
+
}
|
|
812
|
+
|
|
813
|
+
/**
|
|
814
|
+
* Run a scheduled job immediately.
|
|
815
|
+
*/
|
|
816
|
+
async runScheduleNow(id: string): Promise<void> {
|
|
817
|
+
const scheduler = this.manager.getScheduler()
|
|
818
|
+
await scheduler.runNow(id)
|
|
819
|
+
}
|
|
820
|
+
|
|
821
|
+
/**
|
|
822
|
+
* Tick the scheduler to process due jobs.
|
|
823
|
+
*/
|
|
824
|
+
async tickScheduler(): Promise<void> {
|
|
825
|
+
const scheduler = this.manager.getScheduler()
|
|
826
|
+
await scheduler.tick()
|
|
827
|
+
}
|
|
828
|
+
}
|