rrq 0.5.0__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,776 @@
1
+ """Real-time monitoring dashboard for RRQ"""
2
+
3
+ import asyncio
4
+ from collections import defaultdict, deque
5
+ from datetime import datetime
6
+ from typing import Dict, List
7
+
8
+ import click
9
+ from rich.align import Align
10
+ from rich.layout import Layout
11
+ from rich.live import Live
12
+ from rich.panel import Panel
13
+ from rich.table import Table
14
+ from rich.text import Text
15
+
16
+ from rrq.constants import (
17
+ HEALTH_KEY_PREFIX,
18
+ JOB_KEY_PREFIX,
19
+ QUEUE_KEY_PREFIX,
20
+ DLQ_KEY_PREFIX,
21
+ )
22
+ from rrq.cli_commands.base import AsyncCommand, load_app_settings, get_job_store
23
+ from ..utils import (
24
+ console,
25
+ format_duration,
26
+ format_queue_name,
27
+ format_status,
28
+ format_timestamp,
29
+ )
30
+
31
+ # Error truncation lengths for consistency with DLQ commands
32
+ ERROR_DISPLAY_LENGTH = 50 # For consistent display across DLQ and monitor
33
+
34
+
35
+ class MonitorCommands(AsyncCommand):
36
+ """Real-time monitoring commands"""
37
+
38
+ def register(self, cli_group: click.Group) -> None:
39
+ """Register monitor commands"""
40
+
41
+ @cli_group.command("monitor")
42
+ @click.option(
43
+ "--settings",
44
+ "settings_object_path",
45
+ type=str,
46
+ help="Python settings path (e.g., myapp.settings.rrq_settings)",
47
+ )
48
+ @click.option(
49
+ "--refresh",
50
+ type=float,
51
+ default=1.0,
52
+ help="Refresh interval in seconds",
53
+ )
54
+ @click.option(
55
+ "--queues",
56
+ multiple=True,
57
+ help="Specific queues to monitor (default: all)",
58
+ )
59
+ def monitor(settings_object_path: str, refresh: float, queues: tuple):
60
+ """Launch real-time monitoring dashboard"""
61
+ self.make_async(self._monitor)(settings_object_path, refresh, queues)
62
+
63
+ async def _monitor(
64
+ self, settings_object_path: str, refresh: float, queues: tuple
65
+ ) -> None:
66
+ """Run the monitoring dashboard"""
67
+ settings = load_app_settings(settings_object_path)
68
+ dashboard = Dashboard(settings, refresh, queues)
69
+
70
+ try:
71
+ await dashboard.run()
72
+ except KeyboardInterrupt:
73
+ pass
74
+
75
+
76
+ class Dashboard:
77
+ """Real-time monitoring dashboard"""
78
+
79
+ def __init__(self, settings, refresh_interval: float, queue_filter: tuple):
80
+ self.settings = settings
81
+ self.refresh_interval = refresh_interval
82
+ self.queue_filter = list(queue_filter) if queue_filter else None
83
+ self.job_store = None
84
+
85
+ # Metrics storage
86
+ self.queue_sizes = defaultdict(lambda: deque(maxlen=60)) # 60 data points
87
+ self.processing_rates = defaultdict(lambda: deque(maxlen=60))
88
+ self.error_counts = defaultdict(int)
89
+ self.dlq_stats = {"total_jobs": 0, "newest_error": None, "top_errors": {}}
90
+ self.last_update = datetime.now()
91
+
92
+ # Event streaming for real-time updates
93
+ self._last_event_id = "0"
94
+ self._event_buffer = deque(maxlen=100)
95
+
96
+ async def run(self):
97
+ """Run the dashboard"""
98
+ self.job_store = await get_job_store(self.settings)
99
+
100
+ try:
101
+ layout = self.create_layout()
102
+
103
+ with Live(
104
+ layout, refresh_per_second=1 / self.refresh_interval, console=console
105
+ ) as _:
106
+ while True:
107
+ await self.update_metrics()
108
+ self.update_layout(layout)
109
+ await asyncio.sleep(self.refresh_interval)
110
+ finally:
111
+ await self.job_store.aclose()
112
+
113
+ def create_layout(self) -> Layout:
114
+ """Create the dashboard layout"""
115
+ layout = Layout(name="root")
116
+
117
+ layout.split_column(
118
+ Layout(name="header", size=3),
119
+ Layout(name="main"),
120
+ Layout(name="footer", size=3),
121
+ )
122
+
123
+ layout["main"].split_row(
124
+ Layout(name="queues", ratio=1),
125
+ Layout(name="workers", ratio=1),
126
+ Layout(name="dlq", ratio=1),
127
+ )
128
+
129
+ layout["queues"].split_column(
130
+ Layout(name="queue_stats", ratio=2),
131
+ Layout(name="queue_chart", ratio=1),
132
+ )
133
+
134
+ layout["workers"].split_column(
135
+ Layout(name="worker_list", ratio=2),
136
+ Layout(name="recent_jobs", ratio=1),
137
+ )
138
+
139
+ layout["dlq"].split_column(
140
+ Layout(name="dlq_stats", ratio=2),
141
+ Layout(name="dlq_errors", ratio=1),
142
+ )
143
+
144
+ return layout
145
+
146
+ async def update_metrics(self):
147
+ """Update all metrics using hybrid monitoring approach"""
148
+ try:
149
+ # Check for real-time events first
150
+ await self._process_monitoring_events()
151
+
152
+ # Use hybrid approach for queue/worker metrics
153
+ queue_data = await self._update_queue_metrics_optimized()
154
+ await self._update_worker_metrics_optimized()
155
+
156
+ # Update queue size tracking
157
+ for queue_name, size in queue_data.items():
158
+ self.queue_sizes[queue_name].append(size)
159
+
160
+ # Get recent job information (keep optimized scan)
161
+ self.recent_jobs = await self._get_recent_jobs_optimized()
162
+
163
+ # Get DLQ information
164
+ await self._update_dlq_stats()
165
+
166
+ self.last_update = datetime.now()
167
+ except Exception as e:
168
+ console.print(f"[red]Error updating metrics: {e}[/red]")
169
+ # Continue with cached data if available
170
+
171
+ async def _process_monitoring_events(self):
172
+ """Process real-time monitoring events from Redis streams"""
173
+ try:
174
+ events = await self.job_store.consume_monitor_events(
175
+ last_id=self._last_event_id,
176
+ count=50,
177
+ block=10, # Short non-blocking read
178
+ )
179
+
180
+ for stream_name, event_list in events:
181
+ for event_id, event_data in event_list:
182
+ # Update last processed event ID
183
+ self._last_event_id = (
184
+ event_id.decode() if isinstance(event_id, bytes) else event_id
185
+ )
186
+
187
+ # Process event based on type
188
+ event_type = event_data.get(b"event_type", b"").decode()
189
+ if event_type == "queue_activity":
190
+ queue_name = event_data.get(b"queue_name", b"").decode()
191
+ if queue_name:
192
+ # Trigger immediate refresh for this queue
193
+ await self._refresh_queue_size(queue_name)
194
+ elif event_type == "worker_heartbeat":
195
+ worker_id = event_data.get(b"worker_id", b"").decode()
196
+ if worker_id:
197
+ # Trigger immediate worker refresh
198
+ await self._refresh_worker_status(worker_id)
199
+
200
+ except Exception:
201
+ # Events are optional - continue without them if there's an issue
202
+ pass
203
+
204
+ async def _refresh_queue_size(self, queue_name: str):
205
+ """Immediately refresh size for a specific queue"""
206
+ try:
207
+ queue_key = f"{QUEUE_KEY_PREFIX}{queue_name}"
208
+ size = await self.job_store.redis.zcard(queue_key)
209
+ self.queue_sizes[queue_name].append(size)
210
+ except Exception:
211
+ pass
212
+
213
+ async def _refresh_worker_status(self, worker_id: str):
214
+ """Immediately refresh status for a specific worker"""
215
+ try:
216
+ health_data, ttl = await self.job_store.get_worker_health(worker_id)
217
+ if health_data:
218
+ # Update worker in current list
219
+ for i, worker in enumerate(self.workers):
220
+ if worker["id"] == worker_id:
221
+ self.workers[i].update(
222
+ {
223
+ "status": health_data.get("status", "unknown"),
224
+ "active_jobs": health_data.get("active_jobs", 0),
225
+ "queues": health_data.get("queues", []),
226
+ "last_heartbeat": health_data.get("timestamp"),
227
+ "ttl": ttl,
228
+ }
229
+ )
230
+ break
231
+ except Exception:
232
+ pass
233
+
234
+ async def _get_recent_jobs(self, limit: int = 10) -> List[Dict]:
235
+ """Get recently processed jobs"""
236
+ jobs = []
237
+ job_pattern = f"{JOB_KEY_PREFIX}*"
238
+
239
+ # Sample recent jobs
240
+ count = 0
241
+ async for key in self.job_store.redis.scan_iter(match=job_pattern):
242
+ if count >= limit * 2: # Sample more to find recent ones
243
+ break
244
+
245
+ job_id = key.decode().replace(JOB_KEY_PREFIX, "")
246
+ job_dict = await self.job_store.get_job_data_dict(job_id)
247
+ if job_dict:
248
+ # Only include recently updated jobs
249
+ if "completed_at" in job_dict or "started_at" in job_dict:
250
+ jobs.append(
251
+ {
252
+ "id": job_id,
253
+ "function": job_dict.get("function_name", "unknown"),
254
+ "status": job_dict.get("status", "unknown"),
255
+ "started_at": float(job_dict.get("started_at", 0)),
256
+ "completed_at": float(job_dict.get("completed_at", 0)),
257
+ }
258
+ )
259
+
260
+ count += 1
261
+
262
+ # Sort by most recent activity
263
+ jobs.sort(
264
+ key=lambda x: x.get("completed_at") or x.get("started_at") or 0,
265
+ reverse=True,
266
+ )
267
+
268
+ return jobs[:limit]
269
+
270
+ async def _update_queue_metrics_optimized(self) -> Dict[str, int]:
271
+ """Hybrid queue metrics collection using active registries and efficient batch operations"""
272
+ # Use the hybrid monitoring approach: get active queues from registry
273
+ try:
274
+ # Get recently active queues from the registry (O(log N) operation)
275
+ active_queue_names = await self.job_store.get_active_queues(
276
+ max_age_seconds=300
277
+ )
278
+
279
+ # Apply filtering if specified
280
+ if self.queue_filter:
281
+ active_queue_names = [
282
+ q for q in active_queue_names if q in self.queue_filter
283
+ ]
284
+
285
+ # Use batch operation to get queue sizes efficiently
286
+ if active_queue_names:
287
+ queue_data = await self.job_store.batch_get_queue_sizes(
288
+ active_queue_names
289
+ )
290
+ else:
291
+ queue_data = {}
292
+
293
+ # Fallback to legacy scan for first run or if no active queues found
294
+ if not queue_data:
295
+ queue_data = await self._legacy_scan_queue_metrics()
296
+
297
+ except Exception:
298
+ # Fallback to legacy scan on any error
299
+ queue_data = await self._legacy_scan_queue_metrics()
300
+
301
+ return queue_data
302
+
303
+ async def _legacy_scan_queue_metrics(self) -> Dict[str, int]:
304
+ """Legacy scan-based queue metrics as fallback"""
305
+ queue_keys = []
306
+ queue_pattern = f"{QUEUE_KEY_PREFIX}*"
307
+
308
+ # Perform limited scan (max 100 keys at a time)
309
+ scan_count = 0
310
+ try:
311
+ async for key in self.job_store.redis.scan_iter(
312
+ match=queue_pattern, count=50
313
+ ):
314
+ queue_keys.append(key)
315
+ scan_count += 1
316
+ if scan_count >= 100: # Limit scan operations
317
+ break
318
+ except TypeError:
319
+ # Handle mocks that don't support count parameter
320
+ async for key in self.job_store.redis.scan_iter(match=queue_pattern):
321
+ queue_keys.append(key)
322
+ scan_count += 1
323
+ if scan_count >= 100: # Limit scan operations
324
+ break
325
+
326
+ # Apply filtering early and get sizes individually (compatible with tests)
327
+ queue_data = {}
328
+ for key in queue_keys:
329
+ queue_name = key.decode().replace(QUEUE_KEY_PREFIX, "")
330
+ if not self.queue_filter or queue_name in self.queue_filter:
331
+ size = await self.job_store.redis.zcard(key)
332
+ queue_data[queue_name] = size
333
+
334
+ return queue_data
335
+
336
+ async def _update_worker_metrics_optimized(self):
337
+ """Hybrid worker metrics collection using active registries"""
338
+ try:
339
+ # Use the hybrid monitoring approach: get active workers from registry
340
+ active_worker_ids = await self.job_store.get_active_workers(
341
+ max_age_seconds=60
342
+ )
343
+
344
+ # Get worker health data efficiently
345
+ workers = []
346
+ for worker_id in active_worker_ids:
347
+ health_data, ttl = await self.job_store.get_worker_health(worker_id)
348
+
349
+ if health_data:
350
+ workers.append(
351
+ {
352
+ "id": worker_id,
353
+ "status": health_data.get("status", "unknown"),
354
+ "active_jobs": health_data.get("active_jobs", 0),
355
+ "queues": health_data.get("queues", []),
356
+ "last_heartbeat": health_data.get("timestamp"),
357
+ "ttl": ttl,
358
+ }
359
+ )
360
+
361
+ # Fallback to legacy scan if no active workers found
362
+ if not workers:
363
+ workers = await self._legacy_scan_worker_metrics()
364
+
365
+ except Exception:
366
+ # Fallback to legacy scan on any error
367
+ workers = await self._legacy_scan_worker_metrics()
368
+
369
+ self.workers = workers
370
+
371
+ async def _legacy_scan_worker_metrics(self) -> list:
372
+ """Legacy scan-based worker metrics as fallback"""
373
+ worker_keys = []
374
+ health_pattern = f"{HEALTH_KEY_PREFIX}*"
375
+
376
+ scan_count = 0
377
+ try:
378
+ async for key in self.job_store.redis.scan_iter(
379
+ match=health_pattern, count=50
380
+ ):
381
+ worker_keys.append(key)
382
+ scan_count += 1
383
+ if scan_count >= 50: # Limit worker scans
384
+ break
385
+ except TypeError:
386
+ # Handle mocks that don't support count parameter
387
+ async for key in self.job_store.redis.scan_iter(match=health_pattern):
388
+ worker_keys.append(key)
389
+ scan_count += 1
390
+ if scan_count >= 50: # Limit worker scans
391
+ break
392
+
393
+ # Get worker health individually (compatible with tests)
394
+ workers = []
395
+ for key in worker_keys:
396
+ worker_id = key.decode().replace(HEALTH_KEY_PREFIX, "")
397
+ health_data, ttl = await self.job_store.get_worker_health(worker_id)
398
+
399
+ if health_data:
400
+ workers.append(
401
+ {
402
+ "id": worker_id,
403
+ "status": health_data.get("status", "unknown"),
404
+ "active_jobs": health_data.get("active_jobs", 0),
405
+ "queues": health_data.get("queues", []),
406
+ "last_heartbeat": health_data.get("timestamp"),
407
+ "ttl": ttl,
408
+ }
409
+ )
410
+
411
+ return workers
412
+
413
+ async def _get_recent_jobs_optimized(self, limit: int = 10) -> List[Dict]:
414
+ """Optimized recent jobs collection with limited scanning"""
415
+ jobs = []
416
+ job_pattern = f"{JOB_KEY_PREFIX}*"
417
+
418
+ # Limit scan iterations more aggressively for recent jobs
419
+ job_keys = []
420
+ scan_count = 0
421
+ try:
422
+ async for key in self.job_store.redis.scan_iter(
423
+ match=job_pattern, count=20
424
+ ):
425
+ job_keys.append(key)
426
+ scan_count += 1
427
+ if scan_count >= limit * 3: # Scan 3x the needed amount max
428
+ break
429
+ except TypeError:
430
+ # Handle mocks that don't support count parameter
431
+ async for key in self.job_store.redis.scan_iter(match=job_pattern):
432
+ job_keys.append(key)
433
+ scan_count += 1
434
+ if scan_count >= limit * 3: # Scan 3x the needed amount max
435
+ break
436
+
437
+ # Get job data individually (compatible with tests)
438
+ if job_keys:
439
+ recent_jobs = []
440
+ for key in job_keys:
441
+ job_id = key.decode().replace(JOB_KEY_PREFIX, "")
442
+ job_dict = await self.job_store.get_job_data_dict(job_id)
443
+ if job_dict:
444
+ try:
445
+ # Only include recently updated jobs
446
+ if "completed_at" in job_dict or "started_at" in job_dict:
447
+ recent_jobs.append(
448
+ {
449
+ "id": job_id,
450
+ "function": job_dict.get(
451
+ "function_name", "unknown"
452
+ ),
453
+ "status": job_dict.get("status", "unknown"),
454
+ "started_at": float(job_dict.get("started_at", 0)),
455
+ "completed_at": float(
456
+ job_dict.get("completed_at", 0)
457
+ ),
458
+ }
459
+ )
460
+ except (ValueError, UnicodeDecodeError):
461
+ continue
462
+
463
+ # Sort by most recent activity
464
+ recent_jobs.sort(
465
+ key=lambda x: x.get("completed_at") or x.get("started_at") or 0,
466
+ reverse=True,
467
+ )
468
+ jobs = recent_jobs[:limit]
469
+
470
+ return jobs
471
+
472
+ def update_layout(self, layout: Layout):
473
+ """Update the layout with current data"""
474
+ # Header
475
+ layout["header"].update(self._create_header())
476
+
477
+ # Queue stats
478
+ layout["queue_stats"].update(self._create_queue_stats())
479
+
480
+ # Queue chart
481
+ layout["queue_chart"].update(self._create_queue_chart())
482
+
483
+ # Worker list
484
+ layout["worker_list"].update(self._create_worker_list())
485
+
486
+ # Recent jobs
487
+ layout["recent_jobs"].update(self._create_recent_jobs())
488
+
489
+ # DLQ stats
490
+ layout["dlq_stats"].update(self._create_dlq_stats())
491
+
492
+ # DLQ errors
493
+ layout["dlq_errors"].update(self._create_dlq_errors())
494
+
495
+ # Footer
496
+ layout["footer"].update(self._create_footer())
497
+
498
+ def _create_header(self) -> Panel:
499
+ """Create header panel"""
500
+ header_text = Text()
501
+ header_text.append("RRQ Monitor", style="bold cyan")
502
+ header_text.append(" | ", style="dim")
503
+ header_text.append(
504
+ f"Last Update: {self.last_update.strftime('%H:%M:%S')}", style="dim"
505
+ )
506
+
507
+ return Panel(
508
+ Align.center(header_text),
509
+ style="cyan",
510
+ )
511
+
512
+ def _create_queue_stats(self) -> Panel:
513
+ """Create queue statistics table"""
514
+ table = Table(show_header=True, header_style="bold magenta", expand=True)
515
+ table.add_column("Queue", style="cyan")
516
+ table.add_column("Size", justify="right")
517
+ table.add_column("Trend", justify="center")
518
+ table.add_column("Rate", justify="right")
519
+
520
+ total_size = 0
521
+ for queue_name in sorted(self.queue_sizes.keys()):
522
+ sizes = list(self.queue_sizes[queue_name])
523
+ current_size = sizes[-1] if sizes else 0
524
+ total_size += current_size
525
+
526
+ # Calculate trend
527
+ trend = "→"
528
+ trend_style = "dim"
529
+ if len(sizes) >= 2:
530
+ diff = sizes[-1] - sizes[-2]
531
+ if diff > 0:
532
+ trend = "↑"
533
+ trend_style = "red"
534
+ elif diff < 0:
535
+ trend = "↓"
536
+ trend_style = "green"
537
+
538
+ # Calculate processing rate (jobs/min)
539
+ rate = "N/A"
540
+ if len(sizes) >= 10:
541
+ # Average change over last 10 samples
542
+ recent_changes = [sizes[i] - sizes[i - 1] for i in range(-9, 0)]
543
+ avg_change = sum(recent_changes) / len(recent_changes)
544
+ if avg_change < 0: # Negative means jobs are being processed
545
+ rate = f"{abs(avg_change * 60 / self.refresh_interval):.1f}/min"
546
+
547
+ table.add_row(
548
+ format_queue_name(queue_name),
549
+ str(current_size),
550
+ Text(trend, style=trend_style),
551
+ rate,
552
+ )
553
+
554
+ # Add total row
555
+ table.add_row(
556
+ Text("TOTAL", style="bold"),
557
+ Text(str(total_size), style="bold"),
558
+ "",
559
+ "",
560
+ )
561
+
562
+ return Panel(table, title="Queue Statistics", border_style="blue")
563
+
564
+ def _create_queue_chart(self) -> Panel:
565
+ """Create queue size sparkline chart"""
566
+ lines = []
567
+
568
+ for queue_name in sorted(self.queue_sizes.keys()):
569
+ sizes = list(self.queue_sizes[queue_name])
570
+ if not sizes:
571
+ continue
572
+
573
+ # Create sparkline
574
+ max_val = max(sizes) if sizes else 1
575
+ min_val = min(sizes) if sizes else 0
576
+
577
+ if max_val == min_val:
578
+ sparkline = "─" * 20
579
+ else:
580
+ sparkline = ""
581
+ for val in sizes[-20:]: # Last 20 values
582
+ normalized = (val - min_val) / (max_val - min_val)
583
+ spark_chars = " ▁▂▃▄▅▆▇█"
584
+ idx = int(normalized * (len(spark_chars) - 1))
585
+ sparkline += spark_chars[idx]
586
+
587
+ line = f"{queue_name:>12}: {sparkline} [{sizes[-1] if sizes else 0}]"
588
+ lines.append(line)
589
+
590
+ content = "\n".join(lines) if lines else "No queue data"
591
+ return Panel(content, title="Queue Trends (60s)", border_style="green")
592
+
593
+ def _create_worker_list(self) -> Panel:
594
+ """Create worker status table"""
595
+ table = Table(show_header=True, header_style="bold magenta", expand=True)
596
+ table.add_column("Worker", style="cyan")
597
+ table.add_column("Status", justify="center")
598
+ table.add_column("Jobs", justify="right")
599
+ table.add_column("Heartbeat", style="dim")
600
+
601
+ if not self.workers:
602
+ table.add_row(
603
+ "[dim italic]No active workers[/dim italic]",
604
+ "",
605
+ "",
606
+ "",
607
+ )
608
+ else:
609
+ for worker in sorted(self.workers, key=lambda x: x["id"]):
610
+ # Status with color
611
+ status_colors = {
612
+ "running": "green",
613
+ "idle": "yellow",
614
+ "stopped": "red",
615
+ "initializing": "blue",
616
+ }
617
+ status_color = status_colors.get(worker["status"], "white")
618
+ status_text = Text(worker["status"].upper(), style=status_color)
619
+
620
+ # Worker ID (truncated)
621
+ worker_id = (
622
+ worker["id"][:12] + "..."
623
+ if len(worker["id"]) > 15
624
+ else worker["id"]
625
+ )
626
+
627
+ table.add_row(
628
+ worker_id,
629
+ status_text,
630
+ str(worker["active_jobs"]),
631
+ format_timestamp(worker["last_heartbeat"]),
632
+ )
633
+
634
+ return Panel(table, title="Active Workers", border_style="blue")
635
+
636
+ def _create_recent_jobs(self) -> Panel:
637
+ """Create recent jobs table"""
638
+ table = Table(show_header=True, header_style="bold magenta", expand=True)
639
+ table.add_column("Job", style="cyan")
640
+ table.add_column("Function", style="yellow")
641
+ table.add_column("Status", justify="center")
642
+ table.add_column("Duration", justify="right")
643
+
644
+ if not self.recent_jobs:
645
+ table.add_row(
646
+ "[dim italic]No recent jobs[/dim italic]",
647
+ "",
648
+ "",
649
+ "",
650
+ )
651
+ else:
652
+ for job in self.recent_jobs[:5]: # Show top 5
653
+ # Calculate duration
654
+ duration = None
655
+ if job.get("completed_at") and job.get("started_at"):
656
+ duration = job["completed_at"] - job["started_at"]
657
+
658
+ # Truncate IDs
659
+ job_id = job["id"][:8] + "..."
660
+ function = (
661
+ job["function"][:20] + "..."
662
+ if len(job["function"]) > 20
663
+ else job["function"]
664
+ )
665
+
666
+ table.add_row(
667
+ job_id,
668
+ function,
669
+ format_status(job["status"]),
670
+ format_duration(duration) if duration else "N/A",
671
+ )
672
+
673
+ return Panel(table, title="Recent Jobs", border_style="green")
674
+
675
+ def _create_footer(self) -> Panel:
676
+ """Create footer panel"""
677
+ footer_text = Text()
678
+ footer_text.append("Press ", style="dim")
679
+ footer_text.append("Ctrl+C", style="bold yellow")
680
+ footer_text.append(" to exit", style="dim")
681
+
682
+ return Panel(
683
+ Align.center(footer_text),
684
+ style="dim",
685
+ )
686
+
687
+ async def _update_dlq_stats(self):
688
+ """Update DLQ statistics"""
689
+ dlq_name = self.settings.default_dlq_name
690
+ dlq_key = f"{DLQ_KEY_PREFIX}{dlq_name}"
691
+
692
+ # Get total DLQ job count
693
+ self.dlq_stats["total_jobs"] = await self.job_store.redis.llen(dlq_key)
694
+
695
+ if self.dlq_stats["total_jobs"] > 0:
696
+ # Get some recent DLQ jobs for error analysis
697
+ job_ids = await self.job_store.redis.lrange(dlq_key, 0, 9) # Get first 10
698
+ job_ids = [job_id.decode("utf-8") for job_id in job_ids]
699
+
700
+ errors = []
701
+ newest_time = 0
702
+
703
+ for job_id in job_ids:
704
+ job_data = await self.job_store.get_job(job_id)
705
+ if job_data:
706
+ error = job_data.get("last_error", "Unknown error")
707
+ completion_time = job_data.get("completion_time", 0)
708
+
709
+ if isinstance(completion_time, str):
710
+ try:
711
+ from datetime import datetime
712
+
713
+ completion_time = datetime.fromisoformat(
714
+ completion_time.replace("Z", "+00:00")
715
+ ).timestamp()
716
+ except (ValueError, TypeError):
717
+ completion_time = 0
718
+
719
+ if completion_time > newest_time:
720
+ newest_time = completion_time
721
+ self.dlq_stats["newest_error"] = (
722
+ error[:ERROR_DISPLAY_LENGTH] + "..."
723
+ if len(error) > ERROR_DISPLAY_LENGTH
724
+ else error
725
+ )
726
+
727
+ errors.append(
728
+ error[:ERROR_DISPLAY_LENGTH] + "..."
729
+ if len(error) > ERROR_DISPLAY_LENGTH
730
+ else error
731
+ )
732
+
733
+ # Count error types
734
+ error_counts = {}
735
+ for error in errors:
736
+ error_counts[error] = error_counts.get(error, 0) + 1
737
+
738
+ self.dlq_stats["top_errors"] = dict(
739
+ sorted(error_counts.items(), key=lambda x: x[1], reverse=True)[:5]
740
+ )
741
+ else:
742
+ self.dlq_stats["newest_error"] = None
743
+ self.dlq_stats["top_errors"] = {}
744
+
745
+ def _create_dlq_stats(self) -> Panel:
746
+ """Create DLQ statistics panel"""
747
+ table = Table(show_header=True, header_style="bold red", expand=True)
748
+ table.add_column("Metric", style="cyan")
749
+ table.add_column("Value", justify="right")
750
+
751
+ table.add_row("Total Jobs", str(self.dlq_stats["total_jobs"]))
752
+
753
+ if self.dlq_stats["newest_error"]:
754
+ table.add_row("Latest Error", self.dlq_stats["newest_error"])
755
+ else:
756
+ table.add_row("Latest Error", "None")
757
+
758
+ return Panel(
759
+ table,
760
+ title=f"Dead Letter Queue ({self.settings.default_dlq_name})",
761
+ border_style="red",
762
+ )
763
+
764
+ def _create_dlq_errors(self) -> Panel:
765
+ """Create DLQ error patterns panel"""
766
+ table = Table(show_header=True, header_style="bold red", expand=True)
767
+ table.add_column("Error Pattern", style="red")
768
+ table.add_column("Count", justify="right")
769
+
770
+ if self.dlq_stats["top_errors"]:
771
+ for error, count in self.dlq_stats["top_errors"].items():
772
+ table.add_row(error, str(count))
773
+ else:
774
+ table.add_row("No errors", "0")
775
+
776
+ return Panel(table, title="Top Error Patterns", border_style="red")