mcp-ticketer 0.1.30__py3-none-any.whl → 1.2.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mcp-ticketer might be problematic. Click here for more details.
- mcp_ticketer/__init__.py +10 -10
- mcp_ticketer/__version__.py +3 -3
- mcp_ticketer/adapters/__init__.py +2 -0
- mcp_ticketer/adapters/aitrackdown.py +796 -46
- mcp_ticketer/adapters/asana/__init__.py +15 -0
- mcp_ticketer/adapters/asana/adapter.py +1416 -0
- mcp_ticketer/adapters/asana/client.py +292 -0
- mcp_ticketer/adapters/asana/mappers.py +348 -0
- mcp_ticketer/adapters/asana/types.py +146 -0
- mcp_ticketer/adapters/github.py +879 -129
- mcp_ticketer/adapters/hybrid.py +11 -11
- mcp_ticketer/adapters/jira.py +973 -73
- mcp_ticketer/adapters/linear/__init__.py +24 -0
- mcp_ticketer/adapters/linear/adapter.py +2732 -0
- mcp_ticketer/adapters/linear/client.py +344 -0
- mcp_ticketer/adapters/linear/mappers.py +420 -0
- mcp_ticketer/adapters/linear/queries.py +479 -0
- mcp_ticketer/adapters/linear/types.py +360 -0
- mcp_ticketer/adapters/linear.py +10 -2315
- mcp_ticketer/analysis/__init__.py +23 -0
- mcp_ticketer/analysis/orphaned.py +218 -0
- mcp_ticketer/analysis/similarity.py +224 -0
- mcp_ticketer/analysis/staleness.py +266 -0
- mcp_ticketer/cache/memory.py +9 -8
- mcp_ticketer/cli/adapter_diagnostics.py +421 -0
- mcp_ticketer/cli/auggie_configure.py +116 -15
- mcp_ticketer/cli/codex_configure.py +274 -82
- mcp_ticketer/cli/configure.py +888 -151
- mcp_ticketer/cli/diagnostics.py +400 -157
- mcp_ticketer/cli/discover.py +297 -26
- mcp_ticketer/cli/gemini_configure.py +119 -26
- mcp_ticketer/cli/init_command.py +880 -0
- mcp_ticketer/cli/instruction_commands.py +435 -0
- mcp_ticketer/cli/linear_commands.py +616 -0
- mcp_ticketer/cli/main.py +203 -1165
- mcp_ticketer/cli/mcp_configure.py +474 -90
- mcp_ticketer/cli/mcp_server_commands.py +415 -0
- mcp_ticketer/cli/migrate_config.py +12 -8
- mcp_ticketer/cli/platform_commands.py +123 -0
- mcp_ticketer/cli/platform_detection.py +418 -0
- mcp_ticketer/cli/platform_installer.py +513 -0
- mcp_ticketer/cli/python_detection.py +126 -0
- mcp_ticketer/cli/queue_commands.py +15 -15
- mcp_ticketer/cli/setup_command.py +639 -0
- mcp_ticketer/cli/simple_health.py +90 -65
- mcp_ticketer/cli/ticket_commands.py +1013 -0
- mcp_ticketer/cli/update_checker.py +313 -0
- mcp_ticketer/cli/utils.py +114 -66
- mcp_ticketer/core/__init__.py +24 -1
- mcp_ticketer/core/adapter.py +250 -16
- mcp_ticketer/core/config.py +145 -37
- mcp_ticketer/core/env_discovery.py +101 -22
- mcp_ticketer/core/env_loader.py +349 -0
- mcp_ticketer/core/exceptions.py +160 -0
- mcp_ticketer/core/http_client.py +26 -26
- mcp_ticketer/core/instructions.py +405 -0
- mcp_ticketer/core/label_manager.py +732 -0
- mcp_ticketer/core/mappers.py +42 -30
- mcp_ticketer/core/models.py +280 -28
- mcp_ticketer/core/onepassword_secrets.py +379 -0
- mcp_ticketer/core/project_config.py +183 -49
- mcp_ticketer/core/registry.py +3 -3
- mcp_ticketer/core/session_state.py +171 -0
- mcp_ticketer/core/state_matcher.py +592 -0
- mcp_ticketer/core/url_parser.py +425 -0
- mcp_ticketer/core/validators.py +69 -0
- mcp_ticketer/defaults/ticket_instructions.md +644 -0
- mcp_ticketer/mcp/__init__.py +29 -1
- mcp_ticketer/mcp/__main__.py +60 -0
- mcp_ticketer/mcp/server/__init__.py +25 -0
- mcp_ticketer/mcp/server/__main__.py +60 -0
- mcp_ticketer/mcp/server/constants.py +58 -0
- mcp_ticketer/mcp/server/diagnostic_helper.py +175 -0
- mcp_ticketer/mcp/server/dto.py +195 -0
- mcp_ticketer/mcp/server/main.py +1343 -0
- mcp_ticketer/mcp/server/response_builder.py +206 -0
- mcp_ticketer/mcp/server/routing.py +655 -0
- mcp_ticketer/mcp/server/server_sdk.py +151 -0
- mcp_ticketer/mcp/server/tools/__init__.py +56 -0
- mcp_ticketer/mcp/server/tools/analysis_tools.py +495 -0
- mcp_ticketer/mcp/server/tools/attachment_tools.py +226 -0
- mcp_ticketer/mcp/server/tools/bulk_tools.py +273 -0
- mcp_ticketer/mcp/server/tools/comment_tools.py +152 -0
- mcp_ticketer/mcp/server/tools/config_tools.py +1439 -0
- mcp_ticketer/mcp/server/tools/diagnostic_tools.py +211 -0
- mcp_ticketer/mcp/server/tools/hierarchy_tools.py +921 -0
- mcp_ticketer/mcp/server/tools/instruction_tools.py +300 -0
- mcp_ticketer/mcp/server/tools/label_tools.py +948 -0
- mcp_ticketer/mcp/server/tools/pr_tools.py +152 -0
- mcp_ticketer/mcp/server/tools/search_tools.py +215 -0
- mcp_ticketer/mcp/server/tools/session_tools.py +170 -0
- mcp_ticketer/mcp/server/tools/ticket_tools.py +1268 -0
- mcp_ticketer/mcp/server/tools/user_ticket_tools.py +547 -0
- mcp_ticketer/queue/__init__.py +1 -0
- mcp_ticketer/queue/health_monitor.py +168 -136
- mcp_ticketer/queue/manager.py +95 -25
- mcp_ticketer/queue/queue.py +40 -21
- mcp_ticketer/queue/run_worker.py +6 -1
- mcp_ticketer/queue/ticket_registry.py +213 -155
- mcp_ticketer/queue/worker.py +109 -49
- mcp_ticketer-1.2.11.dist-info/METADATA +792 -0
- mcp_ticketer-1.2.11.dist-info/RECORD +110 -0
- mcp_ticketer/mcp/server.py +0 -1895
- mcp_ticketer-0.1.30.dist-info/METADATA +0 -413
- mcp_ticketer-0.1.30.dist-info/RECORD +0 -49
- {mcp_ticketer-0.1.30.dist-info → mcp_ticketer-1.2.11.dist-info}/WHEEL +0 -0
- {mcp_ticketer-0.1.30.dist-info → mcp_ticketer-1.2.11.dist-info}/entry_points.txt +0 -0
- {mcp_ticketer-0.1.30.dist-info → mcp_ticketer-1.2.11.dist-info}/licenses/LICENSE +0 -0
- {mcp_ticketer-0.1.30.dist-info → mcp_ticketer-1.2.11.dist-info}/top_level.txt +0 -0
|
@@ -1,12 +1,10 @@
|
|
|
1
1
|
"""Queue health monitoring and alerting system."""
|
|
2
2
|
|
|
3
|
-
import asyncio
|
|
4
3
|
import logging
|
|
5
4
|
import time
|
|
6
5
|
from datetime import datetime, timedelta
|
|
7
6
|
from enum import Enum
|
|
8
|
-
from
|
|
9
|
-
from typing import Any, Dict, List, Optional
|
|
7
|
+
from typing import Any
|
|
10
8
|
|
|
11
9
|
import psutil
|
|
12
10
|
|
|
@@ -18,6 +16,7 @@ logger = logging.getLogger(__name__)
|
|
|
18
16
|
|
|
19
17
|
class HealthStatus(str, Enum):
|
|
20
18
|
"""Health status levels."""
|
|
19
|
+
|
|
21
20
|
HEALTHY = "healthy"
|
|
22
21
|
WARNING = "warning"
|
|
23
22
|
CRITICAL = "critical"
|
|
@@ -26,282 +25,313 @@ class HealthStatus(str, Enum):
|
|
|
26
25
|
|
|
27
26
|
class HealthAlert:
|
|
28
27
|
"""Health alert with severity and details."""
|
|
29
|
-
|
|
28
|
+
|
|
30
29
|
def __init__(
|
|
31
30
|
self,
|
|
32
31
|
level: HealthStatus,
|
|
33
32
|
message: str,
|
|
34
|
-
details:
|
|
35
|
-
timestamp:
|
|
33
|
+
details: dict[str, Any] | None = None,
|
|
34
|
+
timestamp: datetime | None = None,
|
|
36
35
|
):
|
|
37
36
|
self.level = level
|
|
38
37
|
self.message = message
|
|
39
38
|
self.details = details or {}
|
|
40
39
|
self.timestamp = timestamp or datetime.now()
|
|
41
|
-
|
|
40
|
+
|
|
42
41
|
def __str__(self) -> str:
|
|
42
|
+
"""Return string representation of alert."""
|
|
43
43
|
return f"[{self.level.upper()}] {self.message}"
|
|
44
44
|
|
|
45
45
|
|
|
46
46
|
class QueueHealthMonitor:
|
|
47
47
|
"""Monitors queue health and provides immediate alerts."""
|
|
48
|
-
|
|
48
|
+
|
|
49
49
|
# Health check thresholds
|
|
50
50
|
WORKER_TIMEOUT_SECONDS = 30 # Worker should process items within 30s
|
|
51
|
-
STUCK_ITEM_THRESHOLD = 300
|
|
52
|
-
HIGH_FAILURE_RATE = 0.3
|
|
53
|
-
QUEUE_BACKLOG_WARNING = 10
|
|
51
|
+
STUCK_ITEM_THRESHOLD = 300 # 5 minutes for stuck items
|
|
52
|
+
HIGH_FAILURE_RATE = 0.3 # 30% failure rate is concerning
|
|
53
|
+
QUEUE_BACKLOG_WARNING = 10 # Warn if more than 10 pending items
|
|
54
54
|
QUEUE_BACKLOG_CRITICAL = 50 # Critical if more than 50 pending items
|
|
55
|
-
|
|
56
|
-
def __init__(self, queue:
|
|
55
|
+
|
|
56
|
+
def __init__(self, queue: Queue | None = None):
|
|
57
57
|
"""Initialize health monitor.
|
|
58
|
-
|
|
58
|
+
|
|
59
59
|
Args:
|
|
60
60
|
queue: Queue instance to monitor. Creates new if None.
|
|
61
|
+
|
|
61
62
|
"""
|
|
62
63
|
self.queue = queue or Queue()
|
|
63
64
|
self.manager = WorkerManager()
|
|
64
65
|
self.last_check = datetime.now()
|
|
65
|
-
self.alerts:
|
|
66
|
-
|
|
67
|
-
def check_health(self) ->
|
|
66
|
+
self.alerts: list[HealthAlert] = []
|
|
67
|
+
|
|
68
|
+
def check_health(self) -> dict[str, Any]:
|
|
68
69
|
"""Perform comprehensive health check.
|
|
69
|
-
|
|
70
|
+
|
|
70
71
|
Returns:
|
|
71
72
|
Health status with alerts and metrics
|
|
73
|
+
|
|
72
74
|
"""
|
|
73
75
|
self.alerts.clear()
|
|
74
|
-
|
|
76
|
+
|
|
75
77
|
# Check worker status
|
|
76
78
|
worker_health = self._check_worker_health()
|
|
77
|
-
|
|
79
|
+
|
|
78
80
|
# Check queue status
|
|
79
81
|
queue_health = self._check_queue_health()
|
|
80
|
-
|
|
82
|
+
|
|
81
83
|
# Check for stuck items
|
|
82
84
|
stuck_health = self._check_stuck_items()
|
|
83
|
-
|
|
85
|
+
|
|
84
86
|
# Check failure rates
|
|
85
87
|
failure_health = self._check_failure_rates()
|
|
86
|
-
|
|
88
|
+
|
|
87
89
|
# Determine overall health
|
|
88
90
|
overall_status = self._determine_overall_status()
|
|
89
|
-
|
|
91
|
+
|
|
90
92
|
health_report = {
|
|
91
93
|
"status": overall_status,
|
|
92
94
|
"timestamp": datetime.now().isoformat(),
|
|
93
|
-
"alerts": [
|
|
95
|
+
"alerts": [
|
|
96
|
+
{
|
|
97
|
+
"level": alert.level,
|
|
98
|
+
"message": alert.message,
|
|
99
|
+
"details": alert.details,
|
|
100
|
+
}
|
|
101
|
+
for alert in self.alerts
|
|
102
|
+
],
|
|
94
103
|
"metrics": {
|
|
95
104
|
"worker": worker_health,
|
|
96
105
|
"queue": queue_health,
|
|
97
106
|
"stuck_items": stuck_health,
|
|
98
|
-
"failure_rate": failure_health
|
|
99
|
-
}
|
|
107
|
+
"failure_rate": failure_health,
|
|
108
|
+
},
|
|
100
109
|
}
|
|
101
|
-
|
|
110
|
+
|
|
102
111
|
self.last_check = datetime.now()
|
|
103
112
|
return health_report
|
|
104
|
-
|
|
105
|
-
def _check_worker_health(self) ->
|
|
113
|
+
|
|
114
|
+
def _check_worker_health(self) -> dict[str, Any]:
|
|
106
115
|
"""Check worker process health."""
|
|
107
116
|
worker_status = self.manager.get_status()
|
|
108
|
-
|
|
117
|
+
|
|
109
118
|
metrics = {
|
|
110
119
|
"running": worker_status["running"],
|
|
111
120
|
"pid": worker_status.get("pid"),
|
|
112
121
|
"cpu_percent": worker_status.get("cpu_percent", 0),
|
|
113
|
-
"memory_mb": worker_status.get("memory_mb", 0)
|
|
122
|
+
"memory_mb": worker_status.get("memory_mb", 0),
|
|
114
123
|
}
|
|
115
|
-
|
|
124
|
+
|
|
116
125
|
if not worker_status["running"]:
|
|
117
126
|
# Check if we have pending items but no worker
|
|
118
127
|
pending_count = self.queue.get_pending_count()
|
|
119
128
|
if pending_count > 0:
|
|
120
|
-
self.alerts.append(
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
129
|
+
self.alerts.append(
|
|
130
|
+
HealthAlert(
|
|
131
|
+
HealthStatus.CRITICAL,
|
|
132
|
+
f"Worker not running but {pending_count} items pending",
|
|
133
|
+
{"pending_count": pending_count, "action": "start_worker"},
|
|
134
|
+
)
|
|
135
|
+
)
|
|
125
136
|
else:
|
|
126
|
-
self.alerts.append(
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
137
|
+
self.alerts.append(
|
|
138
|
+
HealthAlert(
|
|
139
|
+
HealthStatus.WARNING,
|
|
140
|
+
"Worker not running (no pending items)",
|
|
141
|
+
{"action": "worker_idle"},
|
|
142
|
+
)
|
|
143
|
+
)
|
|
131
144
|
else:
|
|
132
145
|
# Worker is running, check if it's responsive
|
|
133
146
|
pid = worker_status.get("pid")
|
|
134
147
|
if pid:
|
|
135
148
|
try:
|
|
136
|
-
|
|
149
|
+
psutil.Process(pid)
|
|
137
150
|
# Check if worker has been idle too long with pending items
|
|
138
151
|
pending_count = self.queue.get_pending_count()
|
|
139
152
|
if pending_count > 0:
|
|
140
153
|
# Check for items that have been pending too long
|
|
141
154
|
old_pending = self._get_old_pending_items()
|
|
142
155
|
if old_pending:
|
|
143
|
-
self.alerts.append(
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
156
|
+
self.alerts.append(
|
|
157
|
+
HealthAlert(
|
|
158
|
+
HealthStatus.WARNING,
|
|
159
|
+
f"Worker running but {len(old_pending)} items pending for >30s",
|
|
160
|
+
{
|
|
161
|
+
"old_pending_count": len(old_pending),
|
|
162
|
+
"worker_pid": pid,
|
|
163
|
+
},
|
|
164
|
+
)
|
|
165
|
+
)
|
|
148
166
|
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
|
149
|
-
self.alerts.append(
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
167
|
+
self.alerts.append(
|
|
168
|
+
HealthAlert(
|
|
169
|
+
HealthStatus.CRITICAL,
|
|
170
|
+
"Worker PID exists but process not accessible",
|
|
171
|
+
{"pid": pid, "action": "restart_worker"},
|
|
172
|
+
)
|
|
173
|
+
)
|
|
174
|
+
|
|
155
175
|
return metrics
|
|
156
|
-
|
|
157
|
-
def _check_queue_health(self) ->
|
|
176
|
+
|
|
177
|
+
def _check_queue_health(self) -> dict[str, Any]:
|
|
158
178
|
"""Check queue status and backlog."""
|
|
159
179
|
stats = self.queue.get_stats()
|
|
160
|
-
|
|
180
|
+
|
|
161
181
|
pending = stats.get("pending", 0)
|
|
162
182
|
processing = stats.get("processing", 0)
|
|
163
183
|
failed = stats.get("failed", 0)
|
|
164
184
|
completed = stats.get("completed", 0)
|
|
165
|
-
|
|
185
|
+
|
|
166
186
|
metrics = {
|
|
167
187
|
"pending": pending,
|
|
168
188
|
"processing": processing,
|
|
169
189
|
"failed": failed,
|
|
170
190
|
"completed": completed,
|
|
171
|
-
"total": pending + processing + failed + completed
|
|
191
|
+
"total": pending + processing + failed + completed,
|
|
172
192
|
}
|
|
173
|
-
|
|
193
|
+
|
|
174
194
|
# Check backlog levels
|
|
175
195
|
if pending >= self.QUEUE_BACKLOG_CRITICAL:
|
|
176
|
-
self.alerts.append(
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
196
|
+
self.alerts.append(
|
|
197
|
+
HealthAlert(
|
|
198
|
+
HealthStatus.CRITICAL,
|
|
199
|
+
f"Critical queue backlog: {pending} pending items",
|
|
200
|
+
{"pending_count": pending, "action": "scale_workers"},
|
|
201
|
+
)
|
|
202
|
+
)
|
|
181
203
|
elif pending >= self.QUEUE_BACKLOG_WARNING:
|
|
182
|
-
self.alerts.append(
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
204
|
+
self.alerts.append(
|
|
205
|
+
HealthAlert(
|
|
206
|
+
HealthStatus.WARNING,
|
|
207
|
+
f"High queue backlog: {pending} pending items",
|
|
208
|
+
{"pending_count": pending},
|
|
209
|
+
)
|
|
210
|
+
)
|
|
211
|
+
|
|
188
212
|
# Check for too many processing items (might indicate stuck workers)
|
|
189
213
|
if processing > 5: # Should rarely have more than a few processing
|
|
190
|
-
self.alerts.append(
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
214
|
+
self.alerts.append(
|
|
215
|
+
HealthAlert(
|
|
216
|
+
HealthStatus.WARNING,
|
|
217
|
+
f"Many items in processing state: {processing}",
|
|
218
|
+
{"processing_count": processing, "action": "check_stuck_items"},
|
|
219
|
+
)
|
|
220
|
+
)
|
|
221
|
+
|
|
196
222
|
return metrics
|
|
197
|
-
|
|
198
|
-
def _check_stuck_items(self) ->
|
|
223
|
+
|
|
224
|
+
def _check_stuck_items(self) -> dict[str, Any]:
|
|
199
225
|
"""Check for items stuck in processing state."""
|
|
200
226
|
# Reset stuck items first
|
|
201
227
|
self.queue.reset_stuck_items(timeout_minutes=5) # 5 minute timeout
|
|
202
|
-
|
|
228
|
+
|
|
203
229
|
# Get current stuck items
|
|
204
230
|
stuck_items = self._get_stuck_processing_items()
|
|
205
|
-
|
|
231
|
+
|
|
206
232
|
metrics = {
|
|
207
233
|
"stuck_count": len(stuck_items),
|
|
208
|
-
"stuck_items": [item.id for item in stuck_items]
|
|
234
|
+
"stuck_items": [item.id for item in stuck_items],
|
|
209
235
|
}
|
|
210
|
-
|
|
236
|
+
|
|
211
237
|
if stuck_items:
|
|
212
|
-
self.alerts.append(
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
238
|
+
self.alerts.append(
|
|
239
|
+
HealthAlert(
|
|
240
|
+
HealthStatus.WARNING,
|
|
241
|
+
f"Found {len(stuck_items)} stuck items, auto-reset applied",
|
|
242
|
+
{
|
|
243
|
+
"stuck_items": [item.id for item in stuck_items],
|
|
244
|
+
"action": "items_reset",
|
|
245
|
+
},
|
|
246
|
+
)
|
|
247
|
+
)
|
|
248
|
+
|
|
218
249
|
return metrics
|
|
219
|
-
|
|
220
|
-
def _check_failure_rates(self) ->
|
|
250
|
+
|
|
251
|
+
def _check_failure_rates(self) -> dict[str, Any]:
|
|
221
252
|
"""Check recent failure rates."""
|
|
222
253
|
stats = self.queue.get_stats()
|
|
223
|
-
|
|
254
|
+
|
|
224
255
|
total_items = sum(stats.values())
|
|
225
256
|
failed_items = stats.get("failed", 0)
|
|
226
|
-
|
|
257
|
+
|
|
227
258
|
failure_rate = failed_items / total_items if total_items > 0 else 0
|
|
228
|
-
|
|
259
|
+
|
|
229
260
|
metrics = {
|
|
230
261
|
"failure_rate": failure_rate,
|
|
231
262
|
"failed_count": failed_items,
|
|
232
|
-
"total_count": total_items
|
|
263
|
+
"total_count": total_items,
|
|
233
264
|
}
|
|
234
|
-
|
|
265
|
+
|
|
235
266
|
if failure_rate >= self.HIGH_FAILURE_RATE and total_items >= 10:
|
|
236
|
-
self.alerts.append(
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
267
|
+
self.alerts.append(
|
|
268
|
+
HealthAlert(
|
|
269
|
+
HealthStatus.CRITICAL,
|
|
270
|
+
f"High failure rate: {failure_rate:.1%} ({failed_items}/{total_items})",
|
|
271
|
+
{"failure_rate": failure_rate, "action": "investigate_failures"},
|
|
272
|
+
)
|
|
273
|
+
)
|
|
274
|
+
|
|
242
275
|
return metrics
|
|
243
|
-
|
|
276
|
+
|
|
244
277
|
def _determine_overall_status(self) -> HealthStatus:
|
|
245
278
|
"""Determine overall health status from alerts."""
|
|
246
279
|
if not self.alerts:
|
|
247
280
|
return HealthStatus.HEALTHY
|
|
248
|
-
|
|
281
|
+
|
|
249
282
|
# Check for critical alerts
|
|
250
283
|
if any(alert.level == HealthStatus.CRITICAL for alert in self.alerts):
|
|
251
284
|
return HealthStatus.CRITICAL
|
|
252
|
-
|
|
285
|
+
|
|
253
286
|
# Check for warnings
|
|
254
287
|
if any(alert.level == HealthStatus.WARNING for alert in self.alerts):
|
|
255
288
|
return HealthStatus.WARNING
|
|
256
|
-
|
|
289
|
+
|
|
257
290
|
return HealthStatus.HEALTHY
|
|
258
|
-
|
|
259
|
-
def _get_old_pending_items(self) ->
|
|
291
|
+
|
|
292
|
+
def _get_old_pending_items(self) -> list:
|
|
260
293
|
"""Get items that have been pending for too long."""
|
|
261
294
|
cutoff_time = datetime.now() - timedelta(seconds=self.WORKER_TIMEOUT_SECONDS)
|
|
262
|
-
|
|
295
|
+
|
|
263
296
|
items = self.queue.list_items(status=QueueStatus.PENDING, limit=100)
|
|
264
|
-
return [
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
]
|
|
268
|
-
|
|
269
|
-
def _get_stuck_processing_items(self) -> List:
|
|
297
|
+
return [item for item in items if item.created_at < cutoff_time]
|
|
298
|
+
|
|
299
|
+
def _get_stuck_processing_items(self) -> list:
|
|
270
300
|
"""Get items stuck in processing state."""
|
|
271
301
|
cutoff_time = datetime.now() - timedelta(seconds=self.STUCK_ITEM_THRESHOLD)
|
|
272
|
-
|
|
302
|
+
|
|
273
303
|
items = self.queue.list_items(status=QueueStatus.PROCESSING, limit=100)
|
|
274
|
-
return [
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
]
|
|
278
|
-
|
|
279
|
-
def get_immediate_alerts(self) -> List[HealthAlert]:
|
|
304
|
+
return [item for item in items if item.created_at < cutoff_time]
|
|
305
|
+
|
|
306
|
+
def get_immediate_alerts(self) -> list[HealthAlert]:
|
|
280
307
|
"""Get alerts that require immediate attention."""
|
|
281
308
|
return [
|
|
282
|
-
alert
|
|
309
|
+
alert
|
|
310
|
+
for alert in self.alerts
|
|
283
311
|
if alert.level in [HealthStatus.CRITICAL, HealthStatus.FAILED]
|
|
284
312
|
]
|
|
285
|
-
|
|
286
|
-
def auto_repair(self) ->
|
|
313
|
+
|
|
314
|
+
def auto_repair(self) -> dict[str, Any]:
|
|
287
315
|
"""Attempt automatic repair of detected issues."""
|
|
288
316
|
repair_actions = []
|
|
289
|
-
|
|
317
|
+
|
|
290
318
|
# Check health first
|
|
291
|
-
|
|
292
|
-
|
|
319
|
+
self.check_health()
|
|
320
|
+
|
|
293
321
|
for alert in self.alerts:
|
|
294
322
|
action = alert.details.get("action")
|
|
295
|
-
|
|
323
|
+
|
|
296
324
|
if action == "start_worker":
|
|
297
325
|
try:
|
|
298
326
|
if self.manager.start():
|
|
299
|
-
repair_actions.append(
|
|
327
|
+
repair_actions.append(
|
|
328
|
+
f"Started worker for {alert.details.get('pending_count')} pending items"
|
|
329
|
+
)
|
|
300
330
|
else:
|
|
301
331
|
repair_actions.append("Failed to start worker")
|
|
302
332
|
except Exception as e:
|
|
303
333
|
repair_actions.append(f"Error starting worker: {e}")
|
|
304
|
-
|
|
334
|
+
|
|
305
335
|
elif action == "restart_worker":
|
|
306
336
|
try:
|
|
307
337
|
self.manager.stop()
|
|
@@ -312,11 +342,13 @@ class QueueHealthMonitor:
|
|
|
312
342
|
repair_actions.append("Failed to restart worker")
|
|
313
343
|
except Exception as e:
|
|
314
344
|
repair_actions.append(f"Error restarting worker: {e}")
|
|
315
|
-
|
|
345
|
+
|
|
316
346
|
elif action == "items_reset":
|
|
317
|
-
repair_actions.append(
|
|
318
|
-
|
|
347
|
+
repair_actions.append(
|
|
348
|
+
f"Reset {alert.details.get('stuck_items', [])} stuck items"
|
|
349
|
+
)
|
|
350
|
+
|
|
319
351
|
return {
|
|
320
352
|
"actions_taken": repair_actions,
|
|
321
|
-
"timestamp": datetime.now().isoformat()
|
|
353
|
+
"timestamp": datetime.now().isoformat(),
|
|
322
354
|
}
|