mcp-ticketer 0.1.24__py3-none-any.whl → 0.1.27__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mcp-ticketer might be problematic. Click here for more details.
- mcp_ticketer/__version__.py +1 -1
- mcp_ticketer/adapters/hybrid.py +8 -8
- mcp_ticketer/cli/main.py +146 -4
- mcp_ticketer/mcp/server.py +710 -3
- mcp_ticketer/queue/health_monitor.py +322 -0
- mcp_ticketer/queue/queue.py +147 -66
- mcp_ticketer/queue/ticket_registry.py +416 -0
- mcp_ticketer/queue/worker.py +102 -8
- {mcp_ticketer-0.1.24.dist-info → mcp_ticketer-0.1.27.dist-info}/METADATA +1 -1
- {mcp_ticketer-0.1.24.dist-info → mcp_ticketer-0.1.27.dist-info}/RECORD +14 -12
- {mcp_ticketer-0.1.24.dist-info → mcp_ticketer-0.1.27.dist-info}/WHEEL +0 -0
- {mcp_ticketer-0.1.24.dist-info → mcp_ticketer-0.1.27.dist-info}/entry_points.txt +0 -0
- {mcp_ticketer-0.1.24.dist-info → mcp_ticketer-0.1.27.dist-info}/licenses/LICENSE +0 -0
- {mcp_ticketer-0.1.24.dist-info → mcp_ticketer-0.1.27.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,322 @@
|
|
|
1
|
+
"""Queue health monitoring and alerting system."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import logging
|
|
5
|
+
import time
|
|
6
|
+
from datetime import datetime, timedelta
|
|
7
|
+
from enum import Enum
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any, Dict, List, Optional
|
|
10
|
+
|
|
11
|
+
import psutil
|
|
12
|
+
|
|
13
|
+
from .manager import WorkerManager
|
|
14
|
+
from .queue import Queue, QueueStatus
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class HealthStatus(str, Enum):
|
|
20
|
+
"""Health status levels."""
|
|
21
|
+
HEALTHY = "healthy"
|
|
22
|
+
WARNING = "warning"
|
|
23
|
+
CRITICAL = "critical"
|
|
24
|
+
FAILED = "failed"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class HealthAlert:
|
|
28
|
+
"""Health alert with severity and details."""
|
|
29
|
+
|
|
30
|
+
def __init__(
|
|
31
|
+
self,
|
|
32
|
+
level: HealthStatus,
|
|
33
|
+
message: str,
|
|
34
|
+
details: Optional[Dict[str, Any]] = None,
|
|
35
|
+
timestamp: Optional[datetime] = None
|
|
36
|
+
):
|
|
37
|
+
self.level = level
|
|
38
|
+
self.message = message
|
|
39
|
+
self.details = details or {}
|
|
40
|
+
self.timestamp = timestamp or datetime.now()
|
|
41
|
+
|
|
42
|
+
def __str__(self) -> str:
|
|
43
|
+
return f"[{self.level.upper()}] {self.message}"
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class QueueHealthMonitor:
|
|
47
|
+
"""Monitors queue health and provides immediate alerts."""
|
|
48
|
+
|
|
49
|
+
# Health check thresholds
|
|
50
|
+
WORKER_TIMEOUT_SECONDS = 30 # Worker should process items within 30s
|
|
51
|
+
STUCK_ITEM_THRESHOLD = 300 # 5 minutes for stuck items
|
|
52
|
+
HIGH_FAILURE_RATE = 0.3 # 30% failure rate is concerning
|
|
53
|
+
QUEUE_BACKLOG_WARNING = 10 # Warn if more than 10 pending items
|
|
54
|
+
QUEUE_BACKLOG_CRITICAL = 50 # Critical if more than 50 pending items
|
|
55
|
+
|
|
56
|
+
def __init__(self, queue: Optional[Queue] = None):
|
|
57
|
+
"""Initialize health monitor.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
queue: Queue instance to monitor. Creates new if None.
|
|
61
|
+
"""
|
|
62
|
+
self.queue = queue or Queue()
|
|
63
|
+
self.manager = WorkerManager()
|
|
64
|
+
self.last_check = datetime.now()
|
|
65
|
+
self.alerts: List[HealthAlert] = []
|
|
66
|
+
|
|
67
|
+
def check_health(self) -> Dict[str, Any]:
|
|
68
|
+
"""Perform comprehensive health check.
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
Health status with alerts and metrics
|
|
72
|
+
"""
|
|
73
|
+
self.alerts.clear()
|
|
74
|
+
|
|
75
|
+
# Check worker status
|
|
76
|
+
worker_health = self._check_worker_health()
|
|
77
|
+
|
|
78
|
+
# Check queue status
|
|
79
|
+
queue_health = self._check_queue_health()
|
|
80
|
+
|
|
81
|
+
# Check for stuck items
|
|
82
|
+
stuck_health = self._check_stuck_items()
|
|
83
|
+
|
|
84
|
+
# Check failure rates
|
|
85
|
+
failure_health = self._check_failure_rates()
|
|
86
|
+
|
|
87
|
+
# Determine overall health
|
|
88
|
+
overall_status = self._determine_overall_status()
|
|
89
|
+
|
|
90
|
+
health_report = {
|
|
91
|
+
"status": overall_status,
|
|
92
|
+
"timestamp": datetime.now().isoformat(),
|
|
93
|
+
"alerts": [{"level": alert.level, "message": alert.message, "details": alert.details} for alert in self.alerts],
|
|
94
|
+
"metrics": {
|
|
95
|
+
"worker": worker_health,
|
|
96
|
+
"queue": queue_health,
|
|
97
|
+
"stuck_items": stuck_health,
|
|
98
|
+
"failure_rate": failure_health
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
self.last_check = datetime.now()
|
|
103
|
+
return health_report
|
|
104
|
+
|
|
105
|
+
def _check_worker_health(self) -> Dict[str, Any]:
|
|
106
|
+
"""Check worker process health."""
|
|
107
|
+
worker_status = self.manager.get_status()
|
|
108
|
+
|
|
109
|
+
metrics = {
|
|
110
|
+
"running": worker_status["running"],
|
|
111
|
+
"pid": worker_status.get("pid"),
|
|
112
|
+
"cpu_percent": worker_status.get("cpu_percent", 0),
|
|
113
|
+
"memory_mb": worker_status.get("memory_mb", 0)
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
if not worker_status["running"]:
|
|
117
|
+
# Check if we have pending items but no worker
|
|
118
|
+
pending_count = self.queue.get_pending_count()
|
|
119
|
+
if pending_count > 0:
|
|
120
|
+
self.alerts.append(HealthAlert(
|
|
121
|
+
HealthStatus.CRITICAL,
|
|
122
|
+
f"Worker not running but {pending_count} items pending",
|
|
123
|
+
{"pending_count": pending_count, "action": "start_worker"}
|
|
124
|
+
))
|
|
125
|
+
else:
|
|
126
|
+
self.alerts.append(HealthAlert(
|
|
127
|
+
HealthStatus.WARNING,
|
|
128
|
+
"Worker not running (no pending items)",
|
|
129
|
+
{"action": "worker_idle"}
|
|
130
|
+
))
|
|
131
|
+
else:
|
|
132
|
+
# Worker is running, check if it's responsive
|
|
133
|
+
pid = worker_status.get("pid")
|
|
134
|
+
if pid:
|
|
135
|
+
try:
|
|
136
|
+
process = psutil.Process(pid)
|
|
137
|
+
# Check if worker has been idle too long with pending items
|
|
138
|
+
pending_count = self.queue.get_pending_count()
|
|
139
|
+
if pending_count > 0:
|
|
140
|
+
# Check for items that have been pending too long
|
|
141
|
+
old_pending = self._get_old_pending_items()
|
|
142
|
+
if old_pending:
|
|
143
|
+
self.alerts.append(HealthAlert(
|
|
144
|
+
HealthStatus.WARNING,
|
|
145
|
+
f"Worker running but {len(old_pending)} items pending for >30s",
|
|
146
|
+
{"old_pending_count": len(old_pending), "worker_pid": pid}
|
|
147
|
+
))
|
|
148
|
+
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
|
149
|
+
self.alerts.append(HealthAlert(
|
|
150
|
+
HealthStatus.CRITICAL,
|
|
151
|
+
"Worker PID exists but process not accessible",
|
|
152
|
+
{"pid": pid, "action": "restart_worker"}
|
|
153
|
+
))
|
|
154
|
+
|
|
155
|
+
return metrics
|
|
156
|
+
|
|
157
|
+
def _check_queue_health(self) -> Dict[str, Any]:
|
|
158
|
+
"""Check queue status and backlog."""
|
|
159
|
+
stats = self.queue.get_stats()
|
|
160
|
+
|
|
161
|
+
pending = stats.get("pending", 0)
|
|
162
|
+
processing = stats.get("processing", 0)
|
|
163
|
+
failed = stats.get("failed", 0)
|
|
164
|
+
completed = stats.get("completed", 0)
|
|
165
|
+
|
|
166
|
+
metrics = {
|
|
167
|
+
"pending": pending,
|
|
168
|
+
"processing": processing,
|
|
169
|
+
"failed": failed,
|
|
170
|
+
"completed": completed,
|
|
171
|
+
"total": pending + processing + failed + completed
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
# Check backlog levels
|
|
175
|
+
if pending >= self.QUEUE_BACKLOG_CRITICAL:
|
|
176
|
+
self.alerts.append(HealthAlert(
|
|
177
|
+
HealthStatus.CRITICAL,
|
|
178
|
+
f"Critical queue backlog: {pending} pending items",
|
|
179
|
+
{"pending_count": pending, "action": "scale_workers"}
|
|
180
|
+
))
|
|
181
|
+
elif pending >= self.QUEUE_BACKLOG_WARNING:
|
|
182
|
+
self.alerts.append(HealthAlert(
|
|
183
|
+
HealthStatus.WARNING,
|
|
184
|
+
f"High queue backlog: {pending} pending items",
|
|
185
|
+
{"pending_count": pending}
|
|
186
|
+
))
|
|
187
|
+
|
|
188
|
+
# Check for too many processing items (might indicate stuck workers)
|
|
189
|
+
if processing > 5: # Should rarely have more than a few processing
|
|
190
|
+
self.alerts.append(HealthAlert(
|
|
191
|
+
HealthStatus.WARNING,
|
|
192
|
+
f"Many items in processing state: {processing}",
|
|
193
|
+
{"processing_count": processing, "action": "check_stuck_items"}
|
|
194
|
+
))
|
|
195
|
+
|
|
196
|
+
return metrics
|
|
197
|
+
|
|
198
|
+
def _check_stuck_items(self) -> Dict[str, Any]:
|
|
199
|
+
"""Check for items stuck in processing state."""
|
|
200
|
+
# Reset stuck items first
|
|
201
|
+
self.queue.reset_stuck_items(timeout_minutes=5) # 5 minute timeout
|
|
202
|
+
|
|
203
|
+
# Get current stuck items
|
|
204
|
+
stuck_items = self._get_stuck_processing_items()
|
|
205
|
+
|
|
206
|
+
metrics = {
|
|
207
|
+
"stuck_count": len(stuck_items),
|
|
208
|
+
"stuck_items": [item.id for item in stuck_items]
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
if stuck_items:
|
|
212
|
+
self.alerts.append(HealthAlert(
|
|
213
|
+
HealthStatus.WARNING,
|
|
214
|
+
f"Found {len(stuck_items)} stuck items, auto-reset applied",
|
|
215
|
+
{"stuck_items": [item.id for item in stuck_items], "action": "items_reset"}
|
|
216
|
+
))
|
|
217
|
+
|
|
218
|
+
return metrics
|
|
219
|
+
|
|
220
|
+
def _check_failure_rates(self) -> Dict[str, Any]:
|
|
221
|
+
"""Check recent failure rates."""
|
|
222
|
+
stats = self.queue.get_stats()
|
|
223
|
+
|
|
224
|
+
total_items = sum(stats.values())
|
|
225
|
+
failed_items = stats.get("failed", 0)
|
|
226
|
+
|
|
227
|
+
failure_rate = failed_items / total_items if total_items > 0 else 0
|
|
228
|
+
|
|
229
|
+
metrics = {
|
|
230
|
+
"failure_rate": failure_rate,
|
|
231
|
+
"failed_count": failed_items,
|
|
232
|
+
"total_count": total_items
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
if failure_rate >= self.HIGH_FAILURE_RATE and total_items >= 10:
|
|
236
|
+
self.alerts.append(HealthAlert(
|
|
237
|
+
HealthStatus.CRITICAL,
|
|
238
|
+
f"High failure rate: {failure_rate:.1%} ({failed_items}/{total_items})",
|
|
239
|
+
{"failure_rate": failure_rate, "action": "investigate_failures"}
|
|
240
|
+
))
|
|
241
|
+
|
|
242
|
+
return metrics
|
|
243
|
+
|
|
244
|
+
def _determine_overall_status(self) -> HealthStatus:
|
|
245
|
+
"""Determine overall health status from alerts."""
|
|
246
|
+
if not self.alerts:
|
|
247
|
+
return HealthStatus.HEALTHY
|
|
248
|
+
|
|
249
|
+
# Check for critical alerts
|
|
250
|
+
if any(alert.level == HealthStatus.CRITICAL for alert in self.alerts):
|
|
251
|
+
return HealthStatus.CRITICAL
|
|
252
|
+
|
|
253
|
+
# Check for warnings
|
|
254
|
+
if any(alert.level == HealthStatus.WARNING for alert in self.alerts):
|
|
255
|
+
return HealthStatus.WARNING
|
|
256
|
+
|
|
257
|
+
return HealthStatus.HEALTHY
|
|
258
|
+
|
|
259
|
+
def _get_old_pending_items(self) -> List:
|
|
260
|
+
"""Get items that have been pending for too long."""
|
|
261
|
+
cutoff_time = datetime.now() - timedelta(seconds=self.WORKER_TIMEOUT_SECONDS)
|
|
262
|
+
|
|
263
|
+
items = self.queue.list_items(status=QueueStatus.PENDING, limit=100)
|
|
264
|
+
return [
|
|
265
|
+
item for item in items
|
|
266
|
+
if item.created_at < cutoff_time
|
|
267
|
+
]
|
|
268
|
+
|
|
269
|
+
def _get_stuck_processing_items(self) -> List:
|
|
270
|
+
"""Get items stuck in processing state."""
|
|
271
|
+
cutoff_time = datetime.now() - timedelta(seconds=self.STUCK_ITEM_THRESHOLD)
|
|
272
|
+
|
|
273
|
+
items = self.queue.list_items(status=QueueStatus.PROCESSING, limit=100)
|
|
274
|
+
return [
|
|
275
|
+
item for item in items
|
|
276
|
+
if item.created_at < cutoff_time
|
|
277
|
+
]
|
|
278
|
+
|
|
279
|
+
def get_immediate_alerts(self) -> List[HealthAlert]:
|
|
280
|
+
"""Get alerts that require immediate attention."""
|
|
281
|
+
return [
|
|
282
|
+
alert for alert in self.alerts
|
|
283
|
+
if alert.level in [HealthStatus.CRITICAL, HealthStatus.FAILED]
|
|
284
|
+
]
|
|
285
|
+
|
|
286
|
+
def auto_repair(self) -> Dict[str, Any]:
|
|
287
|
+
"""Attempt automatic repair of detected issues."""
|
|
288
|
+
repair_actions = []
|
|
289
|
+
|
|
290
|
+
# Check health first
|
|
291
|
+
health = self.check_health()
|
|
292
|
+
|
|
293
|
+
for alert in self.alerts:
|
|
294
|
+
action = alert.details.get("action")
|
|
295
|
+
|
|
296
|
+
if action == "start_worker":
|
|
297
|
+
try:
|
|
298
|
+
if self.manager.start():
|
|
299
|
+
repair_actions.append(f"Started worker for {alert.details.get('pending_count')} pending items")
|
|
300
|
+
else:
|
|
301
|
+
repair_actions.append("Failed to start worker")
|
|
302
|
+
except Exception as e:
|
|
303
|
+
repair_actions.append(f"Error starting worker: {e}")
|
|
304
|
+
|
|
305
|
+
elif action == "restart_worker":
|
|
306
|
+
try:
|
|
307
|
+
self.manager.stop()
|
|
308
|
+
time.sleep(2)
|
|
309
|
+
if self.manager.start():
|
|
310
|
+
repair_actions.append("Restarted unresponsive worker")
|
|
311
|
+
else:
|
|
312
|
+
repair_actions.append("Failed to restart worker")
|
|
313
|
+
except Exception as e:
|
|
314
|
+
repair_actions.append(f"Error restarting worker: {e}")
|
|
315
|
+
|
|
316
|
+
elif action == "items_reset":
|
|
317
|
+
repair_actions.append(f"Reset {alert.details.get('stuck_items', [])} stuck items")
|
|
318
|
+
|
|
319
|
+
return {
|
|
320
|
+
"actions_taken": repair_actions,
|
|
321
|
+
"timestamp": datetime.now().isoformat()
|
|
322
|
+
}
|
mcp_ticketer/queue/queue.py
CHANGED
|
@@ -180,7 +180,7 @@ class Queue:
|
|
|
180
180
|
return queue_id
|
|
181
181
|
|
|
182
182
|
def get_next_pending(self) -> Optional[QueueItem]:
|
|
183
|
-
"""Get next pending item from queue.
|
|
183
|
+
"""Get next pending item from queue atomically.
|
|
184
184
|
|
|
185
185
|
Returns:
|
|
186
186
|
Next pending QueueItem or None if queue is empty
|
|
@@ -188,34 +188,50 @@ class Queue:
|
|
|
188
188
|
"""
|
|
189
189
|
with self._lock:
|
|
190
190
|
with sqlite3.connect(self.db_path) as conn:
|
|
191
|
-
#
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
ORDER BY created_at
|
|
197
|
-
LIMIT 1
|
|
198
|
-
""",
|
|
199
|
-
(QueueStatus.PENDING.value,),
|
|
200
|
-
)
|
|
201
|
-
|
|
202
|
-
row = cursor.fetchone()
|
|
203
|
-
if row:
|
|
204
|
-
# Mark as processing
|
|
205
|
-
conn.execute(
|
|
191
|
+
# Use a transaction to atomically get and update the item
|
|
192
|
+
conn.execute("BEGIN IMMEDIATE")
|
|
193
|
+
try:
|
|
194
|
+
# Get next pending item ordered by creation time
|
|
195
|
+
cursor = conn.execute(
|
|
206
196
|
"""
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
197
|
+
SELECT * FROM queue
|
|
198
|
+
WHERE status = ?
|
|
199
|
+
ORDER BY created_at
|
|
200
|
+
LIMIT 1
|
|
210
201
|
""",
|
|
211
|
-
(QueueStatus.
|
|
202
|
+
(QueueStatus.PENDING.value,),
|
|
212
203
|
)
|
|
213
|
-
conn.commit()
|
|
214
204
|
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
205
|
+
row = cursor.fetchone()
|
|
206
|
+
if row:
|
|
207
|
+
# Atomically mark as processing
|
|
208
|
+
update_cursor = conn.execute(
|
|
209
|
+
"""
|
|
210
|
+
UPDATE queue
|
|
211
|
+
SET status = ?, processed_at = ?
|
|
212
|
+
WHERE id = ? AND status = ?
|
|
213
|
+
""",
|
|
214
|
+
(QueueStatus.PROCESSING.value, datetime.now().isoformat(), row[0], QueueStatus.PENDING.value),
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
# Check if update was successful (prevents race conditions)
|
|
218
|
+
if update_cursor.rowcount == 1:
|
|
219
|
+
conn.commit()
|
|
220
|
+
# Create QueueItem from row and update status
|
|
221
|
+
item = QueueItem.from_row(row)
|
|
222
|
+
item.status = QueueStatus.PROCESSING
|
|
223
|
+
return item
|
|
224
|
+
else:
|
|
225
|
+
# Item was already taken by another worker
|
|
226
|
+
conn.rollback()
|
|
227
|
+
return None
|
|
228
|
+
else:
|
|
229
|
+
conn.rollback()
|
|
230
|
+
return None
|
|
231
|
+
|
|
232
|
+
except Exception:
|
|
233
|
+
conn.rollback()
|
|
234
|
+
raise
|
|
219
235
|
|
|
220
236
|
return None
|
|
221
237
|
|
|
@@ -225,67 +241,132 @@ class Queue:
|
|
|
225
241
|
status: QueueStatus,
|
|
226
242
|
error_message: Optional[str] = None,
|
|
227
243
|
result: Optional[dict[str, Any]] = None,
|
|
228
|
-
|
|
229
|
-
|
|
244
|
+
expected_status: Optional[QueueStatus] = None,
|
|
245
|
+
) -> bool:
|
|
246
|
+
"""Update queue item status atomically.
|
|
230
247
|
|
|
231
248
|
Args:
|
|
232
249
|
queue_id: Queue item ID
|
|
233
250
|
status: New status
|
|
234
251
|
error_message: Error message if failed
|
|
235
252
|
result: Result data if completed
|
|
253
|
+
expected_status: Expected current status (for atomic updates)
|
|
236
254
|
|
|
255
|
+
Returns:
|
|
256
|
+
True if update was successful, False if item was in unexpected state
|
|
237
257
|
"""
|
|
238
258
|
with self._lock:
|
|
239
259
|
with sqlite3.connect(self.db_path) as conn:
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
"""
|
|
248
|
-
UPDATE queue
|
|
249
|
-
SET status = ?, processed_at = ?,
|
|
250
|
-
error_message = ?, result = ?
|
|
251
|
-
WHERE id = ?
|
|
252
|
-
""",
|
|
253
|
-
(
|
|
254
|
-
status.value,
|
|
255
|
-
processed_at,
|
|
256
|
-
error_message,
|
|
257
|
-
json.dumps(result) if result else None,
|
|
258
|
-
queue_id,
|
|
259
|
-
),
|
|
260
|
-
)
|
|
261
|
-
conn.commit()
|
|
260
|
+
conn.execute("BEGIN IMMEDIATE")
|
|
261
|
+
try:
|
|
262
|
+
processed_at = (
|
|
263
|
+
datetime.now().isoformat()
|
|
264
|
+
if status in [QueueStatus.COMPLETED, QueueStatus.FAILED]
|
|
265
|
+
else None
|
|
266
|
+
)
|
|
262
267
|
|
|
263
|
-
|
|
264
|
-
|
|
268
|
+
if expected_status:
|
|
269
|
+
# Atomic update with status check
|
|
270
|
+
cursor = conn.execute(
|
|
271
|
+
"""
|
|
272
|
+
UPDATE queue
|
|
273
|
+
SET status = ?, processed_at = ?,
|
|
274
|
+
error_message = ?, result = ?
|
|
275
|
+
WHERE id = ? AND status = ?
|
|
276
|
+
""",
|
|
277
|
+
(
|
|
278
|
+
status.value,
|
|
279
|
+
processed_at,
|
|
280
|
+
error_message,
|
|
281
|
+
json.dumps(result) if result else None,
|
|
282
|
+
queue_id,
|
|
283
|
+
expected_status.value,
|
|
284
|
+
),
|
|
285
|
+
)
|
|
286
|
+
success = cursor.rowcount == 1
|
|
287
|
+
else:
|
|
288
|
+
# Regular update
|
|
289
|
+
cursor = conn.execute(
|
|
290
|
+
"""
|
|
291
|
+
UPDATE queue
|
|
292
|
+
SET status = ?, processed_at = ?,
|
|
293
|
+
error_message = ?, result = ?
|
|
294
|
+
WHERE id = ?
|
|
295
|
+
""",
|
|
296
|
+
(
|
|
297
|
+
status.value,
|
|
298
|
+
processed_at,
|
|
299
|
+
error_message,
|
|
300
|
+
json.dumps(result) if result else None,
|
|
301
|
+
queue_id,
|
|
302
|
+
),
|
|
303
|
+
)
|
|
304
|
+
success = cursor.rowcount == 1
|
|
305
|
+
|
|
306
|
+
if success:
|
|
307
|
+
conn.commit()
|
|
308
|
+
else:
|
|
309
|
+
conn.rollback()
|
|
310
|
+
|
|
311
|
+
return success
|
|
312
|
+
|
|
313
|
+
except Exception:
|
|
314
|
+
conn.rollback()
|
|
315
|
+
raise
|
|
316
|
+
|
|
317
|
+
def increment_retry(self, queue_id: str, expected_status: Optional[QueueStatus] = None) -> int:
|
|
318
|
+
"""Increment retry count and reset to pending atomically.
|
|
265
319
|
|
|
266
320
|
Args:
|
|
267
321
|
queue_id: Queue item ID
|
|
322
|
+
expected_status: Expected current status for atomic operation
|
|
268
323
|
|
|
269
324
|
Returns:
|
|
270
|
-
New retry count
|
|
325
|
+
New retry count, or -1 if operation failed
|
|
271
326
|
|
|
272
327
|
"""
|
|
273
328
|
with self._lock:
|
|
274
329
|
with sqlite3.connect(self.db_path) as conn:
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
330
|
+
conn.execute("BEGIN IMMEDIATE")
|
|
331
|
+
try:
|
|
332
|
+
if expected_status:
|
|
333
|
+
# Atomic increment with status check
|
|
334
|
+
cursor = conn.execute(
|
|
335
|
+
"""
|
|
336
|
+
UPDATE queue
|
|
337
|
+
SET retry_count = retry_count + 1,
|
|
338
|
+
status = ?, processed_at = NULL,
|
|
339
|
+
error_message = NULL
|
|
340
|
+
WHERE id = ? AND status = ?
|
|
341
|
+
RETURNING retry_count
|
|
342
|
+
""",
|
|
343
|
+
(QueueStatus.PENDING.value, queue_id, expected_status.value),
|
|
344
|
+
)
|
|
345
|
+
else:
|
|
346
|
+
# Regular increment
|
|
347
|
+
cursor = conn.execute(
|
|
348
|
+
"""
|
|
349
|
+
UPDATE queue
|
|
350
|
+
SET retry_count = retry_count + 1,
|
|
351
|
+
status = ?, processed_at = NULL,
|
|
352
|
+
error_message = NULL
|
|
353
|
+
WHERE id = ?
|
|
354
|
+
RETURNING retry_count
|
|
355
|
+
""",
|
|
356
|
+
(QueueStatus.PENDING.value, queue_id),
|
|
357
|
+
)
|
|
358
|
+
|
|
359
|
+
result = cursor.fetchone()
|
|
360
|
+
if result:
|
|
361
|
+
conn.commit()
|
|
362
|
+
return result[0]
|
|
363
|
+
else:
|
|
364
|
+
conn.rollback()
|
|
365
|
+
return -1
|
|
366
|
+
|
|
367
|
+
except Exception:
|
|
368
|
+
conn.rollback()
|
|
369
|
+
raise
|
|
289
370
|
|
|
290
371
|
def get_item(self, queue_id: str) -> Optional[QueueItem]:
|
|
291
372
|
"""Get specific queue item by ID.
|