mcp-ticketer 0.3.1__py3-none-any.whl → 0.3.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mcp-ticketer might be problematic. Click here for more details.
- mcp_ticketer/__version__.py +1 -1
- mcp_ticketer/adapters/aitrackdown.py +164 -36
- mcp_ticketer/adapters/github.py +11 -8
- mcp_ticketer/adapters/jira.py +29 -28
- mcp_ticketer/adapters/linear/__init__.py +1 -1
- mcp_ticketer/adapters/linear/adapter.py +105 -104
- mcp_ticketer/adapters/linear/client.py +78 -59
- mcp_ticketer/adapters/linear/mappers.py +93 -73
- mcp_ticketer/adapters/linear/queries.py +28 -7
- mcp_ticketer/adapters/linear/types.py +67 -60
- mcp_ticketer/adapters/linear.py +2 -2
- mcp_ticketer/cli/adapter_diagnostics.py +87 -52
- mcp_ticketer/cli/codex_configure.py +6 -6
- mcp_ticketer/cli/diagnostics.py +180 -88
- mcp_ticketer/cli/linear_commands.py +156 -113
- mcp_ticketer/cli/main.py +153 -82
- mcp_ticketer/cli/simple_health.py +74 -51
- mcp_ticketer/cli/utils.py +15 -10
- mcp_ticketer/core/config.py +23 -19
- mcp_ticketer/core/env_discovery.py +5 -4
- mcp_ticketer/core/env_loader.py +114 -91
- mcp_ticketer/core/exceptions.py +22 -20
- mcp_ticketer/core/models.py +9 -0
- mcp_ticketer/core/project_config.py +1 -1
- mcp_ticketer/mcp/constants.py +58 -0
- mcp_ticketer/mcp/dto.py +195 -0
- mcp_ticketer/mcp/response_builder.py +206 -0
- mcp_ticketer/mcp/server.py +361 -1182
- mcp_ticketer/queue/health_monitor.py +166 -135
- mcp_ticketer/queue/manager.py +70 -19
- mcp_ticketer/queue/queue.py +24 -5
- mcp_ticketer/queue/run_worker.py +1 -1
- mcp_ticketer/queue/ticket_registry.py +203 -145
- mcp_ticketer/queue/worker.py +79 -43
- {mcp_ticketer-0.3.1.dist-info → mcp_ticketer-0.3.3.dist-info}/METADATA +1 -1
- mcp_ticketer-0.3.3.dist-info/RECORD +62 -0
- mcp_ticketer-0.3.1.dist-info/RECORD +0 -59
- {mcp_ticketer-0.3.1.dist-info → mcp_ticketer-0.3.3.dist-info}/WHEEL +0 -0
- {mcp_ticketer-0.3.1.dist-info → mcp_ticketer-0.3.3.dist-info}/entry_points.txt +0 -0
- {mcp_ticketer-0.3.1.dist-info → mcp_ticketer-0.3.3.dist-info}/licenses/LICENSE +0 -0
- {mcp_ticketer-0.3.1.dist-info → mcp_ticketer-0.3.3.dist-info}/top_level.txt +0 -0
|
@@ -1,12 +1,10 @@
|
|
|
1
1
|
"""Queue health monitoring and alerting system."""
|
|
2
2
|
|
|
3
|
-
import asyncio
|
|
4
3
|
import logging
|
|
5
4
|
import time
|
|
6
5
|
from datetime import datetime, timedelta
|
|
7
6
|
from enum import Enum
|
|
8
|
-
from
|
|
9
|
-
from typing import Any, Dict, List, Optional
|
|
7
|
+
from typing import Any, Optional
|
|
10
8
|
|
|
11
9
|
import psutil
|
|
12
10
|
|
|
@@ -18,6 +16,7 @@ logger = logging.getLogger(__name__)
|
|
|
18
16
|
|
|
19
17
|
class HealthStatus(str, Enum):
|
|
20
18
|
"""Health status levels."""
|
|
19
|
+
|
|
21
20
|
HEALTHY = "healthy"
|
|
22
21
|
WARNING = "warning"
|
|
23
22
|
CRITICAL = "critical"
|
|
@@ -26,282 +25,312 @@ class HealthStatus(str, Enum):
|
|
|
26
25
|
|
|
27
26
|
class HealthAlert:
|
|
28
27
|
"""Health alert with severity and details."""
|
|
29
|
-
|
|
28
|
+
|
|
30
29
|
def __init__(
|
|
31
30
|
self,
|
|
32
31
|
level: HealthStatus,
|
|
33
32
|
message: str,
|
|
34
|
-
details: Optional[
|
|
35
|
-
timestamp: Optional[datetime] = None
|
|
33
|
+
details: Optional[dict[str, Any]] = None,
|
|
34
|
+
timestamp: Optional[datetime] = None,
|
|
36
35
|
):
|
|
37
36
|
self.level = level
|
|
38
37
|
self.message = message
|
|
39
38
|
self.details = details or {}
|
|
40
39
|
self.timestamp = timestamp or datetime.now()
|
|
41
|
-
|
|
40
|
+
|
|
42
41
|
def __str__(self) -> str:
|
|
43
42
|
return f"[{self.level.upper()}] {self.message}"
|
|
44
43
|
|
|
45
44
|
|
|
46
45
|
class QueueHealthMonitor:
|
|
47
46
|
"""Monitors queue health and provides immediate alerts."""
|
|
48
|
-
|
|
47
|
+
|
|
49
48
|
# Health check thresholds
|
|
50
49
|
WORKER_TIMEOUT_SECONDS = 30 # Worker should process items within 30s
|
|
51
|
-
STUCK_ITEM_THRESHOLD = 300
|
|
52
|
-
HIGH_FAILURE_RATE = 0.3
|
|
53
|
-
QUEUE_BACKLOG_WARNING = 10
|
|
50
|
+
STUCK_ITEM_THRESHOLD = 300 # 5 minutes for stuck items
|
|
51
|
+
HIGH_FAILURE_RATE = 0.3 # 30% failure rate is concerning
|
|
52
|
+
QUEUE_BACKLOG_WARNING = 10 # Warn if more than 10 pending items
|
|
54
53
|
QUEUE_BACKLOG_CRITICAL = 50 # Critical if more than 50 pending items
|
|
55
|
-
|
|
54
|
+
|
|
56
55
|
def __init__(self, queue: Optional[Queue] = None):
|
|
57
56
|
"""Initialize health monitor.
|
|
58
|
-
|
|
57
|
+
|
|
59
58
|
Args:
|
|
60
59
|
queue: Queue instance to monitor. Creates new if None.
|
|
60
|
+
|
|
61
61
|
"""
|
|
62
62
|
self.queue = queue or Queue()
|
|
63
63
|
self.manager = WorkerManager()
|
|
64
64
|
self.last_check = datetime.now()
|
|
65
|
-
self.alerts:
|
|
66
|
-
|
|
67
|
-
def check_health(self) ->
|
|
65
|
+
self.alerts: list[HealthAlert] = []
|
|
66
|
+
|
|
67
|
+
def check_health(self) -> dict[str, Any]:
|
|
68
68
|
"""Perform comprehensive health check.
|
|
69
|
-
|
|
69
|
+
|
|
70
70
|
Returns:
|
|
71
71
|
Health status with alerts and metrics
|
|
72
|
+
|
|
72
73
|
"""
|
|
73
74
|
self.alerts.clear()
|
|
74
|
-
|
|
75
|
+
|
|
75
76
|
# Check worker status
|
|
76
77
|
worker_health = self._check_worker_health()
|
|
77
|
-
|
|
78
|
+
|
|
78
79
|
# Check queue status
|
|
79
80
|
queue_health = self._check_queue_health()
|
|
80
|
-
|
|
81
|
+
|
|
81
82
|
# Check for stuck items
|
|
82
83
|
stuck_health = self._check_stuck_items()
|
|
83
|
-
|
|
84
|
+
|
|
84
85
|
# Check failure rates
|
|
85
86
|
failure_health = self._check_failure_rates()
|
|
86
|
-
|
|
87
|
+
|
|
87
88
|
# Determine overall health
|
|
88
89
|
overall_status = self._determine_overall_status()
|
|
89
|
-
|
|
90
|
+
|
|
90
91
|
health_report = {
|
|
91
92
|
"status": overall_status,
|
|
92
93
|
"timestamp": datetime.now().isoformat(),
|
|
93
|
-
"alerts": [
|
|
94
|
+
"alerts": [
|
|
95
|
+
{
|
|
96
|
+
"level": alert.level,
|
|
97
|
+
"message": alert.message,
|
|
98
|
+
"details": alert.details,
|
|
99
|
+
}
|
|
100
|
+
for alert in self.alerts
|
|
101
|
+
],
|
|
94
102
|
"metrics": {
|
|
95
103
|
"worker": worker_health,
|
|
96
104
|
"queue": queue_health,
|
|
97
105
|
"stuck_items": stuck_health,
|
|
98
|
-
"failure_rate": failure_health
|
|
99
|
-
}
|
|
106
|
+
"failure_rate": failure_health,
|
|
107
|
+
},
|
|
100
108
|
}
|
|
101
|
-
|
|
109
|
+
|
|
102
110
|
self.last_check = datetime.now()
|
|
103
111
|
return health_report
|
|
104
|
-
|
|
105
|
-
def _check_worker_health(self) ->
|
|
112
|
+
|
|
113
|
+
def _check_worker_health(self) -> dict[str, Any]:
|
|
106
114
|
"""Check worker process health."""
|
|
107
115
|
worker_status = self.manager.get_status()
|
|
108
|
-
|
|
116
|
+
|
|
109
117
|
metrics = {
|
|
110
118
|
"running": worker_status["running"],
|
|
111
119
|
"pid": worker_status.get("pid"),
|
|
112
120
|
"cpu_percent": worker_status.get("cpu_percent", 0),
|
|
113
|
-
"memory_mb": worker_status.get("memory_mb", 0)
|
|
121
|
+
"memory_mb": worker_status.get("memory_mb", 0),
|
|
114
122
|
}
|
|
115
|
-
|
|
123
|
+
|
|
116
124
|
if not worker_status["running"]:
|
|
117
125
|
# Check if we have pending items but no worker
|
|
118
126
|
pending_count = self.queue.get_pending_count()
|
|
119
127
|
if pending_count > 0:
|
|
120
|
-
self.alerts.append(
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
128
|
+
self.alerts.append(
|
|
129
|
+
HealthAlert(
|
|
130
|
+
HealthStatus.CRITICAL,
|
|
131
|
+
f"Worker not running but {pending_count} items pending",
|
|
132
|
+
{"pending_count": pending_count, "action": "start_worker"},
|
|
133
|
+
)
|
|
134
|
+
)
|
|
125
135
|
else:
|
|
126
|
-
self.alerts.append(
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
136
|
+
self.alerts.append(
|
|
137
|
+
HealthAlert(
|
|
138
|
+
HealthStatus.WARNING,
|
|
139
|
+
"Worker not running (no pending items)",
|
|
140
|
+
{"action": "worker_idle"},
|
|
141
|
+
)
|
|
142
|
+
)
|
|
131
143
|
else:
|
|
132
144
|
# Worker is running, check if it's responsive
|
|
133
145
|
pid = worker_status.get("pid")
|
|
134
146
|
if pid:
|
|
135
147
|
try:
|
|
136
|
-
|
|
148
|
+
psutil.Process(pid)
|
|
137
149
|
# Check if worker has been idle too long with pending items
|
|
138
150
|
pending_count = self.queue.get_pending_count()
|
|
139
151
|
if pending_count > 0:
|
|
140
152
|
# Check for items that have been pending too long
|
|
141
153
|
old_pending = self._get_old_pending_items()
|
|
142
154
|
if old_pending:
|
|
143
|
-
self.alerts.append(
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
155
|
+
self.alerts.append(
|
|
156
|
+
HealthAlert(
|
|
157
|
+
HealthStatus.WARNING,
|
|
158
|
+
f"Worker running but {len(old_pending)} items pending for >30s",
|
|
159
|
+
{
|
|
160
|
+
"old_pending_count": len(old_pending),
|
|
161
|
+
"worker_pid": pid,
|
|
162
|
+
},
|
|
163
|
+
)
|
|
164
|
+
)
|
|
148
165
|
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
|
149
|
-
self.alerts.append(
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
166
|
+
self.alerts.append(
|
|
167
|
+
HealthAlert(
|
|
168
|
+
HealthStatus.CRITICAL,
|
|
169
|
+
"Worker PID exists but process not accessible",
|
|
170
|
+
{"pid": pid, "action": "restart_worker"},
|
|
171
|
+
)
|
|
172
|
+
)
|
|
173
|
+
|
|
155
174
|
return metrics
|
|
156
|
-
|
|
157
|
-
def _check_queue_health(self) ->
|
|
175
|
+
|
|
176
|
+
def _check_queue_health(self) -> dict[str, Any]:
|
|
158
177
|
"""Check queue status and backlog."""
|
|
159
178
|
stats = self.queue.get_stats()
|
|
160
|
-
|
|
179
|
+
|
|
161
180
|
pending = stats.get("pending", 0)
|
|
162
181
|
processing = stats.get("processing", 0)
|
|
163
182
|
failed = stats.get("failed", 0)
|
|
164
183
|
completed = stats.get("completed", 0)
|
|
165
|
-
|
|
184
|
+
|
|
166
185
|
metrics = {
|
|
167
186
|
"pending": pending,
|
|
168
187
|
"processing": processing,
|
|
169
188
|
"failed": failed,
|
|
170
189
|
"completed": completed,
|
|
171
|
-
"total": pending + processing + failed + completed
|
|
190
|
+
"total": pending + processing + failed + completed,
|
|
172
191
|
}
|
|
173
|
-
|
|
192
|
+
|
|
174
193
|
# Check backlog levels
|
|
175
194
|
if pending >= self.QUEUE_BACKLOG_CRITICAL:
|
|
176
|
-
self.alerts.append(
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
195
|
+
self.alerts.append(
|
|
196
|
+
HealthAlert(
|
|
197
|
+
HealthStatus.CRITICAL,
|
|
198
|
+
f"Critical queue backlog: {pending} pending items",
|
|
199
|
+
{"pending_count": pending, "action": "scale_workers"},
|
|
200
|
+
)
|
|
201
|
+
)
|
|
181
202
|
elif pending >= self.QUEUE_BACKLOG_WARNING:
|
|
182
|
-
self.alerts.append(
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
203
|
+
self.alerts.append(
|
|
204
|
+
HealthAlert(
|
|
205
|
+
HealthStatus.WARNING,
|
|
206
|
+
f"High queue backlog: {pending} pending items",
|
|
207
|
+
{"pending_count": pending},
|
|
208
|
+
)
|
|
209
|
+
)
|
|
210
|
+
|
|
188
211
|
# Check for too many processing items (might indicate stuck workers)
|
|
189
212
|
if processing > 5: # Should rarely have more than a few processing
|
|
190
|
-
self.alerts.append(
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
213
|
+
self.alerts.append(
|
|
214
|
+
HealthAlert(
|
|
215
|
+
HealthStatus.WARNING,
|
|
216
|
+
f"Many items in processing state: {processing}",
|
|
217
|
+
{"processing_count": processing, "action": "check_stuck_items"},
|
|
218
|
+
)
|
|
219
|
+
)
|
|
220
|
+
|
|
196
221
|
return metrics
|
|
197
|
-
|
|
198
|
-
def _check_stuck_items(self) ->
|
|
222
|
+
|
|
223
|
+
def _check_stuck_items(self) -> dict[str, Any]:
|
|
199
224
|
"""Check for items stuck in processing state."""
|
|
200
225
|
# Reset stuck items first
|
|
201
226
|
self.queue.reset_stuck_items(timeout_minutes=5) # 5 minute timeout
|
|
202
|
-
|
|
227
|
+
|
|
203
228
|
# Get current stuck items
|
|
204
229
|
stuck_items = self._get_stuck_processing_items()
|
|
205
|
-
|
|
230
|
+
|
|
206
231
|
metrics = {
|
|
207
232
|
"stuck_count": len(stuck_items),
|
|
208
|
-
"stuck_items": [item.id for item in stuck_items]
|
|
233
|
+
"stuck_items": [item.id for item in stuck_items],
|
|
209
234
|
}
|
|
210
|
-
|
|
235
|
+
|
|
211
236
|
if stuck_items:
|
|
212
|
-
self.alerts.append(
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
237
|
+
self.alerts.append(
|
|
238
|
+
HealthAlert(
|
|
239
|
+
HealthStatus.WARNING,
|
|
240
|
+
f"Found {len(stuck_items)} stuck items, auto-reset applied",
|
|
241
|
+
{
|
|
242
|
+
"stuck_items": [item.id for item in stuck_items],
|
|
243
|
+
"action": "items_reset",
|
|
244
|
+
},
|
|
245
|
+
)
|
|
246
|
+
)
|
|
247
|
+
|
|
218
248
|
return metrics
|
|
219
|
-
|
|
220
|
-
def _check_failure_rates(self) ->
|
|
249
|
+
|
|
250
|
+
def _check_failure_rates(self) -> dict[str, Any]:
|
|
221
251
|
"""Check recent failure rates."""
|
|
222
252
|
stats = self.queue.get_stats()
|
|
223
|
-
|
|
253
|
+
|
|
224
254
|
total_items = sum(stats.values())
|
|
225
255
|
failed_items = stats.get("failed", 0)
|
|
226
|
-
|
|
256
|
+
|
|
227
257
|
failure_rate = failed_items / total_items if total_items > 0 else 0
|
|
228
|
-
|
|
258
|
+
|
|
229
259
|
metrics = {
|
|
230
260
|
"failure_rate": failure_rate,
|
|
231
261
|
"failed_count": failed_items,
|
|
232
|
-
"total_count": total_items
|
|
262
|
+
"total_count": total_items,
|
|
233
263
|
}
|
|
234
|
-
|
|
264
|
+
|
|
235
265
|
if failure_rate >= self.HIGH_FAILURE_RATE and total_items >= 10:
|
|
236
|
-
self.alerts.append(
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
266
|
+
self.alerts.append(
|
|
267
|
+
HealthAlert(
|
|
268
|
+
HealthStatus.CRITICAL,
|
|
269
|
+
f"High failure rate: {failure_rate:.1%} ({failed_items}/{total_items})",
|
|
270
|
+
{"failure_rate": failure_rate, "action": "investigate_failures"},
|
|
271
|
+
)
|
|
272
|
+
)
|
|
273
|
+
|
|
242
274
|
return metrics
|
|
243
|
-
|
|
275
|
+
|
|
244
276
|
def _determine_overall_status(self) -> HealthStatus:
|
|
245
277
|
"""Determine overall health status from alerts."""
|
|
246
278
|
if not self.alerts:
|
|
247
279
|
return HealthStatus.HEALTHY
|
|
248
|
-
|
|
280
|
+
|
|
249
281
|
# Check for critical alerts
|
|
250
282
|
if any(alert.level == HealthStatus.CRITICAL for alert in self.alerts):
|
|
251
283
|
return HealthStatus.CRITICAL
|
|
252
|
-
|
|
284
|
+
|
|
253
285
|
# Check for warnings
|
|
254
286
|
if any(alert.level == HealthStatus.WARNING for alert in self.alerts):
|
|
255
287
|
return HealthStatus.WARNING
|
|
256
|
-
|
|
288
|
+
|
|
257
289
|
return HealthStatus.HEALTHY
|
|
258
|
-
|
|
259
|
-
def _get_old_pending_items(self) ->
|
|
290
|
+
|
|
291
|
+
def _get_old_pending_items(self) -> list:
|
|
260
292
|
"""Get items that have been pending for too long."""
|
|
261
293
|
cutoff_time = datetime.now() - timedelta(seconds=self.WORKER_TIMEOUT_SECONDS)
|
|
262
|
-
|
|
294
|
+
|
|
263
295
|
items = self.queue.list_items(status=QueueStatus.PENDING, limit=100)
|
|
264
|
-
return [
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
]
|
|
268
|
-
|
|
269
|
-
def _get_stuck_processing_items(self) -> List:
|
|
296
|
+
return [item for item in items if item.created_at < cutoff_time]
|
|
297
|
+
|
|
298
|
+
def _get_stuck_processing_items(self) -> list:
|
|
270
299
|
"""Get items stuck in processing state."""
|
|
271
300
|
cutoff_time = datetime.now() - timedelta(seconds=self.STUCK_ITEM_THRESHOLD)
|
|
272
|
-
|
|
301
|
+
|
|
273
302
|
items = self.queue.list_items(status=QueueStatus.PROCESSING, limit=100)
|
|
274
|
-
return [
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
]
|
|
278
|
-
|
|
279
|
-
def get_immediate_alerts(self) -> List[HealthAlert]:
|
|
303
|
+
return [item for item in items if item.created_at < cutoff_time]
|
|
304
|
+
|
|
305
|
+
def get_immediate_alerts(self) -> list[HealthAlert]:
|
|
280
306
|
"""Get alerts that require immediate attention."""
|
|
281
307
|
return [
|
|
282
|
-
alert
|
|
308
|
+
alert
|
|
309
|
+
for alert in self.alerts
|
|
283
310
|
if alert.level in [HealthStatus.CRITICAL, HealthStatus.FAILED]
|
|
284
311
|
]
|
|
285
|
-
|
|
286
|
-
def auto_repair(self) ->
|
|
312
|
+
|
|
313
|
+
def auto_repair(self) -> dict[str, Any]:
|
|
287
314
|
"""Attempt automatic repair of detected issues."""
|
|
288
315
|
repair_actions = []
|
|
289
|
-
|
|
316
|
+
|
|
290
317
|
# Check health first
|
|
291
|
-
|
|
292
|
-
|
|
318
|
+
self.check_health()
|
|
319
|
+
|
|
293
320
|
for alert in self.alerts:
|
|
294
321
|
action = alert.details.get("action")
|
|
295
|
-
|
|
322
|
+
|
|
296
323
|
if action == "start_worker":
|
|
297
324
|
try:
|
|
298
325
|
if self.manager.start():
|
|
299
|
-
repair_actions.append(
|
|
326
|
+
repair_actions.append(
|
|
327
|
+
f"Started worker for {alert.details.get('pending_count')} pending items"
|
|
328
|
+
)
|
|
300
329
|
else:
|
|
301
330
|
repair_actions.append("Failed to start worker")
|
|
302
331
|
except Exception as e:
|
|
303
332
|
repair_actions.append(f"Error starting worker: {e}")
|
|
304
|
-
|
|
333
|
+
|
|
305
334
|
elif action == "restart_worker":
|
|
306
335
|
try:
|
|
307
336
|
self.manager.stop()
|
|
@@ -312,11 +341,13 @@ class QueueHealthMonitor:
|
|
|
312
341
|
repair_actions.append("Failed to restart worker")
|
|
313
342
|
except Exception as e:
|
|
314
343
|
repair_actions.append(f"Error restarting worker: {e}")
|
|
315
|
-
|
|
344
|
+
|
|
316
345
|
elif action == "items_reset":
|
|
317
|
-
repair_actions.append(
|
|
318
|
-
|
|
346
|
+
repair_actions.append(
|
|
347
|
+
f"Reset {alert.details.get('stuck_items', [])} stuck items"
|
|
348
|
+
)
|
|
349
|
+
|
|
319
350
|
return {
|
|
320
351
|
"actions_taken": repair_actions,
|
|
321
|
-
"timestamp": datetime.now().isoformat()
|
|
352
|
+
"timestamp": datetime.now().isoformat(),
|
|
322
353
|
}
|
mcp_ticketer/queue/manager.py
CHANGED
|
@@ -81,12 +81,22 @@ class WorkerManager:
|
|
|
81
81
|
# Try to start worker
|
|
82
82
|
return self.start()
|
|
83
83
|
|
|
84
|
-
def start(self) -> bool:
|
|
84
|
+
def start(self, timeout: float = 30.0) -> bool:
|
|
85
85
|
"""Start the worker process.
|
|
86
86
|
|
|
87
|
+
Args:
|
|
88
|
+
timeout: Maximum seconds to wait for worker to become fully operational (default: 30)
|
|
89
|
+
|
|
87
90
|
Returns:
|
|
88
91
|
True if started successfully, False otherwise
|
|
89
92
|
|
|
93
|
+
Raises:
|
|
94
|
+
TimeoutError: If worker doesn't start within timeout period
|
|
95
|
+
|
|
96
|
+
Note:
|
|
97
|
+
If timeout occurs, a worker may already be running in another process.
|
|
98
|
+
Check for stale lock files or processes using `get_status()`.
|
|
99
|
+
|
|
90
100
|
"""
|
|
91
101
|
# Check if already running
|
|
92
102
|
if self.is_running():
|
|
@@ -96,7 +106,19 @@ class WorkerManager:
|
|
|
96
106
|
# Try to acquire lock
|
|
97
107
|
if not self._acquire_lock():
|
|
98
108
|
logger.warning("Could not acquire lock - another worker may be running")
|
|
99
|
-
|
|
109
|
+
# Wait briefly to see if worker becomes operational
|
|
110
|
+
start_time = time.time()
|
|
111
|
+
while time.time() - start_time < timeout:
|
|
112
|
+
if self.is_running():
|
|
113
|
+
logger.info("Worker started by another process")
|
|
114
|
+
return True
|
|
115
|
+
time.sleep(1)
|
|
116
|
+
|
|
117
|
+
raise TimeoutError(
|
|
118
|
+
f"Worker start timed out after {timeout}s. "
|
|
119
|
+
f"Lock is held but worker is not running. "
|
|
120
|
+
f"Check for stale lock files or zombie processes."
|
|
121
|
+
)
|
|
100
122
|
|
|
101
123
|
try:
|
|
102
124
|
# Start worker in subprocess using the same Python executable as the CLI
|
|
@@ -113,9 +135,12 @@ class WorkerManager:
|
|
|
113
135
|
if env_file.exists():
|
|
114
136
|
logger.debug(f"Loading environment from {env_file} for subprocess")
|
|
115
137
|
from dotenv import dotenv_values
|
|
138
|
+
|
|
116
139
|
env_vars = dotenv_values(env_file)
|
|
117
140
|
subprocess_env.update(env_vars)
|
|
118
|
-
logger.debug(
|
|
141
|
+
logger.debug(
|
|
142
|
+
f"Added {len(env_vars)} environment variables from .env.local"
|
|
143
|
+
)
|
|
119
144
|
|
|
120
145
|
# Start as background process
|
|
121
146
|
process = subprocess.Popen(
|
|
@@ -130,20 +155,42 @@ class WorkerManager:
|
|
|
130
155
|
# Save PID
|
|
131
156
|
self.pid_file.write_text(str(process.pid))
|
|
132
157
|
|
|
133
|
-
#
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
158
|
+
# Wait for worker to become fully operational with timeout
|
|
159
|
+
start_time = time.time()
|
|
160
|
+
while time.time() - start_time < timeout:
|
|
161
|
+
# Check if process exists
|
|
162
|
+
if not psutil.pid_exists(process.pid):
|
|
163
|
+
logger.error("Worker process died during startup")
|
|
164
|
+
self._cleanup()
|
|
165
|
+
return False
|
|
166
|
+
|
|
167
|
+
# Check if worker is fully running (not just process exists)
|
|
168
|
+
if self.is_running():
|
|
169
|
+
elapsed = time.time() - start_time
|
|
170
|
+
logger.info(
|
|
171
|
+
f"Worker started successfully in {elapsed:.1f}s with PID {process.pid}"
|
|
172
|
+
)
|
|
173
|
+
return True
|
|
174
|
+
|
|
175
|
+
# Log progress for long startups
|
|
176
|
+
elapsed = time.time() - start_time
|
|
177
|
+
if elapsed > 5 and int(elapsed) % 5 == 0:
|
|
178
|
+
logger.debug(
|
|
179
|
+
f"Waiting for worker to start, elapsed: {elapsed:.1f}s"
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
time.sleep(0.5)
|
|
183
|
+
|
|
184
|
+
# Timeout reached
|
|
185
|
+
raise TimeoutError(
|
|
186
|
+
f"Worker start timed out after {timeout}s. "
|
|
187
|
+
f"Process spawned (PID {process.pid}) but worker did not become operational. "
|
|
188
|
+
f"Check worker logs for startup errors."
|
|
189
|
+
)
|
|
146
190
|
|
|
191
|
+
except TimeoutError:
|
|
192
|
+
# Re-raise timeout errors with context preserved
|
|
193
|
+
raise
|
|
147
194
|
except Exception as e:
|
|
148
195
|
logger.error(f"Failed to start worker: {e}")
|
|
149
196
|
self._release_lock()
|
|
@@ -281,6 +328,7 @@ class WorkerManager:
|
|
|
281
328
|
|
|
282
329
|
Returns:
|
|
283
330
|
Path to Python executable
|
|
331
|
+
|
|
284
332
|
"""
|
|
285
333
|
# First, try to detect if we're running in a pipx environment
|
|
286
334
|
# by checking if the current executable is in a pipx venv
|
|
@@ -293,18 +341,21 @@ class WorkerManager:
|
|
|
293
341
|
|
|
294
342
|
# Check if we can find the mcp-ticketer executable and extract its Python
|
|
295
343
|
import shutil
|
|
344
|
+
|
|
296
345
|
mcp_ticketer_path = shutil.which("mcp-ticketer")
|
|
297
346
|
if mcp_ticketer_path:
|
|
298
347
|
try:
|
|
299
348
|
# Read the shebang line to get the Python executable
|
|
300
|
-
with open(mcp_ticketer_path
|
|
349
|
+
with open(mcp_ticketer_path) as f:
|
|
301
350
|
first_line = f.readline().strip()
|
|
302
351
|
if first_line.startswith("#!") and "python" in first_line:
|
|
303
352
|
python_path = first_line[2:].strip()
|
|
304
353
|
if os.path.exists(python_path):
|
|
305
|
-
logger.debug(
|
|
354
|
+
logger.debug(
|
|
355
|
+
f"Using Python from mcp-ticketer shebang: {python_path}"
|
|
356
|
+
)
|
|
306
357
|
return python_path
|
|
307
|
-
except
|
|
358
|
+
except OSError:
|
|
308
359
|
pass
|
|
309
360
|
|
|
310
361
|
# Fallback to sys.executable
|