crackerjack 0.31.10__py3-none-any.whl → 0.31.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crackerjack might be problematic. Click here for more details.
- crackerjack/CLAUDE.md +288 -705
- crackerjack/__main__.py +22 -8
- crackerjack/agents/__init__.py +0 -3
- crackerjack/agents/architect_agent.py +0 -43
- crackerjack/agents/base.py +1 -9
- crackerjack/agents/coordinator.py +2 -148
- crackerjack/agents/documentation_agent.py +109 -81
- crackerjack/agents/dry_agent.py +122 -97
- crackerjack/agents/formatting_agent.py +3 -16
- crackerjack/agents/import_optimization_agent.py +1174 -130
- crackerjack/agents/performance_agent.py +956 -188
- crackerjack/agents/performance_helpers.py +229 -0
- crackerjack/agents/proactive_agent.py +1 -48
- crackerjack/agents/refactoring_agent.py +516 -246
- crackerjack/agents/refactoring_helpers.py +282 -0
- crackerjack/agents/security_agent.py +393 -90
- crackerjack/agents/test_creation_agent.py +1776 -120
- crackerjack/agents/test_specialist_agent.py +59 -15
- crackerjack/agents/tracker.py +0 -102
- crackerjack/api.py +145 -37
- crackerjack/cli/handlers.py +48 -30
- crackerjack/cli/interactive.py +11 -11
- crackerjack/cli/options.py +66 -4
- crackerjack/code_cleaner.py +808 -148
- crackerjack/config/global_lock_config.py +110 -0
- crackerjack/config/hooks.py +43 -64
- crackerjack/core/async_workflow_orchestrator.py +247 -97
- crackerjack/core/autofix_coordinator.py +192 -109
- crackerjack/core/enhanced_container.py +46 -63
- crackerjack/core/file_lifecycle.py +549 -0
- crackerjack/core/performance.py +9 -8
- crackerjack/core/performance_monitor.py +395 -0
- crackerjack/core/phase_coordinator.py +281 -94
- crackerjack/core/proactive_workflow.py +9 -58
- crackerjack/core/resource_manager.py +501 -0
- crackerjack/core/service_watchdog.py +490 -0
- crackerjack/core/session_coordinator.py +4 -8
- crackerjack/core/timeout_manager.py +504 -0
- crackerjack/core/websocket_lifecycle.py +475 -0
- crackerjack/core/workflow_orchestrator.py +343 -209
- crackerjack/dynamic_config.py +50 -9
- crackerjack/errors.py +3 -4
- crackerjack/executors/async_hook_executor.py +63 -13
- crackerjack/executors/cached_hook_executor.py +14 -14
- crackerjack/executors/hook_executor.py +100 -37
- crackerjack/executors/hook_lock_manager.py +856 -0
- crackerjack/executors/individual_hook_executor.py +120 -86
- crackerjack/intelligence/__init__.py +0 -7
- crackerjack/intelligence/adaptive_learning.py +13 -86
- crackerjack/intelligence/agent_orchestrator.py +15 -78
- crackerjack/intelligence/agent_registry.py +12 -59
- crackerjack/intelligence/agent_selector.py +31 -92
- crackerjack/intelligence/integration.py +1 -41
- crackerjack/interactive.py +9 -9
- crackerjack/managers/async_hook_manager.py +25 -8
- crackerjack/managers/hook_manager.py +9 -9
- crackerjack/managers/publish_manager.py +57 -59
- crackerjack/managers/test_command_builder.py +6 -36
- crackerjack/managers/test_executor.py +9 -61
- crackerjack/managers/test_manager.py +17 -63
- crackerjack/managers/test_manager_backup.py +77 -127
- crackerjack/managers/test_progress.py +4 -23
- crackerjack/mcp/cache.py +5 -12
- crackerjack/mcp/client_runner.py +10 -10
- crackerjack/mcp/context.py +64 -6
- crackerjack/mcp/dashboard.py +14 -11
- crackerjack/mcp/enhanced_progress_monitor.py +55 -55
- crackerjack/mcp/file_monitor.py +72 -42
- crackerjack/mcp/progress_components.py +103 -84
- crackerjack/mcp/progress_monitor.py +122 -49
- crackerjack/mcp/rate_limiter.py +12 -12
- crackerjack/mcp/server_core.py +16 -22
- crackerjack/mcp/service_watchdog.py +26 -26
- crackerjack/mcp/state.py +15 -0
- crackerjack/mcp/tools/core_tools.py +95 -39
- crackerjack/mcp/tools/error_analyzer.py +6 -32
- crackerjack/mcp/tools/execution_tools.py +1 -56
- crackerjack/mcp/tools/execution_tools_backup.py +35 -131
- crackerjack/mcp/tools/intelligence_tool_registry.py +0 -36
- crackerjack/mcp/tools/intelligence_tools.py +2 -55
- crackerjack/mcp/tools/monitoring_tools.py +308 -145
- crackerjack/mcp/tools/proactive_tools.py +12 -42
- crackerjack/mcp/tools/progress_tools.py +23 -15
- crackerjack/mcp/tools/utility_tools.py +3 -40
- crackerjack/mcp/tools/workflow_executor.py +40 -60
- crackerjack/mcp/websocket/app.py +0 -3
- crackerjack/mcp/websocket/endpoints.py +206 -268
- crackerjack/mcp/websocket/jobs.py +213 -66
- crackerjack/mcp/websocket/server.py +84 -6
- crackerjack/mcp/websocket/websocket_handler.py +137 -29
- crackerjack/models/config_adapter.py +3 -16
- crackerjack/models/protocols.py +162 -3
- crackerjack/models/resource_protocols.py +454 -0
- crackerjack/models/task.py +3 -3
- crackerjack/monitoring/__init__.py +0 -0
- crackerjack/monitoring/ai_agent_watchdog.py +25 -71
- crackerjack/monitoring/regression_prevention.py +28 -87
- crackerjack/orchestration/advanced_orchestrator.py +44 -78
- crackerjack/orchestration/coverage_improvement.py +10 -60
- crackerjack/orchestration/execution_strategies.py +16 -16
- crackerjack/orchestration/test_progress_streamer.py +61 -53
- crackerjack/plugins/base.py +1 -1
- crackerjack/plugins/managers.py +22 -20
- crackerjack/py313.py +65 -21
- crackerjack/services/backup_service.py +467 -0
- crackerjack/services/bounded_status_operations.py +627 -0
- crackerjack/services/cache.py +7 -9
- crackerjack/services/config.py +35 -52
- crackerjack/services/config_integrity.py +5 -16
- crackerjack/services/config_merge.py +542 -0
- crackerjack/services/contextual_ai_assistant.py +17 -19
- crackerjack/services/coverage_ratchet.py +44 -73
- crackerjack/services/debug.py +25 -39
- crackerjack/services/dependency_monitor.py +52 -50
- crackerjack/services/enhanced_filesystem.py +14 -11
- crackerjack/services/file_hasher.py +1 -1
- crackerjack/services/filesystem.py +1 -12
- crackerjack/services/git.py +71 -47
- crackerjack/services/health_metrics.py +31 -27
- crackerjack/services/initialization.py +276 -428
- crackerjack/services/input_validator.py +760 -0
- crackerjack/services/log_manager.py +16 -16
- crackerjack/services/logging.py +7 -6
- crackerjack/services/metrics.py +43 -43
- crackerjack/services/pattern_cache.py +2 -31
- crackerjack/services/pattern_detector.py +26 -63
- crackerjack/services/performance_benchmarks.py +20 -45
- crackerjack/services/regex_patterns.py +2887 -0
- crackerjack/services/regex_utils.py +537 -0
- crackerjack/services/secure_path_utils.py +683 -0
- crackerjack/services/secure_status_formatter.py +534 -0
- crackerjack/services/secure_subprocess.py +605 -0
- crackerjack/services/security.py +47 -10
- crackerjack/services/security_logger.py +492 -0
- crackerjack/services/server_manager.py +109 -50
- crackerjack/services/smart_scheduling.py +8 -25
- crackerjack/services/status_authentication.py +603 -0
- crackerjack/services/status_security_manager.py +442 -0
- crackerjack/services/thread_safe_status_collector.py +546 -0
- crackerjack/services/tool_version_service.py +1 -23
- crackerjack/services/unified_config.py +36 -58
- crackerjack/services/validation_rate_limiter.py +269 -0
- crackerjack/services/version_checker.py +9 -40
- crackerjack/services/websocket_resource_limiter.py +572 -0
- crackerjack/slash_commands/__init__.py +52 -2
- crackerjack/tools/__init__.py +0 -0
- crackerjack/tools/validate_input_validator_patterns.py +262 -0
- crackerjack/tools/validate_regex_patterns.py +198 -0
- {crackerjack-0.31.10.dist-info → crackerjack-0.31.13.dist-info}/METADATA +197 -12
- crackerjack-0.31.13.dist-info/RECORD +178 -0
- crackerjack/cli/facade.py +0 -104
- crackerjack-0.31.10.dist-info/RECORD +0 -149
- {crackerjack-0.31.10.dist-info → crackerjack-0.31.13.dist-info}/WHEEL +0 -0
- {crackerjack-0.31.10.dist-info → crackerjack-0.31.13.dist-info}/entry_points.txt +0 -0
- {crackerjack-0.31.10.dist-info → crackerjack-0.31.13.dist-info}/licenses/LICENSE +0 -0
|
@@ -8,12 +8,17 @@ from typing import Any
|
|
|
8
8
|
|
|
9
9
|
from rich.console import Console
|
|
10
10
|
|
|
11
|
+
from crackerjack.core.timeout_manager import TimeoutStrategy, get_timeout_manager
|
|
12
|
+
from crackerjack.services.input_validator import get_input_validator
|
|
13
|
+
from crackerjack.services.secure_path_utils import SecurePathValidator
|
|
14
|
+
|
|
11
15
|
console = Console()
|
|
12
16
|
|
|
13
17
|
|
|
14
18
|
class JobManager:
|
|
15
19
|
def __init__(self, progress_dir: Path) -> None:
|
|
16
|
-
|
|
20
|
+
# Validate and secure the progress directory path
|
|
21
|
+
self.progress_dir = SecurePathValidator.validate_safe_path(progress_dir)
|
|
17
22
|
self.active_connections: dict[str, set[Any]] = {}
|
|
18
23
|
self.known_jobs: set[str] = set()
|
|
19
24
|
self.is_running = True
|
|
@@ -21,17 +26,18 @@ class JobManager:
|
|
|
21
26
|
self.progress_dir.mkdir(exist_ok=True)
|
|
22
27
|
|
|
23
28
|
def validate_job_id(self, job_id: str) -> bool:
|
|
29
|
+
"""Validate job ID using secure input validator."""
|
|
24
30
|
if not job_id:
|
|
25
31
|
return False
|
|
26
32
|
|
|
33
|
+
# First check if it's a valid UUID
|
|
27
34
|
with suppress(ValueError):
|
|
28
35
|
uuid.UUID(job_id)
|
|
29
36
|
return True
|
|
30
37
|
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
return bool(re.match(r"^[a-zA-Z0-9_-]+$", job_id) and len(job_id) <= 50)
|
|
38
|
+
# Use secure input validator for additional validation
|
|
39
|
+
result = get_input_validator().validate_job_id(job_id)
|
|
40
|
+
return result.valid
|
|
35
41
|
|
|
36
42
|
def add_connection(self, job_id: str, websocket: Any) -> None:
|
|
37
43
|
if job_id not in self.active_connections:
|
|
@@ -48,18 +54,79 @@ class JobManager:
|
|
|
48
54
|
if job_id not in self.active_connections:
|
|
49
55
|
return
|
|
50
56
|
|
|
57
|
+
timeout_manager = get_timeout_manager()
|
|
51
58
|
connections = self.active_connections[job_id].copy()
|
|
59
|
+
|
|
60
|
+
# Create websocket send tasks
|
|
61
|
+
send_tasks = self._create_broadcast_tasks(connections, timeout_manager, data)
|
|
62
|
+
|
|
63
|
+
# Execute broadcast with timeout handling
|
|
64
|
+
if send_tasks:
|
|
65
|
+
await self._execute_broadcast_tasks(job_id, send_tasks)
|
|
66
|
+
|
|
67
|
+
def _create_broadcast_tasks(
|
|
68
|
+
self, connections: set, timeout_manager, data: dict
|
|
69
|
+
) -> list:
|
|
70
|
+
"""Create tasks for all websocket sends with timeout."""
|
|
71
|
+
send_tasks = []
|
|
52
72
|
for websocket in connections:
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
73
|
+
task = asyncio.create_task(
|
|
74
|
+
timeout_manager.with_timeout(
|
|
75
|
+
"websocket_broadcast",
|
|
76
|
+
websocket.send_json(data),
|
|
77
|
+
timeout=2.0, # Quick broadcast timeout
|
|
78
|
+
)
|
|
79
|
+
)
|
|
80
|
+
send_tasks.append((websocket, task))
|
|
81
|
+
return send_tasks
|
|
82
|
+
|
|
83
|
+
async def _execute_broadcast_tasks(self, job_id: str, send_tasks: list) -> None:
|
|
84
|
+
"""Execute broadcast tasks with timeout and error handling."""
|
|
85
|
+
try:
|
|
86
|
+
# Use asyncio.wait with timeout for batch sending
|
|
87
|
+
done, pending = await asyncio.wait(
|
|
88
|
+
[task for _, task in send_tasks],
|
|
89
|
+
timeout=5.0, # Overall timeout for all broadcasts
|
|
90
|
+
return_when=asyncio.ALL_COMPLETED,
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
# Handle completed and pending tasks
|
|
94
|
+
await self._handle_broadcast_results(job_id, send_tasks, done, pending)
|
|
95
|
+
|
|
96
|
+
except Exception as e:
|
|
97
|
+
console.print(f"[red]Broadcast error: {e}[/red]")
|
|
98
|
+
await self._cleanup_failed_broadcast(job_id, send_tasks)
|
|
99
|
+
|
|
100
|
+
async def _handle_broadcast_results(
|
|
101
|
+
self, job_id: str, send_tasks: list, done: set, pending: set
|
|
102
|
+
) -> None:
|
|
103
|
+
"""Handle results of broadcast tasks."""
|
|
104
|
+
# Cancel any pending tasks and remove failed connections
|
|
105
|
+
for websocket, task in send_tasks:
|
|
106
|
+
if task in pending:
|
|
107
|
+
task.cancel()
|
|
56
108
|
self.remove_connection(job_id, websocket)
|
|
109
|
+
elif task in done:
|
|
110
|
+
try:
|
|
111
|
+
await task
|
|
112
|
+
except Exception:
|
|
113
|
+
self.remove_connection(job_id, websocket)
|
|
114
|
+
|
|
115
|
+
# Wait for cancelled tasks to complete
|
|
116
|
+
if pending:
|
|
117
|
+
await asyncio.gather(*pending, return_exceptions=True)
|
|
118
|
+
|
|
119
|
+
async def _cleanup_failed_broadcast(self, job_id: str, send_tasks: list) -> None:
|
|
120
|
+
"""Clean up connections after broadcast failure."""
|
|
121
|
+
for websocket, task in send_tasks:
|
|
122
|
+
if not task.done():
|
|
123
|
+
task.cancel()
|
|
124
|
+
self.remove_connection(job_id, websocket)
|
|
57
125
|
|
|
58
126
|
def get_latest_job_id(self) -> str | None:
|
|
59
127
|
if not self.progress_dir.exists():
|
|
60
128
|
return None
|
|
61
129
|
|
|
62
|
-
# Performance: Use more specific glob pattern to reduce filesystem calls
|
|
63
130
|
progress_files = list(self.progress_dir.glob("job-*.json"))
|
|
64
131
|
if not progress_files:
|
|
65
132
|
return None
|
|
@@ -68,88 +135,158 @@ class JobManager:
|
|
|
68
135
|
return self.extract_job_id_from_file(latest_file)
|
|
69
136
|
|
|
70
137
|
def extract_job_id_from_file(self, progress_file: Path) -> str | None:
|
|
71
|
-
# Performance: Use slice instead of replace for fixed prefix removal
|
|
72
138
|
return (
|
|
73
|
-
progress_file.stem[4:]
|
|
74
|
-
if progress_file.stem.startswith("job-")
|
|
75
|
-
else None
|
|
139
|
+
progress_file.stem[4:] if progress_file.stem.startswith("job -") else None
|
|
76
140
|
)
|
|
77
141
|
|
|
78
142
|
def get_job_progress(self, job_id: str) -> dict | None:
|
|
79
143
|
if not self.validate_job_id(job_id):
|
|
80
144
|
return None
|
|
81
145
|
|
|
82
|
-
|
|
83
|
-
if not progress_file.exists():
|
|
84
|
-
return None
|
|
85
|
-
|
|
146
|
+
# Use secure path joining to create progress file path
|
|
86
147
|
try:
|
|
148
|
+
progress_file = SecurePathValidator.secure_path_join(
|
|
149
|
+
self.progress_dir, f"job-{job_id}.json"
|
|
150
|
+
)
|
|
151
|
+
if not progress_file.exists():
|
|
152
|
+
return None
|
|
153
|
+
|
|
154
|
+
# Validate file size before reading
|
|
155
|
+
SecurePathValidator.validate_file_size(progress_file)
|
|
156
|
+
|
|
87
157
|
return json.loads(progress_file.read_text())
|
|
88
158
|
except (json.JSONDecodeError, OSError):
|
|
89
159
|
return None
|
|
90
160
|
|
|
91
161
|
async def _process_progress_file(self, progress_file: Path) -> None:
|
|
92
|
-
|
|
93
|
-
|
|
162
|
+
# Validate the progress file path is within our allowed directory
|
|
163
|
+
try:
|
|
164
|
+
validated_file = SecurePathValidator.validate_safe_path(
|
|
165
|
+
progress_file, self.progress_dir
|
|
166
|
+
)
|
|
167
|
+
except Exception:
|
|
168
|
+
# If path validation fails, skip processing this file
|
|
169
|
+
return
|
|
170
|
+
|
|
171
|
+
job_id = self.extract_job_id_from_file(validated_file)
|
|
94
172
|
if not (job_id and self.validate_job_id(job_id)):
|
|
95
173
|
return
|
|
96
174
|
|
|
97
175
|
progress_data = self.get_job_progress(job_id)
|
|
98
176
|
if progress_data and job_id not in self.known_jobs:
|
|
99
177
|
self.known_jobs.add(job_id)
|
|
100
|
-
console.print(f"[green]New job detected: {job_id}[/green]")
|
|
178
|
+
console.print(f"[green]New job detected: {job_id}[/ green]")
|
|
101
179
|
await self.broadcast_to_job(job_id, progress_data)
|
|
102
180
|
|
|
103
181
|
async def _monitor_directory_changes(self) -> None:
|
|
104
|
-
|
|
182
|
+
timeout_manager = get_timeout_manager()
|
|
183
|
+
consecutive_errors = 0
|
|
184
|
+
max_consecutive_errors = 5
|
|
185
|
+
|
|
105
186
|
while self.is_running:
|
|
106
187
|
try:
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
188
|
+
# Monitor directory changes with timeout protection
|
|
189
|
+
async with timeout_manager.timeout_context(
|
|
190
|
+
"file_operations",
|
|
191
|
+
timeout=10.0, # Timeout for directory monitoring cycle
|
|
192
|
+
strategy=TimeoutStrategy.GRACEFUL_DEGRADATION,
|
|
193
|
+
):
|
|
194
|
+
if self.progress_dir.exists():
|
|
195
|
+
# Process files with individual timeouts
|
|
196
|
+
for progress_file in self.progress_dir.glob("job-*.json"):
|
|
197
|
+
try:
|
|
198
|
+
await timeout_manager.with_timeout(
|
|
199
|
+
"file_operations",
|
|
200
|
+
self._process_progress_file(progress_file),
|
|
201
|
+
timeout=5.0, # Per-file timeout
|
|
202
|
+
)
|
|
203
|
+
except Exception as e:
|
|
204
|
+
console.print(
|
|
205
|
+
f"[yellow]File processing error: {e}[/yellow]"
|
|
206
|
+
)
|
|
207
|
+
continue
|
|
208
|
+
|
|
209
|
+
# Reset error count on successful cycle
|
|
210
|
+
consecutive_errors = 0
|
|
211
|
+
await asyncio.sleep(1)
|
|
110
212
|
|
|
111
|
-
await asyncio.sleep(1) # Check every second
|
|
112
213
|
except Exception as e:
|
|
214
|
+
consecutive_errors += 1
|
|
113
215
|
console.print(f"[red]Progress monitoring error: {e}[/red]")
|
|
114
|
-
|
|
216
|
+
|
|
217
|
+
# Implement exponential backoff for repeated errors
|
|
218
|
+
if consecutive_errors >= max_consecutive_errors:
|
|
219
|
+
console.print(
|
|
220
|
+
f"[red]Too many consecutive errors ({consecutive_errors}), stopping monitor[/red]"
|
|
221
|
+
)
|
|
222
|
+
break
|
|
223
|
+
|
|
224
|
+
# Exponential backoff with max delay
|
|
225
|
+
delay = min(5 * (2 ** (consecutive_errors - 1)), 60)
|
|
226
|
+
await asyncio.sleep(delay)
|
|
115
227
|
|
|
116
228
|
async def monitor_progress_files(self) -> None:
|
|
117
229
|
from crackerjack.mcp.file_monitor import create_progress_monitor
|
|
118
230
|
|
|
119
231
|
console.print("[blue]Starting progress file monitoring...[/blue]")
|
|
232
|
+
timeout_manager = get_timeout_manager()
|
|
120
233
|
|
|
121
234
|
try:
|
|
122
|
-
monitor
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
235
|
+
# Start file monitor with timeout protection
|
|
236
|
+
async with timeout_manager.timeout_context(
|
|
237
|
+
"file_operations",
|
|
238
|
+
timeout=30.0, # Monitor startup timeout
|
|
239
|
+
strategy=TimeoutStrategy.GRACEFUL_DEGRADATION,
|
|
240
|
+
):
|
|
241
|
+
monitor = create_progress_monitor(self.progress_dir)
|
|
242
|
+
await monitor.start()
|
|
243
|
+
|
|
244
|
+
def on_progress_update(job_id: str, progress_data: dict) -> None:
|
|
245
|
+
if job_id and self.validate_job_id(job_id):
|
|
246
|
+
# Create broadcast task with timeout handling
|
|
247
|
+
async def safe_broadcast():
|
|
248
|
+
try:
|
|
249
|
+
await timeout_manager.with_timeout(
|
|
250
|
+
"websocket_broadcast",
|
|
251
|
+
self.broadcast_to_job(job_id, progress_data),
|
|
252
|
+
timeout=5.0,
|
|
253
|
+
)
|
|
254
|
+
except Exception as e:
|
|
255
|
+
console.print(
|
|
256
|
+
f"[yellow]Broadcast failed for job {job_id}: {e}[/yellow]"
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
asyncio.create_task(safe_broadcast())
|
|
260
|
+
|
|
261
|
+
if job_id not in self.known_jobs:
|
|
262
|
+
self.known_jobs.add(job_id)
|
|
263
|
+
console.print(f"[green]New job detected: {job_id}[/green]")
|
|
264
|
+
|
|
265
|
+
# Start directory monitoring with proper timeout handling
|
|
266
|
+
await self._monitor_directory_changes()
|
|
137
267
|
|
|
138
268
|
except Exception as e:
|
|
139
269
|
console.print(f"[red]Progress monitoring setup error: {e}[/red]")
|
|
140
270
|
|
|
141
271
|
async def cleanup_old_jobs(self) -> None:
|
|
142
|
-
|
|
272
|
+
timeout_manager = get_timeout_manager()
|
|
273
|
+
|
|
143
274
|
while self.is_running:
|
|
144
275
|
try:
|
|
145
|
-
|
|
146
|
-
await
|
|
276
|
+
# Cleanup cycle with timeout protection
|
|
277
|
+
await timeout_manager.with_timeout(
|
|
278
|
+
"file_operations",
|
|
279
|
+
self._perform_cleanup_cycle(),
|
|
280
|
+
timeout=30.0, # Cleanup timeout
|
|
281
|
+
strategy=TimeoutStrategy.GRACEFUL_DEGRADATION,
|
|
282
|
+
)
|
|
283
|
+
await asyncio.sleep(3600) # 1 hour between cleanups
|
|
147
284
|
except Exception as e:
|
|
148
285
|
console.print(f"[red]Cleanup error: {e}[/red]")
|
|
149
|
-
|
|
286
|
+
# Shorter sleep on error to retry sooner
|
|
287
|
+
await asyncio.sleep(1800) # 30 minutes on error
|
|
150
288
|
|
|
151
289
|
async def _perform_cleanup_cycle(self) -> None:
|
|
152
|
-
"""Perform a single cleanup cycle for old jobs."""
|
|
153
290
|
if not self.progress_dir.exists():
|
|
154
291
|
return
|
|
155
292
|
|
|
@@ -160,44 +297,48 @@ class JobManager:
|
|
|
160
297
|
self._cleanup_old_job_file(progress_file)
|
|
161
298
|
|
|
162
299
|
def _calculate_cleanup_cutoff_time(self) -> float:
|
|
163
|
-
"""Calculate cutoff time for job cleanup (24 hours ago)."""
|
|
164
300
|
return time.time() - (24 * 60 * 60)
|
|
165
301
|
|
|
166
302
|
def _find_old_job_files(self, cutoff_time: float) -> list[Path]:
|
|
167
|
-
"""Find job files older than the cutoff time."""
|
|
168
303
|
return [
|
|
169
304
|
progress_file
|
|
170
|
-
for progress_file in self.progress_dir.glob("job
|
|
305
|
+
for progress_file in self.progress_dir.glob("job - *.json")
|
|
171
306
|
if progress_file.stat().st_mtime < cutoff_time
|
|
172
307
|
]
|
|
173
308
|
|
|
174
309
|
def _cleanup_old_job_file(self, progress_file: Path) -> None:
|
|
175
|
-
"""Clean up a single old job file if it's safe to do so."""
|
|
176
310
|
job_id = self.extract_job_id_from_file(progress_file)
|
|
177
311
|
|
|
178
312
|
if job_id not in self.active_connections:
|
|
179
313
|
progress_file.unlink(missing_ok=True)
|
|
180
|
-
console.print(f"[yellow]Cleaned up old job: {job_id}[/yellow]")
|
|
314
|
+
console.print(f"[yellow]Cleaned up old job: {job_id}[/ yellow]")
|
|
181
315
|
|
|
182
316
|
async def timeout_stuck_jobs(self) -> None:
|
|
183
|
-
|
|
317
|
+
timeout_manager = get_timeout_manager()
|
|
318
|
+
|
|
184
319
|
while self.is_running:
|
|
185
320
|
try:
|
|
186
|
-
|
|
187
|
-
await
|
|
321
|
+
# Timeout check with its own timeout protection
|
|
322
|
+
await timeout_manager.with_timeout(
|
|
323
|
+
"file_operations",
|
|
324
|
+
self._check_and_timeout_stuck_jobs(),
|
|
325
|
+
timeout=60.0, # Timeout check timeout
|
|
326
|
+
strategy=TimeoutStrategy.GRACEFUL_DEGRADATION,
|
|
327
|
+
)
|
|
328
|
+
await asyncio.sleep(300) # 5 minutes between checks
|
|
188
329
|
except Exception as e:
|
|
189
330
|
console.print(f"[red]Timeout check error: {e}[/red]")
|
|
331
|
+
# Continue checking even on errors
|
|
190
332
|
await asyncio.sleep(300)
|
|
191
333
|
|
|
192
334
|
async def _check_and_timeout_stuck_jobs(self) -> None:
|
|
193
|
-
"""Check for stuck jobs and timeout those that are inactive."""
|
|
194
335
|
if not self.progress_dir.exists():
|
|
195
336
|
return
|
|
196
337
|
|
|
197
338
|
current_time = time.time()
|
|
198
339
|
timeout_seconds = 30 * 60
|
|
199
340
|
|
|
200
|
-
for progress_file in self.progress_dir.glob("job
|
|
341
|
+
for progress_file in self.progress_dir.glob("job-* .json"):
|
|
201
342
|
await self._process_job_timeout_check(
|
|
202
343
|
progress_file,
|
|
203
344
|
current_time,
|
|
@@ -210,20 +351,28 @@ class JobManager:
|
|
|
210
351
|
current_time: float,
|
|
211
352
|
timeout_seconds: int,
|
|
212
353
|
) -> None:
|
|
213
|
-
"""Process timeout check for a single job file."""
|
|
214
354
|
try:
|
|
215
|
-
|
|
355
|
+
# Validate the progress file path is secure
|
|
356
|
+
validated_file = SecurePathValidator.validate_safe_path(
|
|
357
|
+
progress_file, self.progress_dir
|
|
358
|
+
)
|
|
359
|
+
|
|
360
|
+
# Validate file size before reading
|
|
361
|
+
SecurePathValidator.validate_file_size(validated_file)
|
|
362
|
+
|
|
363
|
+
progress_data = json.loads(validated_file.read_text())
|
|
216
364
|
|
|
217
365
|
if self._should_timeout_job(
|
|
218
366
|
progress_data,
|
|
219
|
-
|
|
367
|
+
validated_file,
|
|
220
368
|
current_time,
|
|
221
369
|
timeout_seconds,
|
|
222
370
|
):
|
|
223
|
-
self._timeout_job(progress_data,
|
|
371
|
+
self._timeout_job(progress_data, validated_file)
|
|
224
372
|
|
|
225
|
-
except (json.JSONDecodeError, OSError):
|
|
226
|
-
|
|
373
|
+
except (json.JSONDecodeError, OSError, Exception):
|
|
374
|
+
# Catch validation errors as well as file errors
|
|
375
|
+
pass
|
|
227
376
|
|
|
228
377
|
def _should_timeout_job(
|
|
229
378
|
self,
|
|
@@ -232,22 +381,20 @@ class JobManager:
|
|
|
232
381
|
current_time: float,
|
|
233
382
|
timeout_seconds: int,
|
|
234
383
|
) -> bool:
|
|
235
|
-
"""Determine if a job should be timed out."""
|
|
236
384
|
return (
|
|
237
385
|
progress_data.get("status") == "running"
|
|
238
386
|
and current_time - progress_file.stat().st_mtime > timeout_seconds
|
|
239
387
|
)
|
|
240
388
|
|
|
241
389
|
def _timeout_job(self, progress_data: dict, progress_file: Path) -> None:
|
|
242
|
-
"""Mark a job as failed due to timeout."""
|
|
243
390
|
progress_data["status"] = "failed"
|
|
244
391
|
progress_data["message"] = "Job timed out (no updates for 30 minutes)"
|
|
245
392
|
|
|
246
393
|
progress_file.write_text(json.dumps(progress_data, indent=2))
|
|
247
394
|
|
|
248
395
|
job_id = progress_data.get("job_id", "unknown")
|
|
249
|
-
console.print(f"[red]Job {job_id} timed out and marked as failed[/red]")
|
|
396
|
+
console.print(f"[red]Job {job_id} timed out and marked as failed[/ red]")
|
|
250
397
|
|
|
251
398
|
def cleanup(self) -> None:
|
|
252
399
|
self.is_running = False
|
|
253
|
-
console.print("[blue]Job manager cleanup completed[/blue]")
|
|
400
|
+
console.print("[blue]Job manager cleanup completed[/ blue]")
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import contextlib
|
|
1
3
|
import signal
|
|
2
4
|
import subprocess
|
|
3
5
|
import tempfile
|
|
@@ -7,6 +9,8 @@ from pathlib import Path
|
|
|
7
9
|
import uvicorn
|
|
8
10
|
from rich.console import Console
|
|
9
11
|
|
|
12
|
+
from crackerjack.core.timeout_manager import get_timeout_manager
|
|
13
|
+
|
|
10
14
|
from .app import create_websocket_app
|
|
11
15
|
from .jobs import JobManager
|
|
12
16
|
|
|
@@ -20,6 +24,8 @@ class WebSocketServer:
|
|
|
20
24
|
self.is_running = True
|
|
21
25
|
self.job_manager: JobManager | None = None
|
|
22
26
|
self.app = None
|
|
27
|
+
self.timeout_manager = get_timeout_manager()
|
|
28
|
+
self.server_task: asyncio.Task | None = None
|
|
23
29
|
|
|
24
30
|
def setup(self) -> None:
|
|
25
31
|
self.progress_dir.mkdir(exist_ok=True)
|
|
@@ -35,6 +41,31 @@ class WebSocketServer:
|
|
|
35
41
|
console.print("\n[yellow]Shutting down WebSocket server...[/yellow]")
|
|
36
42
|
self.is_running = False
|
|
37
43
|
|
|
44
|
+
# Cancel server task if running
|
|
45
|
+
if self.server_task and not self.server_task.done():
|
|
46
|
+
self.server_task.cancel()
|
|
47
|
+
|
|
48
|
+
# Clean up job manager connections
|
|
49
|
+
if self.job_manager:
|
|
50
|
+
with contextlib.suppress(Exception):
|
|
51
|
+
# Give existing connections 5 seconds to close
|
|
52
|
+
asyncio.create_task(self._graceful_shutdown())
|
|
53
|
+
|
|
54
|
+
async def _graceful_shutdown(self) -> None:
|
|
55
|
+
"""Gracefully shutdown WebSocket connections."""
|
|
56
|
+
if self.job_manager:
|
|
57
|
+
try:
|
|
58
|
+
# Wait briefly for connections to close naturally
|
|
59
|
+
await asyncio.sleep(2.0)
|
|
60
|
+
|
|
61
|
+
# Force close any remaining connections
|
|
62
|
+
# Note: Implementation depends on JobManager API
|
|
63
|
+
console.print(
|
|
64
|
+
"[yellow]Forcing remaining WebSocket connections to close[/yellow]"
|
|
65
|
+
)
|
|
66
|
+
except Exception as e:
|
|
67
|
+
console.print(f"[red]Error during graceful shutdown: {e}[/red]")
|
|
68
|
+
|
|
38
69
|
def run(self) -> None:
|
|
39
70
|
try:
|
|
40
71
|
self.setup()
|
|
@@ -42,24 +73,73 @@ class WebSocketServer:
|
|
|
42
73
|
f"[green]Starting WebSocket server on port {self.port}[/green]",
|
|
43
74
|
)
|
|
44
75
|
console.print(f"Progress directory: {self.progress_dir}")
|
|
45
|
-
console.print("Press Ctrl
|
|
76
|
+
console.print("Press Ctrl+C to stop")
|
|
46
77
|
|
|
47
78
|
config = uvicorn.Config(
|
|
48
79
|
app=self.app,
|
|
49
80
|
port=self.port,
|
|
50
81
|
host="127.0.0.1",
|
|
51
82
|
log_level="info",
|
|
83
|
+
# Add timeout configurations
|
|
84
|
+
timeout_keep_alive=30, # Keep-alive timeout
|
|
85
|
+
timeout_graceful_shutdown=30, # Graceful shutdown timeout
|
|
52
86
|
)
|
|
53
87
|
|
|
54
88
|
server = uvicorn.Server(config)
|
|
55
|
-
|
|
89
|
+
|
|
90
|
+
# Use asyncio event loop for better control
|
|
91
|
+
try:
|
|
92
|
+
asyncio.run(self._run_with_timeout(server))
|
|
93
|
+
except KeyboardInterrupt:
|
|
94
|
+
console.print("\n[yellow]Server interrupted by user[/yellow]")
|
|
56
95
|
|
|
57
96
|
except KeyboardInterrupt:
|
|
58
97
|
console.print("\n[yellow]Server stopped by user[/yellow]")
|
|
59
98
|
except Exception as e:
|
|
60
99
|
console.print(f"[red]Server error: {e}[/red]")
|
|
61
100
|
finally:
|
|
62
|
-
|
|
101
|
+
console.print("[green]WebSocket server shutdown complete[/green]")
|
|
102
|
+
|
|
103
|
+
async def _run_with_timeout(self, server: uvicorn.Server) -> None:
|
|
104
|
+
"""Run the server with timeout protection."""
|
|
105
|
+
try:
|
|
106
|
+
# Start server as a background task
|
|
107
|
+
self.server_task = asyncio.create_task(server.serve())
|
|
108
|
+
|
|
109
|
+
# Monitor server health while running
|
|
110
|
+
while self.is_running and not self.server_task.done():
|
|
111
|
+
try:
|
|
112
|
+
# Check server health periodically
|
|
113
|
+
await asyncio.sleep(5.0)
|
|
114
|
+
|
|
115
|
+
# Optional: Add health checks here
|
|
116
|
+
# if not await self._server_health_check():
|
|
117
|
+
# console.print("[yellow]Server health check failed[/yellow]")
|
|
118
|
+
# break
|
|
119
|
+
|
|
120
|
+
except asyncio.CancelledError:
|
|
121
|
+
console.print("[yellow]Server monitoring cancelled[/yellow]")
|
|
122
|
+
break
|
|
123
|
+
except Exception as e:
|
|
124
|
+
console.print(f"[red]Server monitoring error: {e}[/red]")
|
|
125
|
+
break
|
|
126
|
+
|
|
127
|
+
# Wait for server task to complete
|
|
128
|
+
if self.server_task and not self.server_task.done():
|
|
129
|
+
try:
|
|
130
|
+
await asyncio.wait_for(self.server_task, timeout=30.0)
|
|
131
|
+
except TimeoutError:
|
|
132
|
+
console.print(
|
|
133
|
+
"[yellow]Server shutdown timeout, forcing termination[/yellow]"
|
|
134
|
+
)
|
|
135
|
+
self.server_task.cancel()
|
|
136
|
+
try:
|
|
137
|
+
await self.server_task
|
|
138
|
+
except asyncio.CancelledError:
|
|
139
|
+
pass
|
|
140
|
+
|
|
141
|
+
except Exception as e:
|
|
142
|
+
console.print(f"[red]Server runtime error: {e}[/red]")
|
|
63
143
|
|
|
64
144
|
|
|
65
145
|
def handle_websocket_server_command(
|
|
@@ -68,10 +148,9 @@ def handle_websocket_server_command(
|
|
|
68
148
|
restart: bool = False,
|
|
69
149
|
port: int = 8675,
|
|
70
150
|
) -> None:
|
|
71
|
-
"""Handle WebSocket server start/stop/restart commands."""
|
|
72
151
|
if stop or restart:
|
|
73
152
|
console.print("[yellow]Stopping WebSocket servers...[/yellow]")
|
|
74
|
-
|
|
153
|
+
|
|
75
154
|
try:
|
|
76
155
|
result = subprocess.run(
|
|
77
156
|
["pkill", "-f", f"uvicorn.*:{port}"],
|
|
@@ -92,7 +171,6 @@ def handle_websocket_server_command(
|
|
|
92
171
|
if stop:
|
|
93
172
|
return
|
|
94
173
|
|
|
95
|
-
# For restart, wait a moment before starting again
|
|
96
174
|
time.sleep(2)
|
|
97
175
|
|
|
98
176
|
if start or restart:
|