crackerjack 0.31.10__py3-none-any.whl → 0.31.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crackerjack might be problematic. Click here for more details.

Files changed (155) hide show
  1. crackerjack/CLAUDE.md +288 -705
  2. crackerjack/__main__.py +22 -8
  3. crackerjack/agents/__init__.py +0 -3
  4. crackerjack/agents/architect_agent.py +0 -43
  5. crackerjack/agents/base.py +1 -9
  6. crackerjack/agents/coordinator.py +2 -148
  7. crackerjack/agents/documentation_agent.py +109 -81
  8. crackerjack/agents/dry_agent.py +122 -97
  9. crackerjack/agents/formatting_agent.py +3 -16
  10. crackerjack/agents/import_optimization_agent.py +1174 -130
  11. crackerjack/agents/performance_agent.py +956 -188
  12. crackerjack/agents/performance_helpers.py +229 -0
  13. crackerjack/agents/proactive_agent.py +1 -48
  14. crackerjack/agents/refactoring_agent.py +516 -246
  15. crackerjack/agents/refactoring_helpers.py +282 -0
  16. crackerjack/agents/security_agent.py +393 -90
  17. crackerjack/agents/test_creation_agent.py +1776 -120
  18. crackerjack/agents/test_specialist_agent.py +59 -15
  19. crackerjack/agents/tracker.py +0 -102
  20. crackerjack/api.py +145 -37
  21. crackerjack/cli/handlers.py +48 -30
  22. crackerjack/cli/interactive.py +11 -11
  23. crackerjack/cli/options.py +66 -4
  24. crackerjack/code_cleaner.py +808 -148
  25. crackerjack/config/global_lock_config.py +110 -0
  26. crackerjack/config/hooks.py +43 -64
  27. crackerjack/core/async_workflow_orchestrator.py +247 -97
  28. crackerjack/core/autofix_coordinator.py +192 -109
  29. crackerjack/core/enhanced_container.py +46 -63
  30. crackerjack/core/file_lifecycle.py +549 -0
  31. crackerjack/core/performance.py +9 -8
  32. crackerjack/core/performance_monitor.py +395 -0
  33. crackerjack/core/phase_coordinator.py +281 -94
  34. crackerjack/core/proactive_workflow.py +9 -58
  35. crackerjack/core/resource_manager.py +501 -0
  36. crackerjack/core/service_watchdog.py +490 -0
  37. crackerjack/core/session_coordinator.py +4 -8
  38. crackerjack/core/timeout_manager.py +504 -0
  39. crackerjack/core/websocket_lifecycle.py +475 -0
  40. crackerjack/core/workflow_orchestrator.py +343 -209
  41. crackerjack/dynamic_config.py +47 -6
  42. crackerjack/errors.py +3 -4
  43. crackerjack/executors/async_hook_executor.py +63 -13
  44. crackerjack/executors/cached_hook_executor.py +14 -14
  45. crackerjack/executors/hook_executor.py +100 -37
  46. crackerjack/executors/hook_lock_manager.py +856 -0
  47. crackerjack/executors/individual_hook_executor.py +120 -86
  48. crackerjack/intelligence/__init__.py +0 -7
  49. crackerjack/intelligence/adaptive_learning.py +13 -86
  50. crackerjack/intelligence/agent_orchestrator.py +15 -78
  51. crackerjack/intelligence/agent_registry.py +12 -59
  52. crackerjack/intelligence/agent_selector.py +31 -92
  53. crackerjack/intelligence/integration.py +1 -41
  54. crackerjack/interactive.py +9 -9
  55. crackerjack/managers/async_hook_manager.py +25 -8
  56. crackerjack/managers/hook_manager.py +9 -9
  57. crackerjack/managers/publish_manager.py +57 -59
  58. crackerjack/managers/test_command_builder.py +6 -36
  59. crackerjack/managers/test_executor.py +9 -61
  60. crackerjack/managers/test_manager.py +17 -63
  61. crackerjack/managers/test_manager_backup.py +77 -127
  62. crackerjack/managers/test_progress.py +4 -23
  63. crackerjack/mcp/cache.py +5 -12
  64. crackerjack/mcp/client_runner.py +10 -10
  65. crackerjack/mcp/context.py +64 -6
  66. crackerjack/mcp/dashboard.py +14 -11
  67. crackerjack/mcp/enhanced_progress_monitor.py +55 -55
  68. crackerjack/mcp/file_monitor.py +72 -42
  69. crackerjack/mcp/progress_components.py +103 -84
  70. crackerjack/mcp/progress_monitor.py +122 -49
  71. crackerjack/mcp/rate_limiter.py +12 -12
  72. crackerjack/mcp/server_core.py +16 -22
  73. crackerjack/mcp/service_watchdog.py +26 -26
  74. crackerjack/mcp/state.py +15 -0
  75. crackerjack/mcp/tools/core_tools.py +95 -39
  76. crackerjack/mcp/tools/error_analyzer.py +6 -32
  77. crackerjack/mcp/tools/execution_tools.py +1 -56
  78. crackerjack/mcp/tools/execution_tools_backup.py +35 -131
  79. crackerjack/mcp/tools/intelligence_tool_registry.py +0 -36
  80. crackerjack/mcp/tools/intelligence_tools.py +2 -55
  81. crackerjack/mcp/tools/monitoring_tools.py +308 -145
  82. crackerjack/mcp/tools/proactive_tools.py +12 -42
  83. crackerjack/mcp/tools/progress_tools.py +23 -15
  84. crackerjack/mcp/tools/utility_tools.py +3 -40
  85. crackerjack/mcp/tools/workflow_executor.py +40 -60
  86. crackerjack/mcp/websocket/app.py +0 -3
  87. crackerjack/mcp/websocket/endpoints.py +206 -268
  88. crackerjack/mcp/websocket/jobs.py +213 -66
  89. crackerjack/mcp/websocket/server.py +84 -6
  90. crackerjack/mcp/websocket/websocket_handler.py +137 -29
  91. crackerjack/models/config_adapter.py +3 -16
  92. crackerjack/models/protocols.py +162 -3
  93. crackerjack/models/resource_protocols.py +454 -0
  94. crackerjack/models/task.py +3 -3
  95. crackerjack/monitoring/__init__.py +0 -0
  96. crackerjack/monitoring/ai_agent_watchdog.py +25 -71
  97. crackerjack/monitoring/regression_prevention.py +28 -87
  98. crackerjack/orchestration/advanced_orchestrator.py +44 -78
  99. crackerjack/orchestration/coverage_improvement.py +10 -60
  100. crackerjack/orchestration/execution_strategies.py +16 -16
  101. crackerjack/orchestration/test_progress_streamer.py +61 -53
  102. crackerjack/plugins/base.py +1 -1
  103. crackerjack/plugins/managers.py +22 -20
  104. crackerjack/py313.py +65 -21
  105. crackerjack/services/backup_service.py +467 -0
  106. crackerjack/services/bounded_status_operations.py +627 -0
  107. crackerjack/services/cache.py +7 -9
  108. crackerjack/services/config.py +35 -52
  109. crackerjack/services/config_integrity.py +5 -16
  110. crackerjack/services/config_merge.py +542 -0
  111. crackerjack/services/contextual_ai_assistant.py +17 -19
  112. crackerjack/services/coverage_ratchet.py +44 -73
  113. crackerjack/services/debug.py +25 -39
  114. crackerjack/services/dependency_monitor.py +52 -50
  115. crackerjack/services/enhanced_filesystem.py +14 -11
  116. crackerjack/services/file_hasher.py +1 -1
  117. crackerjack/services/filesystem.py +1 -12
  118. crackerjack/services/git.py +71 -47
  119. crackerjack/services/health_metrics.py +31 -27
  120. crackerjack/services/initialization.py +276 -428
  121. crackerjack/services/input_validator.py +760 -0
  122. crackerjack/services/log_manager.py +16 -16
  123. crackerjack/services/logging.py +7 -6
  124. crackerjack/services/metrics.py +43 -43
  125. crackerjack/services/pattern_cache.py +2 -31
  126. crackerjack/services/pattern_detector.py +26 -63
  127. crackerjack/services/performance_benchmarks.py +20 -45
  128. crackerjack/services/regex_patterns.py +2887 -0
  129. crackerjack/services/regex_utils.py +537 -0
  130. crackerjack/services/secure_path_utils.py +683 -0
  131. crackerjack/services/secure_status_formatter.py +534 -0
  132. crackerjack/services/secure_subprocess.py +605 -0
  133. crackerjack/services/security.py +47 -10
  134. crackerjack/services/security_logger.py +492 -0
  135. crackerjack/services/server_manager.py +109 -50
  136. crackerjack/services/smart_scheduling.py +8 -25
  137. crackerjack/services/status_authentication.py +603 -0
  138. crackerjack/services/status_security_manager.py +442 -0
  139. crackerjack/services/thread_safe_status_collector.py +546 -0
  140. crackerjack/services/tool_version_service.py +1 -23
  141. crackerjack/services/unified_config.py +36 -58
  142. crackerjack/services/validation_rate_limiter.py +269 -0
  143. crackerjack/services/version_checker.py +9 -40
  144. crackerjack/services/websocket_resource_limiter.py +572 -0
  145. crackerjack/slash_commands/__init__.py +52 -2
  146. crackerjack/tools/__init__.py +0 -0
  147. crackerjack/tools/validate_input_validator_patterns.py +262 -0
  148. crackerjack/tools/validate_regex_patterns.py +198 -0
  149. {crackerjack-0.31.10.dist-info → crackerjack-0.31.12.dist-info}/METADATA +197 -12
  150. crackerjack-0.31.12.dist-info/RECORD +178 -0
  151. crackerjack/cli/facade.py +0 -104
  152. crackerjack-0.31.10.dist-info/RECORD +0 -149
  153. {crackerjack-0.31.10.dist-info → crackerjack-0.31.12.dist-info}/WHEEL +0 -0
  154. {crackerjack-0.31.10.dist-info → crackerjack-0.31.12.dist-info}/entry_points.txt +0 -0
  155. {crackerjack-0.31.10.dist-info → crackerjack-0.31.12.dist-info}/licenses/LICENSE +0 -0
@@ -8,12 +8,17 @@ from typing import Any
8
8
 
9
9
  from rich.console import Console
10
10
 
11
+ from crackerjack.core.timeout_manager import TimeoutStrategy, get_timeout_manager
12
+ from crackerjack.services.input_validator import get_input_validator
13
+ from crackerjack.services.secure_path_utils import SecurePathValidator
14
+
11
15
  console = Console()
12
16
 
13
17
 
14
18
  class JobManager:
15
19
  def __init__(self, progress_dir: Path) -> None:
16
- self.progress_dir = progress_dir
20
+ # Validate and secure the progress directory path
21
+ self.progress_dir = SecurePathValidator.validate_safe_path(progress_dir)
17
22
  self.active_connections: dict[str, set[Any]] = {}
18
23
  self.known_jobs: set[str] = set()
19
24
  self.is_running = True
@@ -21,17 +26,18 @@ class JobManager:
21
26
  self.progress_dir.mkdir(exist_ok=True)
22
27
 
23
28
  def validate_job_id(self, job_id: str) -> bool:
29
+ """Validate job ID using secure input validator."""
24
30
  if not job_id:
25
31
  return False
26
32
 
33
+ # First check if it's a valid UUID
27
34
  with suppress(ValueError):
28
35
  uuid.UUID(job_id)
29
36
  return True
30
37
 
31
- import re
32
-
33
- # Performance: Use simpler regex pattern without whitespace
34
- return bool(re.match(r"^[a-zA-Z0-9_-]+$", job_id) and len(job_id) <= 50)
38
+ # Use secure input validator for additional validation
39
+ result = get_input_validator().validate_job_id(job_id)
40
+ return result.valid
35
41
 
36
42
  def add_connection(self, job_id: str, websocket: Any) -> None:
37
43
  if job_id not in self.active_connections:
@@ -48,18 +54,79 @@ class JobManager:
48
54
  if job_id not in self.active_connections:
49
55
  return
50
56
 
57
+ timeout_manager = get_timeout_manager()
51
58
  connections = self.active_connections[job_id].copy()
59
+
60
+ # Create websocket send tasks
61
+ send_tasks = self._create_broadcast_tasks(connections, timeout_manager, data)
62
+
63
+ # Execute broadcast with timeout handling
64
+ if send_tasks:
65
+ await self._execute_broadcast_tasks(job_id, send_tasks)
66
+
67
+ def _create_broadcast_tasks(
68
+ self, connections: set, timeout_manager, data: dict
69
+ ) -> list:
70
+ """Create tasks for all websocket sends with timeout."""
71
+ send_tasks = []
52
72
  for websocket in connections:
53
- try:
54
- await websocket.send_json(data)
55
- except Exception:
73
+ task = asyncio.create_task(
74
+ timeout_manager.with_timeout(
75
+ "websocket_broadcast",
76
+ websocket.send_json(data),
77
+ timeout=2.0, # Quick broadcast timeout
78
+ )
79
+ )
80
+ send_tasks.append((websocket, task))
81
+ return send_tasks
82
+
83
+ async def _execute_broadcast_tasks(self, job_id: str, send_tasks: list) -> None:
84
+ """Execute broadcast tasks with timeout and error handling."""
85
+ try:
86
+ # Use asyncio.wait with timeout for batch sending
87
+ done, pending = await asyncio.wait(
88
+ [task for _, task in send_tasks],
89
+ timeout=5.0, # Overall timeout for all broadcasts
90
+ return_when=asyncio.ALL_COMPLETED,
91
+ )
92
+
93
+ # Handle completed and pending tasks
94
+ await self._handle_broadcast_results(job_id, send_tasks, done, pending)
95
+
96
+ except Exception as e:
97
+ console.print(f"[red]Broadcast error: {e}[/red]")
98
+ await self._cleanup_failed_broadcast(job_id, send_tasks)
99
+
100
+ async def _handle_broadcast_results(
101
+ self, job_id: str, send_tasks: list, done: set, pending: set
102
+ ) -> None:
103
+ """Handle results of broadcast tasks."""
104
+ # Cancel any pending tasks and remove failed connections
105
+ for websocket, task in send_tasks:
106
+ if task in pending:
107
+ task.cancel()
56
108
  self.remove_connection(job_id, websocket)
109
+ elif task in done:
110
+ try:
111
+ await task
112
+ except Exception:
113
+ self.remove_connection(job_id, websocket)
114
+
115
+ # Wait for cancelled tasks to complete
116
+ if pending:
117
+ await asyncio.gather(*pending, return_exceptions=True)
118
+
119
+ async def _cleanup_failed_broadcast(self, job_id: str, send_tasks: list) -> None:
120
+ """Clean up connections after broadcast failure."""
121
+ for websocket, task in send_tasks:
122
+ if not task.done():
123
+ task.cancel()
124
+ self.remove_connection(job_id, websocket)
57
125
 
58
126
  def get_latest_job_id(self) -> str | None:
59
127
  if not self.progress_dir.exists():
60
128
  return None
61
129
 
62
- # Performance: Use more specific glob pattern to reduce filesystem calls
63
130
  progress_files = list(self.progress_dir.glob("job-*.json"))
64
131
  if not progress_files:
65
132
  return None
@@ -68,88 +135,158 @@ class JobManager:
68
135
  return self.extract_job_id_from_file(latest_file)
69
136
 
70
137
  def extract_job_id_from_file(self, progress_file: Path) -> str | None:
71
- # Performance: Use slice instead of replace for fixed prefix removal
72
138
  return (
73
- progress_file.stem[4:] # Remove "job-" prefix (4 chars)
74
- if progress_file.stem.startswith("job-")
75
- else None
139
+ progress_file.stem[4:] if progress_file.stem.startswith("job -") else None
76
140
  )
77
141
 
78
142
  def get_job_progress(self, job_id: str) -> dict | None:
79
143
  if not self.validate_job_id(job_id):
80
144
  return None
81
145
 
82
- progress_file = self.progress_dir / f"job-{job_id}.json"
83
- if not progress_file.exists():
84
- return None
85
-
146
+ # Use secure path joining to create progress file path
86
147
  try:
148
+ progress_file = SecurePathValidator.secure_path_join(
149
+ self.progress_dir, f"job-{job_id}.json"
150
+ )
151
+ if not progress_file.exists():
152
+ return None
153
+
154
+ # Validate file size before reading
155
+ SecurePathValidator.validate_file_size(progress_file)
156
+
87
157
  return json.loads(progress_file.read_text())
88
158
  except (json.JSONDecodeError, OSError):
89
159
  return None
90
160
 
91
161
  async def _process_progress_file(self, progress_file: Path) -> None:
92
- """Process a single progress file and handle new job detection."""
93
- job_id = self.extract_job_id_from_file(progress_file)
162
+ # Validate the progress file path is within our allowed directory
163
+ try:
164
+ validated_file = SecurePathValidator.validate_safe_path(
165
+ progress_file, self.progress_dir
166
+ )
167
+ except Exception:
168
+ # If path validation fails, skip processing this file
169
+ return
170
+
171
+ job_id = self.extract_job_id_from_file(validated_file)
94
172
  if not (job_id and self.validate_job_id(job_id)):
95
173
  return
96
174
 
97
175
  progress_data = self.get_job_progress(job_id)
98
176
  if progress_data and job_id not in self.known_jobs:
99
177
  self.known_jobs.add(job_id)
100
- console.print(f"[green]New job detected: {job_id}[/green]")
178
+ console.print(f"[green]New job detected: {job_id}[/ green]")
101
179
  await self.broadcast_to_job(job_id, progress_data)
102
180
 
103
181
  async def _monitor_directory_changes(self) -> None:
104
- """Monitor the progress directory for new job files."""
182
+ timeout_manager = get_timeout_manager()
183
+ consecutive_errors = 0
184
+ max_consecutive_errors = 5
185
+
105
186
  while self.is_running:
106
187
  try:
107
- if self.progress_dir.exists():
108
- for progress_file in self.progress_dir.glob("job-*.json"):
109
- await self._process_progress_file(progress_file)
188
+ # Monitor directory changes with timeout protection
189
+ async with timeout_manager.timeout_context(
190
+ "file_operations",
191
+ timeout=10.0, # Timeout for directory monitoring cycle
192
+ strategy=TimeoutStrategy.GRACEFUL_DEGRADATION,
193
+ ):
194
+ if self.progress_dir.exists():
195
+ # Process files with individual timeouts
196
+ for progress_file in self.progress_dir.glob("job-*.json"):
197
+ try:
198
+ await timeout_manager.with_timeout(
199
+ "file_operations",
200
+ self._process_progress_file(progress_file),
201
+ timeout=5.0, # Per-file timeout
202
+ )
203
+ except Exception as e:
204
+ console.print(
205
+ f"[yellow]File processing error: {e}[/yellow]"
206
+ )
207
+ continue
208
+
209
+ # Reset error count on successful cycle
210
+ consecutive_errors = 0
211
+ await asyncio.sleep(1)
110
212
 
111
- await asyncio.sleep(1) # Check every second
112
213
  except Exception as e:
214
+ consecutive_errors += 1
113
215
  console.print(f"[red]Progress monitoring error: {e}[/red]")
114
- await asyncio.sleep(5) # Wait longer on error
216
+
217
+ # Implement exponential backoff for repeated errors
218
+ if consecutive_errors >= max_consecutive_errors:
219
+ console.print(
220
+ f"[red]Too many consecutive errors ({consecutive_errors}), stopping monitor[/red]"
221
+ )
222
+ break
223
+
224
+ # Exponential backoff with max delay
225
+ delay = min(5 * (2 ** (consecutive_errors - 1)), 60)
226
+ await asyncio.sleep(delay)
115
227
 
116
228
  async def monitor_progress_files(self) -> None:
117
229
  from crackerjack.mcp.file_monitor import create_progress_monitor
118
230
 
119
231
  console.print("[blue]Starting progress file monitoring...[/blue]")
232
+ timeout_manager = get_timeout_manager()
120
233
 
121
234
  try:
122
- monitor = create_progress_monitor(self.progress_dir)
123
- await monitor.start()
124
-
125
- def on_progress_update(job_id: str, progress_data: dict) -> None:
126
- """Callback for when progress files are updated."""
127
- if job_id and self.validate_job_id(job_id):
128
- # Schedule the broadcast in the event loop
129
- asyncio.create_task(self.broadcast_to_job(job_id, progress_data))
130
-
131
- if job_id not in self.known_jobs:
132
- self.known_jobs.add(job_id)
133
- console.print(f"[green]New job detected: {job_id}[/green]")
134
-
135
- # Monitor for new job files by checking the directory periodically
136
- await self._monitor_directory_changes()
235
+ # Start file monitor with timeout protection
236
+ async with timeout_manager.timeout_context(
237
+ "file_operations",
238
+ timeout=30.0, # Monitor startup timeout
239
+ strategy=TimeoutStrategy.GRACEFUL_DEGRADATION,
240
+ ):
241
+ monitor = create_progress_monitor(self.progress_dir)
242
+ await monitor.start()
243
+
244
+ def on_progress_update(job_id: str, progress_data: dict) -> None:
245
+ if job_id and self.validate_job_id(job_id):
246
+ # Create broadcast task with timeout handling
247
+ async def safe_broadcast():
248
+ try:
249
+ await timeout_manager.with_timeout(
250
+ "websocket_broadcast",
251
+ self.broadcast_to_job(job_id, progress_data),
252
+ timeout=5.0,
253
+ )
254
+ except Exception as e:
255
+ console.print(
256
+ f"[yellow]Broadcast failed for job {job_id}: {e}[/yellow]"
257
+ )
258
+
259
+ asyncio.create_task(safe_broadcast())
260
+
261
+ if job_id not in self.known_jobs:
262
+ self.known_jobs.add(job_id)
263
+ console.print(f"[green]New job detected: {job_id}[/green]")
264
+
265
+ # Start directory monitoring with proper timeout handling
266
+ await self._monitor_directory_changes()
137
267
 
138
268
  except Exception as e:
139
269
  console.print(f"[red]Progress monitoring setup error: {e}[/red]")
140
270
 
141
271
  async def cleanup_old_jobs(self) -> None:
142
- """Periodically clean up old job files."""
272
+ timeout_manager = get_timeout_manager()
273
+
143
274
  while self.is_running:
144
275
  try:
145
- await self._perform_cleanup_cycle()
146
- await asyncio.sleep(3600)
276
+ # Cleanup cycle with timeout protection
277
+ await timeout_manager.with_timeout(
278
+ "file_operations",
279
+ self._perform_cleanup_cycle(),
280
+ timeout=30.0, # Cleanup timeout
281
+ strategy=TimeoutStrategy.GRACEFUL_DEGRADATION,
282
+ )
283
+ await asyncio.sleep(3600) # 1 hour between cleanups
147
284
  except Exception as e:
148
285
  console.print(f"[red]Cleanup error: {e}[/red]")
149
- await asyncio.sleep(3600)
286
+ # Shorter sleep on error to retry sooner
287
+ await asyncio.sleep(1800) # 30 minutes on error
150
288
 
151
289
  async def _perform_cleanup_cycle(self) -> None:
152
- """Perform a single cleanup cycle for old jobs."""
153
290
  if not self.progress_dir.exists():
154
291
  return
155
292
 
@@ -160,44 +297,48 @@ class JobManager:
160
297
  self._cleanup_old_job_file(progress_file)
161
298
 
162
299
  def _calculate_cleanup_cutoff_time(self) -> float:
163
- """Calculate cutoff time for job cleanup (24 hours ago)."""
164
300
  return time.time() - (24 * 60 * 60)
165
301
 
166
302
  def _find_old_job_files(self, cutoff_time: float) -> list[Path]:
167
- """Find job files older than the cutoff time."""
168
303
  return [
169
304
  progress_file
170
- for progress_file in self.progress_dir.glob("job-*.json")
305
+ for progress_file in self.progress_dir.glob("job - *.json")
171
306
  if progress_file.stat().st_mtime < cutoff_time
172
307
  ]
173
308
 
174
309
  def _cleanup_old_job_file(self, progress_file: Path) -> None:
175
- """Clean up a single old job file if it's safe to do so."""
176
310
  job_id = self.extract_job_id_from_file(progress_file)
177
311
 
178
312
  if job_id not in self.active_connections:
179
313
  progress_file.unlink(missing_ok=True)
180
- console.print(f"[yellow]Cleaned up old job: {job_id}[/yellow]")
314
+ console.print(f"[yellow]Cleaned up old job: {job_id}[/ yellow]")
181
315
 
182
316
  async def timeout_stuck_jobs(self) -> None:
183
- """Monitor and timeout stuck jobs that haven't been updated."""
317
+ timeout_manager = get_timeout_manager()
318
+
184
319
  while self.is_running:
185
320
  try:
186
- await self._check_and_timeout_stuck_jobs()
187
- await asyncio.sleep(300)
321
+ # Timeout check with its own timeout protection
322
+ await timeout_manager.with_timeout(
323
+ "file_operations",
324
+ self._check_and_timeout_stuck_jobs(),
325
+ timeout=60.0, # Timeout check timeout
326
+ strategy=TimeoutStrategy.GRACEFUL_DEGRADATION,
327
+ )
328
+ await asyncio.sleep(300) # 5 minutes between checks
188
329
  except Exception as e:
189
330
  console.print(f"[red]Timeout check error: {e}[/red]")
331
+ # Continue checking even on errors
190
332
  await asyncio.sleep(300)
191
333
 
192
334
  async def _check_and_timeout_stuck_jobs(self) -> None:
193
- """Check for stuck jobs and timeout those that are inactive."""
194
335
  if not self.progress_dir.exists():
195
336
  return
196
337
 
197
338
  current_time = time.time()
198
339
  timeout_seconds = 30 * 60
199
340
 
200
- for progress_file in self.progress_dir.glob("job -* .json"):
341
+ for progress_file in self.progress_dir.glob("job-* .json"):
201
342
  await self._process_job_timeout_check(
202
343
  progress_file,
203
344
  current_time,
@@ -210,20 +351,28 @@ class JobManager:
210
351
  current_time: float,
211
352
  timeout_seconds: int,
212
353
  ) -> None:
213
- """Process timeout check for a single job file."""
214
354
  try:
215
- progress_data = json.loads(progress_file.read_text())
355
+ # Validate the progress file path is secure
356
+ validated_file = SecurePathValidator.validate_safe_path(
357
+ progress_file, self.progress_dir
358
+ )
359
+
360
+ # Validate file size before reading
361
+ SecurePathValidator.validate_file_size(validated_file)
362
+
363
+ progress_data = json.loads(validated_file.read_text())
216
364
 
217
365
  if self._should_timeout_job(
218
366
  progress_data,
219
- progress_file,
367
+ validated_file,
220
368
  current_time,
221
369
  timeout_seconds,
222
370
  ):
223
- self._timeout_job(progress_data, progress_file)
371
+ self._timeout_job(progress_data, validated_file)
224
372
 
225
- except (json.JSONDecodeError, OSError):
226
- pass # Skip files that can't be processed
373
+ except (json.JSONDecodeError, OSError, Exception):
374
+ # Catch validation errors as well as file errors
375
+ pass
227
376
 
228
377
  def _should_timeout_job(
229
378
  self,
@@ -232,22 +381,20 @@ class JobManager:
232
381
  current_time: float,
233
382
  timeout_seconds: int,
234
383
  ) -> bool:
235
- """Determine if a job should be timed out."""
236
384
  return (
237
385
  progress_data.get("status") == "running"
238
386
  and current_time - progress_file.stat().st_mtime > timeout_seconds
239
387
  )
240
388
 
241
389
  def _timeout_job(self, progress_data: dict, progress_file: Path) -> None:
242
- """Mark a job as failed due to timeout."""
243
390
  progress_data["status"] = "failed"
244
391
  progress_data["message"] = "Job timed out (no updates for 30 minutes)"
245
392
 
246
393
  progress_file.write_text(json.dumps(progress_data, indent=2))
247
394
 
248
395
  job_id = progress_data.get("job_id", "unknown")
249
- console.print(f"[red]Job {job_id} timed out and marked as failed[/red]")
396
+ console.print(f"[red]Job {job_id} timed out and marked as failed[/ red]")
250
397
 
251
398
  def cleanup(self) -> None:
252
399
  self.is_running = False
253
- console.print("[blue]Job manager cleanup completed[/blue]")
400
+ console.print("[blue]Job manager cleanup completed[/ blue]")
@@ -1,3 +1,5 @@
1
+ import asyncio
2
+ import contextlib
1
3
  import signal
2
4
  import subprocess
3
5
  import tempfile
@@ -7,6 +9,8 @@ from pathlib import Path
7
9
  import uvicorn
8
10
  from rich.console import Console
9
11
 
12
+ from crackerjack.core.timeout_manager import get_timeout_manager
13
+
10
14
  from .app import create_websocket_app
11
15
  from .jobs import JobManager
12
16
 
@@ -20,6 +24,8 @@ class WebSocketServer:
20
24
  self.is_running = True
21
25
  self.job_manager: JobManager | None = None
22
26
  self.app = None
27
+ self.timeout_manager = get_timeout_manager()
28
+ self.server_task: asyncio.Task | None = None
23
29
 
24
30
  def setup(self) -> None:
25
31
  self.progress_dir.mkdir(exist_ok=True)
@@ -35,6 +41,31 @@ class WebSocketServer:
35
41
  console.print("\n[yellow]Shutting down WebSocket server...[/yellow]")
36
42
  self.is_running = False
37
43
 
44
+ # Cancel server task if running
45
+ if self.server_task and not self.server_task.done():
46
+ self.server_task.cancel()
47
+
48
+ # Clean up job manager connections
49
+ if self.job_manager:
50
+ with contextlib.suppress(Exception):
51
+ # Give existing connections 5 seconds to close
52
+ asyncio.create_task(self._graceful_shutdown())
53
+
54
+ async def _graceful_shutdown(self) -> None:
55
+ """Gracefully shutdown WebSocket connections."""
56
+ if self.job_manager:
57
+ try:
58
+ # Wait briefly for connections to close naturally
59
+ await asyncio.sleep(2.0)
60
+
61
+ # Force close any remaining connections
62
+ # Note: Implementation depends on JobManager API
63
+ console.print(
64
+ "[yellow]Forcing remaining WebSocket connections to close[/yellow]"
65
+ )
66
+ except Exception as e:
67
+ console.print(f"[red]Error during graceful shutdown: {e}[/red]")
68
+
38
69
  def run(self) -> None:
39
70
  try:
40
71
  self.setup()
@@ -42,24 +73,73 @@ class WebSocketServer:
42
73
  f"[green]Starting WebSocket server on port {self.port}[/green]",
43
74
  )
44
75
  console.print(f"Progress directory: {self.progress_dir}")
45
- console.print("Press Ctrl + C to stop")
76
+ console.print("Press Ctrl+C to stop")
46
77
 
47
78
  config = uvicorn.Config(
48
79
  app=self.app,
49
80
  port=self.port,
50
81
  host="127.0.0.1",
51
82
  log_level="info",
83
+ # Add timeout configurations
84
+ timeout_keep_alive=30, # Keep-alive timeout
85
+ timeout_graceful_shutdown=30, # Graceful shutdown timeout
52
86
  )
53
87
 
54
88
  server = uvicorn.Server(config)
55
- server.run()
89
+
90
+ # Use asyncio event loop for better control
91
+ try:
92
+ asyncio.run(self._run_with_timeout(server))
93
+ except KeyboardInterrupt:
94
+ console.print("\n[yellow]Server interrupted by user[/yellow]")
56
95
 
57
96
  except KeyboardInterrupt:
58
97
  console.print("\n[yellow]Server stopped by user[/yellow]")
59
98
  except Exception as e:
60
99
  console.print(f"[red]Server error: {e}[/red]")
61
100
  finally:
62
- pass # Cleanup handled by FastAPI lifespan
101
+ console.print("[green]WebSocket server shutdown complete[/green]")
102
+
103
+ async def _run_with_timeout(self, server: uvicorn.Server) -> None:
104
+ """Run the server with timeout protection."""
105
+ try:
106
+ # Start server as a background task
107
+ self.server_task = asyncio.create_task(server.serve())
108
+
109
+ # Monitor server health while running
110
+ while self.is_running and not self.server_task.done():
111
+ try:
112
+ # Check server health periodically
113
+ await asyncio.sleep(5.0)
114
+
115
+ # Optional: Add health checks here
116
+ # if not await self._server_health_check():
117
+ # console.print("[yellow]Server health check failed[/yellow]")
118
+ # break
119
+
120
+ except asyncio.CancelledError:
121
+ console.print("[yellow]Server monitoring cancelled[/yellow]")
122
+ break
123
+ except Exception as e:
124
+ console.print(f"[red]Server monitoring error: {e}[/red]")
125
+ break
126
+
127
+ # Wait for server task to complete
128
+ if self.server_task and not self.server_task.done():
129
+ try:
130
+ await asyncio.wait_for(self.server_task, timeout=30.0)
131
+ except TimeoutError:
132
+ console.print(
133
+ "[yellow]Server shutdown timeout, forcing termination[/yellow]"
134
+ )
135
+ self.server_task.cancel()
136
+ try:
137
+ await self.server_task
138
+ except asyncio.CancelledError:
139
+ pass
140
+
141
+ except Exception as e:
142
+ console.print(f"[red]Server runtime error: {e}[/red]")
63
143
 
64
144
 
65
145
  def handle_websocket_server_command(
@@ -68,10 +148,9 @@ def handle_websocket_server_command(
68
148
  restart: bool = False,
69
149
  port: int = 8675,
70
150
  ) -> None:
71
- """Handle WebSocket server start/stop/restart commands."""
72
151
  if stop or restart:
73
152
  console.print("[yellow]Stopping WebSocket servers...[/yellow]")
74
- # Kill any existing uvicorn processes running on the port
153
+
75
154
  try:
76
155
  result = subprocess.run(
77
156
  ["pkill", "-f", f"uvicorn.*:{port}"],
@@ -92,7 +171,6 @@ def handle_websocket_server_command(
92
171
  if stop:
93
172
  return
94
173
 
95
- # For restart, wait a moment before starting again
96
174
  time.sleep(2)
97
175
 
98
176
  if start or restart: