crackerjack 0.31.10__py3-none-any.whl → 0.31.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crackerjack might be problematic. Click here for more details.

Files changed (155) hide show
  1. crackerjack/CLAUDE.md +288 -705
  2. crackerjack/__main__.py +22 -8
  3. crackerjack/agents/__init__.py +0 -3
  4. crackerjack/agents/architect_agent.py +0 -43
  5. crackerjack/agents/base.py +1 -9
  6. crackerjack/agents/coordinator.py +2 -148
  7. crackerjack/agents/documentation_agent.py +109 -81
  8. crackerjack/agents/dry_agent.py +122 -97
  9. crackerjack/agents/formatting_agent.py +3 -16
  10. crackerjack/agents/import_optimization_agent.py +1174 -130
  11. crackerjack/agents/performance_agent.py +956 -188
  12. crackerjack/agents/performance_helpers.py +229 -0
  13. crackerjack/agents/proactive_agent.py +1 -48
  14. crackerjack/agents/refactoring_agent.py +516 -246
  15. crackerjack/agents/refactoring_helpers.py +282 -0
  16. crackerjack/agents/security_agent.py +393 -90
  17. crackerjack/agents/test_creation_agent.py +1776 -120
  18. crackerjack/agents/test_specialist_agent.py +59 -15
  19. crackerjack/agents/tracker.py +0 -102
  20. crackerjack/api.py +145 -37
  21. crackerjack/cli/handlers.py +48 -30
  22. crackerjack/cli/interactive.py +11 -11
  23. crackerjack/cli/options.py +66 -4
  24. crackerjack/code_cleaner.py +808 -148
  25. crackerjack/config/global_lock_config.py +110 -0
  26. crackerjack/config/hooks.py +43 -64
  27. crackerjack/core/async_workflow_orchestrator.py +247 -97
  28. crackerjack/core/autofix_coordinator.py +192 -109
  29. crackerjack/core/enhanced_container.py +46 -63
  30. crackerjack/core/file_lifecycle.py +549 -0
  31. crackerjack/core/performance.py +9 -8
  32. crackerjack/core/performance_monitor.py +395 -0
  33. crackerjack/core/phase_coordinator.py +281 -94
  34. crackerjack/core/proactive_workflow.py +9 -58
  35. crackerjack/core/resource_manager.py +501 -0
  36. crackerjack/core/service_watchdog.py +490 -0
  37. crackerjack/core/session_coordinator.py +4 -8
  38. crackerjack/core/timeout_manager.py +504 -0
  39. crackerjack/core/websocket_lifecycle.py +475 -0
  40. crackerjack/core/workflow_orchestrator.py +343 -209
  41. crackerjack/dynamic_config.py +47 -6
  42. crackerjack/errors.py +3 -4
  43. crackerjack/executors/async_hook_executor.py +63 -13
  44. crackerjack/executors/cached_hook_executor.py +14 -14
  45. crackerjack/executors/hook_executor.py +100 -37
  46. crackerjack/executors/hook_lock_manager.py +856 -0
  47. crackerjack/executors/individual_hook_executor.py +120 -86
  48. crackerjack/intelligence/__init__.py +0 -7
  49. crackerjack/intelligence/adaptive_learning.py +13 -86
  50. crackerjack/intelligence/agent_orchestrator.py +15 -78
  51. crackerjack/intelligence/agent_registry.py +12 -59
  52. crackerjack/intelligence/agent_selector.py +31 -92
  53. crackerjack/intelligence/integration.py +1 -41
  54. crackerjack/interactive.py +9 -9
  55. crackerjack/managers/async_hook_manager.py +25 -8
  56. crackerjack/managers/hook_manager.py +9 -9
  57. crackerjack/managers/publish_manager.py +57 -59
  58. crackerjack/managers/test_command_builder.py +6 -36
  59. crackerjack/managers/test_executor.py +9 -61
  60. crackerjack/managers/test_manager.py +17 -63
  61. crackerjack/managers/test_manager_backup.py +77 -127
  62. crackerjack/managers/test_progress.py +4 -23
  63. crackerjack/mcp/cache.py +5 -12
  64. crackerjack/mcp/client_runner.py +10 -10
  65. crackerjack/mcp/context.py +64 -6
  66. crackerjack/mcp/dashboard.py +14 -11
  67. crackerjack/mcp/enhanced_progress_monitor.py +55 -55
  68. crackerjack/mcp/file_monitor.py +72 -42
  69. crackerjack/mcp/progress_components.py +103 -84
  70. crackerjack/mcp/progress_monitor.py +122 -49
  71. crackerjack/mcp/rate_limiter.py +12 -12
  72. crackerjack/mcp/server_core.py +16 -22
  73. crackerjack/mcp/service_watchdog.py +26 -26
  74. crackerjack/mcp/state.py +15 -0
  75. crackerjack/mcp/tools/core_tools.py +95 -39
  76. crackerjack/mcp/tools/error_analyzer.py +6 -32
  77. crackerjack/mcp/tools/execution_tools.py +1 -56
  78. crackerjack/mcp/tools/execution_tools_backup.py +35 -131
  79. crackerjack/mcp/tools/intelligence_tool_registry.py +0 -36
  80. crackerjack/mcp/tools/intelligence_tools.py +2 -55
  81. crackerjack/mcp/tools/monitoring_tools.py +308 -145
  82. crackerjack/mcp/tools/proactive_tools.py +12 -42
  83. crackerjack/mcp/tools/progress_tools.py +23 -15
  84. crackerjack/mcp/tools/utility_tools.py +3 -40
  85. crackerjack/mcp/tools/workflow_executor.py +40 -60
  86. crackerjack/mcp/websocket/app.py +0 -3
  87. crackerjack/mcp/websocket/endpoints.py +206 -268
  88. crackerjack/mcp/websocket/jobs.py +213 -66
  89. crackerjack/mcp/websocket/server.py +84 -6
  90. crackerjack/mcp/websocket/websocket_handler.py +137 -29
  91. crackerjack/models/config_adapter.py +3 -16
  92. crackerjack/models/protocols.py +162 -3
  93. crackerjack/models/resource_protocols.py +454 -0
  94. crackerjack/models/task.py +3 -3
  95. crackerjack/monitoring/__init__.py +0 -0
  96. crackerjack/monitoring/ai_agent_watchdog.py +25 -71
  97. crackerjack/monitoring/regression_prevention.py +28 -87
  98. crackerjack/orchestration/advanced_orchestrator.py +44 -78
  99. crackerjack/orchestration/coverage_improvement.py +10 -60
  100. crackerjack/orchestration/execution_strategies.py +16 -16
  101. crackerjack/orchestration/test_progress_streamer.py +61 -53
  102. crackerjack/plugins/base.py +1 -1
  103. crackerjack/plugins/managers.py +22 -20
  104. crackerjack/py313.py +65 -21
  105. crackerjack/services/backup_service.py +467 -0
  106. crackerjack/services/bounded_status_operations.py +627 -0
  107. crackerjack/services/cache.py +7 -9
  108. crackerjack/services/config.py +35 -52
  109. crackerjack/services/config_integrity.py +5 -16
  110. crackerjack/services/config_merge.py +542 -0
  111. crackerjack/services/contextual_ai_assistant.py +17 -19
  112. crackerjack/services/coverage_ratchet.py +44 -73
  113. crackerjack/services/debug.py +25 -39
  114. crackerjack/services/dependency_monitor.py +52 -50
  115. crackerjack/services/enhanced_filesystem.py +14 -11
  116. crackerjack/services/file_hasher.py +1 -1
  117. crackerjack/services/filesystem.py +1 -12
  118. crackerjack/services/git.py +71 -47
  119. crackerjack/services/health_metrics.py +31 -27
  120. crackerjack/services/initialization.py +276 -428
  121. crackerjack/services/input_validator.py +760 -0
  122. crackerjack/services/log_manager.py +16 -16
  123. crackerjack/services/logging.py +7 -6
  124. crackerjack/services/metrics.py +43 -43
  125. crackerjack/services/pattern_cache.py +2 -31
  126. crackerjack/services/pattern_detector.py +26 -63
  127. crackerjack/services/performance_benchmarks.py +20 -45
  128. crackerjack/services/regex_patterns.py +2887 -0
  129. crackerjack/services/regex_utils.py +537 -0
  130. crackerjack/services/secure_path_utils.py +683 -0
  131. crackerjack/services/secure_status_formatter.py +534 -0
  132. crackerjack/services/secure_subprocess.py +605 -0
  133. crackerjack/services/security.py +47 -10
  134. crackerjack/services/security_logger.py +492 -0
  135. crackerjack/services/server_manager.py +109 -50
  136. crackerjack/services/smart_scheduling.py +8 -25
  137. crackerjack/services/status_authentication.py +603 -0
  138. crackerjack/services/status_security_manager.py +442 -0
  139. crackerjack/services/thread_safe_status_collector.py +546 -0
  140. crackerjack/services/tool_version_service.py +1 -23
  141. crackerjack/services/unified_config.py +36 -58
  142. crackerjack/services/validation_rate_limiter.py +269 -0
  143. crackerjack/services/version_checker.py +9 -40
  144. crackerjack/services/websocket_resource_limiter.py +572 -0
  145. crackerjack/slash_commands/__init__.py +52 -2
  146. crackerjack/tools/__init__.py +0 -0
  147. crackerjack/tools/validate_input_validator_patterns.py +262 -0
  148. crackerjack/tools/validate_regex_patterns.py +198 -0
  149. {crackerjack-0.31.10.dist-info → crackerjack-0.31.12.dist-info}/METADATA +197 -12
  150. crackerjack-0.31.12.dist-info/RECORD +178 -0
  151. crackerjack/cli/facade.py +0 -104
  152. crackerjack-0.31.10.dist-info/RECORD +0 -149
  153. {crackerjack-0.31.10.dist-info → crackerjack-0.31.12.dist-info}/WHEEL +0 -0
  154. {crackerjack-0.31.10.dist-info → crackerjack-0.31.12.dist-info}/entry_points.txt +0 -0
  155. {crackerjack-0.31.10.dist-info → crackerjack-0.31.12.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,856 @@
1
+ """Global hook lock management to prevent concurrent execution of specific hooks.
2
+
3
+ This module provides a lock manager that ensures certain hooks
4
+ (like complexipy) run sequentially rather than concurrently to prevent
5
+ resource contention and hanging processes.
6
+
7
+ This implements the HookLockManagerProtocol for dependency injection compatibility.
8
+
9
+ Phase 2 implementation provides enhanced file-based global lock coordination
10
+ across multiple crackerjack sessions with atomic operations, heartbeat monitoring,
11
+ and comprehensive stale lock cleanup.
12
+ """
13
+
14
+ import asyncio
15
+ import json
16
+ import logging
17
+ import os
18
+ import time
19
+ import typing as t
20
+ from collections import defaultdict
21
+ from contextlib import asynccontextmanager, suppress
22
+ from pathlib import Path
23
+
24
+ from ..config.global_lock_config import GlobalLockConfig
25
+
26
+
27
+ class HookLockManager:
28
+ """Manager for hook-specific locks to prevent concurrent execution.
29
+
30
+ Implements HookLockManagerProtocol for dependency injection compatibility.
31
+ Provides async locking with timeout protection and comprehensive monitoring.
32
+ """
33
+
34
+ _instance: t.Optional["HookLockManager"] = None
35
+ _initialized: bool = False
36
+
37
+ def __new__(cls) -> "HookLockManager":
38
+ if cls._instance is None:
39
+ cls._instance = super().__new__(cls)
40
+ return cls._instance
41
+
42
+ def __init__(self) -> None:
43
+ if self._initialized:
44
+ return
45
+
46
+ self._hooks_requiring_locks = {
47
+ "complexipy", # Prevent multiple complexipy processes
48
+ # Add other hooks that should run sequentially
49
+ }
50
+
51
+ # Per-hook locks for sequential execution
52
+ self._hook_locks: dict[str, asyncio.Lock] = defaultdict(asyncio.Lock)
53
+
54
+ # Global lock configuration and state
55
+ self._global_config = GlobalLockConfig()
56
+ self._global_lock_enabled = self._global_config.enabled
57
+ self._active_global_locks: set[str] = set()
58
+ self._heartbeat_tasks: dict[str, asyncio.Task[None]] = {}
59
+
60
+ # Lock usage tracking for monitoring
61
+ self._lock_usage: dict[str, list[float]] = defaultdict(list)
62
+ self._lock_wait_times: dict[str, list[float]] = defaultdict(list)
63
+ self._lock_execution_times: dict[str, list[float]] = defaultdict(list)
64
+ self._max_history = 50 # Keep last 50 lock acquisitions
65
+
66
+ # Global lock statistics tracking
67
+ self._global_lock_attempts: dict[str, int] = defaultdict(int)
68
+ self._global_lock_successes: dict[str, int] = defaultdict(int)
69
+ self._global_lock_failures: dict[str, int] = defaultdict(int)
70
+ self._stale_locks_cleaned: dict[str, int] = defaultdict(int)
71
+ self._heartbeat_failures: dict[str, int] = defaultdict(int)
72
+
73
+ # Timeout protection
74
+ self._default_lock_timeout = 300.0 # 5 minutes default timeout
75
+ self._lock_timeouts: dict[str, float] = {}
76
+
77
+ # Error tracking
78
+ self._lock_failures: dict[str, int] = defaultdict(int)
79
+ self._timeout_failures: dict[str, int] = defaultdict(int)
80
+
81
+ self.logger = logging.getLogger(__name__)
82
+ self._initialized = True
83
+
84
+ def requires_lock(self, hook_name: str) -> bool:
85
+ """Check if a hook requires sequential execution."""
86
+ return hook_name in self._hooks_requiring_locks
87
+
88
+ @asynccontextmanager
89
+ async def acquire_hook_lock(self, hook_name: str) -> t.AsyncIterator[None]:
90
+ """Unified lock acquisition handling both hook-specific and global locks.
91
+
92
+ Args:
93
+ hook_name: Name of the hook to lock
94
+
95
+ Yields:
96
+ None when lock is acquired (or hook doesn't require lock)
97
+
98
+ Raises:
99
+ asyncio.TimeoutError: If lock acquisition times out
100
+
101
+ Example:
102
+ async with lock_manager.acquire_hook_lock("complexipy"):
103
+ # Only one complexipy process will run at a time
104
+ result = await execute_hook(hook)
105
+ """
106
+ if not self.requires_lock(hook_name):
107
+ # Hook doesn't require locking, proceed immediately
108
+ yield
109
+ return
110
+
111
+ if not self._global_lock_enabled:
112
+ # Use existing hook-specific locking only (legacy behavior)
113
+ async with self._acquire_existing_hook_lock(hook_name):
114
+ yield
115
+ return
116
+
117
+ # Global locking: coordinate across all crackerjack sessions
118
+ async with self._acquire_global_coordination_lock(hook_name):
119
+ # Then acquire hook-specific lock within global coordination
120
+ async with self._acquire_existing_hook_lock(hook_name):
121
+ yield
122
+
123
+ @asynccontextmanager
124
+ async def _acquire_existing_hook_lock(
125
+ self, hook_name: str
126
+ ) -> t.AsyncIterator[None]:
127
+ """Acquire hook-specific asyncio lock (original behavior)."""
128
+ lock = self._hook_locks[hook_name]
129
+ timeout = self._lock_timeouts.get(hook_name, self._default_lock_timeout)
130
+ start_time = time.time()
131
+
132
+ self.logger.debug(
133
+ f"Acquiring hook-specific lock: {hook_name} (timeout: {timeout}s)"
134
+ )
135
+
136
+ try:
137
+ # Use asyncio.wait_for to implement timeout for lock acquisition
138
+ await asyncio.wait_for(lock.acquire(), timeout=timeout)
139
+
140
+ try:
141
+ acquisition_time = time.time() - start_time
142
+ self.logger.info(
143
+ f"Hook-specific lock acquired for {hook_name} after"
144
+ f" {acquisition_time:.2f}s"
145
+ )
146
+
147
+ # Track lock usage for monitoring
148
+ self._track_lock_usage(hook_name, acquisition_time)
149
+
150
+ execution_start = time.time()
151
+ try:
152
+ yield
153
+ finally:
154
+ execution_time = time.time() - execution_start
155
+ total_time = time.time() - start_time
156
+
157
+ self._track_lock_execution(hook_name, execution_time, total_time)
158
+ self.logger.debug(
159
+ f"Hook-specific lock released for {hook_name} after"
160
+ f" {total_time:.2f}s total"
161
+ )
162
+
163
+ finally:
164
+ # Always release the lock, even if an exception occurred
165
+ lock.release()
166
+
167
+ except TimeoutError:
168
+ self._timeout_failures[hook_name] += 1
169
+ wait_time = time.time() - start_time
170
+ self.logger.error(
171
+ f"Hook-specific lock acquisition timeout for {hook_name} after"
172
+ f" {wait_time:.2f}s "
173
+ f"(timeout: {timeout}s, total failures: "
174
+ f"{self._timeout_failures[hook_name]})"
175
+ )
176
+ raise
177
+
178
+ except Exception as e:
179
+ self._lock_failures[hook_name] += 1
180
+ self.logger.error(
181
+ f"Hook-specific lock acquisition failed for {hook_name}: {e} "
182
+ f"(total failures: {self._lock_failures[hook_name]})"
183
+ )
184
+ raise
185
+
186
+ @asynccontextmanager
187
+ async def _acquire_global_coordination_lock(
188
+ self, hook_name: str
189
+ ) -> t.AsyncIterator[None]:
190
+ """Acquire global file-based coordination lock across crackerjack sessions."""
191
+ lock_path = self._global_config.get_lock_path(hook_name)
192
+ start_time = time.time()
193
+
194
+ self._global_lock_attempts[hook_name] += 1
195
+ self.logger.debug(
196
+ f"Attempting global lock acquisition for {hook_name}: {lock_path}"
197
+ )
198
+
199
+ # Clean up stale locks first
200
+ await self._cleanup_stale_lock_if_needed(hook_name)
201
+
202
+ try:
203
+ # Atomic lock acquisition with retry logic
204
+ await self._acquire_global_lock_file(hook_name, lock_path)
205
+ self._global_lock_successes[hook_name] += 1
206
+ self._active_global_locks.add(hook_name)
207
+
208
+ # Start heartbeat to keep lock alive
209
+ heartbeat_task = asyncio.create_task(self._maintain_heartbeat(hook_name))
210
+ self._heartbeat_tasks[hook_name] = heartbeat_task
211
+
212
+ acquisition_time = time.time() - start_time
213
+ self.logger.info(
214
+ f"Global lock acquired for {hook_name} after {acquisition_time:.2f}s"
215
+ )
216
+
217
+ try:
218
+ yield
219
+ finally:
220
+ # Cleanup: cancel heartbeat and remove lock file
221
+ await self._cleanup_global_lock(hook_name, heartbeat_task)
222
+
223
+ except Exception as e:
224
+ self._global_lock_failures[hook_name] += 1
225
+ self.logger.error(f"Global lock acquisition failed for {hook_name}: {e}")
226
+ raise
227
+
228
+ def _track_lock_usage(self, hook_name: str, acquisition_time: float) -> None:
229
+ """Track lock acquisition times for monitoring."""
230
+ usage_list = self._lock_usage[hook_name]
231
+ wait_list = self._lock_wait_times[hook_name]
232
+
233
+ usage_list.append(acquisition_time)
234
+ wait_list.append(acquisition_time)
235
+
236
+ # Keep only recent history
237
+ if len(usage_list) > self._max_history:
238
+ usage_list.pop(0)
239
+ if len(wait_list) > self._max_history:
240
+ wait_list.pop(0)
241
+
242
+ def _track_lock_execution(
243
+ self, hook_name: str, execution_time: float, total_time: float
244
+ ) -> None:
245
+ """Track lock execution times for monitoring."""
246
+ exec_list = self._lock_execution_times[hook_name]
247
+ exec_list.append(execution_time)
248
+
249
+ # Keep only recent history
250
+ if len(exec_list) > self._max_history:
251
+ exec_list.pop(0)
252
+
253
+ self.logger.debug(
254
+ f"Hook {hook_name} execution: {execution_time:.2f}s "
255
+ f"(total with lock: {total_time:.2f}s)"
256
+ )
257
+
258
+ async def _acquire_global_lock_file(self, hook_name: str, lock_path: Path) -> None:
259
+ """Atomic acquisition of global lock file with retry logic."""
260
+ for attempt in range(self._global_config.max_retry_attempts):
261
+ try:
262
+ await self._attempt_lock_acquisition(hook_name, lock_path)
263
+ return
264
+ except FileExistsError:
265
+ if attempt < self._global_config.max_retry_attempts - 1:
266
+ # Exponential backoff with jitter
267
+ delay = self._global_config.retry_delay_seconds * (2**attempt)
268
+ jitter = delay * 0.1 # Add 10% jitter
269
+ wait_time = delay + (jitter * (0.5 - os.urandom(1)[0] / 255))
270
+
271
+ self.logger.debug(
272
+ f"Global lock exists for {hook_name}, retrying in "
273
+ f"{wait_time:.2f}s"
274
+ )
275
+ await asyncio.sleep(wait_time)
276
+ else:
277
+ raise TimeoutError(
278
+ f"Failed to acquire global lock for {hook_name} after"
279
+ f" {self._global_config.max_retry_attempts} attempts"
280
+ )
281
+
282
+ async def _attempt_lock_acquisition(self, hook_name: str, lock_path: Path) -> None:
283
+ """Single atomic lock acquisition attempt using temp file + rename pattern."""
284
+ temp_path = lock_path.with_suffix(".tmp")
285
+
286
+ lock_data = {
287
+ "session_id": self._global_config.session_id,
288
+ "hostname": self._global_config.hostname,
289
+ "pid": os.getpid(),
290
+ "hook_name": hook_name,
291
+ "acquired_at": time.time(),
292
+ "last_heartbeat": time.time(),
293
+ "crackerjack_version": "0.30.3", # Could be made configurable
294
+ }
295
+
296
+ try:
297
+ # Use exclusive creation for atomic operation
298
+ with temp_path.open("x", encoding="utf-8") as f:
299
+ json.dump(lock_data, f, indent=2)
300
+
301
+ # Set restrictive permissions (owner only)
302
+ temp_path.chmod(0o600)
303
+
304
+ # Atomic rename - this is the critical section
305
+ try:
306
+ temp_path.rename(lock_path)
307
+ self.logger.debug(f"Successfully created global lock file: {lock_path}")
308
+ except FileExistsError:
309
+ # Another process won the race, clean up our temp file
310
+ with suppress(OSError):
311
+ temp_path.unlink()
312
+ raise
313
+
314
+ except FileExistsError:
315
+ # Lock file already exists - convert to proper exception type
316
+ raise FileExistsError(f"Global lock already exists for {hook_name}")
317
+ except Exception as e:
318
+ # Clean up temp file on any error
319
+ with suppress(OSError):
320
+ temp_path.unlink()
321
+ self.logger.error(f"Failed to create global lock for {hook_name}: {e}")
322
+ raise
323
+
324
+ async def _maintain_heartbeat(self, hook_name: str) -> None:
325
+ """Maintain heartbeat updates to prevent stale lock detection."""
326
+ lock_path = self._global_config.get_lock_path(hook_name)
327
+ interval = self._global_config.session_heartbeat_interval
328
+
329
+ self.logger.debug(f"Starting heartbeat for {hook_name} every {interval}s")
330
+
331
+ while hook_name in self._active_global_locks:
332
+ try:
333
+ await asyncio.sleep(interval)
334
+
335
+ if hook_name not in self._active_global_locks:
336
+ break
337
+
338
+ # Update heartbeat timestamp in lock file
339
+ await self._update_heartbeat_timestamp(hook_name, lock_path)
340
+
341
+ except asyncio.CancelledError:
342
+ self.logger.debug(f"Heartbeat cancelled for {hook_name}")
343
+ break
344
+ except Exception as e:
345
+ self._heartbeat_failures[hook_name] += 1
346
+ self.logger.warning(f"Heartbeat update failed for {hook_name}: {e}")
347
+
348
+ # If too many heartbeat failures, consider the lock compromised
349
+ if self._heartbeat_failures[hook_name] > 3:
350
+ self.logger.error(
351
+ f"Too many heartbeat failures for {hook_name},"
352
+ f" stopping heartbeat"
353
+ )
354
+ break
355
+
356
+ async def _update_heartbeat_timestamp(
357
+ self, hook_name: str, lock_path: Path
358
+ ) -> None:
359
+ """Atomic update of heartbeat timestamp in existing lock file."""
360
+ if not lock_path.exists():
361
+ self.logger.warning(
362
+ f"Lock file disappeared for {hook_name}, stopping heartbeat"
363
+ )
364
+ self._active_global_locks.discard(hook_name)
365
+ return
366
+
367
+ temp_path = lock_path.with_suffix(".heartbeat_tmp")
368
+
369
+ try:
370
+ # Read existing lock data
371
+ with lock_path.open(encoding="utf-8") as f:
372
+ lock_data = json.load(f)
373
+
374
+ # Verify we still own this lock
375
+ if lock_data.get("session_id") != self._global_config.session_id:
376
+ self.logger.warning(
377
+ f"Lock ownership changed for {hook_name}, stopping heartbeat"
378
+ )
379
+ self._active_global_locks.discard(hook_name)
380
+ return
381
+
382
+ # Update heartbeat timestamp
383
+ lock_data["last_heartbeat"] = time.time()
384
+
385
+ # Write updated data atomically
386
+ with temp_path.open("w", encoding="utf-8") as f:
387
+ json.dump(lock_data, f, indent=2)
388
+
389
+ temp_path.chmod(0o600)
390
+ temp_path.rename(lock_path)
391
+
392
+ except Exception as e:
393
+ with suppress(OSError):
394
+ temp_path.unlink()
395
+ raise RuntimeError(f"Failed to update heartbeat for {hook_name}: {e}")
396
+
397
+ async def _cleanup_global_lock(
398
+ self, hook_name: str, heartbeat_task: asyncio.Task[None] | None = None
399
+ ) -> None:
400
+ """Clean up global lock resources and remove lock file."""
401
+ self.logger.debug(f"Cleaning up global lock for {hook_name}")
402
+
403
+ # Stop tracking this lock
404
+ self._active_global_locks.discard(hook_name)
405
+
406
+ # Cancel heartbeat task
407
+ if heartbeat_task is None:
408
+ heartbeat_task = self._heartbeat_tasks.pop(hook_name, None)
409
+ else:
410
+ # Remove from task tracking
411
+ self._heartbeat_tasks.pop(hook_name, None)
412
+
413
+ if heartbeat_task:
414
+ heartbeat_task.cancel()
415
+ with suppress(asyncio.CancelledError):
416
+ await heartbeat_task
417
+
418
+ # Remove lock file
419
+ lock_path = self._global_config.get_lock_path(hook_name)
420
+ with suppress(OSError):
421
+ if lock_path.exists():
422
+ # Verify we still own the lock before deleting
423
+ try:
424
+ with lock_path.open(encoding="utf-8") as f:
425
+ lock_data = json.load(f)
426
+
427
+ if lock_data.get("session_id") == self._global_config.session_id:
428
+ lock_path.unlink()
429
+ self.logger.debug(f"Removed global lock file: {lock_path}")
430
+ else:
431
+ self.logger.warning(
432
+ f"Lock ownership changed, not removing file: {lock_path}"
433
+ )
434
+
435
+ except Exception as e:
436
+ self.logger.warning(
437
+ f"Could not verify lock ownership for cleanup: {e}"
438
+ )
439
+
440
+ async def _cleanup_stale_lock_if_needed(self, hook_name: str) -> None:
441
+ """Check for and remove stale lock if detected."""
442
+ lock_path = self._global_config.get_lock_path(hook_name)
443
+
444
+ if not lock_path.exists():
445
+ return
446
+
447
+ try:
448
+ # Check if lock is stale
449
+ with lock_path.open(encoding="utf-8") as f:
450
+ lock_data = json.load(f)
451
+
452
+ last_heartbeat = lock_data.get(
453
+ "last_heartbeat", lock_data.get("acquired_at", 0)
454
+ )
455
+ age_hours = (time.time() - last_heartbeat) / 3600
456
+
457
+ if age_hours > self._global_config.stale_lock_hours:
458
+ self.logger.warning(
459
+ f"Removing stale lock for {hook_name} (age: {age_hours:.2f}h)"
460
+ )
461
+ lock_path.unlink()
462
+ self._stale_locks_cleaned[hook_name] += 1
463
+ else:
464
+ # Lock is not stale, someone else has it
465
+ owner = lock_data.get("session_id", "unknown")
466
+ self.logger.debug(
467
+ f"Active lock exists for {hook_name} owned by {owner}"
468
+ )
469
+
470
+ except Exception as e:
471
+ self.logger.warning(f"Could not check lock staleness for {hook_name}: {e}")
472
+ # If we can't read the lock file, it might be corrupted - remove it
473
+ with suppress(OSError):
474
+ lock_path.unlink()
475
+ self._stale_locks_cleaned[hook_name] += 1
476
+
477
+ def get_lock_stats(self) -> dict[str, t.Any]:
478
+ """Get comprehensive statistics about lock usage for monitoring."""
479
+ stats = {}
480
+
481
+ for hook_name in self._hooks_requiring_locks:
482
+ wait_times = self._lock_wait_times[hook_name]
483
+ exec_times = self._lock_execution_times[hook_name]
484
+ usage_list = self._lock_usage[hook_name]
485
+
486
+ if not usage_list:
487
+ stats[hook_name] = {
488
+ "total_acquisitions": 0,
489
+ "avg_wait_time": 0.0,
490
+ "max_wait_time": 0.0,
491
+ "min_wait_time": 0.0,
492
+ "avg_execution_time": 0.0,
493
+ "max_execution_time": 0.0,
494
+ "min_execution_time": 0.0,
495
+ "currently_locked": self._hook_locks[hook_name].locked(),
496
+ "lock_failures": self._lock_failures[hook_name],
497
+ "timeout_failures": self._timeout_failures[hook_name],
498
+ "success_rate": 1.0,
499
+ "lock_timeout": self._lock_timeouts.get(
500
+ hook_name, self._default_lock_timeout
501
+ ),
502
+ }
503
+ else:
504
+ total_attempts = len(usage_list) + self._lock_failures[hook_name]
505
+ success_rate = (
506
+ len(usage_list) / total_attempts if total_attempts > 0 else 1.0
507
+ )
508
+
509
+ base_stats = {
510
+ "total_acquisitions": len(usage_list),
511
+ "total_attempts": total_attempts,
512
+ "currently_locked": self._hook_locks[hook_name].locked(),
513
+ "lock_failures": self._lock_failures[hook_name],
514
+ "timeout_failures": self._timeout_failures[hook_name],
515
+ "success_rate": success_rate,
516
+ "lock_timeout": self._lock_timeouts.get(
517
+ hook_name, self._default_lock_timeout
518
+ ),
519
+ }
520
+
521
+ # Wait time statistics
522
+ if wait_times:
523
+ base_stats.update(
524
+ {
525
+ "avg_wait_time": sum(wait_times) / len(wait_times),
526
+ "max_wait_time": max(wait_times),
527
+ "min_wait_time": min(wait_times),
528
+ }
529
+ )
530
+ else:
531
+ base_stats.update(
532
+ {
533
+ "avg_wait_time": 0.0,
534
+ "max_wait_time": 0.0,
535
+ "min_wait_time": 0.0,
536
+ }
537
+ )
538
+
539
+ # Execution time statistics
540
+ if exec_times:
541
+ base_stats.update(
542
+ {
543
+ "avg_execution_time": sum(exec_times) / len(exec_times),
544
+ "max_execution_time": max(exec_times),
545
+ "min_execution_time": min(exec_times),
546
+ }
547
+ )
548
+ else:
549
+ base_stats.update(
550
+ {
551
+ "avg_execution_time": 0.0,
552
+ "max_execution_time": 0.0,
553
+ "min_execution_time": 0.0,
554
+ }
555
+ )
556
+
557
+ stats[hook_name] = base_stats
558
+
559
+ return stats
560
+
561
+ def add_hook_to_lock_list(self, hook_name: str) -> None:
562
+ """Add a hook to the list requiring sequential execution."""
563
+ self._hooks_requiring_locks.add(hook_name)
564
+ self.logger.info(f"Added {hook_name} to hooks requiring locks")
565
+
566
+ def remove_hook_from_lock_list(self, hook_name: str) -> None:
567
+ """Remove a hook from the list requiring sequential execution."""
568
+ self._hooks_requiring_locks.discard(hook_name)
569
+ if hook_name in self._hook_locks:
570
+ del self._hook_locks[hook_name]
571
+ if hook_name in self._lock_usage:
572
+ del self._lock_usage[hook_name]
573
+ self.logger.info(f"Removed {hook_name} from hooks requiring locks")
574
+
575
+ def is_hook_currently_locked(self, hook_name: str) -> bool:
576
+ """Check if a hook is currently locked."""
577
+ if not self.requires_lock(hook_name):
578
+ return False
579
+ return self._hook_locks[hook_name].locked()
580
+
581
+ def set_hook_timeout(self, hook_name: str, timeout: float) -> None:
582
+ """Set custom timeout for a specific hook.
583
+
584
+ Args:
585
+ hook_name: Name of the hook
586
+ timeout: Timeout in seconds
587
+ """
588
+ self._lock_timeouts[hook_name] = timeout
589
+ self.logger.info(f"Set custom timeout for {hook_name}: {timeout}s")
590
+
591
+ def get_hook_timeout(self, hook_name: str) -> float:
592
+ """Get timeout for a specific hook.
593
+
594
+ Args:
595
+ hook_name: Name of the hook
596
+
597
+ Returns:
598
+ Timeout in seconds
599
+ """
600
+ return self._lock_timeouts.get(hook_name, self._default_lock_timeout)
601
+
602
+ # New protocol methods for global lock functionality
603
+
604
+ def enable_global_lock(self, enabled: bool = True) -> None:
605
+ """Enable or disable global lock functionality.
606
+
607
+ Args:
608
+ enabled: Whether to enable global locking
609
+ """
610
+ self._global_lock_enabled = enabled
611
+ self._global_config.enabled = enabled
612
+ self.logger.info(
613
+ f"Global lock functionality {'enabled' if enabled else 'disabled'}"
614
+ )
615
+
616
+ def is_global_lock_enabled(self) -> bool:
617
+ """Check if global lock functionality is enabled.
618
+
619
+ Returns:
620
+ True if global locking is enabled
621
+ """
622
+ return self._global_lock_enabled
623
+
624
+ def get_global_lock_path(self, hook_name: str) -> Path:
625
+ """Get the filesystem path for a hook's global lock file.
626
+
627
+ Args:
628
+ hook_name: Name of the hook
629
+
630
+ Returns:
631
+ Path to the lock file for the hook
632
+ """
633
+ return self._global_config.get_lock_path(hook_name)
634
+
635
+ def cleanup_stale_locks(self, max_age_hours: float = 2.0) -> int:
636
+ """Clean up stale lock files older than max_age_hours.
637
+
638
+ Args:
639
+ max_age_hours: Maximum age in hours before a lock is considered stale
640
+
641
+ Returns:
642
+ Number of stale locks cleaned up
643
+ """
644
+ locks_dir = self._global_config.lock_directory
645
+ if not locks_dir.exists():
646
+ return 0
647
+
648
+ cleaned_count = 0
649
+ current_time = time.time()
650
+
651
+ try:
652
+ for lock_file in locks_dir.glob("*.lock"):
653
+ cleaned_count += self._process_lock_file(
654
+ lock_file, max_age_hours, current_time
655
+ )
656
+
657
+ except OSError as e:
658
+ self.logger.error(f"Could not access locks directory {locks_dir}: {e}")
659
+
660
+ if cleaned_count > 0:
661
+ self.logger.info(f"Cleaned up {cleaned_count} stale lock files")
662
+
663
+ return cleaned_count
664
+
665
+ def _process_lock_file(
666
+ self, lock_file: Path, max_age_hours: float, current_time: float
667
+ ) -> int:
668
+ """Process a single lock file and return number of files cleaned."""
669
+ try:
670
+ # Check file age
671
+ file_age_hours = (current_time - lock_file.stat().st_mtime) / 3600
672
+
673
+ if file_age_hours > max_age_hours:
674
+ return self._cleanup_stale_lock_file(
675
+ lock_file, max_age_hours, current_time
676
+ )
677
+ return 0
678
+
679
+ except OSError as e:
680
+ self.logger.warning(f"Could not process lock file {lock_file}: {e}")
681
+ return 0
682
+
683
+ def _cleanup_stale_lock_file(
684
+ self, lock_file: Path, max_age_hours: float, current_time: float
685
+ ) -> int:
686
+ """Clean up a stale lock file and return 1 if successful."""
687
+ try:
688
+ with lock_file.open(encoding="utf-8") as f:
689
+ lock_data = json.load(f)
690
+
691
+ last_heartbeat = lock_data.get(
692
+ "last_heartbeat", lock_data.get("acquired_at", 0)
693
+ )
694
+ heartbeat_age_hours = (current_time - last_heartbeat) / 3600
695
+
696
+ if heartbeat_age_hours > max_age_hours:
697
+ lock_file.unlink()
698
+ hook_name = lock_file.stem
699
+ self._stale_locks_cleaned[hook_name] += 1
700
+ self.logger.info(
701
+ f"Cleaned stale lock: {lock_file} (age: {heartbeat_age_hours:.2f}h)"
702
+ )
703
+ return 1
704
+
705
+ except (json.JSONDecodeError, KeyError):
706
+ # Corrupted lock file, remove it
707
+ lock_file.unlink()
708
+ self.logger.warning(f"Cleaned corrupted lock file: {lock_file}")
709
+ return 1
710
+
711
+ return 0
712
+
713
+ def get_global_lock_stats(self) -> dict[str, t.Any]:
714
+ """Get comprehensive statistics about global lock usage.
715
+
716
+ Returns:
717
+ Dictionary containing global lock statistics and metrics
718
+ """
719
+ stats: dict[str, t.Any] = {
720
+ "global_lock_enabled": self._global_lock_enabled,
721
+ "lock_directory": str(self._global_config.lock_directory),
722
+ "session_id": self._global_config.session_id,
723
+ "hostname": self._global_config.hostname,
724
+ "active_global_locks": list(self._active_global_locks),
725
+ "active_heartbeat_tasks": len(self._heartbeat_tasks),
726
+ "configuration": {
727
+ "timeout_seconds": self._global_config.timeout_seconds,
728
+ "stale_lock_hours": self._global_config.stale_lock_hours,
729
+ "heartbeat_interval": self._global_config.session_heartbeat_interval,
730
+ "max_retry_attempts": self._global_config.max_retry_attempts,
731
+ "retry_delay_seconds": self._global_config.retry_delay_seconds,
732
+ "enable_lock_monitoring": self._global_config.enable_lock_monitoring,
733
+ },
734
+ "statistics": {},
735
+ }
736
+
737
+ # Per-hook global lock statistics
738
+ all_hooks = (
739
+ set(self._global_lock_attempts.keys())
740
+ | set(self._global_lock_successes.keys())
741
+ | set(self._global_lock_failures.keys())
742
+ )
743
+
744
+ for hook_name in all_hooks:
745
+ attempts = self._global_lock_attempts[hook_name]
746
+ successes = self._global_lock_successes[hook_name]
747
+ failures = self._global_lock_failures[hook_name]
748
+ stale_cleaned = self._stale_locks_cleaned[hook_name]
749
+ heartbeat_failures = self._heartbeat_failures[hook_name]
750
+
751
+ success_rate = (successes / attempts) if attempts > 0 else 0.0
752
+
753
+ stats["statistics"][hook_name] = {
754
+ "attempts": attempts,
755
+ "successes": successes,
756
+ "failures": failures,
757
+ "success_rate": success_rate,
758
+ "stale_locks_cleaned": stale_cleaned,
759
+ "heartbeat_failures": heartbeat_failures,
760
+ "currently_locked": hook_name in self._active_global_locks,
761
+ "has_heartbeat_task": hook_name in self._heartbeat_tasks,
762
+ }
763
+
764
+ # Overall statistics
765
+ total_attempts = sum(self._global_lock_attempts.values())
766
+ total_successes = sum(self._global_lock_successes.values())
767
+ total_failures = sum(self._global_lock_failures.values())
768
+ total_stale_cleaned = sum(self._stale_locks_cleaned.values())
769
+ total_heartbeat_failures = sum(self._heartbeat_failures.values())
770
+
771
+ stats["totals"] = {
772
+ "total_attempts": total_attempts,
773
+ "total_successes": total_successes,
774
+ "total_failures": total_failures,
775
+ "overall_success_rate": (total_successes / total_attempts)
776
+ if total_attempts > 0
777
+ else 0.0,
778
+ "total_stale_locks_cleaned": total_stale_cleaned,
779
+ "total_heartbeat_failures": total_heartbeat_failures,
780
+ }
781
+
782
+ return stats
783
+
784
+ def configure_from_options(self, options: t.Any) -> None:
785
+ """Configure the lock manager from CLI options.
786
+
787
+ Args:
788
+ options: Options object containing CLI arguments
789
+ """
790
+ self._global_config = GlobalLockConfig.from_options(options)
791
+ self._global_lock_enabled = self._global_config.enabled
792
+
793
+ # Apply stale lock cleanup if requested
794
+ if hasattr(options, "global_lock_cleanup") and options.global_lock_cleanup:
795
+ self.cleanup_stale_locks()
796
+
797
+ self.logger.info(
798
+ f"Configured lock manager: global_locks={
799
+ 'enabled' if self._global_lock_enabled else 'disabled'
800
+ },"
801
+ f" timeout={self._global_config.timeout_seconds}s, "
802
+ f"lock_dir={self._global_config.lock_directory}"
803
+ )
804
+
805
+ def reset_hook_stats(self, hook_name: str | None = None) -> None:
806
+ """Reset statistics for a specific hook or all hooks.
807
+
808
+ Args:
809
+ hook_name: Name of the hook to reset, or None for all hooks
810
+ """
811
+ if hook_name:
812
+ self._lock_usage[hook_name].clear()
813
+ self._lock_wait_times[hook_name].clear()
814
+ self._lock_execution_times[hook_name].clear()
815
+ self._lock_failures[hook_name] = 0
816
+ self._timeout_failures[hook_name] = 0
817
+ self.logger.info(f"Reset statistics for hook: {hook_name}")
818
+ else:
819
+ self._lock_usage.clear()
820
+ self._lock_wait_times.clear()
821
+ self._lock_execution_times.clear()
822
+ self._lock_failures.clear()
823
+ self._timeout_failures.clear()
824
+ self.logger.info("Reset statistics for all hooks")
825
+
826
+ def get_comprehensive_status(self) -> dict[str, t.Any]:
827
+ """Get comprehensive status including configuration and health."""
828
+ status = {
829
+ "hooks_requiring_locks": list(self._hooks_requiring_locks),
830
+ "default_timeout": self._default_lock_timeout,
831
+ "custom_timeouts": self._lock_timeouts.copy(),
832
+ "max_history": self._max_history,
833
+ "lock_statistics": self.get_lock_stats(),
834
+ "currently_locked_hooks": [
835
+ hook
836
+ for hook in self._hooks_requiring_locks
837
+ if self.is_hook_currently_locked(hook)
838
+ ],
839
+ "total_lock_failures": sum(self._lock_failures.values()),
840
+ "total_timeout_failures": sum(self._timeout_failures.values()),
841
+ }
842
+
843
+ # Add global lock information if enabled
844
+ if self._global_lock_enabled:
845
+ status["global_lock_stats"] = self.get_global_lock_stats()
846
+ else:
847
+ status["global_lock_stats"] = {
848
+ "global_lock_enabled": False,
849
+ "message": "Global locking is disabled",
850
+ }
851
+
852
+ return status
853
+
854
+
855
+ # Singleton instance
856
+ hook_lock_manager = HookLockManager()