crackerjack 0.31.9__py3-none-any.whl → 0.31.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crackerjack might be problematic. Click here for more details.

Files changed (155) hide show
  1. crackerjack/CLAUDE.md +288 -705
  2. crackerjack/__main__.py +22 -8
  3. crackerjack/agents/__init__.py +0 -3
  4. crackerjack/agents/architect_agent.py +0 -43
  5. crackerjack/agents/base.py +1 -9
  6. crackerjack/agents/coordinator.py +2 -148
  7. crackerjack/agents/documentation_agent.py +109 -81
  8. crackerjack/agents/dry_agent.py +122 -97
  9. crackerjack/agents/formatting_agent.py +3 -16
  10. crackerjack/agents/import_optimization_agent.py +1174 -130
  11. crackerjack/agents/performance_agent.py +956 -188
  12. crackerjack/agents/performance_helpers.py +229 -0
  13. crackerjack/agents/proactive_agent.py +1 -48
  14. crackerjack/agents/refactoring_agent.py +516 -246
  15. crackerjack/agents/refactoring_helpers.py +282 -0
  16. crackerjack/agents/security_agent.py +393 -90
  17. crackerjack/agents/test_creation_agent.py +1776 -120
  18. crackerjack/agents/test_specialist_agent.py +59 -15
  19. crackerjack/agents/tracker.py +0 -102
  20. crackerjack/api.py +145 -37
  21. crackerjack/cli/handlers.py +48 -30
  22. crackerjack/cli/interactive.py +11 -11
  23. crackerjack/cli/options.py +66 -4
  24. crackerjack/code_cleaner.py +808 -148
  25. crackerjack/config/global_lock_config.py +110 -0
  26. crackerjack/config/hooks.py +43 -64
  27. crackerjack/core/async_workflow_orchestrator.py +247 -97
  28. crackerjack/core/autofix_coordinator.py +192 -109
  29. crackerjack/core/enhanced_container.py +46 -63
  30. crackerjack/core/file_lifecycle.py +549 -0
  31. crackerjack/core/performance.py +9 -8
  32. crackerjack/core/performance_monitor.py +395 -0
  33. crackerjack/core/phase_coordinator.py +282 -95
  34. crackerjack/core/proactive_workflow.py +9 -58
  35. crackerjack/core/resource_manager.py +501 -0
  36. crackerjack/core/service_watchdog.py +490 -0
  37. crackerjack/core/session_coordinator.py +4 -8
  38. crackerjack/core/timeout_manager.py +504 -0
  39. crackerjack/core/websocket_lifecycle.py +475 -0
  40. crackerjack/core/workflow_orchestrator.py +355 -204
  41. crackerjack/dynamic_config.py +47 -6
  42. crackerjack/errors.py +3 -4
  43. crackerjack/executors/async_hook_executor.py +63 -13
  44. crackerjack/executors/cached_hook_executor.py +14 -14
  45. crackerjack/executors/hook_executor.py +100 -37
  46. crackerjack/executors/hook_lock_manager.py +856 -0
  47. crackerjack/executors/individual_hook_executor.py +120 -86
  48. crackerjack/intelligence/__init__.py +0 -7
  49. crackerjack/intelligence/adaptive_learning.py +13 -86
  50. crackerjack/intelligence/agent_orchestrator.py +15 -78
  51. crackerjack/intelligence/agent_registry.py +12 -59
  52. crackerjack/intelligence/agent_selector.py +31 -92
  53. crackerjack/intelligence/integration.py +1 -41
  54. crackerjack/interactive.py +9 -9
  55. crackerjack/managers/async_hook_manager.py +25 -8
  56. crackerjack/managers/hook_manager.py +9 -9
  57. crackerjack/managers/publish_manager.py +57 -59
  58. crackerjack/managers/test_command_builder.py +6 -36
  59. crackerjack/managers/test_executor.py +9 -61
  60. crackerjack/managers/test_manager.py +52 -62
  61. crackerjack/managers/test_manager_backup.py +77 -127
  62. crackerjack/managers/test_progress.py +4 -23
  63. crackerjack/mcp/cache.py +5 -12
  64. crackerjack/mcp/client_runner.py +10 -10
  65. crackerjack/mcp/context.py +64 -6
  66. crackerjack/mcp/dashboard.py +14 -11
  67. crackerjack/mcp/enhanced_progress_monitor.py +55 -55
  68. crackerjack/mcp/file_monitor.py +72 -42
  69. crackerjack/mcp/progress_components.py +103 -84
  70. crackerjack/mcp/progress_monitor.py +122 -49
  71. crackerjack/mcp/rate_limiter.py +12 -12
  72. crackerjack/mcp/server_core.py +16 -22
  73. crackerjack/mcp/service_watchdog.py +26 -26
  74. crackerjack/mcp/state.py +15 -0
  75. crackerjack/mcp/tools/core_tools.py +95 -39
  76. crackerjack/mcp/tools/error_analyzer.py +6 -32
  77. crackerjack/mcp/tools/execution_tools.py +1 -56
  78. crackerjack/mcp/tools/execution_tools_backup.py +35 -131
  79. crackerjack/mcp/tools/intelligence_tool_registry.py +0 -36
  80. crackerjack/mcp/tools/intelligence_tools.py +2 -55
  81. crackerjack/mcp/tools/monitoring_tools.py +308 -145
  82. crackerjack/mcp/tools/proactive_tools.py +12 -42
  83. crackerjack/mcp/tools/progress_tools.py +23 -15
  84. crackerjack/mcp/tools/utility_tools.py +3 -40
  85. crackerjack/mcp/tools/workflow_executor.py +40 -60
  86. crackerjack/mcp/websocket/app.py +0 -3
  87. crackerjack/mcp/websocket/endpoints.py +206 -268
  88. crackerjack/mcp/websocket/jobs.py +213 -66
  89. crackerjack/mcp/websocket/server.py +84 -6
  90. crackerjack/mcp/websocket/websocket_handler.py +137 -29
  91. crackerjack/models/config_adapter.py +3 -16
  92. crackerjack/models/protocols.py +162 -3
  93. crackerjack/models/resource_protocols.py +454 -0
  94. crackerjack/models/task.py +3 -3
  95. crackerjack/monitoring/__init__.py +0 -0
  96. crackerjack/monitoring/ai_agent_watchdog.py +25 -71
  97. crackerjack/monitoring/regression_prevention.py +28 -87
  98. crackerjack/orchestration/advanced_orchestrator.py +44 -78
  99. crackerjack/orchestration/coverage_improvement.py +10 -60
  100. crackerjack/orchestration/execution_strategies.py +16 -16
  101. crackerjack/orchestration/test_progress_streamer.py +61 -53
  102. crackerjack/plugins/base.py +1 -1
  103. crackerjack/plugins/managers.py +22 -20
  104. crackerjack/py313.py +65 -21
  105. crackerjack/services/backup_service.py +467 -0
  106. crackerjack/services/bounded_status_operations.py +627 -0
  107. crackerjack/services/cache.py +7 -9
  108. crackerjack/services/config.py +35 -52
  109. crackerjack/services/config_integrity.py +5 -16
  110. crackerjack/services/config_merge.py +542 -0
  111. crackerjack/services/contextual_ai_assistant.py +17 -19
  112. crackerjack/services/coverage_ratchet.py +51 -76
  113. crackerjack/services/debug.py +25 -39
  114. crackerjack/services/dependency_monitor.py +52 -50
  115. crackerjack/services/enhanced_filesystem.py +14 -11
  116. crackerjack/services/file_hasher.py +1 -1
  117. crackerjack/services/filesystem.py +1 -12
  118. crackerjack/services/git.py +78 -44
  119. crackerjack/services/health_metrics.py +31 -27
  120. crackerjack/services/initialization.py +281 -433
  121. crackerjack/services/input_validator.py +760 -0
  122. crackerjack/services/log_manager.py +16 -16
  123. crackerjack/services/logging.py +7 -6
  124. crackerjack/services/metrics.py +43 -43
  125. crackerjack/services/pattern_cache.py +2 -31
  126. crackerjack/services/pattern_detector.py +26 -63
  127. crackerjack/services/performance_benchmarks.py +20 -45
  128. crackerjack/services/regex_patterns.py +2887 -0
  129. crackerjack/services/regex_utils.py +537 -0
  130. crackerjack/services/secure_path_utils.py +683 -0
  131. crackerjack/services/secure_status_formatter.py +534 -0
  132. crackerjack/services/secure_subprocess.py +605 -0
  133. crackerjack/services/security.py +47 -10
  134. crackerjack/services/security_logger.py +492 -0
  135. crackerjack/services/server_manager.py +109 -50
  136. crackerjack/services/smart_scheduling.py +8 -25
  137. crackerjack/services/status_authentication.py +603 -0
  138. crackerjack/services/status_security_manager.py +442 -0
  139. crackerjack/services/thread_safe_status_collector.py +546 -0
  140. crackerjack/services/tool_version_service.py +1 -23
  141. crackerjack/services/unified_config.py +36 -58
  142. crackerjack/services/validation_rate_limiter.py +269 -0
  143. crackerjack/services/version_checker.py +9 -40
  144. crackerjack/services/websocket_resource_limiter.py +572 -0
  145. crackerjack/slash_commands/__init__.py +52 -2
  146. crackerjack/tools/__init__.py +0 -0
  147. crackerjack/tools/validate_input_validator_patterns.py +262 -0
  148. crackerjack/tools/validate_regex_patterns.py +198 -0
  149. {crackerjack-0.31.9.dist-info → crackerjack-0.31.12.dist-info}/METADATA +197 -12
  150. crackerjack-0.31.12.dist-info/RECORD +178 -0
  151. crackerjack/cli/facade.py +0 -104
  152. crackerjack-0.31.9.dist-info/RECORD +0 -149
  153. {crackerjack-0.31.9.dist-info → crackerjack-0.31.12.dist-info}/WHEEL +0 -0
  154. {crackerjack-0.31.9.dist-info → crackerjack-0.31.12.dist-info}/entry_points.txt +0 -0
  155. {crackerjack-0.31.9.dist-info → crackerjack-0.31.12.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,2887 @@
1
+ """
2
+ Centralized regex patterns with validation to prevent bad regex issues.
3
+
4
+ CRITICAL: All regex patterns in this codebase MUST be defined here with comprehensive
5
+ testing to prevent spacing and replacement syntax errors.
6
+
7
+ Optimized for performance, safety, and maintainability with:
8
+ - Thread-safe compiled pattern caching
9
+ - Iterative application for complex multi-word cases
10
+ - Safety limits to prevent catastrophic backtracking
11
+ - Performance monitoring capabilities
12
+ """
13
+
14
+ import re
15
+ import threading
16
+ import time
17
+ import typing as t
18
+ from dataclasses import dataclass, field
19
+ from re import Pattern
20
+
21
+ # Safety constants
22
+ MAX_INPUT_SIZE = 10 * 1024 * 1024 # 10MB max input size
23
+ MAX_ITERATIONS = 10 # Max iterations for iterative application
24
+ PATTERN_CACHE_SIZE = 100 # Max cached compiled patterns
25
+
26
+
27
+ class CompiledPatternCache:
28
+ """Thread-safe cache for compiled regex patterns."""
29
+
30
+ _lock = threading.RLock()
31
+ _cache: dict[str, Pattern[str]] = {}
32
+ _max_size = PATTERN_CACHE_SIZE
33
+
34
+ @classmethod
35
+ def get_compiled_pattern(cls, pattern: str) -> Pattern[str]:
36
+ """Get compiled pattern from cache, compiling if necessary."""
37
+ return cls.get_compiled_pattern_with_flags(pattern, pattern, 0)
38
+
39
+ @classmethod
40
+ def get_compiled_pattern_with_flags(
41
+ cls, cache_key: str, pattern: str, flags: int
42
+ ) -> Pattern[str]:
43
+ """Get compiled pattern with flags from cache, compiling if necessary."""
44
+ with cls._lock:
45
+ if cache_key in cls._cache:
46
+ return cls._cache[cache_key]
47
+
48
+ # Compile new pattern
49
+ try:
50
+ compiled = re.compile(pattern, flags)
51
+ except re.error as e:
52
+ # Maintain backward compatibility with existing error message format
53
+ raise ValueError(f"Invalid regex pattern '{pattern}': {e}")
54
+
55
+ # Add to cache with size limit
56
+ if len(cls._cache) >= cls._max_size:
57
+ # Remove oldest entry (simple FIFO eviction)
58
+ oldest_key = next(iter(cls._cache))
59
+ del cls._cache[oldest_key]
60
+
61
+ cls._cache[cache_key] = compiled
62
+ return compiled
63
+
64
+ @classmethod
65
+ def clear_cache(cls) -> None:
66
+ """Clear the pattern cache (useful for testing)."""
67
+ with cls._lock:
68
+ cls._cache.clear()
69
+
70
+ @classmethod
71
+ def get_cache_stats(cls) -> dict[str, int | list[str]]:
72
+ """Get cache statistics for monitoring."""
73
+ with cls._lock:
74
+ return {
75
+ "size": len(cls._cache),
76
+ "max_size": cls._max_size,
77
+ "patterns": list(cls._cache.keys()),
78
+ }
79
+
80
+
81
+ def validate_pattern_safety(pattern: str) -> list[str]:
82
+ """Validate pattern for potential safety issues."""
83
+ warnings = []
84
+
85
+ # Check for potentially problematic constructs
86
+ if ".*.*" in pattern:
87
+ warnings.append("Multiple .* constructs may cause performance issues")
88
+
89
+ if ".+.+" in pattern:
90
+ warnings.append("Multiple .+ constructs may cause performance issues")
91
+
92
+ # Check for nested quantifiers
93
+ nested_quantifiers = re.findall(r"[+*?]\??[+*?]", pattern)
94
+ if nested_quantifiers:
95
+ warnings.append(f"Nested quantifiers detected: {nested_quantifiers}")
96
+
97
+ # Check for alternation with overlapping cases
98
+ if "|" in pattern and pattern.count("|") > 10:
99
+ warnings.append("Many alternations may cause performance issues")
100
+
101
+ return warnings
102
+
103
+
104
+ @dataclass
105
+ class ValidatedPattern:
106
+ """A regex pattern that has been tested and validated."""
107
+
108
+ name: str
109
+ pattern: str
110
+ replacement: str
111
+ test_cases: list[tuple[str, str]] # (input, expected_output)
112
+ description: str = ""
113
+ global_replace: bool = False # If True, replace all matches
114
+ flags: int = 0 # Regex flags (re.IGNORECASE, re.MULTILINE, etc.)
115
+ _compiled_pattern: Pattern[str] | None = field(default=None, init=False)
116
+
117
+ def __post_init__(self):
118
+ """Validate pattern on creation."""
119
+ self._validate()
120
+
121
+ def _validate(self) -> None:
122
+ """Ensure pattern works with all test cases."""
123
+ try:
124
+ # Use cached compilation for validation
125
+ self._get_compiled_pattern()
126
+ except ValueError as e:
127
+ # Maintain backward compatibility with error message format
128
+ if "Invalid regex pattern" in str(e):
129
+ # Replace the pattern string with the name in the error message
130
+ error_msg = str(e).replace(f"'{self.pattern}'", f"'{self.name}'")
131
+ raise ValueError(error_msg) from e
132
+ raise # Re-raise other errors
133
+
134
+ # Check for forbidden replacement syntax
135
+ if r"\g < " in self.replacement or r" >" in self.replacement:
136
+ raise ValueError(
137
+ f"Bad replacement syntax in '{self.name}': {self.replacement}. "
138
+ "Use \\g<1> not \\g < 1 >"
139
+ )
140
+
141
+ # Check for safety warnings
142
+ warnings = validate_pattern_safety(self.pattern)
143
+ if warnings:
144
+ # For now, just store warnings - could log them in the future
145
+ pass
146
+
147
+ # Validate all test cases
148
+ for input_text, expected in self.test_cases:
149
+ try:
150
+ count = 0 if self.global_replace else 1
151
+ result = self._apply_internal(input_text, count)
152
+ if result != expected:
153
+ raise ValueError(
154
+ f"Pattern '{self.name}' failed test case: "
155
+ f"'{input_text}' -> '{result}' != expected '{expected}'"
156
+ )
157
+ except re.error as e:
158
+ raise ValueError(f"Pattern '{self.name}' failed on '{input_text}': {e}")
159
+
160
+ def _get_compiled_pattern(self) -> Pattern[str]:
161
+ """Get cached compiled pattern with flags."""
162
+ # Create cache key that includes flags
163
+ cache_key = f"{self.pattern}|flags:{self.flags}"
164
+ return CompiledPatternCache.get_compiled_pattern_with_flags(
165
+ cache_key, self.pattern, self.flags
166
+ )
167
+
168
+ def _apply_internal(self, text: str, count: int = 1) -> str:
169
+ """Internal method for applying pattern with compiled regex."""
170
+ if len(text) > MAX_INPUT_SIZE:
171
+ raise ValueError(
172
+ f"Input text too large: {len(text)} bytes > {MAX_INPUT_SIZE}"
173
+ )
174
+
175
+ return self._get_compiled_pattern().sub(self.replacement, text, count=count)
176
+
177
+ def apply(self, text: str) -> str:
178
+ """Apply the validated pattern safely."""
179
+ count = 0 if self.global_replace else 1
180
+ return self._apply_internal(text, count)
181
+
182
+ def apply_iteratively(self, text: str, max_iterations: int = MAX_ITERATIONS) -> str:
183
+ """
184
+ Apply pattern repeatedly until no more changes occur.
185
+
186
+ Useful for cases like 'pytest - hypothesis - specialist' -> 'pytest-hypothesis-specialist'
187
+ where multiple passes are needed.
188
+ """
189
+ if max_iterations <= 0:
190
+ raise ValueError("max_iterations must be positive")
191
+
192
+ result = text
193
+ for _ in range(max_iterations):
194
+ new_result = self.apply(result)
195
+ if new_result == result:
196
+ # No more changes, done
197
+ break
198
+ result = new_result
199
+ else:
200
+ # Reached max iterations without convergence
201
+ # This might indicate a problematic pattern, but we return the current result
202
+ pass
203
+
204
+ return result
205
+
206
+ def apply_with_timeout(self, text: str, timeout_seconds: float = 1.0) -> str:
207
+ """Apply pattern with timeout protection."""
208
+ import signal
209
+
210
+ def timeout_handler(signum: int, frame: t.Any) -> None:
211
+ raise TimeoutError(
212
+ f"Pattern '{self.name}' timed out after {timeout_seconds}s"
213
+ )
214
+
215
+ # Note: signal-based timeout only works on Unix and in main thread
216
+ # For broader compatibility, we could use threading.Timer instead
217
+ old_handler = signal.signal(signal.SIGALRM, timeout_handler)
218
+ signal.alarm(int(timeout_seconds))
219
+
220
+ try:
221
+ result = self.apply(text)
222
+ finally:
223
+ signal.alarm(0)
224
+ signal.signal(signal.SIGALRM, old_handler)
225
+
226
+ return result
227
+
228
+ def test(self, text: str) -> bool:
229
+ """Test if pattern matches text without applying replacement."""
230
+ compiled = self._get_compiled_pattern()
231
+ return bool(compiled.search(text))
232
+
233
+ def search(self, text: str) -> re.Match[str] | None:
234
+ """Search for the first match and return a Match object or None."""
235
+ if len(text) > MAX_INPUT_SIZE:
236
+ raise ValueError(
237
+ f"Input text too large: {len(text)} bytes > {MAX_INPUT_SIZE}"
238
+ )
239
+ return self._get_compiled_pattern().search(text)
240
+
241
+ def findall(self, text: str) -> list[str]:
242
+ """Find all matches of the pattern in text safely."""
243
+ if len(text) > MAX_INPUT_SIZE:
244
+ raise ValueError(
245
+ f"Input text too large: {len(text)} bytes > {MAX_INPUT_SIZE}"
246
+ )
247
+ return self._get_compiled_pattern().findall(text)
248
+
249
+ def get_performance_stats(
250
+ self, text: str, iterations: int = 100
251
+ ) -> dict[str, float]:
252
+ """Get performance statistics for this pattern on given text."""
253
+ times = []
254
+
255
+ for _ in range(iterations):
256
+ start = time.perf_counter()
257
+ self.apply(text)
258
+ end = time.perf_counter()
259
+ times.append(end - start)
260
+
261
+ return {
262
+ "mean_time": sum(times) / len(times),
263
+ "min_time": min(times),
264
+ "max_time": max(times),
265
+ "total_time": sum(times),
266
+ }
267
+
268
+
269
+ # All validated patterns - ADD NEW PATTERNS HERE WITH TESTS
270
+ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
271
+ "fix_command_spacing": ValidatedPattern(
272
+ name="fix_command_spacing",
273
+ pattern=r"python\s*-\s*m\s+(\w+)",
274
+ replacement=r"python -m \1",
275
+ description="Fix spacing in 'python -m command' patterns",
276
+ test_cases=[
277
+ ("python - m crackerjack", "python -m crackerjack"),
278
+ ("python -m crackerjack", "python -m crackerjack"), # No change
279
+ ("python - m pytest", "python -m pytest"),
280
+ ("other python - m stuff", "other python -m stuff"),
281
+ ],
282
+ ),
283
+ "fix_long_flag_spacing": ValidatedPattern(
284
+ name="fix_long_flag_spacing",
285
+ pattern=r"-\s*-\s*(\w+(?:-\w+)*)",
286
+ replacement=r"--\1",
287
+ description="Fix spacing in long flags like '--help'",
288
+ test_cases=[
289
+ ("- - help", "--help"),
290
+ ("- - ai-agent", "--ai-agent"),
291
+ ("--help", "--help"), # No change
292
+ ("- - start-websocket-server", "--start-websocket-server"),
293
+ ],
294
+ ),
295
+ "fix_short_flag_spacing": ValidatedPattern(
296
+ name="fix_short_flag_spacing",
297
+ pattern=r"(?<!\w)-\s+(\w)(?!\w)",
298
+ replacement=r"-\1",
299
+ description="Fix spacing in short flags like '-t'",
300
+ test_cases=[
301
+ ("python -m crackerjack - t", "python -m crackerjack -t"),
302
+ ("- q", "-q"),
303
+ ("-t", "-t"), # No change
304
+ ("some - x flag", "some -x flag"),
305
+ ],
306
+ ),
307
+ "fix_hyphenated_names": ValidatedPattern(
308
+ name="fix_hyphenated_names",
309
+ pattern=r"(\w+)\s*-\s*(\w+)",
310
+ replacement=r"\1-\2",
311
+ description="Fix spacing in hyphenated names and identifiers",
312
+ test_cases=[
313
+ ("python - pro", "python-pro"),
314
+ (
315
+ "pytest - hypothesis - specialist",
316
+ "pytest-hypothesis - specialist",
317
+ ), # Only fixes first
318
+ ("backend - architect", "backend-architect"),
319
+ ("python-pro", "python-pro"), # No change
320
+ ("end - of - file-fixer", "end-of - file-fixer"), # Only fixes first
321
+ ],
322
+ ),
323
+ "fix_hyphenated_names_global": ValidatedPattern(
324
+ name="fix_hyphenated_names_global",
325
+ pattern=r"(\w+)\s+-\s+(\w+)",
326
+ replacement=r"\1-\2",
327
+ description="Globally fix spacing in hyphenated names (single pass only)",
328
+ global_replace=True,
329
+ test_cases=[
330
+ ("python - pro", "python-pro"),
331
+ ("end - of - file", "end-of - file"), # Single pass: only first match
332
+ ("already-hyphenated", "already-hyphenated"), # No change
333
+ ("start - middle - end", "start-middle - end"), # Single pass
334
+ ],
335
+ ),
336
+ "fix_spaced_hyphens": ValidatedPattern(
337
+ name="fix_spaced_hyphens",
338
+ pattern=r"(\w+)\s+-\s+(\w+)",
339
+ replacement=r"\1-\2",
340
+ description="Fix spaced hyphens with spaces around dashes (use apply_iteratively for multi-word)",
341
+ global_replace=True, # Apply to all matches in one pass
342
+ test_cases=[
343
+ ("python - pro", "python-pro"),
344
+ (
345
+ "pytest - hypothesis - specialist",
346
+ "pytest-hypothesis - specialist",
347
+ ), # Single pass: only first match
348
+ (
349
+ "end - of - file - fixer",
350
+ "end-of - file-fixer",
351
+ ), # Global finds: "end-of" and "file-fixer"
352
+ ("already-hyphenated", "already-hyphenated"), # No change
353
+ ("mixed-case with - spaces", "mixed-case with-spaces"), # Partial fix
354
+ ],
355
+ ),
356
+ "fix_debug_log_pattern": ValidatedPattern(
357
+ name="fix_debug_log_pattern",
358
+ pattern=r"crackerjack\s*-\s*debug",
359
+ replacement="crackerjack-debug",
360
+ description="Fix spacing in debug log patterns",
361
+ test_cases=[
362
+ ("crackerjack - debug-12345.log", "crackerjack-debug-12345.log"),
363
+ ("crackerjack-debug.log", "crackerjack-debug.log"), # No change
364
+ ("old crackerjack - debug files", "old crackerjack-debug files"),
365
+ ],
366
+ ),
367
+ "fix_job_file_pattern": ValidatedPattern(
368
+ name="fix_job_file_pattern",
369
+ pattern=r"job\s*-\s*(\{[^}]+\}|\w+)",
370
+ replacement=r"job-\1",
371
+ description="Fix spacing in job file patterns",
372
+ test_cases=[
373
+ ("job - {self.web_job_id}.json", "job-{self.web_job_id}.json"),
374
+ ("job - abc123.json", "job-abc123.json"),
375
+ ("job-existing.json", "job-existing.json"), # No change
376
+ ],
377
+ ),
378
+ "fix_markdown_bold": ValidatedPattern(
379
+ name="fix_markdown_bold",
380
+ pattern=r"\*\s+\*(.+?)\s*\*\s+\*",
381
+ replacement=r"**\1**",
382
+ description="Fix spacing in markdown bold patterns",
383
+ test_cases=[
384
+ ("* *Bold Text * *", "**Bold Text**"),
385
+ ("* *🧪 pytest-specialist * *", "**🧪 pytest-specialist**"),
386
+ ("**Already Bold**", "**Already Bold**"), # No change
387
+ ],
388
+ ),
389
+ # Security token masking patterns
390
+ "mask_pypi_token": ValidatedPattern(
391
+ name="mask_pypi_token",
392
+ pattern=r"\bpypi-[a-zA-Z0-9_-]{12,}\b",
393
+ replacement="pypi-****",
394
+ description="Mask PyPI authentication tokens (word boundaries to prevent"
395
+ " false matches)",
396
+ global_replace=True,
397
+ test_cases=[
398
+ ("pypi-AgEIcHlwaS5vcmcCJGE4M2Y3ZjI", "pypi-****"),
399
+ (
400
+ "Using token: pypi-AgEIcHlwaS5vcmcCJGE4M2Y3ZjI for upload",
401
+ "Using token: pypi-**** for upload",
402
+ ),
403
+ ("pypi-short", "pypi-short"), # Too short, no change
404
+ (
405
+ "not pypi-AgEIcHlwaS5vcmcCJGE4M2Y3ZjI",
406
+ "not pypi-****",
407
+ ), # Space-separated, should match pypi token
408
+ (
409
+ "Multiple pypi-token1234567890 and pypi-anothertokenhere",
410
+ "Multiple pypi-**** and pypi-****",
411
+ ),
412
+ ],
413
+ ),
414
+ "mask_github_token": ValidatedPattern(
415
+ name="mask_github_token",
416
+ pattern=r"\bghp_[a-zA-Z0-9]{36}\b",
417
+ replacement="ghp_****",
418
+ description="Mask GitHub personal access tokens (exactly 40 chars total"
419
+ " with word boundaries)",
420
+ global_replace=True,
421
+ test_cases=[
422
+ ("ghp_1234567890abcdef1234567890abcdef1234", "ghp_****"),
423
+ (
424
+ "GITHUB_TOKEN=ghp_1234567890abcdef1234567890abcdef1234",
425
+ "GITHUB_TOKEN=ghp_****",
426
+ ),
427
+ ("ghp_short", "ghp_short"), # Too short, no change
428
+ (
429
+ "ghp_1234567890abcdef1234567890abcdef12345",
430
+ "ghp_1234567890abcdef1234567890abcdef12345",
431
+ ), # Too long, no match due to word boundary
432
+ (
433
+ "Multiple ghp_1234567890abcdef1234567890abcdef1234 and"
434
+ " ghp_abcdef1234567890abcdef12345678901234",
435
+ "Multiple ghp_**** and ghp_****",
436
+ ),
437
+ ],
438
+ ),
439
+ "mask_generic_long_token": ValidatedPattern(
440
+ name="mask_generic_long_token",
441
+ pattern=r"\b[a-zA-Z0-9_-]{32,}\b",
442
+ replacement="****",
443
+ description="Mask generic long tokens (32+ chars, word boundaries to avoid"
444
+ " false positives)",
445
+ global_replace=True,
446
+ test_cases=[
447
+ ("secret_key=abcdef1234567890abcdef1234567890abcdef", "secret_key=****"),
448
+ (
449
+ "Short token abc123def456",
450
+ "Short token abc123def456",
451
+ ), # Too short, no change
452
+ (
453
+ "File path "
454
+ "/very/long/path/that/should/not/be/masked/even/though/its/long",
455
+ "File path "
456
+ "/very/long/path/that/should/not/be/masked/even/though/its/long",
457
+ ), # Contains slashes
458
+ ("API_KEY=verylongapikeyhere1234567890123456", "API_KEY=****"),
459
+ (
460
+ "Long-token_with-underscores_123456789012345678",
461
+ "****",
462
+ ), # Entire string matches as one long token
463
+ ],
464
+ ),
465
+ "mask_token_assignment": ValidatedPattern(
466
+ name="mask_token_assignment",
467
+ pattern=r"(?i)\b(token\s*[=:]\s*)['\"]([^'\"]{8,})['\"]",
468
+ replacement=r"\1'****'",
469
+ description="Mask token assignments in various formats (case insensitive)",
470
+ global_replace=True,
471
+ test_cases=[
472
+ ('token="abc123def456789"', "token='****'"),
473
+ ("token='long_secret_token_here'", "token='****'"),
474
+ ('token: "another_secret_token"', "token: '****'"),
475
+ ("token = 'spaced_assignment_token'", "token = '****'"),
476
+ ('token="short"', 'token="short"'), # Too short, no change
477
+ (
478
+ "not_token='should_not_be_masked'",
479
+ "not_token='should_not_be_masked'",
480
+ ), # Wrong key
481
+ ('TOKEN="UPPERCASE_TOKEN_HERE"', "TOKEN='****'"), # Case insensitive
482
+ ],
483
+ ),
484
+ "mask_password_assignment": ValidatedPattern(
485
+ name="mask_password_assignment",
486
+ pattern=r"(?i)\b(password\s*[=:]\s*)['\"]([^'\"]{8,})['\"]",
487
+ replacement=r"\1'****'",
488
+ description="Mask password assignments in various formats (case insensitive)",
489
+ global_replace=True,
490
+ test_cases=[
491
+ ('password="secret123456"', "password='****'"),
492
+ ("password='my_long_password'", "password='****'"),
493
+ ('password: "another_secret_password"', "password: '****'"),
494
+ ("password = 'spaced_password_assignment'", "password = '****'"),
495
+ ('password="short"', 'password="short"'), # Too short, no change
496
+ (
497
+ "not_password='should_not_be_masked'",
498
+ "not_password='should_not_be_masked'",
499
+ ), # Wrong key
500
+ ('PASSWORD="UPPERCASE_PASSWORD"', "PASSWORD='****'"), # Case insensitive
501
+ ],
502
+ ),
503
+ # Version management patterns
504
+ "update_pyproject_version": ValidatedPattern(
505
+ name="update_pyproject_version",
506
+ pattern=r'^(version\s*=\s*["\'])([^"\']+)(["\'])$',
507
+ replacement=r"\g<1>NEW_VERSION\g<3>",
508
+ description="Update version in pyproject.toml files (NEW_VERSION placeholder"
509
+ " replaced dynamically)",
510
+ test_cases=[
511
+ ('version = "1.2.3"', 'version = "NEW_VERSION"'),
512
+ ("version='0.1.0'", "version='NEW_VERSION'"),
513
+ ('version="1.0.0-beta"', 'version="NEW_VERSION"'),
514
+ ("version = '2.1.0'", "version = 'NEW_VERSION'"),
515
+ ("version='10.20.30'", "version='NEW_VERSION'"),
516
+ # Should not match non-version lines
517
+ ('name = "my-package"', 'name = "my-package"'), # No change
518
+ ],
519
+ ),
520
+ # Formatting agent patterns
521
+ "remove_trailing_whitespace": ValidatedPattern(
522
+ name="remove_trailing_whitespace",
523
+ pattern=r"[ \t]+$",
524
+ replacement="",
525
+ description="Remove trailing whitespace from lines",
526
+ global_replace=True,
527
+ test_cases=[
528
+ ("line with spaces ", "line with spaces"),
529
+ ("line with tabs\t\t", "line with tabs"),
530
+ ("normal line", "normal line"), # No change
531
+ ("mixed \t ", "mixed"),
532
+ ("", ""), # Empty lines
533
+ ],
534
+ ),
535
+ "normalize_multiple_newlines": ValidatedPattern(
536
+ name="normalize_multiple_newlines",
537
+ pattern=r"\n{3,}",
538
+ replacement="\n\n",
539
+ description="Normalize multiple consecutive newlines to maximum 2",
540
+ global_replace=True,
541
+ test_cases=[
542
+ ("line1\n\n\nline2", "line1\n\nline2"),
543
+ ("line1\n\n\n\n\nline2", "line1\n\nline2"),
544
+ ("line1\n\nline2", "line1\n\nline2"), # No change
545
+ ("line1\nline2", "line1\nline2"), # No change
546
+ ],
547
+ ),
548
+ # Security agent patterns - subprocess fixes
549
+ "fix_subprocess_run_shell": ValidatedPattern(
550
+ name="fix_subprocess_run_shell",
551
+ pattern=r"subprocess\.run\(([^,]+),\s*shell=True\)",
552
+ replacement=r"subprocess.run(\1.split())",
553
+ description="Remove shell=True from subprocess.run calls",
554
+ global_replace=True,
555
+ test_cases=[
556
+ ("subprocess.run(cmd, shell=True)", "subprocess.run(cmd.split())"),
557
+ (
558
+ "subprocess.run('ls -la', shell=True)",
559
+ "subprocess.run('ls -la'.split())",
560
+ ),
561
+ (
562
+ "subprocess.run(command, shell=False)",
563
+ "subprocess.run(command, shell=False)",
564
+ ), # No change
565
+ ],
566
+ ),
567
+ "fix_subprocess_call_shell": ValidatedPattern(
568
+ name="fix_subprocess_call_shell",
569
+ pattern=r"subprocess\.call\(([^,]+),\s*shell=True\)",
570
+ replacement=r"subprocess.call(\1.split())",
571
+ description="Remove shell=True from subprocess.call calls",
572
+ global_replace=True,
573
+ test_cases=[
574
+ ("subprocess.call(cmd, shell=True)", "subprocess.call(cmd.split())"),
575
+ (
576
+ "subprocess.call('ls -la', shell=True)",
577
+ "subprocess.call('ls -la'.split())",
578
+ ),
579
+ (
580
+ "subprocess.call(command, shell=False)",
581
+ "subprocess.call(command, shell=False)",
582
+ ), # No change
583
+ ],
584
+ ),
585
+ "fix_subprocess_popen_shell": ValidatedPattern(
586
+ name="fix_subprocess_popen_shell",
587
+ pattern=r"subprocess\.Popen\(([^,]+),\s*shell=True\)",
588
+ replacement=r"subprocess.Popen(\1.split())",
589
+ description="Remove shell=True from subprocess.Popen calls",
590
+ global_replace=True,
591
+ test_cases=[
592
+ ("subprocess.Popen(cmd, shell=True)", "subprocess.Popen(cmd.split())"),
593
+ (
594
+ "subprocess.Popen('ls -la', shell=True)",
595
+ "subprocess.Popen('ls -la'.split())",
596
+ ),
597
+ (
598
+ "subprocess.Popen(command, shell=False)",
599
+ "subprocess.Popen(command, shell=False)",
600
+ ), # No change
601
+ ],
602
+ ),
603
+ # Security agent patterns - unsafe library usage
604
+ "fix_unsafe_yaml_load": ValidatedPattern(
605
+ name="fix_unsafe_yaml_load",
606
+ pattern=r"\byaml\.load\(",
607
+ replacement="yaml.safe_load(",
608
+ description="Replace unsafe yaml.load with yaml.safe_load",
609
+ global_replace=True,
610
+ test_cases=[
611
+ ("yaml.load(file)", "yaml.safe_load(file)"),
612
+ ("data = yaml.load(content)", "data = yaml.safe_load(content)"),
613
+ ("yaml.safe_load(content)", "yaml.safe_load(content)"), # No change
614
+ (
615
+ "my_yaml.load(content)",
616
+ "my_yaml.load(content)",
617
+ ), # No change (not yaml module)
618
+ ],
619
+ ),
620
+ "fix_weak_md5_hash": ValidatedPattern(
621
+ name="fix_weak_md5_hash",
622
+ pattern=r"\bhashlib\.md5\(",
623
+ replacement="hashlib.sha256(",
624
+ description="Replace weak MD5 hashing with SHA256",
625
+ global_replace=True,
626
+ test_cases=[
627
+ ("hashlib.md5(data)", "hashlib.sha256(data)"),
628
+ ("hash = hashlib.md5(content)", "hash = hashlib.sha256(content)"),
629
+ ("hashlib.sha256(data)", "hashlib.sha256(data)"), # No change
630
+ ],
631
+ ),
632
+ "fix_weak_sha1_hash": ValidatedPattern(
633
+ name="fix_weak_sha1_hash",
634
+ pattern=r"\bhashlib\.sha1\(",
635
+ replacement="hashlib.sha256(",
636
+ description="Replace weak SHA1 hashing with SHA256",
637
+ global_replace=True,
638
+ test_cases=[
639
+ ("hashlib.sha1(data)", "hashlib.sha256(data)"),
640
+ ("hash = hashlib.sha1(content)", "hash = hashlib.sha256(content)"),
641
+ ("hashlib.sha256(data)", "hashlib.sha256(data)"), # No change
642
+ ],
643
+ ),
644
+ "fix_insecure_random_choice": ValidatedPattern(
645
+ name="fix_insecure_random_choice",
646
+ pattern=r"random\.choice\(([^)]+)\)",
647
+ replacement=r"secrets.choice(\1)",
648
+ description="Replace insecure random.choice with secrets.choice",
649
+ global_replace=True,
650
+ test_cases=[
651
+ ("random.choice(options)", "secrets.choice(options)"),
652
+ ("item = random.choice(items)", "item = secrets.choice(items)"),
653
+ ("secrets.choice(options)", "secrets.choice(options)"), # No change
654
+ ],
655
+ ),
656
+ "remove_debug_prints_with_secrets": ValidatedPattern(
657
+ name="remove_debug_prints_with_secrets",
658
+ pattern=r"print\s*\([^)]*(?:password|secret|key|token)[^)]*\)",
659
+ replacement="",
660
+ description="Remove debug print statements that contain sensitive information",
661
+ global_replace=True,
662
+ test_cases=[
663
+ ('print("password:", password)', ""),
664
+ ("print(f'Token: {token}')", ""),
665
+ ("print('Debug secret value')", ""),
666
+ (
667
+ "print('Normal debug message')",
668
+ "print('Normal debug message')",
669
+ ), # No change
670
+ ('print("API key is", key)', ""),
671
+ ],
672
+ ),
673
+ # Test specialist agent patterns
674
+ "normalize_assert_statements": ValidatedPattern(
675
+ name="normalize_assert_statements",
676
+ pattern=r"assert (.+?)\s*==\s*(.+)",
677
+ replacement=r"assert \1 == \2",
678
+ description="Normalize spacing around == in assert statements",
679
+ global_replace=True,
680
+ test_cases=[
681
+ ("assert result==expected", "assert result == expected"),
682
+ ("assert value == other", "assert value == other"),
683
+ ("assert result== expected", "assert result == expected"),
684
+ ("assert result ==expected", "assert result == expected"),
685
+ (
686
+ "assert result == expected",
687
+ "assert result == expected",
688
+ ), # No change (already spaced)
689
+ ],
690
+ ),
691
+ # Job ID validation patterns
692
+ "validate_job_id_alphanumeric": ValidatedPattern(
693
+ name="validate_job_id_alphanumeric",
694
+ pattern=r"^[a-zA-Z0-9_-]+$",
695
+ replacement="VALID", # Dummy replacement for validation patterns
696
+ description="Validate job ID contains only alphanumeric characters, "
697
+ "underscores, and hyphens",
698
+ test_cases=[
699
+ # For validation patterns, we test against strings that SHOULD match
700
+ ("valid_job-123", "VALID"), # Valid ID
701
+ ("another_valid-job_456", "VALID"), # Valid ID
702
+ ("job_123", "VALID"), # Valid ID
703
+ ],
704
+ ),
705
+ # Service configuration patterns
706
+ "remove_coverage_fail_under": ValidatedPattern(
707
+ name="remove_coverage_fail_under",
708
+ pattern=r"--cov-fail-under=\d+\.?\d*\s*",
709
+ replacement="",
710
+ description="Remove coverage fail-under flags from pytest addopts",
711
+ global_replace=True,
712
+ test_cases=[
713
+ ("--cov-fail-under=85 --verbose", "--verbose"),
714
+ ("--cov-fail-under=90.5 -x", "-x"),
715
+ ("--verbose --cov-fail-under=80 ", "--verbose "),
716
+ ("--no-cov", "--no-cov"), # No change
717
+ ],
718
+ ),
719
+ "update_coverage_requirement": ValidatedPattern(
720
+ name="update_coverage_requirement",
721
+ pattern=r"(--cov-fail-under=)\d+\.?\d*",
722
+ replacement=r"\1NEW_COVERAGE",
723
+ description="Update coverage fail-under requirement (NEW_COVERAGE placeholder"
724
+ " replaced dynamically)",
725
+ test_cases=[
726
+ ("--cov-fail-under=85", "--cov-fail-under=NEW_COVERAGE"),
727
+ ("--cov-fail-under=90.5", "--cov-fail-under=NEW_COVERAGE"),
728
+ ("--verbose", "--verbose"), # No change
729
+ ],
730
+ ),
731
+ # Path security validation patterns - designed for testing existence, not
732
+ # replacement
733
+ "detect_directory_traversal_basic": ValidatedPattern(
734
+ name="detect_directory_traversal_basic",
735
+ pattern=r"\.\./",
736
+ replacement="[TRAVERSAL]",
737
+ description="Detect basic directory traversal patterns (../)",
738
+ global_replace=True,
739
+ test_cases=[
740
+ ("../config.txt", "[TRAVERSAL]config.txt"),
741
+ ("normal/path", "normal/path"), # No change
742
+ ("../../etc/passwd", "[TRAVERSAL][TRAVERSAL]etc/passwd"),
743
+ ],
744
+ ),
745
+ "detect_directory_traversal_backslash": ValidatedPattern(
746
+ name="detect_directory_traversal_backslash",
747
+ pattern=r"\.\.[/\\]",
748
+ replacement="[TRAVERSAL]",
749
+ description="Detect directory traversal with forward/back slashes",
750
+ global_replace=True,
751
+ test_cases=[
752
+ ("..\\config.txt", "[TRAVERSAL]config.txt"),
753
+ ("../config.txt", "[TRAVERSAL]config.txt"),
754
+ ("normal/path", "normal/path"), # No change
755
+ ],
756
+ ),
757
+ "detect_url_encoded_traversal": ValidatedPattern(
758
+ name="detect_url_encoded_traversal",
759
+ pattern=r"%2e%2e%2f",
760
+ replacement="[TRAVERSAL]",
761
+ description="Detect URL encoded directory traversal (%2e%2e%2f = ../)",
762
+ global_replace=True,
763
+ test_cases=[
764
+ ("path/%2e%2e%2f/config", "path/[TRAVERSAL]/config"),
765
+ ("normal/path", "normal/path"), # No change
766
+ ("%2e%2e%2fpasswd", "[TRAVERSAL]passwd"),
767
+ ],
768
+ ),
769
+ "detect_double_url_encoded_traversal": ValidatedPattern(
770
+ name="detect_double_url_encoded_traversal",
771
+ pattern=r"%252e%252e%252f",
772
+ replacement="[TRAVERSAL]",
773
+ description="Detect double URL encoded directory traversal",
774
+ global_replace=True,
775
+ test_cases=[
776
+ ("path/%252e%252e%252f/config", "path/[TRAVERSAL]/config"),
777
+ ("normal/path", "normal/path"), # No change
778
+ ],
779
+ ),
780
+ "detect_null_bytes_url": ValidatedPattern(
781
+ name="detect_null_bytes_url",
782
+ pattern=r"%00",
783
+ replacement="[NULL]",
784
+ description="Detect URL encoded null bytes",
785
+ global_replace=True,
786
+ test_cases=[
787
+ ("file.txt%00.jpg", "file.txt[NULL].jpg"),
788
+ ("normal.txt", "normal.txt"), # No change
789
+ ],
790
+ ),
791
+ "detect_null_bytes_literal": ValidatedPattern(
792
+ name="detect_null_bytes_literal",
793
+ pattern=r"\\x00",
794
+ replacement="[NULL]",
795
+ description="Detect literal null byte patterns",
796
+ global_replace=True,
797
+ test_cases=[
798
+ ("file.txt\\x00", "file.txt[NULL]"),
799
+ ("normal.txt", "normal.txt"), # No change
800
+ ],
801
+ ),
802
+ "detect_utf8_overlong_null": ValidatedPattern(
803
+ name="detect_utf8_overlong_null",
804
+ pattern=r"%c0%80",
805
+ replacement="[NULL]",
806
+ description="Detect UTF-8 overlong null byte encoding",
807
+ global_replace=True,
808
+ test_cases=[
809
+ ("file.txt%c0%80", "file.txt[NULL]"),
810
+ ("normal.txt", "normal.txt"), # No change
811
+ ],
812
+ ),
813
+ "detect_sys_directory_pattern": ValidatedPattern(
814
+ name="detect_sys_directory_pattern",
815
+ pattern=r"^/sys/?.*",
816
+ replacement="[DANGER]",
817
+ description="Detect access to /sys directory",
818
+ test_cases=[
819
+ ("/sys/", "[DANGER]"),
820
+ ("/sys/devices", "[DANGER]"),
821
+ ("/usr/sys", "/usr/sys"), # No change
822
+ ],
823
+ ),
824
+ "detect_proc_directory_pattern": ValidatedPattern(
825
+ name="detect_proc_directory_pattern",
826
+ pattern=r"^/proc/?.*",
827
+ replacement="[DANGER]",
828
+ description="Detect access to /proc directory",
829
+ test_cases=[
830
+ ("/proc/", "[DANGER]"),
831
+ ("/proc/self", "[DANGER]"),
832
+ ("/usr/proc", "/usr/proc"), # No change
833
+ ],
834
+ ),
835
+ "detect_etc_directory_pattern": ValidatedPattern(
836
+ name="detect_etc_directory_pattern",
837
+ pattern=r"^/etc/?.*",
838
+ replacement="[DANGER]",
839
+ description="Detect access to /etc directory",
840
+ test_cases=[
841
+ ("/etc/", "[DANGER]"),
842
+ ("/etc/passwd", "[DANGER]"),
843
+ ("/usr/etc", "/usr/etc"), # No change
844
+ ],
845
+ ),
846
+ "detect_boot_directory_pattern": ValidatedPattern(
847
+ name="detect_boot_directory_pattern",
848
+ pattern=r"^/boot/?.*",
849
+ replacement="[DANGER]",
850
+ description="Detect access to /boot directory",
851
+ test_cases=[
852
+ ("/boot/", "[DANGER]"),
853
+ ("/boot/grub", "[DANGER]"),
854
+ ("/usr/boot", "/usr/boot"), # No change
855
+ ],
856
+ ),
857
+ "detect_dev_directory_pattern": ValidatedPattern(
858
+ name="detect_dev_directory_pattern",
859
+ pattern=r"^/dev/?.*",
860
+ replacement="[DANGER]",
861
+ description="Detect access to /dev directory",
862
+ test_cases=[
863
+ ("/dev/", "[DANGER]"),
864
+ ("/dev/null", "[DANGER]"),
865
+ ("/usr/dev", "/usr/dev"), # No change
866
+ ],
867
+ ),
868
+ "detect_root_directory_pattern": ValidatedPattern(
869
+ name="detect_root_directory_pattern",
870
+ pattern=r"^/root/?.*",
871
+ replacement="[DANGER]",
872
+ description="Detect access to /root directory",
873
+ test_cases=[
874
+ ("/root/", "[DANGER]"),
875
+ ("/root/.ssh", "[DANGER]"),
876
+ ("/usr/root", "/usr/root"), # No change
877
+ ],
878
+ ),
879
+ "detect_var_log_directory_pattern": ValidatedPattern(
880
+ name="detect_var_log_directory_pattern",
881
+ pattern=r"^/var/log/?.*",
882
+ replacement="[DANGER]",
883
+ description="Detect access to /var/log directory",
884
+ test_cases=[
885
+ ("/var/log/", "[DANGER]"),
886
+ ("/var/log/messages", "[DANGER]"),
887
+ ("/usr/var/log", "/usr/var/log"), # No change
888
+ ],
889
+ ),
890
+ "detect_bin_directory_pattern": ValidatedPattern(
891
+ name="detect_bin_directory_pattern",
892
+ pattern=r"^/(usr/)?bin/?.*",
893
+ replacement="[DANGER]",
894
+ description="Detect access to /bin or /usr/bin directories",
895
+ test_cases=[
896
+ ("/bin/", "[DANGER]"),
897
+ ("/usr/bin/", "[DANGER]"),
898
+ ("/usr/local/bin", "/usr/local/bin"), # No change
899
+ ],
900
+ ),
901
+ "detect_sbin_directory_pattern": ValidatedPattern(
902
+ name="detect_sbin_directory_pattern",
903
+ pattern=r"^/(usr/)?sbin/?.*",
904
+ replacement="[DANGER]",
905
+ description="Detect access to /sbin or /usr/sbin directories",
906
+ test_cases=[
907
+ ("/sbin/", "[DANGER]"),
908
+ ("/usr/sbin/", "[DANGER]"),
909
+ ("/usr/local/sbin", "/usr/local/sbin"), # No change
910
+ ],
911
+ ),
912
+ "detect_parent_directory_in_path": ValidatedPattern(
913
+ name="detect_parent_directory_in_path",
914
+ pattern=r"\.\.",
915
+ replacement="[PARENT]",
916
+ description="Detect parent directory references anywhere in path",
917
+ global_replace=True,
918
+ test_cases=[
919
+ ("../config", "[PARENT]/config"),
920
+ ("safe/path", "safe/path"), # No change
921
+ ("path/../other", "path/[PARENT]/other"),
922
+ ],
923
+ ),
924
+ "detect_suspicious_temp_traversal": ValidatedPattern(
925
+ name="detect_suspicious_temp_traversal",
926
+ pattern=r"/tmp/.*\.\./", # nosec B108
927
+ replacement="[SUSPICIOUS]",
928
+ description="Detect traversal attempts in temp directories",
929
+ test_cases=[
930
+ ("/tmp/safe/../etc/passwd", "[SUSPICIOUS]etc/passwd"), # nosec B108
931
+ ("/tmp/normal/file.txt", "/tmp/normal/file.txt"), # No change # nosec B108
932
+ ],
933
+ ),
934
+ "detect_suspicious_var_traversal": ValidatedPattern(
935
+ name="detect_suspicious_var_traversal",
936
+ pattern=r"/var/.*\.\./",
937
+ replacement="[SUSPICIOUS]",
938
+ description="Detect traversal attempts in var directories",
939
+ test_cases=[
940
+ ("/var/lib/../etc/passwd", "[SUSPICIOUS]etc/passwd"),
941
+ ("/var/lib/normal.txt", "/var/lib/normal.txt"), # No change
942
+ ],
943
+ ),
944
+ # Tool output parsing patterns - for development tool output processing
945
+ "ruff_check_error": ValidatedPattern(
946
+ name="ruff_check_error",
947
+ pattern=r"^(.+?): (\d+): (\d+): ([A-Z]\d+) (.+)$",
948
+ replacement=r"File: \1, Line: \2, Col: \3, Code: \4, Message: \5",
949
+ description="Parse ruff-check error output: file:line:col:code message",
950
+ test_cases=[
951
+ (
952
+ "crackerjack/core.py: 123: 45: E501 line too long",
953
+ "File: crackerjack/core.py, Line: 123, Col: 45, Code: E501, Message: "
954
+ "line too long",
955
+ ),
956
+ (
957
+ "./test.py: 1: 1: F401 unused import",
958
+ "File: ./test.py, Line: 1, Col: 1, Code: F401, Message: unused import",
959
+ ),
960
+ (
961
+ "src/main.py: 999: 80: W291 trailing whitespace",
962
+ "File: src/main.py, Line: 999, Col: 80, Code: W291, Message: trailing "
963
+ "whitespace",
964
+ ),
965
+ ],
966
+ ),
967
+ "ruff_check_summary": ValidatedPattern(
968
+ name="ruff_check_summary",
969
+ pattern=r"Found (\d+) error",
970
+ replacement=r"Found \1 error(s)",
971
+ description="Parse ruff-check summary line for error count",
972
+ test_cases=[
973
+ ("Found 5 error", "Found 5 error(s)"),
974
+ ("Found 1 error in 3 files", "Found 1 error(s) in 3 files"),
975
+ ("Found 42 error detected", "Found 42 error(s) detected"),
976
+ ],
977
+ ),
978
+ "pyright_error": ValidatedPattern(
979
+ name="pyright_error",
980
+ pattern=r"^(.+?): (\d+): (\d+) - error: (.+)$",
981
+ replacement=r"File: \1, Line: \2, Col: \3, Error: \4",
982
+ description="Parse pyright error output: file:line:col - error: message",
983
+ test_cases=[
984
+ (
985
+ "src/app.py: 45: 12 - error: Undefined variable",
986
+ "File: src/app.py, Line: 45, Col: 12, Error: Undefined variable",
987
+ ),
988
+ (
989
+ "test.py: 1: 1 - error: Type mismatch",
990
+ "File: test.py, Line: 1, Col: 1, Error: Type mismatch",
991
+ ),
992
+ (
993
+ "./main.py: 999: 50 - error: Missing return statement",
994
+ "File: ./main.py, Line: 999, Col: 50, Error: Missing return statement",
995
+ ),
996
+ ],
997
+ ),
998
+ "pyright_warning": ValidatedPattern(
999
+ name="pyright_warning",
1000
+ pattern=r"^(.+?): (\d+): (\d+) - warning: (.+)$",
1001
+ replacement=r"File: \1, Line: \2, Col: \3, Warning: \4",
1002
+ description="Parse pyright warning output: file:line:col - warning: message",
1003
+ test_cases=[
1004
+ (
1005
+ "src/app.py: 45: 12 - warning: Unused variable",
1006
+ "File: src/app.py, Line: 45, Col: 12, Warning: Unused variable",
1007
+ ),
1008
+ (
1009
+ "test.py: 1: 1 - warning: Deprecated API",
1010
+ "File: test.py, Line: 1, Col: 1, Warning: Deprecated API",
1011
+ ),
1012
+ (
1013
+ "./main.py: 999: 50 - warning: Type could be more specific",
1014
+ "File: ./main.py, Line: 999, Col: 50, Warning: Type could be more"
1015
+ " specific",
1016
+ ),
1017
+ ],
1018
+ ),
1019
+ "pyright_summary": ValidatedPattern(
1020
+ name="pyright_summary",
1021
+ pattern=r"(\d+) error[s]?, (\d+) warning[s]?",
1022
+ replacement=r"\1 errors, \2 warnings",
1023
+ description="Parse pyright summary with error and warning counts",
1024
+ test_cases=[
1025
+ ("5 errors, 3 warnings", "5 errors, 3 warnings"),
1026
+ ("1 error, 1 warning", "1 errors, 1 warnings"),
1027
+ ("0 errors, 10 warnings found", "0 errors, 10 warnings found"),
1028
+ ],
1029
+ ),
1030
+ "bandit_issue": ValidatedPattern(
1031
+ name="bandit_issue",
1032
+ pattern=r">> Issue: \[([A-Z]\d+): \w+\] (.+)",
1033
+ replacement=r"Security Issue [\1]: \2",
1034
+ description="Parse bandit security issue output with code and message",
1035
+ test_cases=[
1036
+ (
1037
+ ">> Issue: [B602: subprocess_popen_with_shell_equals_true] Use of "
1038
+ "shell=True",
1039
+ "Security Issue [B602]: Use of shell=True",
1040
+ ),
1041
+ (
1042
+ ">> Issue: [B101: assert_used] Use of assert detected",
1043
+ "Security Issue [B101]: Use of assert detected",
1044
+ ),
1045
+ (
1046
+ ">> Issue: [B301: pickle] Pickle library detected",
1047
+ "Security Issue [B301]: Pickle library detected",
1048
+ ),
1049
+ ],
1050
+ ),
1051
+ "bandit_location": ValidatedPattern(
1052
+ name="bandit_location",
1053
+ pattern=r"Location: (.+?): (\d+): (\d+)",
1054
+ replacement=r"Location: File \1, Line \2, Column \3",
1055
+ description="Parse bandit location information for security issues",
1056
+ test_cases=[
1057
+ (
1058
+ "Location: src/security.py: 123: 45",
1059
+ "Location: File src/security.py, Line 123, Column 45",
1060
+ ),
1061
+ ("Location: ./test.py: 1: 1", "Location: File ./test.py, Line 1, Column 1"),
1062
+ (
1063
+ "Location: crackerjack/core.py: 999: 80",
1064
+ "Location: File crackerjack/core.py, Line 999, Column 80",
1065
+ ),
1066
+ ],
1067
+ ),
1068
+ "bandit_confidence": ValidatedPattern(
1069
+ name="bandit_confidence",
1070
+ pattern=r"Confidence: (\w+)",
1071
+ replacement=r"Confidence Level: \1",
1072
+ description="Parse bandit confidence level for security issues",
1073
+ test_cases=[
1074
+ ("Confidence: HIGH", "Confidence Level: HIGH"),
1075
+ ("Confidence: MEDIUM", "Confidence Level: MEDIUM"),
1076
+ ("Confidence: LOW", "Confidence Level: LOW"),
1077
+ ],
1078
+ ),
1079
+ "bandit_severity": ValidatedPattern(
1080
+ name="bandit_severity",
1081
+ pattern=r"Severity: (\w+)",
1082
+ replacement=r"Severity Level: \1",
1083
+ description="Parse bandit severity level for security issues",
1084
+ test_cases=[
1085
+ ("Severity: HIGH", "Severity Level: HIGH"),
1086
+ ("Severity: MEDIUM", "Severity Level: MEDIUM"),
1087
+ ("Severity: LOW", "Severity Level: LOW"),
1088
+ ],
1089
+ ),
1090
+ "mypy_error": ValidatedPattern(
1091
+ name="mypy_error",
1092
+ pattern=r"^(.+?): (\d+): error: (.+)$",
1093
+ replacement=r"File: \1, Line: \2, Error: \3",
1094
+ description="Parse mypy error output: file:line: error: message",
1095
+ test_cases=[
1096
+ (
1097
+ "src/app.py: 45: error: Name 'undefined_var' is not defined",
1098
+ "File: src/app.py, Line: 45, Error: Name 'undefined_var' is not "
1099
+ "defined",
1100
+ ),
1101
+ (
1102
+ "test.py: 1: error: Incompatible return value type",
1103
+ "File: test.py, Line: 1, Error: Incompatible return value type",
1104
+ ),
1105
+ (
1106
+ "./main.py: 999: error: Argument has incompatible type",
1107
+ "File: ./main.py, Line: 999, Error: Argument has incompatible type",
1108
+ ),
1109
+ ],
1110
+ ),
1111
+ "mypy_note": ValidatedPattern(
1112
+ name="mypy_note",
1113
+ pattern=r"^(.+?): (\d+): note: (.+)$",
1114
+ replacement=r"File: \1, Line: \2, Note: \3",
1115
+ description="Parse mypy note output: file:line: note: message",
1116
+ test_cases=[
1117
+ (
1118
+ "src/app.py: 45: note: Expected type Union[int, str]",
1119
+ "File: src/app.py, Line: 45, Note: Expected type Union[int, str]",
1120
+ ),
1121
+ (
1122
+ "test.py: 1: note: See https://mypy.readthedocs.io/",
1123
+ "File: test.py, Line: 1, Note: See https://mypy.readthedocs.io/",
1124
+ ),
1125
+ (
1126
+ "./main.py: 999: note: Consider using Optional[...]",
1127
+ "File: ./main.py, Line: 999, Note: Consider using Optional[...]",
1128
+ ),
1129
+ ],
1130
+ ),
1131
+ "vulture_unused": ValidatedPattern(
1132
+ name="vulture_unused",
1133
+ pattern=r"^(.+?): (\d+): unused (.+) '(.+)'",
1134
+ replacement=r"File: \1, Line: \2, Unused \3: '\4'",
1135
+ description="Parse vulture unused code detection: file:line: unused type"
1136
+ " 'name'",
1137
+ test_cases=[
1138
+ (
1139
+ "src/app.py: 45: unused variable 'temp_var'",
1140
+ "File: src/app.py, Line: 45, Unused variable: 'temp_var'",
1141
+ ),
1142
+ (
1143
+ "test.py: 1: unused function 'helper'",
1144
+ "File: test.py, Line: 1, Unused function: 'helper'",
1145
+ ),
1146
+ (
1147
+ "./main.py: 999: unused import 'os'",
1148
+ "File: ./main.py, Line: 999, Unused import: 'os'",
1149
+ ),
1150
+ ],
1151
+ ),
1152
+ "complexipy_complex": ValidatedPattern(
1153
+ name="complexipy_complex",
1154
+ pattern=r"^(.+?): (\d+): (\d+) - (.+) is too complex \((\d+)\)",
1155
+ replacement=r"File: \1, Line: \2, Col: \3, Function: \4, Complexity: \5",
1156
+ description="Parse complexipy complexity detection: file:line:col - function "
1157
+ "is too complex (score)",
1158
+ test_cases=[
1159
+ (
1160
+ "src/app.py: 45: 1 - complex_function is too complex (15)",
1161
+ "File: src/app.py, Line: 45, Col: 1, Function: complex_function,"
1162
+ " Complexity: 15",
1163
+ ),
1164
+ (
1165
+ "test.py: 1: 1 - nested_loops is too complex (20)",
1166
+ "File: test.py, Line: 1, Col: 1, Function: nested_loops, "
1167
+ "Complexity: 20",
1168
+ ),
1169
+ (
1170
+ "./main.py: 999: 5 - process_data is too complex (18)",
1171
+ "File: ./main.py, Line: 999, Col: 5, Function: process_data, "
1172
+ "Complexity: 18",
1173
+ ),
1174
+ ],
1175
+ ),
1176
+ # Test output parsing patterns for test_progress_streamer.py
1177
+ # These patterns are used for matching/extraction, not replacement
1178
+ "pytest_test_start": ValidatedPattern(
1179
+ name="pytest_test_start",
1180
+ pattern=r"^(.+?):: ?(.+?):: ?(.+?) (PASSED|FAILED|SKIPPED|ERROR)$",
1181
+ replacement=r"\1::\2::\3", # Extract file::class::method
1182
+ description="Parse pytest test start line with file, class, and method "
1183
+ "(3-part format)",
1184
+ test_cases=[
1185
+ (
1186
+ "test_file.py::TestClass::test_method PASSED",
1187
+ "test_file.py::TestClass::test_method",
1188
+ ),
1189
+ (
1190
+ "tests/test_core.py::TestCore::test_function FAILED",
1191
+ "tests/test_core.py::TestCore::test_function",
1192
+ ),
1193
+ (
1194
+ "src/test.py::MyTest::test_case SKIPPED",
1195
+ "src/test.py::MyTest::test_case",
1196
+ ),
1197
+ ],
1198
+ ),
1199
+ "pytest_test_result": ValidatedPattern(
1200
+ name="pytest_test_result",
1201
+ pattern=r"^(.+?) (PASSED|FAILED|SKIPPED|ERROR)(?: \[.*?\])?\s*$",
1202
+ replacement=r"\1", # Extract just the test identifier
1203
+ description="Parse pytest test result line with test identifier",
1204
+ test_cases=[
1205
+ ("test_file.py::test_method PASSED", "test_file.py::test_method"),
1206
+ (
1207
+ "tests/test_core.py::test_func FAILED [100%]",
1208
+ "tests/test_core.py::test_func",
1209
+ ),
1210
+ ("src/test.py::test_case SKIPPED ", "src/test.py::test_case"),
1211
+ ],
1212
+ ),
1213
+ "pytest_collection_count": ValidatedPattern(
1214
+ name="pytest_collection_count",
1215
+ pattern=r"collected (\d+) items?",
1216
+ replacement=r"\1", # Extract just the count
1217
+ description="Parse pytest test collection count",
1218
+ test_cases=[
1219
+ ("collected 5 items", "5"),
1220
+ ("collected 1 item", "1"),
1221
+ (
1222
+ "collected 42 items for execution",
1223
+ "42 for execution",
1224
+ ), # Only the match is replaced
1225
+ ],
1226
+ ),
1227
+ "pytest_session_start": ValidatedPattern(
1228
+ name="pytest_session_start",
1229
+ pattern=r"test session starts",
1230
+ replacement=r"test session starts", # Identity replacement
1231
+ description="Match pytest session start indicator",
1232
+ test_cases=[
1233
+ ("test session starts", "test session starts"),
1234
+ ("pytest test session starts", "pytest test session starts"),
1235
+ ],
1236
+ ),
1237
+ "pytest_coverage_total": ValidatedPattern(
1238
+ name="pytest_coverage_total",
1239
+ pattern=r"TOTAL\s+\d+\s+\d+\s+(\d+)%",
1240
+ replacement=r"\1", # Extract just the percentage
1241
+ description="Parse pytest coverage total percentage",
1242
+ test_cases=[
1243
+ ("TOTAL 123 45 85%", "85"),
1244
+ ("TOTAL 1000 250 75%", "75"),
1245
+ ("TOTAL 50 0 100%", "100"),
1246
+ ],
1247
+ ),
1248
+ "pytest_detailed_test": ValidatedPattern(
1249
+ name="pytest_detailed_test",
1250
+ pattern=r"^(.+\.py)::(.+) (PASSED|FAILED|SKIPPED|ERROR)",
1251
+ replacement=r"\1::\2", # Extract file and test name
1252
+ description="Parse detailed pytest test output with file, test name, and "
1253
+ "status",
1254
+ test_cases=[
1255
+ (
1256
+ "test_file.py::test_method PASSED [50%]",
1257
+ "test_file.py::test_method [50%]", # Only the matched part is replaced
1258
+ ),
1259
+ (
1260
+ "tests/core.py::TestClass::test_func FAILED [75%] [0.1s]",
1261
+ "tests/core.py::TestClass::test_func [75%] [0.1s]",
1262
+ ),
1263
+ (
1264
+ "src/test.py::test_case SKIPPED",
1265
+ "src/test.py::test_case",
1266
+ ),
1267
+ ],
1268
+ ),
1269
+ # Code cleaning patterns (from code_cleaner.py)
1270
+ "docstring_triple_double": ValidatedPattern(
1271
+ name="docstring_triple_double",
1272
+ pattern=r'^\s*""".*?"""\s*$',
1273
+ replacement=r"",
1274
+ flags=re.MULTILINE | re.DOTALL,
1275
+ description="Remove triple-quoted docstrings with double quotes",
1276
+ test_cases=[
1277
+ (' """This is a docstring""" ', ""),
1278
+ ('"""Module docstring"""', ""),
1279
+ (' """\n Multi-line\n docstring\n """', ""),
1280
+ (
1281
+ 'regular_code = "not a docstring"',
1282
+ 'regular_code = "not a docstring"',
1283
+ ), # No change
1284
+ ],
1285
+ ),
1286
+ "docstring_triple_single": ValidatedPattern(
1287
+ name="docstring_triple_single",
1288
+ pattern=r"^\s*'''.*?'''\s*$",
1289
+ replacement=r"",
1290
+ flags=re.MULTILINE | re.DOTALL,
1291
+ description="Remove triple-quoted docstrings with single quotes",
1292
+ test_cases=[
1293
+ (" '''This is a docstring''' ", ""),
1294
+ ("'''Module docstring'''", ""),
1295
+ (" '''\n Multi-line\n docstring\n '''", ""),
1296
+ (
1297
+ "regular_code = 'not a docstring'",
1298
+ "regular_code = 'not a docstring'",
1299
+ ), # No change
1300
+ ],
1301
+ ),
1302
+ "spacing_after_comma": ValidatedPattern(
1303
+ name="spacing_after_comma",
1304
+ pattern=r",([^ \n])",
1305
+ replacement=r", \1",
1306
+ global_replace=True,
1307
+ description="Add space after comma if missing",
1308
+ test_cases=[
1309
+ ("def func(a,b,c):", "def func(a, b, c):"),
1310
+ ("items = [1,2,3,4]", "items = [1, 2, 3, 4]"),
1311
+ ("already, spaced, properly", "already, spaced, properly"), # No change
1312
+ ("mixed,spacing, here", "mixed, spacing, here"),
1313
+ ],
1314
+ ),
1315
+ "spacing_after_colon": ValidatedPattern(
1316
+ name="spacing_after_colon",
1317
+ pattern=r"(?<!:):([^ \n:])",
1318
+ replacement=r": \1",
1319
+ global_replace=True,
1320
+ description="Add space after colon if missing (avoid double colons)",
1321
+ test_cases=[
1322
+ ("def func(x:int, y:str):", "def func(x: int, y: str):"),
1323
+ ("dict_item = {'key':'value'}", "dict_item = {'key': 'value'}"),
1324
+ ("already: spaced: properly", "already: spaced: properly"), # No change
1325
+ ("class::method", "class::method"), # No change (double colon)
1326
+ ],
1327
+ ),
1328
+ "multiple_spaces": ValidatedPattern(
1329
+ name="multiple_spaces",
1330
+ pattern=r" {2,}",
1331
+ replacement=r" ",
1332
+ description="Replace multiple spaces with single space",
1333
+ global_replace=True,
1334
+ test_cases=[
1335
+ ("def func( x, y ):", "def func( x, y ):"),
1336
+ ("single space only", "single space only"), # No change
1337
+ ("lots of spaces", "lots of spaces"),
1338
+ ("\tkeep\ttabs\tbut fix spaces", "\tkeep\ttabs\tbut fix spaces"),
1339
+ ],
1340
+ ),
1341
+ "preserved_comments": ValidatedPattern(
1342
+ name="preserved_comments",
1343
+ pattern=r"(#.*?(?: coding: | encoding: | type: | noqa | pragma).*)",
1344
+ replacement=r"\1", # Identity replacement - used for matching only
1345
+ description="Match preserved code comments (encoding, type hints, etc.)",
1346
+ test_cases=[
1347
+ ("# coding: utf-8", "# coding: utf-8"), # No change - identity replacement
1348
+ (
1349
+ "# encoding: latin-1",
1350
+ "# encoding: latin-1",
1351
+ ), # No change - identity replacement
1352
+ ("# type: ignore", "# type: ignore"), # No change - identity replacement
1353
+ ("# noqa: E501", "# noqa: E501"), # No change - identity replacement
1354
+ (
1355
+ "# pragma: no cover",
1356
+ "# pragma: no cover",
1357
+ ), # No change - identity replacement
1358
+ ("# regular comment", "# regular comment"), # No change - no match
1359
+ ],
1360
+ ),
1361
+ "todo_pattern": ValidatedPattern(
1362
+ name="todo_pattern",
1363
+ pattern=r"(#.*?TODO.*)",
1364
+ replacement=r"\1", # Identity replacement - used for matching only
1365
+ flags=re.IGNORECASE,
1366
+ description="Match TODO comments for validation",
1367
+ test_cases=[
1368
+ (
1369
+ "# TODO: Fix this bug",
1370
+ "# TODO: Fix this bug",
1371
+ ), # No change - identity replacement
1372
+ (
1373
+ "# todo: implement later",
1374
+ "# todo: implement later",
1375
+ ), # No change - identity replacement
1376
+ (
1377
+ "# TODO refactor this method",
1378
+ "# TODO refactor this method",
1379
+ ), # No change - identity replacement
1380
+ (
1381
+ "# FIXME: another issue",
1382
+ "# FIXME: another issue",
1383
+ ), # No change - no match
1384
+ ("# regular comment", "# regular comment"), # No change - no match
1385
+ ],
1386
+ ),
1387
+ # DRY agent patterns - for code duplication detection
1388
+ "detect_error_response_patterns": ValidatedPattern(
1389
+ name="detect_error_response_patterns",
1390
+ pattern=r'return\s+.*[\'\"]\{.*[\'\""]error[\'\""].*\}.*[\'\""]',
1391
+ replacement=r"MATCH", # Dummy replacement for detection patterns
1392
+ description="Detect error response patterns in Python code for DRY violations",
1393
+ test_cases=[
1394
+ ('return \'{"error": "msg"}\'', "MATCH"),
1395
+ ('return f\'{"error": "msg"}\'', "MATCH"),
1396
+ ('return {"success": True}', 'return {"success": True}'), # No match
1397
+ ('return \'{"error": "test message", "code": 500}\'', "MATCH"),
1398
+ ],
1399
+ ),
1400
+ "detect_path_conversion_patterns": ValidatedPattern(
1401
+ name="detect_path_conversion_patterns",
1402
+ pattern=r"Path\([^)]+\)\s+if\s+isinstance\([^)]+,\s*str\)\s+else\s+[^)]+",
1403
+ replacement=r"MATCH", # Dummy replacement for detection patterns
1404
+ description="Detect path conversion patterns in Python code for DRY violations",
1405
+ test_cases=[
1406
+ ("Path(value) if isinstance(value, str) else value", "MATCH"),
1407
+ ("Path(path) if isinstance(path, str) else path", "MATCH"),
1408
+ ("Path('/tmp/file')", "Path('/tmp/file')"), # No match
1409
+ (
1410
+ "Path(input_path) if isinstance(input_path, str) else input_path",
1411
+ "MATCH",
1412
+ ),
1413
+ ],
1414
+ ),
1415
+ "detect_file_existence_patterns": ValidatedPattern(
1416
+ name="detect_file_existence_patterns",
1417
+ pattern=r"if\s+not\s+\w+\.exists\(\):",
1418
+ replacement=r"MATCH", # Dummy replacement for detection patterns
1419
+ description="Detect file existence check patterns in Python code for DRY"
1420
+ " violations",
1421
+ test_cases=[
1422
+ ("if not file.exists():", "MATCH"),
1423
+ ("if not path.exists():", "MATCH"),
1424
+ ("if not file_path.exists():", "MATCH"),
1425
+ ("if file.exists():", "if file.exists():"), # No match
1426
+ ],
1427
+ ),
1428
+ "detect_exception_patterns": ValidatedPattern(
1429
+ name="detect_exception_patterns",
1430
+ pattern=r"except\s+\w*Exception\s+as\s+\w+:",
1431
+ replacement=r"MATCH", # Dummy replacement for detection patterns
1432
+ description="Detect exception handling patterns for base Exception class in Python code for DRY violations",
1433
+ test_cases=[
1434
+ ("except Exception as e:", "MATCH"),
1435
+ ("except BaseException as error:", "MATCH"),
1436
+ (
1437
+ "except ValueError as error:",
1438
+ "except ValueError as error:",
1439
+ ), # No match - doesn't match pattern
1440
+ ("try:", "try:"), # No match
1441
+ ],
1442
+ ),
1443
+ "fix_path_conversion_with_ensure_path": ValidatedPattern(
1444
+ name="fix_path_conversion_with_ensure_path",
1445
+ pattern=r"Path\([^)]+\)\s+if\s+isinstance\([^)]+,\s*str\)\s+else\s+([^)]+)",
1446
+ replacement=r"_ensure_path(\1)",
1447
+ description="Replace path conversion patterns with _ensure_path utility "
1448
+ "function",
1449
+ test_cases=[
1450
+ ("Path(value) if isinstance(value, str) else value", "_ensure_path(value)"),
1451
+ ("Path(path) if isinstance(path, str) else path", "_ensure_path(path)"),
1452
+ (
1453
+ "Path(input_path) if isinstance(input_path, str) else input_path",
1454
+ "_ensure_path(input_path)",
1455
+ ),
1456
+ ],
1457
+ ),
1458
+ "fix_path_conversion_simple": ValidatedPattern(
1459
+ name="fix_path_conversion_simple",
1460
+ pattern=r"Path\(([^)]+)\)\s+if\s+isinstance\(\1,\s*str\)\s+else\s+\1",
1461
+ replacement=r"_ensure_path(\1)",
1462
+ description="Replace simple path conversion patterns with _ensure_path utility "
1463
+ "function",
1464
+ test_cases=[
1465
+ ("Path(value) if isinstance(value, str) else value", "_ensure_path(value)"),
1466
+ ("Path(path) if isinstance(path, str) else path", "_ensure_path(path)"),
1467
+ (
1468
+ "Path(file_path) if isinstance(file_path, str) else file_path",
1469
+ "_ensure_path(file_path)",
1470
+ ),
1471
+ ],
1472
+ ),
1473
+ # Security agent patterns - NEW PATTERNS FOR SECURITY_AGENT.PY
1474
+ "detect_security_keywords": ValidatedPattern(
1475
+ name="detect_security_keywords",
1476
+ pattern=r"(?i)(bandit|security|vulnerability|hardcoded|"
1477
+ r"shell=true|b108|b602|b301|b506|unsafe|injection)",
1478
+ replacement=r"MATCH", # Dummy replacement for detection patterns
1479
+ description="Detect security-related keywords in issue messages "
1480
+ "(case insensitive)",
1481
+ flags=re.IGNORECASE,
1482
+ test_cases=[
1483
+ ("Bandit security issue found", "MATCH security issue found"),
1484
+ ("VULNERABILITY detected", "MATCH detected"),
1485
+ ("hardcoded path found", "MATCH path found"),
1486
+ ("shell=True usage", "MATCH usage"),
1487
+ ("B108 violation", "MATCH violation"),
1488
+ ("normal message", "normal message"), # No match
1489
+ ],
1490
+ ),
1491
+ "detect_hardcoded_temp_paths_basic": ValidatedPattern(
1492
+ name="detect_hardcoded_temp_paths_basic",
1493
+ pattern=r"(?:/tmp/|/temp/|C:\\temp\\|C:\\tmp\\)", # nosec B108
1494
+ replacement="[TEMP_PATH]/",
1495
+ description="Detect hardcoded temporary directory paths",
1496
+ global_replace=True,
1497
+ test_cases=[
1498
+ ("/tmp/myfile.txt", "[TEMP_PATH]/myfile.txt"), # nosec B108
1499
+ (r"C:\tmp\data.log", "[TEMP_PATH]/data.log"),
1500
+ ("/temp/cache", "[TEMP_PATH]/cache"),
1501
+ (r"C:\temp\work", "[TEMP_PATH]/work"),
1502
+ ("/regular/path", "/regular/path"), # No change
1503
+ ],
1504
+ ),
1505
+ "replace_hardcoded_temp_paths": ValidatedPattern(
1506
+ name="replace_hardcoded_temp_paths",
1507
+ pattern=r'Path\("/tmp/([^"]+)"\)',
1508
+ replacement=r'Path(tempfile.gettempdir()) / "\1"',
1509
+ description="Replace hardcoded /tmp paths with tempfile.gettempdir()",
1510
+ global_replace=True,
1511
+ test_cases=[
1512
+ ('Path("/tmp/myfile.txt")', 'Path(tempfile.gettempdir()) / "myfile.txt"'),
1513
+ ('Path("/tmp/data.log")', 'Path(tempfile.gettempdir()) / "data.log"'),
1514
+ ('Path("/regular/path")', 'Path("/regular/path")'), # No change
1515
+ ],
1516
+ ),
1517
+ "replace_hardcoded_temp_strings": ValidatedPattern(
1518
+ name="replace_hardcoded_temp_strings",
1519
+ pattern=r'"/tmp/([^"]+)"',
1520
+ replacement=r'str(Path(tempfile.gettempdir()) / "\1")',
1521
+ description="Replace hardcoded /tmp string paths with tempfile equivalent",
1522
+ global_replace=True,
1523
+ test_cases=[
1524
+ ('"/tmp/myfile.txt"', 'str(Path(tempfile.gettempdir()) / "myfile.txt")'),
1525
+ ('"/tmp/data.log"', 'str(Path(tempfile.gettempdir()) / "data.log")'),
1526
+ ('"/regular/path"', '"/regular/path"'), # No change
1527
+ ],
1528
+ ),
1529
+ "replace_hardcoded_temp_single_quotes": ValidatedPattern(
1530
+ name="replace_hardcoded_temp_single_quotes",
1531
+ pattern=r"'/tmp/([^']+)'",
1532
+ replacement=r"str(Path(tempfile.gettempdir()) / '\1')",
1533
+ description="Replace hardcoded /tmp paths (single quotes) with tempfile"
1534
+ " equivalent",
1535
+ global_replace=True,
1536
+ test_cases=[
1537
+ ("'/tmp/myfile.txt'", "str(Path(tempfile.gettempdir()) / 'myfile.txt')"),
1538
+ ("'/tmp/data.log'", "str(Path(tempfile.gettempdir()) / 'data.log')"),
1539
+ ("'/regular/path'", "'/regular/path'"), # No change
1540
+ ],
1541
+ ),
1542
+ "replace_test_path_patterns": ValidatedPattern(
1543
+ name="replace_test_path_patterns",
1544
+ pattern=r'Path\("/test/path"\)',
1545
+ replacement=r"Path(tempfile.gettempdir()) / 'test-path'",
1546
+ description="Replace hardcoded /test/path patterns with tempfile equivalent",
1547
+ test_cases=[
1548
+ ('Path("/test/path")', "Path(tempfile.gettempdir()) / 'test-path'"),
1549
+ ('Path("/other/path")', 'Path("/other/path")'), # No change
1550
+ ],
1551
+ ),
1552
+ "detect_hardcoded_secrets": ValidatedPattern(
1553
+ name="detect_hardcoded_secrets",
1554
+ pattern=r'\b\w*(password|secret|key|token)\w*\s*=\s*[\'"][^\'"]+[\'"]',
1555
+ replacement="[SECRET_DETECTED]",
1556
+ description="Detect hardcoded secrets in assignments (case insensitive)",
1557
+ flags=re.IGNORECASE,
1558
+ global_replace=True,
1559
+ test_cases=[
1560
+ ('password = "secret123"', "[SECRET_DETECTED]"),
1561
+ ("api_key = 'abc123def'", "[SECRET_DETECTED]"),
1562
+ ('TOKEN = "my-token-here"', "[SECRET_DETECTED]"),
1563
+ ("username = 'user123'", "username = 'user123'"), # No match
1564
+ ],
1565
+ ),
1566
+ "extract_variable_name_from_assignment": ValidatedPattern(
1567
+ name="extract_variable_name_from_assignment",
1568
+ pattern=r"\s*(\w+)\s*=.*",
1569
+ replacement=r"\1",
1570
+ description="Extract variable name from assignment statement",
1571
+ test_cases=[
1572
+ ("password = 'secret'", "password"),
1573
+ ("api_key = 'value'", "api_key"),
1574
+ (" token =", "token"), # Matches just the word part
1575
+ ("complex_variable_name = value", "complex_variable_name"),
1576
+ ],
1577
+ ),
1578
+ "detect_insecure_random_usage": ValidatedPattern(
1579
+ name="detect_insecure_random_usage",
1580
+ pattern=r"\brandom\.(?:random|choice)\([^)]*\)",
1581
+ replacement="[INSECURE_RANDOM]()",
1582
+ description="Detect insecure random module usage",
1583
+ global_replace=True,
1584
+ test_cases=[
1585
+ ("random.random()", "[INSECURE_RANDOM]()"),
1586
+ ("random.choice(options)", "[INSECURE_RANDOM]()"),
1587
+ ("secrets.choice(options)", "secrets.choice(options)"), # No change
1588
+ ("my_random.choice()", "my_random.choice()"), # No change
1589
+ ],
1590
+ ),
1591
+ # Input validation patterns for security-critical validation
1592
+ "validate_sql_injection_patterns": ValidatedPattern(
1593
+ name="validate_sql_injection_patterns",
1594
+ pattern=r"\b(union|select|insert|update|delete|drop|create|alter|"
1595
+ r"exec|execute)\b",
1596
+ replacement="[SQL_INJECTION]",
1597
+ flags=re.IGNORECASE,
1598
+ description="Detect SQL injection patterns in input validation "
1599
+ "(case insensitive)",
1600
+ global_replace=True,
1601
+ test_cases=[
1602
+ ("UNION SELECT", "[SQL_INJECTION] [SQL_INJECTION]"),
1603
+ ("drop table", "[SQL_INJECTION] table"),
1604
+ ("normal text", "normal text"), # No change
1605
+ ("exec command", "[SQL_INJECTION] command"),
1606
+ ("execute procedure", "[SQL_INJECTION] procedure"),
1607
+ ],
1608
+ ),
1609
+ "validate_sql_comment_patterns": ValidatedPattern(
1610
+ name="validate_sql_comment_patterns",
1611
+ pattern=r"(-{2,}|\/\*|\*\/)",
1612
+ replacement="[SQL_COMMENT]",
1613
+ description="Detect SQL comment patterns in input validation",
1614
+ global_replace=True,
1615
+ test_cases=[
1616
+ ("--comment", "[SQL_COMMENT]comment"),
1617
+ ("/* comment */", "[SQL_COMMENT] comment [SQL_COMMENT]"),
1618
+ ("normal-text", "normal-text"), # No change (single hyphen)
1619
+ ("---triple", "[SQL_COMMENT]triple"),
1620
+ ],
1621
+ ),
1622
+ "validate_sql_boolean_injection": ValidatedPattern(
1623
+ name="validate_sql_boolean_injection",
1624
+ pattern=r"\b(or|and)\b.*=",
1625
+ replacement="[BOOLEAN_INJECTION]",
1626
+ flags=re.IGNORECASE,
1627
+ description="Detect boolean-based SQL injection patterns (case insensitive)",
1628
+ global_replace=True,
1629
+ test_cases=[
1630
+ ("or 1=1", "[BOOLEAN_INJECTION]1"),
1631
+ ("AND password=", "[BOOLEAN_INJECTION]"),
1632
+ ("normal or text", "normal or text"), # No change (no equals)
1633
+ ("value=test", "value=test"), # No change (no boolean operator)
1634
+ ],
1635
+ ),
1636
+ "validate_sql_server_specific": ValidatedPattern(
1637
+ name="validate_sql_server_specific",
1638
+ pattern=r"\b(xp_cmdshell|sp_executesql)\b",
1639
+ replacement="[SQLSERVER_EXPLOIT]",
1640
+ flags=re.IGNORECASE,
1641
+ description="Detect SQL Server specific attack patterns (case insensitive)",
1642
+ global_replace=True,
1643
+ test_cases=[
1644
+ ("xp_cmdshell", "[SQLSERVER_EXPLOIT]"),
1645
+ ("SP_EXECUTESQL", "[SQLSERVER_EXPLOIT]"),
1646
+ ("normal text", "normal text"), # No change
1647
+ ],
1648
+ ),
1649
+ "validate_code_eval_injection": ValidatedPattern(
1650
+ name="validate_code_eval_injection",
1651
+ pattern=r"\b(eval|exec|execfile)\s*\(",
1652
+ replacement="[CODE_EVAL](",
1653
+ description="Detect Python code evaluation injection patterns",
1654
+ global_replace=True,
1655
+ test_cases=[
1656
+ ("eval(code)", "[CODE_EVAL](code)"),
1657
+ ("exec(command)", "[CODE_EVAL](command)"),
1658
+ ("execfile(script)", "[CODE_EVAL](script)"),
1659
+ ("evaluate()", "evaluate()"), # No change (not exact match)
1660
+ ],
1661
+ ),
1662
+ "validate_code_dynamic_access": ValidatedPattern(
1663
+ name="validate_code_dynamic_access",
1664
+ pattern=r"\b(__import__|getattr|setattr|delattr)\b",
1665
+ replacement="[DYNAMIC_ACCESS]",
1666
+ description="Detect dynamic attribute access patterns for code injection",
1667
+ global_replace=True,
1668
+ test_cases=[
1669
+ ("__import__", "[DYNAMIC_ACCESS]"),
1670
+ ("getattr(obj, name)", "[DYNAMIC_ACCESS](obj, name)"),
1671
+ ("setattr(obj, name)", "[DYNAMIC_ACCESS](obj, name)"),
1672
+ ("delattr(obj, name)", "[DYNAMIC_ACCESS](obj, name)"),
1673
+ ("mygetattr", "mygetattr"), # No change (not exact match)
1674
+ ],
1675
+ ),
1676
+ "validate_code_system_commands": ValidatedPattern(
1677
+ name="validate_code_system_commands",
1678
+ pattern=r"\b(subprocess|os\.system|os\.popen|commands\.)",
1679
+ replacement="[SYSTEM_COMMAND]",
1680
+ description="Detect system command execution patterns for code injection",
1681
+ global_replace=True,
1682
+ test_cases=[
1683
+ ("subprocess.run", "[SYSTEM_COMMAND].run"),
1684
+ ("os.system(cmd)", "[SYSTEM_COMMAND](cmd)"),
1685
+ ("os.popen(cmd)", "[SYSTEM_COMMAND](cmd)"),
1686
+ ("commands.getoutput", "[SYSTEM_COMMAND]getoutput"),
1687
+ ("mysubprocess", "mysubprocess"), # No change (not exact match)
1688
+ ],
1689
+ ),
1690
+ "validate_code_compilation": ValidatedPattern(
1691
+ name="validate_code_compilation",
1692
+ pattern=r"\bcompile\s*\(|code\.compile",
1693
+ replacement="[CODE_COMPILE]",
1694
+ description="Detect code compilation patterns for injection",
1695
+ global_replace=True,
1696
+ test_cases=[
1697
+ ("compile(source)", "[CODE_COMPILE]source)"),
1698
+ ("code.compile(source)", "[CODE_COMPILE](source)"),
1699
+ ("compiled", "compiled"), # No change (not exact match)
1700
+ ],
1701
+ ),
1702
+ "validate_job_id_format": ValidatedPattern(
1703
+ name="validate_job_id_format",
1704
+ pattern=r"^[a-zA-Z0-9\-_]+$",
1705
+ replacement="VALID_JOB_ID",
1706
+ description="Validate job ID format - alphanumeric with hyphens and"
1707
+ " underscores only",
1708
+ test_cases=[
1709
+ ("valid_job-123", "VALID_JOB_ID"),
1710
+ ("another-valid_job_456", "VALID_JOB_ID"),
1711
+ ("job_123", "VALID_JOB_ID"),
1712
+ ("UPPERCASE_JOB-ID", "VALID_JOB_ID"),
1713
+ ("hyphen-underscore_combo", "VALID_JOB_ID"),
1714
+ ],
1715
+ ),
1716
+ "validate_env_var_name_format": ValidatedPattern(
1717
+ name="validate_env_var_name_format",
1718
+ pattern=r"^[A-Z_][A-Z0-9_]*$",
1719
+ replacement="VALID_ENV_VAR_NAME",
1720
+ description="Validate environment variable name format - uppercase letters,"
1721
+ " numbers, underscores only, must start with letter or underscore",
1722
+ test_cases=[
1723
+ ("VALID_VAR", "VALID_ENV_VAR_NAME"),
1724
+ ("_VALID_VAR", "VALID_ENV_VAR_NAME"),
1725
+ ("API_KEY_123", "VALID_ENV_VAR_NAME"),
1726
+ ("DATABASE_URL", "VALID_ENV_VAR_NAME"),
1727
+ ("_PRIVATE_VAR", "VALID_ENV_VAR_NAME"),
1728
+ ],
1729
+ ),
1730
+ # Config file update patterns
1731
+ "update_repo_revision": ValidatedPattern(
1732
+ name="update_repo_revision",
1733
+ pattern=r'("repo": "[^"]+?".*?"rev": )"([^"]+)"',
1734
+ replacement=r'\1"NEW_REVISION"',
1735
+ description="Update repository revision in config files (NEW_REVISION"
1736
+ " placeholder replaced dynamically)",
1737
+ flags=re.DOTALL,
1738
+ test_cases=[
1739
+ (
1740
+ '"repo": "https://github.com/user/repo".*"rev": "old_rev"',
1741
+ '"repo": "https://github.com/user/repo".*"rev": "NEW_REVISION"',
1742
+ ),
1743
+ (
1744
+ '"repo": "git@github.com:user/repo.git", "branch": "main", "rev": '
1745
+ '"abc123"',
1746
+ '"repo": "git@github.com:user/repo.git", "branch": "main", "rev":'
1747
+ ' "NEW_REVISION"',
1748
+ ),
1749
+ (
1750
+ '{"repo": "https://example.com/repo", "description": "test", "rev": '
1751
+ '"456def"}',
1752
+ '{"repo": "https://example.com/repo", "description": "test", "rev":'
1753
+ ' "NEW_REVISION"}',
1754
+ ),
1755
+ ],
1756
+ ),
1757
+ # URL sanitization patterns for security
1758
+ "sanitize_localhost_urls": ValidatedPattern(
1759
+ name="sanitize_localhost_urls",
1760
+ pattern=r"https?://localhost:\d+[^\s]*",
1761
+ replacement="[INTERNAL_URL]",
1762
+ description="Sanitize localhost URLs with ports for security",
1763
+ global_replace=True,
1764
+ test_cases=[
1765
+ ("http://localhost:8000/api/test", "[INTERNAL_URL]"),
1766
+ ("https://localhost:3000/dashboard", "[INTERNAL_URL]"),
1767
+ (
1768
+ "Visit http://localhost:8080/admin for details",
1769
+ "Visit [INTERNAL_URL] for details",
1770
+ ),
1771
+ ("https://example.com/test", "https://example.com/test"), # No change
1772
+ ],
1773
+ ),
1774
+ "sanitize_127_urls": ValidatedPattern(
1775
+ name="sanitize_127_urls",
1776
+ pattern=r"https?://127\.0\.0\.1:\d+[^\s]*",
1777
+ replacement="[INTERNAL_URL]",
1778
+ description="Sanitize 127.0.0.1 URLs with ports for security",
1779
+ global_replace=True,
1780
+ test_cases=[
1781
+ ("http://127.0.0.1:8000/api", "[INTERNAL_URL]"),
1782
+ ("https://127.0.0.1:3000/test", "[INTERNAL_URL]"),
1783
+ ("Connect to http://127.0.0.1:5000/status", "Connect to [INTERNAL_URL]"),
1784
+ (
1785
+ "https://192.168.1.1:8080/test",
1786
+ "https://192.168.1.1:8080/test",
1787
+ ), # No change
1788
+ ],
1789
+ ),
1790
+ "sanitize_any_localhost_urls": ValidatedPattern(
1791
+ name="sanitize_any_localhost_urls",
1792
+ pattern=r"https?://0\.0\.0\.0:\d+[^\s]*",
1793
+ replacement="[INTERNAL_URL]",
1794
+ description="Sanitize 0.0.0.0 URLs with ports for security",
1795
+ global_replace=True,
1796
+ test_cases=[
1797
+ ("http://0.0.0.0:8000/api", "[INTERNAL_URL]"),
1798
+ ("https://0.0.0.0:3000/test", "[INTERNAL_URL]"),
1799
+ ("https://1.1.1.1:8080/test", "https://1.1.1.1:8080/test"), # No change
1800
+ ],
1801
+ ),
1802
+ "sanitize_ws_localhost_urls": ValidatedPattern(
1803
+ name="sanitize_ws_localhost_urls",
1804
+ pattern=r"ws://localhost:\d+[^\s]*",
1805
+ replacement="[INTERNAL_URL]",
1806
+ description="Sanitize WebSocket localhost URLs with ports for security",
1807
+ global_replace=True,
1808
+ test_cases=[
1809
+ ("ws://localhost:8675/websocket", "[INTERNAL_URL]"),
1810
+ ("ws://localhost:3000/socket", "[INTERNAL_URL]"),
1811
+ ("Connect to ws://localhost:8000/ws", "Connect to [INTERNAL_URL]"),
1812
+ (
1813
+ "wss://example.com:443/socket",
1814
+ "wss://example.com:443/socket",
1815
+ ), # No change
1816
+ ],
1817
+ ),
1818
+ "sanitize_ws_127_urls": ValidatedPattern(
1819
+ name="sanitize_ws_127_urls",
1820
+ pattern=r"ws://127\.0\.0\.1:\d+[^\s]*",
1821
+ replacement="[INTERNAL_URL]",
1822
+ description="Sanitize WebSocket 127.0.0.1 URLs with ports for security",
1823
+ global_replace=True,
1824
+ test_cases=[
1825
+ ("ws://127.0.0.1:8675/websocket", "[INTERNAL_URL]"),
1826
+ ("ws://127.0.0.1:3000/socket", "[INTERNAL_URL]"),
1827
+ (
1828
+ "ws://192.168.1.1:8080/socket",
1829
+ "ws://192.168.1.1:8080/socket",
1830
+ ), # No change
1831
+ ],
1832
+ ),
1833
+ "sanitize_simple_localhost_urls": ValidatedPattern(
1834
+ name="sanitize_simple_localhost_urls",
1835
+ pattern=r"http://localhost[^\s]*",
1836
+ replacement="[INTERNAL_URL]",
1837
+ description="Sanitize simple localhost URLs without explicit ports for security",
1838
+ global_replace=True,
1839
+ test_cases=[
1840
+ ("http://localhost/api/test", "[INTERNAL_URL]"),
1841
+ ("http://localhost/dashboard", "[INTERNAL_URL]"),
1842
+ ("Visit http://localhost/admin", "Visit [INTERNAL_URL]"),
1843
+ (
1844
+ "https://localhost:443/test",
1845
+ "https://localhost:443/test",
1846
+ ), # No change (different pattern)
1847
+ ],
1848
+ ),
1849
+ "sanitize_simple_ws_localhost_urls": ValidatedPattern(
1850
+ name="sanitize_simple_ws_localhost_urls",
1851
+ pattern=r"ws://localhost[^\s]*",
1852
+ replacement="[INTERNAL_URL]",
1853
+ description="Sanitize simple WebSocket localhost URLs without explicit ports"
1854
+ " for security",
1855
+ global_replace=True,
1856
+ test_cases=[
1857
+ ("ws://localhost/websocket", "[INTERNAL_URL]"),
1858
+ ("ws://localhost/socket", "[INTERNAL_URL]"),
1859
+ ("Connect to ws://localhost/ws", "Connect to [INTERNAL_URL]"),
1860
+ (
1861
+ "wss://localhost:443/socket",
1862
+ "wss://localhost:443/socket",
1863
+ ), # No change (different pattern)
1864
+ ],
1865
+ ),
1866
+ # Integration script patterns for resource management
1867
+ "detect_tempfile_usage": ValidatedPattern(
1868
+ name="detect_tempfile_usage",
1869
+ pattern=r"tempfile\.(?:mkdtemp|NamedTemporaryFile|TemporaryDirectory)",
1870
+ replacement="MATCH", # Dummy replacement for detection patterns
1871
+ test_cases=[
1872
+ ("tempfile.mkdtemp()", "MATCH()"),
1873
+ ("tempfile.NamedTemporaryFile()", "MATCH()"),
1874
+ ("tempfile.TemporaryDirectory()", "MATCH()"),
1875
+ (
1876
+ "not_tempfile.other()",
1877
+ "not_tempfile.other()",
1878
+ ), # No match leaves original
1879
+ ],
1880
+ description="Detect tempfile module usage for resource management integration",
1881
+ ),
1882
+ "detect_subprocess_usage": ValidatedPattern(
1883
+ name="detect_subprocess_usage",
1884
+ pattern=r"subprocess\.(?:Popen|run)",
1885
+ replacement="MATCH", # Dummy replacement for detection patterns
1886
+ test_cases=[
1887
+ ("subprocess.Popen(cmd)", "MATCH(cmd)"),
1888
+ ("subprocess.run(['cmd'])", "MATCH(['cmd'])"),
1889
+ ("not_subprocess.other()", "not_subprocess.other()"),
1890
+ ],
1891
+ description="Detect subprocess module usage for resource management integration",
1892
+ ),
1893
+ "detect_asyncio_create_task": ValidatedPattern(
1894
+ name="detect_asyncio_create_task",
1895
+ pattern=r"asyncio\.create_task",
1896
+ replacement="MATCH", # Dummy replacement for detection patterns
1897
+ test_cases=[
1898
+ ("asyncio.create_task(coro)", "MATCH(coro)"),
1899
+ ("not_asyncio.other()", "not_asyncio.other()"),
1900
+ ],
1901
+ description="Detect asyncio.create_task usage for resource management"
1902
+ " integration",
1903
+ ),
1904
+ "detect_file_open_operations": ValidatedPattern(
1905
+ name="detect_file_open_operations",
1906
+ pattern=r"(\.open\(|with open\()",
1907
+ replacement=r"MATCH", # Dummy replacement for detection patterns
1908
+ test_cases=[
1909
+ ("file.open()", "fileMATCH)"),
1910
+ ("with open('file.txt'):", "MATCH'file.txt'):"),
1911
+ ("other_method()", "other_method()"), # No change
1912
+ ],
1913
+ description="Detect file open operations for resource management integration",
1914
+ ),
1915
+ "match_async_function_definition": ValidatedPattern(
1916
+ name="match_async_function_definition",
1917
+ pattern=r"(async def \w+\([^)]*\)[^:]*:)",
1918
+ replacement=r"\1",
1919
+ test_cases=[
1920
+ ("async def foo():", "async def foo():"),
1921
+ ("async def bar(a, b) -> None:", "async def bar(a, b) -> None:"),
1922
+ ("def sync_func():", "def sync_func():"),
1923
+ ],
1924
+ description="Match async function definitions for resource management"
1925
+ " integration",
1926
+ ),
1927
+ "match_class_definition": ValidatedPattern(
1928
+ name="match_class_definition",
1929
+ pattern=r"class (\w+).*:",
1930
+ replacement=r"\1",
1931
+ test_cases=[
1932
+ ("class MyClass:", "MyClass"),
1933
+ ("class MyClass(BaseClass):", "MyClass"),
1934
+ ("class MyClass(Base, Mixin):", "MyClass"),
1935
+ ("def not_class():", "def not_class():"),
1936
+ ],
1937
+ description="Match class definitions for resource management integration",
1938
+ ),
1939
+ "replace_subprocess_popen_basic": ValidatedPattern(
1940
+ name="replace_subprocess_popen_basic",
1941
+ pattern=r"subprocess\.Popen\(",
1942
+ replacement="managed_proc = resource_ctx.managed_process(subprocess.Popen(",
1943
+ test_cases=[
1944
+ (
1945
+ "subprocess.Popen(cmd)",
1946
+ "managed_proc = resource_ctx.managed_process(subprocess.Popen(cmd)",
1947
+ ),
1948
+ (
1949
+ "result = subprocess.Popen(['ls'])",
1950
+ "result = managed_proc = resource_ctx.managed_process("
1951
+ "subprocess.Popen(['ls'])",
1952
+ ),
1953
+ ],
1954
+ description="Replace subprocess.Popen with managed version",
1955
+ ),
1956
+ "replace_subprocess_popen_assignment": ValidatedPattern(
1957
+ name="replace_subprocess_popen_assignment",
1958
+ pattern=r"(\w+)\s*=\s*subprocess\.Popen\(",
1959
+ replacement=r"process = subprocess.Popen(",
1960
+ test_cases=[
1961
+ ("proc = subprocess.Popen(cmd)", "process = subprocess.Popen(cmd)"),
1962
+ (
1963
+ "my_process = subprocess.Popen(['ls'])",
1964
+ "process = subprocess.Popen(['ls'])",
1965
+ ),
1966
+ ],
1967
+ description="Replace subprocess.Popen assignment with standard variable name",
1968
+ ),
1969
+ "replace_path_open_write": ValidatedPattern(
1970
+ name="replace_path_open_write",
1971
+ pattern=r'(\w+)\.open\(["\']wb?["\'][^)]*\)',
1972
+ replacement=r"atomic_file_write(\1)",
1973
+ test_cases=[
1974
+ ("path.open('w')", "atomic_file_write(path)"),
1975
+ ("file.open('wb')", "atomic_file_write(file)"),
1976
+ ],
1977
+ description="Replace file.open() with atomic_file_write",
1978
+ ),
1979
+ "replace_path_write_text": ValidatedPattern(
1980
+ name="replace_path_write_text",
1981
+ pattern=r"(\w+)\.write_text\(([^)]+)\)",
1982
+ replacement=r"await SafeFileOperations.safe_write_text(\1, \2, atomic=True)",
1983
+ test_cases=[
1984
+ (
1985
+ "path.write_text(content)",
1986
+ "await SafeFileOperations.safe_write_text(path, content, atomic=True)",
1987
+ ),
1988
+ (
1989
+ "file.write_text(data, encoding='utf-8')",
1990
+ "await SafeFileOperations.safe_write_text(file, data, encoding='utf-8',"
1991
+ " atomic=True)",
1992
+ ),
1993
+ ],
1994
+ description="Replace path.write_text with SafeFileOperations.safe_write_text",
1995
+ ),
1996
+ # Agent-specific patterns - DocumentationAgent
1997
+ "agent_count_pattern": ValidatedPattern(
1998
+ name="agent_count_pattern",
1999
+ pattern=r"(\d+)\s+agents",
2000
+ replacement=r"\1 agents",
2001
+ test_cases=[
2002
+ ("9 agents", "9 agents"),
2003
+ ("12 agents", "12 agents"),
2004
+ ("5 agents", "5 agents"),
2005
+ ],
2006
+ description="Match agent count patterns for documentation consistency",
2007
+ flags=re.IGNORECASE,
2008
+ ),
2009
+ "specialized_agent_count_pattern": ValidatedPattern(
2010
+ name="specialized_agent_count_pattern",
2011
+ pattern=r"(\d+)\s+specialized\s+agents",
2012
+ replacement=r"\1 specialized agents",
2013
+ test_cases=[
2014
+ ("9 specialized agents", "9 specialized agents"),
2015
+ ("12 specialized agents", "12 specialized agents"),
2016
+ ("5 specialized agents", "5 specialized agents"),
2017
+ ],
2018
+ description="Match specialized agent count patterns for documentation "
2019
+ "consistency",
2020
+ flags=re.IGNORECASE,
2021
+ ),
2022
+ "total_agents_config_pattern": ValidatedPattern(
2023
+ name="total_agents_config_pattern",
2024
+ pattern=r'total_agents["\'][\s]*:\s*(\d+)',
2025
+ replacement=r'total_agents": \1',
2026
+ test_cases=[
2027
+ ('total_agents": 9', 'total_agents": 9'),
2028
+ ("total_agents': 12", 'total_agents": 12'),
2029
+ ('total_agents" : 5', 'total_agents": 5'),
2030
+ ],
2031
+ description="Match total agents configuration patterns",
2032
+ flags=re.IGNORECASE,
2033
+ ),
2034
+ "sub_agent_count_pattern": ValidatedPattern(
2035
+ name="sub_agent_count_pattern",
2036
+ pattern=r"(\d+)\s+sub-agents",
2037
+ replacement=r"\1 sub-agents",
2038
+ test_cases=[
2039
+ ("9 sub-agents", "9 sub-agents"),
2040
+ ("12 sub-agents", "12 sub-agents"),
2041
+ ("5 sub-agents", "5 sub-agents"),
2042
+ ],
2043
+ description="Match sub-agent count patterns for documentation consistency",
2044
+ flags=re.IGNORECASE,
2045
+ ),
2046
+ "update_agent_count": ValidatedPattern(
2047
+ name="update_agent_count",
2048
+ pattern=r"\b(\d+)\s+agents\b",
2049
+ replacement=r"NEW_COUNT agents",
2050
+ test_cases=[
2051
+ ("9 agents working", "NEW_COUNT agents working"),
2052
+ ("We have 12 agents ready", "We have NEW_COUNT agents ready"),
2053
+ ("All 5 agents are active", "All NEW_COUNT agents are active"),
2054
+ ],
2055
+ description="Update agent count references (NEW_COUNT replaced dynamically)",
2056
+ ),
2057
+ "update_specialized_agent_count": ValidatedPattern(
2058
+ name="update_specialized_agent_count",
2059
+ pattern=r"\b(\d+)\s+specialized\s+agents\b",
2060
+ replacement=r"NEW_COUNT specialized agents",
2061
+ test_cases=[
2062
+ (
2063
+ "9 specialized agents available",
2064
+ "NEW_COUNT specialized agents available",
2065
+ ),
2066
+ ("We have 12 specialized agents", "We have NEW_COUNT specialized agents"),
2067
+ ("All 5 specialized agents work", "All NEW_COUNT specialized agents work"),
2068
+ ],
2069
+ description="Update specialized agent count references (NEW_COUNT replaced"
2070
+ " dynamically)",
2071
+ ),
2072
+ "update_total_agents_config": ValidatedPattern(
2073
+ name="update_total_agents_config",
2074
+ pattern=r'total_agents["\'][\s]*:\s*\d+',
2075
+ replacement=r'total_agents": NEW_COUNT',
2076
+ test_cases=[
2077
+ ('total_agents": 9', 'total_agents": NEW_COUNT'),
2078
+ ("total_agents': 12", 'total_agents": NEW_COUNT'),
2079
+ ('total_agents" : 5', 'total_agents": NEW_COUNT'),
2080
+ ],
2081
+ description="Update total agents configuration (NEW_COUNT replaced"
2082
+ " dynamically)",
2083
+ ),
2084
+ "update_sub_agent_count": ValidatedPattern(
2085
+ name="update_sub_agent_count",
2086
+ pattern=r"\b(\d+)\s+sub-agents\b",
2087
+ replacement=r"NEW_COUNT sub-agents",
2088
+ test_cases=[
2089
+ ("9 sub-agents working", "NEW_COUNT sub-agents working"),
2090
+ ("We have 12 sub-agents ready", "We have NEW_COUNT sub-agents ready"),
2091
+ ("All 5 sub-agents are active", "All NEW_COUNT sub-agents are active"),
2092
+ ],
2093
+ description="Update sub-agent count references (NEW_COUNT replaced"
2094
+ " dynamically)",
2095
+ ),
2096
+ # Agent-specific patterns - TestSpecialistAgent
2097
+ "fixture_not_found_pattern": ValidatedPattern(
2098
+ name="fixture_not_found_pattern",
2099
+ pattern=r"fixture '(\w+)' not found",
2100
+ replacement=r"fixture '\1' not found",
2101
+ test_cases=[
2102
+ ("fixture 'temp_pkg_path' not found", "fixture 'temp_pkg_path' not found"),
2103
+ ("fixture 'console' not found", "fixture 'console' not found"),
2104
+ ("fixture 'tmp_path' not found", "fixture 'tmp_path' not found"),
2105
+ ],
2106
+ description="Match pytest fixture not found error patterns",
2107
+ ),
2108
+ "import_error_pattern": ValidatedPattern(
2109
+ name="import_error_pattern",
2110
+ pattern=r"ImportError|ModuleNotFoundError",
2111
+ replacement=r"ImportError",
2112
+ test_cases=[
2113
+ ("ImportError: No module named", "ImportError: No module named"),
2114
+ ("ModuleNotFoundError: No module", "ImportError: No module"),
2115
+ ("Other error types", "Other error types"), # No change
2116
+ ],
2117
+ description="Match import error patterns in test failures",
2118
+ ),
2119
+ "assertion_error_pattern": ValidatedPattern(
2120
+ name="assertion_error_pattern",
2121
+ pattern=r"assert .+ ==",
2122
+ replacement=r"AssertionError",
2123
+ test_cases=[
2124
+ (
2125
+ "AssertionError: Values differ",
2126
+ "AssertionError: Values differ",
2127
+ ), # No change
2128
+ ("assert result == expected", "AssertionError expected"),
2129
+ ("Normal code", "Normal code"), # No change
2130
+ ],
2131
+ description="Match assertion error patterns in test failures",
2132
+ ),
2133
+ "attribute_error_pattern": ValidatedPattern(
2134
+ name="attribute_error_pattern",
2135
+ pattern=r"AttributeError: .+ has no attribute",
2136
+ replacement=r"AttributeError: has no attribute",
2137
+ test_cases=[
2138
+ (
2139
+ "AttributeError: 'Mock' has no attribute 'test'",
2140
+ "AttributeError: has no attribute 'test'",
2141
+ ),
2142
+ (
2143
+ "AttributeError: 'NoneType' has no attribute 'value'",
2144
+ "AttributeError: has no attribute 'value'",
2145
+ ),
2146
+ ("Normal error", "Normal error"), # No change
2147
+ ],
2148
+ description="Match attribute error patterns in test failures",
2149
+ ),
2150
+ "mock_spec_error_pattern": ValidatedPattern(
2151
+ name="mock_spec_error_pattern",
2152
+ pattern=r"MockSpec|spec.*Mock",
2153
+ replacement=r"MockSpec",
2154
+ test_cases=[
2155
+ ("MockSpec error occurred", "MockSpec error occurred"),
2156
+ ("spec for Mock failed", "MockSpec failed"),
2157
+ ("Normal mock usage", "Normal mock usage"), # No change
2158
+ ],
2159
+ description="Match mock specification error patterns in test failures",
2160
+ ),
2161
+ "hardcoded_path_pattern": ValidatedPattern(
2162
+ name="hardcoded_path_pattern",
2163
+ pattern=r"'/test/path'|/test/path",
2164
+ replacement=r"str(tmp_path)",
2165
+ test_cases=[
2166
+ ("'/test/path'", "str(tmp_path)"),
2167
+ ("/test/path", "str(tmp_path)"),
2168
+ ("'/other/path'", "'/other/path'"), # No change
2169
+ ],
2170
+ description="Match hardcoded test path patterns that should use tmp_path",
2171
+ ),
2172
+ "missing_name_pattern": ValidatedPattern(
2173
+ name="missing_name_pattern",
2174
+ pattern=r"name '(\w+)' is not defined",
2175
+ replacement=r"name '\1' is not defined",
2176
+ test_cases=[
2177
+ ("name 'pytest' is not defined", "name 'pytest' is not defined"),
2178
+ ("name 'Mock' is not defined", "name 'Mock' is not defined"),
2179
+ ("name 'Path' is not defined", "name 'Path' is not defined"),
2180
+ ],
2181
+ description="Match undefined name patterns in test failures",
2182
+ ),
2183
+ "pydantic_validation_pattern": ValidatedPattern(
2184
+ name="pydantic_validation_pattern",
2185
+ pattern=r"ValidationError|validation error",
2186
+ replacement=r"ValidationError",
2187
+ test_cases=[
2188
+ ("ValidationError: field required", "ValidationError: field required"),
2189
+ ("validation error in field", "ValidationError in field"),
2190
+ ("Normal validation", "Normal validation"), # No change
2191
+ ],
2192
+ description="Match Pydantic validation error patterns in test failures",
2193
+ ),
2194
+ # Agent-specific patterns - PerformanceAgent
2195
+ "list_append_inefficiency_pattern": ValidatedPattern(
2196
+ name="list_append_inefficiency_pattern",
2197
+ pattern=r"(\s*)(\w+)\s*\+=\s*\[([^]]+)\]",
2198
+ replacement=r"\1\2.append(\3)",
2199
+ test_cases=[
2200
+ (" items += [new_item]", " items.append(new_item)"),
2201
+ ("results += [result]", "results.append(result)"),
2202
+ (" data += [value, other]", " data.append(value, other)"),
2203
+ ],
2204
+ description="Replace inefficient list concatenation with append for"
2205
+ " performance",
2206
+ ),
2207
+ "string_concatenation_pattern": ValidatedPattern(
2208
+ name="string_concatenation_pattern",
2209
+ pattern=r"(\s*)(\w+)\s*\+=\s*(.+)",
2210
+ replacement=r"\1\2_parts.append(\3)",
2211
+ test_cases=[
2212
+ (" text += new_text", " text_parts.append(new_text)"),
2213
+ ("result += line", "result_parts.append(line)"),
2214
+ (" output += data", " output_parts.append(data)"),
2215
+ ],
2216
+ description="Replace string concatenation with list append for performance "
2217
+ "optimization",
2218
+ ),
2219
+ # Enhanced performance patterns for PerformanceAgent optimization
2220
+ "nested_loop_detection_pattern": ValidatedPattern(
2221
+ name="nested_loop_detection_pattern",
2222
+ pattern=r"(\s*)(for\s+\w+\s+in\s+.*:)",
2223
+ replacement=r"\1# Performance: Potential nested loop - check complexity\n\1\2",
2224
+ test_cases=[
2225
+ (
2226
+ " for j in other:",
2227
+ " # Performance: Potential nested loop - check complexity\n "
2228
+ " for j in other:",
2229
+ ),
2230
+ (
2231
+ "for i in items:",
2232
+ "# Performance: Potential nested loop - check complexity\nfor i"
2233
+ " in items:",
2234
+ ),
2235
+ ],
2236
+ description="Detect loop patterns that might be nested creating O(n²)"
2237
+ " complexity",
2238
+ flags=re.MULTILINE,
2239
+ ),
2240
+ "list_extend_optimization_pattern": ValidatedPattern(
2241
+ name="list_extend_optimization_pattern",
2242
+ pattern=r"(\s*)(\w+)\s*\+=\s*\[([^]]+(?:,\s*[^]]+)*)\]",
2243
+ replacement=r"\1\2.extend([\3])",
2244
+ test_cases=[
2245
+ (" items += [a, b, c]", " items.extend([a, b, c])"),
2246
+ ("results += [x, y]", "results.extend([x, y])"),
2247
+ (" data += [single_item]", " data.extend([single_item])"),
2248
+ ],
2249
+ description="Replace list concatenation with extend for better performance with multiple items",
2250
+ ),
2251
+ "inefficient_string_join_pattern": ValidatedPattern(
2252
+ name="inefficient_string_join_pattern",
2253
+ pattern=r"(\s*)(\w+)\s*=\s*([\"'])([\"'])\s*\.\s*join\(\s*\[\s*\]\s*\)",
2254
+ replacement=r"\1\2 = \3\4 # Performance: Use empty string directly instead"
2255
+ r" of join",
2256
+ test_cases=[
2257
+ (
2258
+ ' text = "".join([])',
2259
+ ' text = "" # Performance: Use empty string directly instead of'
2260
+ " join",
2261
+ ),
2262
+ (
2263
+ "result = ''.join([])",
2264
+ "result = '' # Performance: Use empty string directly instead of join",
2265
+ ),
2266
+ ],
2267
+ description="Replace inefficient empty list join with direct empty string"
2268
+ " assignment",
2269
+ ),
2270
+ "repeated_len_in_loop_pattern": ValidatedPattern(
2271
+ name="repeated_len_in_loop_pattern",
2272
+ pattern=r"(\s*)(len\(\s*(\w+)\s*\))",
2273
+ replacement=r"\1# Performance: Consider caching len(\3) if used "
2274
+ r"repeatedly\n\1\2",
2275
+ test_cases=[
2276
+ (
2277
+ " len(items)",
2278
+ " # Performance: Consider caching len(items) if used repeatedly\n"
2279
+ " len(items)",
2280
+ ),
2281
+ (
2282
+ "len(data)",
2283
+ "# Performance: Consider caching len(data) if used "
2284
+ "repeatedly\nlen(data)",
2285
+ ),
2286
+ ],
2287
+ description="Suggest caching len() calls that might be repeated",
2288
+ ),
2289
+ "list_comprehension_optimization_pattern": ValidatedPattern(
2290
+ name="list_comprehension_optimization_pattern",
2291
+ pattern=r"(\s*)(\w+)\.append\(([^)]+)\)",
2292
+ replacement=r"\1# Performance: Consider list comprehension if this is in a "
2293
+ r"simple loop\n\1\2.append(\3)",
2294
+ test_cases=[
2295
+ (
2296
+ " results.append(item * 2)",
2297
+ " # Performance: Consider list comprehension if this is in a "
2298
+ "simple loop\n results.append(item * 2)",
2299
+ ),
2300
+ (
2301
+ "data.append(value)",
2302
+ "# Performance: Consider list comprehension if this is in a simple"
2303
+ " loop\ndata.append(value)",
2304
+ ),
2305
+ ],
2306
+ description="Suggest list comprehensions for simple append patterns",
2307
+ ),
2308
+ # Enhanced security patterns for improved SecurityAgent capabilities
2309
+ "detect_crypto_weak_algorithms": ValidatedPattern(
2310
+ name="detect_crypto_weak_algorithms",
2311
+ pattern=r"\b(?:md4|md5|sha1|des|3des|rc4)\b",
2312
+ replacement="[WEAK_CRYPTO_ALGORITHM]",
2313
+ description="Detect weak cryptographic algorithms",
2314
+ flags=re.IGNORECASE,
2315
+ global_replace=True,
2316
+ test_cases=[
2317
+ ("hashlib.md5()", "hashlib.[WEAK_CRYPTO_ALGORITHM]()"),
2318
+ ("using DES encryption", "using [WEAK_CRYPTO_ALGORITHM] encryption"),
2319
+ ("SHA256 is good", "SHA256 is good"), # No change
2320
+ ("MD4 hashing", "[WEAK_CRYPTO_ALGORITHM] hashing"),
2321
+ ],
2322
+ ),
2323
+ "detect_hardcoded_credentials_advanced": ValidatedPattern(
2324
+ name="detect_hardcoded_credentials_advanced",
2325
+ pattern=r"(?i)\b(?:password|passwd|pwd|secret|key|token|api_key|"
2326
+ r'apikey)\s*[:=]\s*["\'][^"\']{3,}["\']',
2327
+ replacement="[HARDCODED_CREDENTIAL_DETECTED]",
2328
+ description="Detect hardcoded credentials in various formats "
2329
+ "(case insensitive)",
2330
+ flags=re.IGNORECASE,
2331
+ global_replace=True,
2332
+ test_cases=[
2333
+ ('password="secret123"', "[HARDCODED_CREDENTIAL_DETECTED]"),
2334
+ ("API_KEY = 'abc-123-def'", "[HARDCODED_CREDENTIAL_DETECTED]"),
2335
+ ('token: "my-secret-token"', "[HARDCODED_CREDENTIAL_DETECTED]"),
2336
+ (
2337
+ 'username = "user"',
2338
+ 'username = "user"',
2339
+ ), # No match - not a credential field
2340
+ ],
2341
+ ),
2342
+ "detect_subprocess_shell_injection": ValidatedPattern(
2343
+ name="detect_subprocess_shell_injection",
2344
+ pattern=r"\bsubprocess\.\w+\([^)]*shell\s*=\s*True[^)]*\)",
2345
+ replacement="[SHELL_INJECTION_RISK]",
2346
+ description="Detect subprocess calls with shell=True",
2347
+ global_replace=True,
2348
+ test_cases=[
2349
+ ("subprocess.run(cmd, shell=True)", "[SHELL_INJECTION_RISK]"),
2350
+ ("subprocess.call(command, shell=True)", "[SHELL_INJECTION_RISK]"),
2351
+ (
2352
+ "subprocess.run(cmd, shell=False)",
2353
+ "subprocess.run(cmd, shell=False)",
2354
+ ), # No change
2355
+ ],
2356
+ ),
2357
+ "detect_regex_redos_vulnerable": ValidatedPattern(
2358
+ name="detect_regex_redos_vulnerable",
2359
+ pattern=r"\([^)]+\)[\*\+]",
2360
+ replacement="[REDOS_VULNERABLE_PATTERN]",
2361
+ description="Detect regex patterns vulnerable to ReDoS attacks (simplified"
2362
+ " detection)",
2363
+ global_replace=True,
2364
+ test_cases=[
2365
+ ("(a+)*", "[REDOS_VULNERABLE_PATTERN]"),
2366
+ ("(a*)+", "[REDOS_VULNERABLE_PATTERN]"),
2367
+ ("(abc)+", "[REDOS_VULNERABLE_PATTERN]"),
2368
+ ("simple+", "simple+"), # No change - not vulnerable
2369
+ ],
2370
+ ),
2371
+ "fix_hardcoded_jwt_secret": ValidatedPattern(
2372
+ name="fix_hardcoded_jwt_secret",
2373
+ pattern=r'(JWT_SECRET|jwt_secret)\s*=\s*["\'][^"\']+["\']',
2374
+ replacement=r'\1 = os.getenv("JWT_SECRET", "")',
2375
+ description="Replace hardcoded JWT secrets with environment variables",
2376
+ global_replace=True,
2377
+ test_cases=[
2378
+ (
2379
+ 'JWT_SECRET = "hardcoded-secret"',
2380
+ 'JWT_SECRET = os.getenv("JWT_SECRET", "")',
2381
+ ),
2382
+ ('jwt_secret = "my-secret"', 'jwt_secret = os.getenv("JWT_SECRET", "")'),
2383
+ ('other_var = "value"', 'other_var = "value"'), # No change
2384
+ ],
2385
+ ),
2386
+ "detect_unsafe_pickle_usage": ValidatedPattern(
2387
+ name="detect_unsafe_pickle_usage",
2388
+ pattern=r"\bpickle\.(loads?)\s*\(",
2389
+ replacement=r"[UNSAFE_PICKLE_USAGE].\1(",
2390
+ description="Detect potentially unsafe pickle usage",
2391
+ global_replace=True,
2392
+ test_cases=[
2393
+ ("pickle.load(file)", "[UNSAFE_PICKLE_USAGE].load(file)"),
2394
+ ("pickle.loads(data)", "[UNSAFE_PICKLE_USAGE].loads(data)"),
2395
+ ("my_pickle.load(file)", "my_pickle.load(file)"), # No change
2396
+ ],
2397
+ ),
2398
+ # Agent-specific patterns for validation and analysis
2399
+ "extract_range_size": ValidatedPattern(
2400
+ name="extract_range_size",
2401
+ pattern=r"range\((\d+)\)",
2402
+ replacement=r"\1",
2403
+ description="Extract numeric size from range() calls",
2404
+ test_cases=[
2405
+ ("range(1000)", "1000"),
2406
+ ("range(50)", "50"),
2407
+ ("for i in range(100):", "for i in 100:"),
2408
+ ("other_func(10)", "other_func(10)"), # No change
2409
+ ],
2410
+ ),
2411
+ "match_error_code_patterns": ValidatedPattern(
2412
+ name="match_error_code_patterns",
2413
+ pattern=r"F\d{3}|I\d{3}|E\d{3}|W\d{3}",
2414
+ replacement=r"\g<0>",
2415
+ description="Match standard error codes like F403, I001, etc.",
2416
+ test_cases=[
2417
+ ("F403", "F403"),
2418
+ ("I001", "I001"),
2419
+ ("E302", "E302"),
2420
+ ("W291", "W291"),
2421
+ ("ABC123", "ABC123"), # No change
2422
+ ],
2423
+ ),
2424
+ "match_validation_patterns": ValidatedPattern(
2425
+ name="match_validation_patterns",
2426
+ pattern=r"if\s+not\s+\w+\s*:|if\s+\w+\s+is\s+None\s*:|if\s+len\(\w+\)\s*[<>=]",
2427
+ replacement=r"\g<0>",
2428
+ description="Match common validation patterns for extraction",
2429
+ test_cases=[
2430
+ ("if not var:", "if not var:"),
2431
+ ("if item is None:", "if item is None:"),
2432
+ ("if len(items) >", "if len(items) >"),
2433
+ ("other code", "other code"), # No change
2434
+ ],
2435
+ ),
2436
+ "match_loop_patterns": ValidatedPattern(
2437
+ name="match_loop_patterns",
2438
+ pattern=r"\s*for\s+.*:\s*$|\s*while\s+.*:\s*$",
2439
+ replacement=r"\g<0>",
2440
+ description="Match for/while loop patterns",
2441
+ test_cases=[
2442
+ (" for i in items:", " for i in items:"),
2443
+ (" while condition:", " while condition:"),
2444
+ ("regular line", "regular line"), # No change
2445
+ ],
2446
+ ),
2447
+ "match_star_import": ValidatedPattern(
2448
+ name="match_star_import",
2449
+ pattern=r"from\s+\w+\s+import\s+\*",
2450
+ replacement=r"\g<0>",
2451
+ description="Match star import statements",
2452
+ test_cases=[
2453
+ ("from module import *", "from module import *"),
2454
+ ("from my_pkg import *", "from my_pkg import *"),
2455
+ ("from module import specific", "from module import specific"), # No change
2456
+ ],
2457
+ ),
2458
+ "clean_unused_import": ValidatedPattern(
2459
+ name="clean_unused_import",
2460
+ pattern=r"^\s*import\s+unused_module\s*$",
2461
+ replacement=r"",
2462
+ description="Remove unused import statements (example with unused_module)",
2463
+ test_cases=[
2464
+ (" import unused_module", ""),
2465
+ (
2466
+ "import other_module",
2467
+ "import other_module",
2468
+ ), # No change for different module
2469
+ ],
2470
+ ),
2471
+ "clean_unused_from_import": ValidatedPattern(
2472
+ name="clean_unused_from_import",
2473
+ pattern=r"^\s*from\s+\w+\s+import\s+.*\bunused_item\b",
2474
+ replacement=r"\g<0>",
2475
+ description="Match from import statements with unused items (example with "
2476
+ "unused_item)",
2477
+ test_cases=[
2478
+ (
2479
+ "from module import used, unused_item",
2480
+ "from module import used, unused_item",
2481
+ ),
2482
+ ("from other import needed", "from other import needed"), # No change
2483
+ ],
2484
+ ),
2485
+ "clean_import_commas": ValidatedPattern(
2486
+ name="clean_import_commas",
2487
+ pattern=r",\s*,",
2488
+ replacement=r",",
2489
+ description="Clean double commas in import statements",
2490
+ test_cases=[
2491
+ ("from module import a, , b", "from module import a, b"),
2492
+ ("items = [a, , b]", "items = [a, b]"),
2493
+ ("normal, list", "normal, list"), # No change
2494
+ ],
2495
+ ),
2496
+ "clean_trailing_import_comma": ValidatedPattern(
2497
+ name="clean_trailing_import_comma",
2498
+ pattern=r",\s*$",
2499
+ replacement=r"",
2500
+ description="Remove trailing commas from lines",
2501
+ test_cases=[
2502
+ ("from module import a, b,", "from module import a, b"),
2503
+ ("import item,", "import item"),
2504
+ ("normal line", "normal line"), # No change
2505
+ ],
2506
+ ),
2507
+ "clean_import_prefix": ValidatedPattern(
2508
+ name="clean_import_prefix",
2509
+ pattern=r"import\s*,\s*",
2510
+ replacement=r"import ",
2511
+ description="Clean malformed import statements with leading comma",
2512
+ test_cases=[
2513
+ ("import ,module", "import module"),
2514
+ ("from pkg import ,item", "from pkg import item"),
2515
+ ("import normal", "import normal"), # No change
2516
+ ],
2517
+ ),
2518
+ "extract_unused_import_name": ValidatedPattern(
2519
+ name="extract_unused_import_name",
2520
+ pattern=r"unused import ['\"]([^'\"]+)['\"]",
2521
+ replacement=r"\1",
2522
+ description="Extract import name from vulture unused import messages",
2523
+ test_cases=[
2524
+ ("unused import 'module_name'", "module_name"),
2525
+ ('unused import "other_module"', "other_module"),
2526
+ ("some other text", "some other text"), # No change
2527
+ ],
2528
+ ),
2529
+ "normalize_whitespace": ValidatedPattern(
2530
+ name="normalize_whitespace",
2531
+ pattern=r"\s+",
2532
+ replacement=r" ",
2533
+ description="Normalize multiple whitespace to single space",
2534
+ global_replace=True,
2535
+ test_cases=[
2536
+ ("import module", "import module"),
2537
+ ("from pkg import item", "from pkg import item"),
2538
+ ("normal text", "normal text"), # No change with single spaces
2539
+ ],
2540
+ ),
2541
+ }
2542
+
2543
+
2544
+ def validate_all_patterns() -> dict[str, bool]:
2545
+ """Validate all patterns and return results."""
2546
+ validate_results = {}
2547
+ for name, pattern in SAFE_PATTERNS.items():
2548
+ try:
2549
+ pattern._validate()
2550
+ results[name] = True
2551
+ except ValueError as e:
2552
+ results[name] = False
2553
+ print(f"Pattern '{name}' failed validation: {e}")
2554
+ return validate_results
2555
+
2556
+
2557
+ def find_pattern_for_text(text: str) -> list[str]:
2558
+ """Find which patterns match the given text."""
2559
+ return [name for name, pattern in SAFE_PATTERNS.items() if pattern.test(text)]
2560
+
2561
+
2562
+ def apply_safe_replacement(text: str, pattern_name: str) -> str:
2563
+ """Apply a safe replacement pattern by name."""
2564
+ if pattern_name not in SAFE_PATTERNS:
2565
+ raise ValueError(f"Unknown pattern: {pattern_name}")
2566
+
2567
+ return SAFE_PATTERNS[pattern_name].apply(text)
2568
+
2569
+
2570
+ def get_pattern_description(pattern_name: str) -> str:
2571
+ """Get description of a pattern."""
2572
+ if pattern_name not in SAFE_PATTERNS:
2573
+ return "Unknown pattern"
2574
+
2575
+ return SAFE_PATTERNS[pattern_name].description
2576
+
2577
+
2578
+ def fix_multi_word_hyphenation(text: str) -> str:
2579
+ """
2580
+ Fix complex multi-word hyphenation cases like 'pytest - hypothesis - specialist'.
2581
+
2582
+ Uses iterative application of the spaced_hyphens pattern to handle multiple words.
2583
+ """
2584
+ return SAFE_PATTERNS["fix_spaced_hyphens"].apply_iteratively(text)
2585
+
2586
+
2587
+ def update_pyproject_version(content: str, new_version: str) -> str:
2588
+ """
2589
+ Update version in pyproject.toml content with safe regex.
2590
+
2591
+ Args:
2592
+ content: The pyproject.toml file content
2593
+ new_version: The new version to set
2594
+
2595
+ Returns:
2596
+ Updated content with new version
2597
+ """
2598
+ import re
2599
+
2600
+ pattern_obj = SAFE_PATTERNS["update_pyproject_version"]
2601
+ # Create a temporary pattern with the actual version
2602
+ temp_pattern = ValidatedPattern(
2603
+ name="temp_version_update",
2604
+ pattern=pattern_obj.pattern,
2605
+ replacement=f"\\g<1>{new_version}\\g<3>",
2606
+ description=f"Update version to {new_version}",
2607
+ test_cases=[
2608
+ ('version = "1.2.3"', f'version = "{new_version}"'),
2609
+ ],
2610
+ )
2611
+
2612
+ # Apply with MULTILINE flag for line-by-line matching
2613
+ return re.compile(pattern_obj.pattern, re.MULTILINE).sub(
2614
+ temp_pattern.replacement, content
2615
+ )
2616
+
2617
+
2618
+ def apply_formatting_fixes(content: str) -> str:
2619
+ """Apply standard formatting fixes to content."""
2620
+ # Remove trailing whitespace using MULTILINE flag
2621
+ import re
2622
+
2623
+ pattern = SAFE_PATTERNS["remove_trailing_whitespace"]
2624
+ content = re.compile(pattern.pattern, re.MULTILINE).sub(
2625
+ pattern.replacement, content
2626
+ )
2627
+
2628
+ # Normalize multiple newlines
2629
+ content = SAFE_PATTERNS["normalize_multiple_newlines"].apply(content)
2630
+
2631
+ return content
2632
+
2633
+
2634
+ def apply_security_fixes(content: str) -> str:
2635
+ """Apply all security-related fixes to content."""
2636
+ # Fix subprocess shell injections
2637
+ content = SAFE_PATTERNS["fix_subprocess_run_shell"].apply(content)
2638
+ content = SAFE_PATTERNS["fix_subprocess_call_shell"].apply(content)
2639
+ content = SAFE_PATTERNS["fix_subprocess_popen_shell"].apply(content)
2640
+
2641
+ # Fix unsafe library usage
2642
+ content = SAFE_PATTERNS["fix_unsafe_yaml_load"].apply(content)
2643
+ content = SAFE_PATTERNS["fix_weak_md5_hash"].apply(content)
2644
+ content = SAFE_PATTERNS["fix_weak_sha1_hash"].apply(content)
2645
+ content = SAFE_PATTERNS["fix_insecure_random_choice"].apply(content)
2646
+
2647
+ # Remove debug prints with secrets
2648
+ content = SAFE_PATTERNS["remove_debug_prints_with_secrets"].apply(content)
2649
+
2650
+ return content
2651
+
2652
+
2653
+ def apply_test_fixes(content: str) -> str:
2654
+ """Apply test-related fixes to content."""
2655
+ return SAFE_PATTERNS["normalize_assert_statements"].apply(content)
2656
+
2657
+
2658
+ def is_valid_job_id(job_id: str) -> bool:
2659
+ """Validate job ID using safe regex patterns."""
2660
+ return SAFE_PATTERNS["validate_job_id_alphanumeric"].test(job_id)
2661
+
2662
+
2663
+ def remove_coverage_fail_under(addopts: str) -> str:
2664
+ """Remove coverage fail-under flags from pytest addopts."""
2665
+ return SAFE_PATTERNS["remove_coverage_fail_under"].apply(addopts)
2666
+
2667
+
2668
+ def update_coverage_requirement(content: str, new_coverage: float) -> str:
2669
+ """Update coverage requirement in content."""
2670
+ import re
2671
+
2672
+ pattern_obj = SAFE_PATTERNS["update_coverage_requirement"]
2673
+ # Create a temporary pattern with the actual coverage value
2674
+ temp_pattern = ValidatedPattern(
2675
+ name="temp_coverage_update",
2676
+ pattern=pattern_obj.pattern,
2677
+ replacement=f"\\1{new_coverage:.0f}",
2678
+ description=f"Update coverage to {new_coverage}",
2679
+ test_cases=[
2680
+ ("--cov-fail-under=85", f"--cov-fail-under={new_coverage:.0f}"),
2681
+ ],
2682
+ )
2683
+
2684
+ return re.compile(pattern_obj.pattern).sub(temp_pattern.replacement, content)
2685
+
2686
+
2687
+ def update_repo_revision(content: str, repo_url: str, new_revision: str) -> str:
2688
+ """
2689
+ Update repository revision in config content with safe regex.
2690
+
2691
+ Args:
2692
+ content: The config file content (JSON-like format)
2693
+ repo_url: The repository URL to find and update
2694
+ new_revision: The new revision to set
2695
+
2696
+ Returns:
2697
+ Updated content with new revision
2698
+ """
2699
+ import re
2700
+
2701
+ # Create a pattern specific to the repo URL (escaped for safety)
2702
+ escaped_url = re.escape(repo_url)
2703
+ pattern = rf'("repo": "{escaped_url}".*?"rev": )"([^"]+)"'
2704
+ replacement = rf'\1"{new_revision}"'
2705
+
2706
+ # Use DOTALL flag for multiline matching
2707
+ return re.compile(pattern, re.DOTALL).sub(replacement, content)
2708
+
2709
+
2710
+ def sanitize_internal_urls(text: str) -> str:
2711
+ """
2712
+ Sanitize internal URLs using safe patterns for security.
2713
+
2714
+ Args:
2715
+ text: Text that may contain internal URLs
2716
+
2717
+ Returns:
2718
+ Text with internal URLs replaced with [INTERNAL_URL]
2719
+ """
2720
+ # Apply all URL sanitization patterns
2721
+ url_patterns = [
2722
+ "sanitize_localhost_urls",
2723
+ "sanitize_127_urls",
2724
+ "sanitize_any_localhost_urls",
2725
+ "sanitize_ws_localhost_urls",
2726
+ "sanitize_ws_127_urls",
2727
+ "sanitize_simple_localhost_urls",
2728
+ "sanitize_simple_ws_localhost_urls",
2729
+ ]
2730
+
2731
+ result = text
2732
+ for pattern_name in url_patterns:
2733
+ result = SAFE_PATTERNS[pattern_name].apply(result)
2734
+
2735
+ return result
2736
+
2737
+
2738
+ def apply_pattern_iteratively(
2739
+ text: str, pattern_name: str, max_iterations: int = MAX_ITERATIONS
2740
+ ) -> str:
2741
+ """Apply a pattern iteratively until no more changes occur."""
2742
+ if pattern_name not in SAFE_PATTERNS:
2743
+ raise ValueError(f"Unknown pattern: {pattern_name}")
2744
+
2745
+ return SAFE_PATTERNS[pattern_name].apply_iteratively(text, max_iterations)
2746
+
2747
+
2748
+ def get_all_pattern_stats() -> dict[str, dict[str, int | float]]:
2749
+ """Get performance statistics for all patterns."""
2750
+ test_text = "python - m crackerjack - t with pytest - hypothesis - specialist"
2751
+ stats = {}
2752
+
2753
+ for name, pattern in SAFE_PATTERNS.items():
2754
+ try:
2755
+ pattern_stats = pattern.get_performance_stats(test_text, iterations=10)
2756
+ stats[name] = pattern_stats
2757
+ except Exception as e:
2758
+ stats[name] = {"error": str(e)}
2759
+
2760
+ return stats
2761
+
2762
+
2763
+ def clear_all_caches() -> None:
2764
+ """Clear all caches (useful for testing and memory management)."""
2765
+ CompiledPatternCache.clear_cache()
2766
+
2767
+
2768
+ def get_cache_info() -> dict[str, int | list[str]]:
2769
+ """Get information about pattern cache usage."""
2770
+ return CompiledPatternCache.get_cache_stats()
2771
+
2772
+
2773
+ # Security validation functions
2774
+ def detect_path_traversal_patterns(path_str: str) -> list[str]:
2775
+ """
2776
+ Detect directory traversal patterns in a path string.
2777
+
2778
+ Returns list of detected pattern names.
2779
+ """
2780
+ detected = []
2781
+ traversal_patterns = [
2782
+ "detect_directory_traversal_basic",
2783
+ "detect_directory_traversal_backslash",
2784
+ "detect_url_encoded_traversal",
2785
+ "detect_double_url_encoded_traversal",
2786
+ ]
2787
+
2788
+ for pattern_name in traversal_patterns:
2789
+ pattern = SAFE_PATTERNS[pattern_name]
2790
+ if pattern.test(path_str):
2791
+ detected.append(pattern_name)
2792
+
2793
+ return detected
2794
+
2795
+
2796
+ def detect_null_byte_patterns(path_str: str) -> list[str]:
2797
+ """
2798
+ Detect null byte patterns in a path string.
2799
+
2800
+ Returns list of detected pattern names.
2801
+ """
2802
+ detected = []
2803
+ null_patterns = [
2804
+ "detect_null_bytes_url",
2805
+ "detect_null_bytes_literal",
2806
+ "detect_utf8_overlong_null",
2807
+ ]
2808
+
2809
+ for pattern_name in null_patterns:
2810
+ pattern = SAFE_PATTERNS[pattern_name]
2811
+ if pattern.test(path_str):
2812
+ detected.append(pattern_name)
2813
+
2814
+ return detected
2815
+
2816
+
2817
+ def detect_dangerous_directory_patterns(path_str: str) -> list[str]:
2818
+ """
2819
+ Detect dangerous directory access patterns.
2820
+
2821
+ Returns list of detected pattern names.
2822
+ """
2823
+ detected = []
2824
+ dangerous_patterns = [
2825
+ "detect_sys_directory_pattern",
2826
+ "detect_proc_directory_pattern",
2827
+ "detect_etc_directory_pattern",
2828
+ "detect_boot_directory_pattern",
2829
+ "detect_dev_directory_pattern",
2830
+ "detect_root_directory_pattern",
2831
+ "detect_var_log_directory_pattern",
2832
+ "detect_bin_directory_pattern",
2833
+ "detect_sbin_directory_pattern",
2834
+ ]
2835
+
2836
+ for pattern_name in dangerous_patterns:
2837
+ pattern = SAFE_PATTERNS[pattern_name]
2838
+ if pattern.test(path_str):
2839
+ detected.append(pattern_name)
2840
+
2841
+ return detected
2842
+
2843
+
2844
+ def detect_suspicious_path_patterns(path_str: str) -> list[str]:
2845
+ """
2846
+ Detect suspicious path patterns that might indicate attacks.
2847
+
2848
+ Returns list of detected pattern names.
2849
+ """
2850
+ detected = []
2851
+ suspicious_patterns = [
2852
+ "detect_parent_directory_in_path",
2853
+ "detect_suspicious_temp_traversal",
2854
+ "detect_suspicious_var_traversal",
2855
+ ]
2856
+
2857
+ for pattern_name in suspicious_patterns:
2858
+ pattern = SAFE_PATTERNS[pattern_name]
2859
+ if pattern.test(path_str):
2860
+ detected.append(pattern_name)
2861
+
2862
+ return detected
2863
+
2864
+
2865
+ def validate_path_security(path_str: str) -> dict[str, list[str]]:
2866
+ """
2867
+ Comprehensive path security validation using safe patterns.
2868
+
2869
+ Returns dict with categories of detected issues.
2870
+ """
2871
+ return {
2872
+ "traversal_patterns": detect_path_traversal_patterns(path_str),
2873
+ "null_bytes": detect_null_byte_patterns(path_str),
2874
+ "dangerous_directories": detect_dangerous_directory_patterns(path_str),
2875
+ "suspicious_patterns": detect_suspicious_path_patterns(path_str),
2876
+ }
2877
+
2878
+
2879
+ # Validation on module import
2880
+ if __name__ == "__main__":
2881
+ results = validate_all_patterns()
2882
+ if all(results.values()):
2883
+ print("✅ All regex patterns validated successfully!")
2884
+ else:
2885
+ failed = [name for name, success in results.items() if not success]
2886
+ print(f"❌ Pattern validation failed for: {failed}")
2887
+ exit(1)