crackerjack 0.31.10__py3-none-any.whl → 0.31.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crackerjack might be problematic. Click here for more details.
- crackerjack/CLAUDE.md +288 -705
- crackerjack/__main__.py +22 -8
- crackerjack/agents/__init__.py +0 -3
- crackerjack/agents/architect_agent.py +0 -43
- crackerjack/agents/base.py +1 -9
- crackerjack/agents/coordinator.py +2 -148
- crackerjack/agents/documentation_agent.py +109 -81
- crackerjack/agents/dry_agent.py +122 -97
- crackerjack/agents/formatting_agent.py +3 -16
- crackerjack/agents/import_optimization_agent.py +1174 -130
- crackerjack/agents/performance_agent.py +956 -188
- crackerjack/agents/performance_helpers.py +229 -0
- crackerjack/agents/proactive_agent.py +1 -48
- crackerjack/agents/refactoring_agent.py +516 -246
- crackerjack/agents/refactoring_helpers.py +282 -0
- crackerjack/agents/security_agent.py +393 -90
- crackerjack/agents/test_creation_agent.py +1776 -120
- crackerjack/agents/test_specialist_agent.py +59 -15
- crackerjack/agents/tracker.py +0 -102
- crackerjack/api.py +145 -37
- crackerjack/cli/handlers.py +48 -30
- crackerjack/cli/interactive.py +11 -11
- crackerjack/cli/options.py +66 -4
- crackerjack/code_cleaner.py +808 -148
- crackerjack/config/global_lock_config.py +110 -0
- crackerjack/config/hooks.py +43 -64
- crackerjack/core/async_workflow_orchestrator.py +247 -97
- crackerjack/core/autofix_coordinator.py +192 -109
- crackerjack/core/enhanced_container.py +46 -63
- crackerjack/core/file_lifecycle.py +549 -0
- crackerjack/core/performance.py +9 -8
- crackerjack/core/performance_monitor.py +395 -0
- crackerjack/core/phase_coordinator.py +281 -94
- crackerjack/core/proactive_workflow.py +9 -58
- crackerjack/core/resource_manager.py +501 -0
- crackerjack/core/service_watchdog.py +490 -0
- crackerjack/core/session_coordinator.py +4 -8
- crackerjack/core/timeout_manager.py +504 -0
- crackerjack/core/websocket_lifecycle.py +475 -0
- crackerjack/core/workflow_orchestrator.py +343 -209
- crackerjack/dynamic_config.py +50 -9
- crackerjack/errors.py +3 -4
- crackerjack/executors/async_hook_executor.py +63 -13
- crackerjack/executors/cached_hook_executor.py +14 -14
- crackerjack/executors/hook_executor.py +100 -37
- crackerjack/executors/hook_lock_manager.py +856 -0
- crackerjack/executors/individual_hook_executor.py +120 -86
- crackerjack/intelligence/__init__.py +0 -7
- crackerjack/intelligence/adaptive_learning.py +13 -86
- crackerjack/intelligence/agent_orchestrator.py +15 -78
- crackerjack/intelligence/agent_registry.py +12 -59
- crackerjack/intelligence/agent_selector.py +31 -92
- crackerjack/intelligence/integration.py +1 -41
- crackerjack/interactive.py +9 -9
- crackerjack/managers/async_hook_manager.py +25 -8
- crackerjack/managers/hook_manager.py +9 -9
- crackerjack/managers/publish_manager.py +57 -59
- crackerjack/managers/test_command_builder.py +6 -36
- crackerjack/managers/test_executor.py +9 -61
- crackerjack/managers/test_manager.py +17 -63
- crackerjack/managers/test_manager_backup.py +77 -127
- crackerjack/managers/test_progress.py +4 -23
- crackerjack/mcp/cache.py +5 -12
- crackerjack/mcp/client_runner.py +10 -10
- crackerjack/mcp/context.py +64 -6
- crackerjack/mcp/dashboard.py +14 -11
- crackerjack/mcp/enhanced_progress_monitor.py +55 -55
- crackerjack/mcp/file_monitor.py +72 -42
- crackerjack/mcp/progress_components.py +103 -84
- crackerjack/mcp/progress_monitor.py +122 -49
- crackerjack/mcp/rate_limiter.py +12 -12
- crackerjack/mcp/server_core.py +16 -22
- crackerjack/mcp/service_watchdog.py +26 -26
- crackerjack/mcp/state.py +15 -0
- crackerjack/mcp/tools/core_tools.py +95 -39
- crackerjack/mcp/tools/error_analyzer.py +6 -32
- crackerjack/mcp/tools/execution_tools.py +1 -56
- crackerjack/mcp/tools/execution_tools_backup.py +35 -131
- crackerjack/mcp/tools/intelligence_tool_registry.py +0 -36
- crackerjack/mcp/tools/intelligence_tools.py +2 -55
- crackerjack/mcp/tools/monitoring_tools.py +308 -145
- crackerjack/mcp/tools/proactive_tools.py +12 -42
- crackerjack/mcp/tools/progress_tools.py +23 -15
- crackerjack/mcp/tools/utility_tools.py +3 -40
- crackerjack/mcp/tools/workflow_executor.py +40 -60
- crackerjack/mcp/websocket/app.py +0 -3
- crackerjack/mcp/websocket/endpoints.py +206 -268
- crackerjack/mcp/websocket/jobs.py +213 -66
- crackerjack/mcp/websocket/server.py +84 -6
- crackerjack/mcp/websocket/websocket_handler.py +137 -29
- crackerjack/models/config_adapter.py +3 -16
- crackerjack/models/protocols.py +162 -3
- crackerjack/models/resource_protocols.py +454 -0
- crackerjack/models/task.py +3 -3
- crackerjack/monitoring/__init__.py +0 -0
- crackerjack/monitoring/ai_agent_watchdog.py +25 -71
- crackerjack/monitoring/regression_prevention.py +28 -87
- crackerjack/orchestration/advanced_orchestrator.py +44 -78
- crackerjack/orchestration/coverage_improvement.py +10 -60
- crackerjack/orchestration/execution_strategies.py +16 -16
- crackerjack/orchestration/test_progress_streamer.py +61 -53
- crackerjack/plugins/base.py +1 -1
- crackerjack/plugins/managers.py +22 -20
- crackerjack/py313.py +65 -21
- crackerjack/services/backup_service.py +467 -0
- crackerjack/services/bounded_status_operations.py +627 -0
- crackerjack/services/cache.py +7 -9
- crackerjack/services/config.py +35 -52
- crackerjack/services/config_integrity.py +5 -16
- crackerjack/services/config_merge.py +542 -0
- crackerjack/services/contextual_ai_assistant.py +17 -19
- crackerjack/services/coverage_ratchet.py +44 -73
- crackerjack/services/debug.py +25 -39
- crackerjack/services/dependency_monitor.py +52 -50
- crackerjack/services/enhanced_filesystem.py +14 -11
- crackerjack/services/file_hasher.py +1 -1
- crackerjack/services/filesystem.py +1 -12
- crackerjack/services/git.py +71 -47
- crackerjack/services/health_metrics.py +31 -27
- crackerjack/services/initialization.py +276 -428
- crackerjack/services/input_validator.py +760 -0
- crackerjack/services/log_manager.py +16 -16
- crackerjack/services/logging.py +7 -6
- crackerjack/services/metrics.py +43 -43
- crackerjack/services/pattern_cache.py +2 -31
- crackerjack/services/pattern_detector.py +26 -63
- crackerjack/services/performance_benchmarks.py +20 -45
- crackerjack/services/regex_patterns.py +2887 -0
- crackerjack/services/regex_utils.py +537 -0
- crackerjack/services/secure_path_utils.py +683 -0
- crackerjack/services/secure_status_formatter.py +534 -0
- crackerjack/services/secure_subprocess.py +605 -0
- crackerjack/services/security.py +47 -10
- crackerjack/services/security_logger.py +492 -0
- crackerjack/services/server_manager.py +109 -50
- crackerjack/services/smart_scheduling.py +8 -25
- crackerjack/services/status_authentication.py +603 -0
- crackerjack/services/status_security_manager.py +442 -0
- crackerjack/services/thread_safe_status_collector.py +546 -0
- crackerjack/services/tool_version_service.py +1 -23
- crackerjack/services/unified_config.py +36 -58
- crackerjack/services/validation_rate_limiter.py +269 -0
- crackerjack/services/version_checker.py +9 -40
- crackerjack/services/websocket_resource_limiter.py +572 -0
- crackerjack/slash_commands/__init__.py +52 -2
- crackerjack/tools/__init__.py +0 -0
- crackerjack/tools/validate_input_validator_patterns.py +262 -0
- crackerjack/tools/validate_regex_patterns.py +198 -0
- {crackerjack-0.31.10.dist-info → crackerjack-0.31.13.dist-info}/METADATA +197 -12
- crackerjack-0.31.13.dist-info/RECORD +178 -0
- crackerjack/cli/facade.py +0 -104
- crackerjack-0.31.10.dist-info/RECORD +0 -149
- {crackerjack-0.31.10.dist-info → crackerjack-0.31.13.dist-info}/WHEEL +0 -0
- {crackerjack-0.31.10.dist-info → crackerjack-0.31.13.dist-info}/entry_points.txt +0 -0
- {crackerjack-0.31.10.dist-info → crackerjack-0.31.13.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,2887 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Centralized regex patterns with validation to prevent bad regex issues.
|
|
3
|
+
|
|
4
|
+
CRITICAL: All regex patterns in this codebase MUST be defined here with comprehensive
|
|
5
|
+
testing to prevent spacing and replacement syntax errors.
|
|
6
|
+
|
|
7
|
+
Optimized for performance, safety, and maintainability with:
|
|
8
|
+
- Thread-safe compiled pattern caching
|
|
9
|
+
- Iterative application for complex multi-word cases
|
|
10
|
+
- Safety limits to prevent catastrophic backtracking
|
|
11
|
+
- Performance monitoring capabilities
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import re
|
|
15
|
+
import threading
|
|
16
|
+
import time
|
|
17
|
+
import typing as t
|
|
18
|
+
from dataclasses import dataclass, field
|
|
19
|
+
from re import Pattern
|
|
20
|
+
|
|
21
|
+
# Safety constants
|
|
22
|
+
MAX_INPUT_SIZE = 10 * 1024 * 1024 # 10MB max input size
|
|
23
|
+
MAX_ITERATIONS = 10 # Max iterations for iterative application
|
|
24
|
+
PATTERN_CACHE_SIZE = 100 # Max cached compiled patterns
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class CompiledPatternCache:
|
|
28
|
+
"""Thread-safe cache for compiled regex patterns."""
|
|
29
|
+
|
|
30
|
+
_lock = threading.RLock()
|
|
31
|
+
_cache: dict[str, Pattern[str]] = {}
|
|
32
|
+
_max_size = PATTERN_CACHE_SIZE
|
|
33
|
+
|
|
34
|
+
@classmethod
|
|
35
|
+
def get_compiled_pattern(cls, pattern: str) -> Pattern[str]:
|
|
36
|
+
"""Get compiled pattern from cache, compiling if necessary."""
|
|
37
|
+
return cls.get_compiled_pattern_with_flags(pattern, pattern, 0)
|
|
38
|
+
|
|
39
|
+
@classmethod
|
|
40
|
+
def get_compiled_pattern_with_flags(
|
|
41
|
+
cls, cache_key: str, pattern: str, flags: int
|
|
42
|
+
) -> Pattern[str]:
|
|
43
|
+
"""Get compiled pattern with flags from cache, compiling if necessary."""
|
|
44
|
+
with cls._lock:
|
|
45
|
+
if cache_key in cls._cache:
|
|
46
|
+
return cls._cache[cache_key]
|
|
47
|
+
|
|
48
|
+
# Compile new pattern
|
|
49
|
+
try:
|
|
50
|
+
compiled = re.compile(pattern, flags)
|
|
51
|
+
except re.error as e:
|
|
52
|
+
# Maintain backward compatibility with existing error message format
|
|
53
|
+
raise ValueError(f"Invalid regex pattern '{pattern}': {e}")
|
|
54
|
+
|
|
55
|
+
# Add to cache with size limit
|
|
56
|
+
if len(cls._cache) >= cls._max_size:
|
|
57
|
+
# Remove oldest entry (simple FIFO eviction)
|
|
58
|
+
oldest_key = next(iter(cls._cache))
|
|
59
|
+
del cls._cache[oldest_key]
|
|
60
|
+
|
|
61
|
+
cls._cache[cache_key] = compiled
|
|
62
|
+
return compiled
|
|
63
|
+
|
|
64
|
+
@classmethod
|
|
65
|
+
def clear_cache(cls) -> None:
|
|
66
|
+
"""Clear the pattern cache (useful for testing)."""
|
|
67
|
+
with cls._lock:
|
|
68
|
+
cls._cache.clear()
|
|
69
|
+
|
|
70
|
+
@classmethod
|
|
71
|
+
def get_cache_stats(cls) -> dict[str, int | list[str]]:
|
|
72
|
+
"""Get cache statistics for monitoring."""
|
|
73
|
+
with cls._lock:
|
|
74
|
+
return {
|
|
75
|
+
"size": len(cls._cache),
|
|
76
|
+
"max_size": cls._max_size,
|
|
77
|
+
"patterns": list(cls._cache.keys()),
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def validate_pattern_safety(pattern: str) -> list[str]:
|
|
82
|
+
"""Validate pattern for potential safety issues."""
|
|
83
|
+
warnings = []
|
|
84
|
+
|
|
85
|
+
# Check for potentially problematic constructs
|
|
86
|
+
if ".*.*" in pattern:
|
|
87
|
+
warnings.append("Multiple .* constructs may cause performance issues")
|
|
88
|
+
|
|
89
|
+
if ".+.+" in pattern:
|
|
90
|
+
warnings.append("Multiple .+ constructs may cause performance issues")
|
|
91
|
+
|
|
92
|
+
# Check for nested quantifiers
|
|
93
|
+
nested_quantifiers = re.findall(r"[+*?]\??[+*?]", pattern)
|
|
94
|
+
if nested_quantifiers:
|
|
95
|
+
warnings.append(f"Nested quantifiers detected: {nested_quantifiers}")
|
|
96
|
+
|
|
97
|
+
# Check for alternation with overlapping cases
|
|
98
|
+
if "|" in pattern and pattern.count("|") > 10:
|
|
99
|
+
warnings.append("Many alternations may cause performance issues")
|
|
100
|
+
|
|
101
|
+
return warnings
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
@dataclass
|
|
105
|
+
class ValidatedPattern:
|
|
106
|
+
"""A regex pattern that has been tested and validated."""
|
|
107
|
+
|
|
108
|
+
name: str
|
|
109
|
+
pattern: str
|
|
110
|
+
replacement: str
|
|
111
|
+
test_cases: list[tuple[str, str]] # (input, expected_output)
|
|
112
|
+
description: str = ""
|
|
113
|
+
global_replace: bool = False # If True, replace all matches
|
|
114
|
+
flags: int = 0 # Regex flags (re.IGNORECASE, re.MULTILINE, etc.)
|
|
115
|
+
_compiled_pattern: Pattern[str] | None = field(default=None, init=False)
|
|
116
|
+
|
|
117
|
+
def __post_init__(self):
|
|
118
|
+
"""Validate pattern on creation."""
|
|
119
|
+
self._validate()
|
|
120
|
+
|
|
121
|
+
def _validate(self) -> None:
|
|
122
|
+
"""Ensure pattern works with all test cases."""
|
|
123
|
+
try:
|
|
124
|
+
# Use cached compilation for validation
|
|
125
|
+
self._get_compiled_pattern()
|
|
126
|
+
except ValueError as e:
|
|
127
|
+
# Maintain backward compatibility with error message format
|
|
128
|
+
if "Invalid regex pattern" in str(e):
|
|
129
|
+
# Replace the pattern string with the name in the error message
|
|
130
|
+
error_msg = str(e).replace(f"'{self.pattern}'", f"'{self.name}'")
|
|
131
|
+
raise ValueError(error_msg) from e
|
|
132
|
+
raise # Re-raise other errors
|
|
133
|
+
|
|
134
|
+
# Check for forbidden replacement syntax
|
|
135
|
+
if r"\g < " in self.replacement or r" >" in self.replacement:
|
|
136
|
+
raise ValueError(
|
|
137
|
+
f"Bad replacement syntax in '{self.name}': {self.replacement}. "
|
|
138
|
+
"Use \\g<1> not \\g < 1 >"
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
# Check for safety warnings
|
|
142
|
+
warnings = validate_pattern_safety(self.pattern)
|
|
143
|
+
if warnings:
|
|
144
|
+
# For now, just store warnings - could log them in the future
|
|
145
|
+
pass
|
|
146
|
+
|
|
147
|
+
# Validate all test cases
|
|
148
|
+
for input_text, expected in self.test_cases:
|
|
149
|
+
try:
|
|
150
|
+
count = 0 if self.global_replace else 1
|
|
151
|
+
result = self._apply_internal(input_text, count)
|
|
152
|
+
if result != expected:
|
|
153
|
+
raise ValueError(
|
|
154
|
+
f"Pattern '{self.name}' failed test case: "
|
|
155
|
+
f"'{input_text}' -> '{result}' != expected '{expected}'"
|
|
156
|
+
)
|
|
157
|
+
except re.error as e:
|
|
158
|
+
raise ValueError(f"Pattern '{self.name}' failed on '{input_text}': {e}")
|
|
159
|
+
|
|
160
|
+
def _get_compiled_pattern(self) -> Pattern[str]:
|
|
161
|
+
"""Get cached compiled pattern with flags."""
|
|
162
|
+
# Create cache key that includes flags
|
|
163
|
+
cache_key = f"{self.pattern}|flags:{self.flags}"
|
|
164
|
+
return CompiledPatternCache.get_compiled_pattern_with_flags(
|
|
165
|
+
cache_key, self.pattern, self.flags
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
def _apply_internal(self, text: str, count: int = 1) -> str:
|
|
169
|
+
"""Internal method for applying pattern with compiled regex."""
|
|
170
|
+
if len(text) > MAX_INPUT_SIZE:
|
|
171
|
+
raise ValueError(
|
|
172
|
+
f"Input text too large: {len(text)} bytes > {MAX_INPUT_SIZE}"
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
return self._get_compiled_pattern().sub(self.replacement, text, count=count)
|
|
176
|
+
|
|
177
|
+
def apply(self, text: str) -> str:
|
|
178
|
+
"""Apply the validated pattern safely."""
|
|
179
|
+
count = 0 if self.global_replace else 1
|
|
180
|
+
return self._apply_internal(text, count)
|
|
181
|
+
|
|
182
|
+
def apply_iteratively(self, text: str, max_iterations: int = MAX_ITERATIONS) -> str:
|
|
183
|
+
"""
|
|
184
|
+
Apply pattern repeatedly until no more changes occur.
|
|
185
|
+
|
|
186
|
+
Useful for cases like 'pytest - hypothesis - specialist' -> 'pytest-hypothesis-specialist'
|
|
187
|
+
where multiple passes are needed.
|
|
188
|
+
"""
|
|
189
|
+
if max_iterations <= 0:
|
|
190
|
+
raise ValueError("max_iterations must be positive")
|
|
191
|
+
|
|
192
|
+
result = text
|
|
193
|
+
for _ in range(max_iterations):
|
|
194
|
+
new_result = self.apply(result)
|
|
195
|
+
if new_result == result:
|
|
196
|
+
# No more changes, done
|
|
197
|
+
break
|
|
198
|
+
result = new_result
|
|
199
|
+
else:
|
|
200
|
+
# Reached max iterations without convergence
|
|
201
|
+
# This might indicate a problematic pattern, but we return the current result
|
|
202
|
+
pass
|
|
203
|
+
|
|
204
|
+
return result
|
|
205
|
+
|
|
206
|
+
def apply_with_timeout(self, text: str, timeout_seconds: float = 1.0) -> str:
|
|
207
|
+
"""Apply pattern with timeout protection."""
|
|
208
|
+
import signal
|
|
209
|
+
|
|
210
|
+
def timeout_handler(signum: int, frame: t.Any) -> None:
|
|
211
|
+
raise TimeoutError(
|
|
212
|
+
f"Pattern '{self.name}' timed out after {timeout_seconds}s"
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
# Note: signal-based timeout only works on Unix and in main thread
|
|
216
|
+
# For broader compatibility, we could use threading.Timer instead
|
|
217
|
+
old_handler = signal.signal(signal.SIGALRM, timeout_handler)
|
|
218
|
+
signal.alarm(int(timeout_seconds))
|
|
219
|
+
|
|
220
|
+
try:
|
|
221
|
+
result = self.apply(text)
|
|
222
|
+
finally:
|
|
223
|
+
signal.alarm(0)
|
|
224
|
+
signal.signal(signal.SIGALRM, old_handler)
|
|
225
|
+
|
|
226
|
+
return result
|
|
227
|
+
|
|
228
|
+
def test(self, text: str) -> bool:
|
|
229
|
+
"""Test if pattern matches text without applying replacement."""
|
|
230
|
+
compiled = self._get_compiled_pattern()
|
|
231
|
+
return bool(compiled.search(text))
|
|
232
|
+
|
|
233
|
+
def search(self, text: str) -> re.Match[str] | None:
|
|
234
|
+
"""Search for the first match and return a Match object or None."""
|
|
235
|
+
if len(text) > MAX_INPUT_SIZE:
|
|
236
|
+
raise ValueError(
|
|
237
|
+
f"Input text too large: {len(text)} bytes > {MAX_INPUT_SIZE}"
|
|
238
|
+
)
|
|
239
|
+
return self._get_compiled_pattern().search(text)
|
|
240
|
+
|
|
241
|
+
def findall(self, text: str) -> list[str]:
|
|
242
|
+
"""Find all matches of the pattern in text safely."""
|
|
243
|
+
if len(text) > MAX_INPUT_SIZE:
|
|
244
|
+
raise ValueError(
|
|
245
|
+
f"Input text too large: {len(text)} bytes > {MAX_INPUT_SIZE}"
|
|
246
|
+
)
|
|
247
|
+
return self._get_compiled_pattern().findall(text)
|
|
248
|
+
|
|
249
|
+
def get_performance_stats(
|
|
250
|
+
self, text: str, iterations: int = 100
|
|
251
|
+
) -> dict[str, float]:
|
|
252
|
+
"""Get performance statistics for this pattern on given text."""
|
|
253
|
+
times = []
|
|
254
|
+
|
|
255
|
+
for _ in range(iterations):
|
|
256
|
+
start = time.perf_counter()
|
|
257
|
+
self.apply(text)
|
|
258
|
+
end = time.perf_counter()
|
|
259
|
+
times.append(end - start)
|
|
260
|
+
|
|
261
|
+
return {
|
|
262
|
+
"mean_time": sum(times) / len(times),
|
|
263
|
+
"min_time": min(times),
|
|
264
|
+
"max_time": max(times),
|
|
265
|
+
"total_time": sum(times),
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
# All validated patterns - ADD NEW PATTERNS HERE WITH TESTS
|
|
270
|
+
SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
271
|
+
"fix_command_spacing": ValidatedPattern(
|
|
272
|
+
name="fix_command_spacing",
|
|
273
|
+
pattern=r"python\s*-\s*m\s+(\w+)",
|
|
274
|
+
replacement=r"python -m \1",
|
|
275
|
+
description="Fix spacing in 'python -m command' patterns",
|
|
276
|
+
test_cases=[
|
|
277
|
+
("python - m crackerjack", "python -m crackerjack"),
|
|
278
|
+
("python -m crackerjack", "python -m crackerjack"), # No change
|
|
279
|
+
("python - m pytest", "python -m pytest"),
|
|
280
|
+
("other python - m stuff", "other python -m stuff"),
|
|
281
|
+
],
|
|
282
|
+
),
|
|
283
|
+
"fix_long_flag_spacing": ValidatedPattern(
|
|
284
|
+
name="fix_long_flag_spacing",
|
|
285
|
+
pattern=r"-\s*-\s*(\w+(?:-\w+)*)",
|
|
286
|
+
replacement=r"--\1",
|
|
287
|
+
description="Fix spacing in long flags like '--help'",
|
|
288
|
+
test_cases=[
|
|
289
|
+
("- - help", "--help"),
|
|
290
|
+
("- - ai-agent", "--ai-agent"),
|
|
291
|
+
("--help", "--help"), # No change
|
|
292
|
+
("- - start-websocket-server", "--start-websocket-server"),
|
|
293
|
+
],
|
|
294
|
+
),
|
|
295
|
+
"fix_short_flag_spacing": ValidatedPattern(
|
|
296
|
+
name="fix_short_flag_spacing",
|
|
297
|
+
pattern=r"(?<!\w)-\s+(\w)(?!\w)",
|
|
298
|
+
replacement=r"-\1",
|
|
299
|
+
description="Fix spacing in short flags like '-t'",
|
|
300
|
+
test_cases=[
|
|
301
|
+
("python -m crackerjack - t", "python -m crackerjack -t"),
|
|
302
|
+
("- q", "-q"),
|
|
303
|
+
("-t", "-t"), # No change
|
|
304
|
+
("some - x flag", "some -x flag"),
|
|
305
|
+
],
|
|
306
|
+
),
|
|
307
|
+
"fix_hyphenated_names": ValidatedPattern(
|
|
308
|
+
name="fix_hyphenated_names",
|
|
309
|
+
pattern=r"(\w+)\s*-\s*(\w+)",
|
|
310
|
+
replacement=r"\1-\2",
|
|
311
|
+
description="Fix spacing in hyphenated names and identifiers",
|
|
312
|
+
test_cases=[
|
|
313
|
+
("python - pro", "python-pro"),
|
|
314
|
+
(
|
|
315
|
+
"pytest - hypothesis - specialist",
|
|
316
|
+
"pytest-hypothesis - specialist",
|
|
317
|
+
), # Only fixes first
|
|
318
|
+
("backend - architect", "backend-architect"),
|
|
319
|
+
("python-pro", "python-pro"), # No change
|
|
320
|
+
("end - of - file-fixer", "end-of - file-fixer"), # Only fixes first
|
|
321
|
+
],
|
|
322
|
+
),
|
|
323
|
+
"fix_hyphenated_names_global": ValidatedPattern(
|
|
324
|
+
name="fix_hyphenated_names_global",
|
|
325
|
+
pattern=r"(\w+)\s+-\s+(\w+)",
|
|
326
|
+
replacement=r"\1-\2",
|
|
327
|
+
description="Globally fix spacing in hyphenated names (single pass only)",
|
|
328
|
+
global_replace=True,
|
|
329
|
+
test_cases=[
|
|
330
|
+
("python - pro", "python-pro"),
|
|
331
|
+
("end - of - file", "end-of - file"), # Single pass: only first match
|
|
332
|
+
("already-hyphenated", "already-hyphenated"), # No change
|
|
333
|
+
("start - middle - end", "start-middle - end"), # Single pass
|
|
334
|
+
],
|
|
335
|
+
),
|
|
336
|
+
"fix_spaced_hyphens": ValidatedPattern(
|
|
337
|
+
name="fix_spaced_hyphens",
|
|
338
|
+
pattern=r"(\w+)\s+-\s+(\w+)",
|
|
339
|
+
replacement=r"\1-\2",
|
|
340
|
+
description="Fix spaced hyphens with spaces around dashes (use apply_iteratively for multi-word)",
|
|
341
|
+
global_replace=True, # Apply to all matches in one pass
|
|
342
|
+
test_cases=[
|
|
343
|
+
("python - pro", "python-pro"),
|
|
344
|
+
(
|
|
345
|
+
"pytest - hypothesis - specialist",
|
|
346
|
+
"pytest-hypothesis - specialist",
|
|
347
|
+
), # Single pass: only first match
|
|
348
|
+
(
|
|
349
|
+
"end - of - file - fixer",
|
|
350
|
+
"end-of - file-fixer",
|
|
351
|
+
), # Global finds: "end-of" and "file-fixer"
|
|
352
|
+
("already-hyphenated", "already-hyphenated"), # No change
|
|
353
|
+
("mixed-case with - spaces", "mixed-case with-spaces"), # Partial fix
|
|
354
|
+
],
|
|
355
|
+
),
|
|
356
|
+
"fix_debug_log_pattern": ValidatedPattern(
|
|
357
|
+
name="fix_debug_log_pattern",
|
|
358
|
+
pattern=r"crackerjack\s*-\s*debug",
|
|
359
|
+
replacement="crackerjack-debug",
|
|
360
|
+
description="Fix spacing in debug log patterns",
|
|
361
|
+
test_cases=[
|
|
362
|
+
("crackerjack - debug-12345.log", "crackerjack-debug-12345.log"),
|
|
363
|
+
("crackerjack-debug.log", "crackerjack-debug.log"), # No change
|
|
364
|
+
("old crackerjack - debug files", "old crackerjack-debug files"),
|
|
365
|
+
],
|
|
366
|
+
),
|
|
367
|
+
"fix_job_file_pattern": ValidatedPattern(
|
|
368
|
+
name="fix_job_file_pattern",
|
|
369
|
+
pattern=r"job\s*-\s*(\{[^}]+\}|\w+)",
|
|
370
|
+
replacement=r"job-\1",
|
|
371
|
+
description="Fix spacing in job file patterns",
|
|
372
|
+
test_cases=[
|
|
373
|
+
("job - {self.web_job_id}.json", "job-{self.web_job_id}.json"),
|
|
374
|
+
("job - abc123.json", "job-abc123.json"),
|
|
375
|
+
("job-existing.json", "job-existing.json"), # No change
|
|
376
|
+
],
|
|
377
|
+
),
|
|
378
|
+
"fix_markdown_bold": ValidatedPattern(
|
|
379
|
+
name="fix_markdown_bold",
|
|
380
|
+
pattern=r"\*\s+\*(.+?)\s*\*\s+\*",
|
|
381
|
+
replacement=r"**\1**",
|
|
382
|
+
description="Fix spacing in markdown bold patterns",
|
|
383
|
+
test_cases=[
|
|
384
|
+
("* *Bold Text * *", "**Bold Text**"),
|
|
385
|
+
("* *🧪 pytest-specialist * *", "**🧪 pytest-specialist**"),
|
|
386
|
+
("**Already Bold**", "**Already Bold**"), # No change
|
|
387
|
+
],
|
|
388
|
+
),
|
|
389
|
+
# Security token masking patterns
|
|
390
|
+
"mask_pypi_token": ValidatedPattern(
|
|
391
|
+
name="mask_pypi_token",
|
|
392
|
+
pattern=r"\bpypi-[a-zA-Z0-9_-]{12,}\b",
|
|
393
|
+
replacement="pypi-****",
|
|
394
|
+
description="Mask PyPI authentication tokens (word boundaries to prevent"
|
|
395
|
+
" false matches)",
|
|
396
|
+
global_replace=True,
|
|
397
|
+
test_cases=[
|
|
398
|
+
("pypi-AgEIcHlwaS5vcmcCJGE4M2Y3ZjI", "pypi-****"),
|
|
399
|
+
(
|
|
400
|
+
"Using token: pypi-AgEIcHlwaS5vcmcCJGE4M2Y3ZjI for upload",
|
|
401
|
+
"Using token: pypi-**** for upload",
|
|
402
|
+
),
|
|
403
|
+
("pypi-short", "pypi-short"), # Too short, no change
|
|
404
|
+
(
|
|
405
|
+
"not pypi-AgEIcHlwaS5vcmcCJGE4M2Y3ZjI",
|
|
406
|
+
"not pypi-****",
|
|
407
|
+
), # Space-separated, should match pypi token
|
|
408
|
+
(
|
|
409
|
+
"Multiple pypi-token1234567890 and pypi-anothertokenhere",
|
|
410
|
+
"Multiple pypi-**** and pypi-****",
|
|
411
|
+
),
|
|
412
|
+
],
|
|
413
|
+
),
|
|
414
|
+
"mask_github_token": ValidatedPattern(
|
|
415
|
+
name="mask_github_token",
|
|
416
|
+
pattern=r"\bghp_[a-zA-Z0-9]{36}\b",
|
|
417
|
+
replacement="ghp_****",
|
|
418
|
+
description="Mask GitHub personal access tokens (exactly 40 chars total"
|
|
419
|
+
" with word boundaries)",
|
|
420
|
+
global_replace=True,
|
|
421
|
+
test_cases=[
|
|
422
|
+
("ghp_1234567890abcdef1234567890abcdef1234", "ghp_****"),
|
|
423
|
+
(
|
|
424
|
+
"GITHUB_TOKEN=ghp_1234567890abcdef1234567890abcdef1234",
|
|
425
|
+
"GITHUB_TOKEN=ghp_****",
|
|
426
|
+
),
|
|
427
|
+
("ghp_short", "ghp_short"), # Too short, no change
|
|
428
|
+
(
|
|
429
|
+
"ghp_1234567890abcdef1234567890abcdef12345",
|
|
430
|
+
"ghp_1234567890abcdef1234567890abcdef12345",
|
|
431
|
+
), # Too long, no match due to word boundary
|
|
432
|
+
(
|
|
433
|
+
"Multiple ghp_1234567890abcdef1234567890abcdef1234 and"
|
|
434
|
+
" ghp_abcdef1234567890abcdef12345678901234",
|
|
435
|
+
"Multiple ghp_**** and ghp_****",
|
|
436
|
+
),
|
|
437
|
+
],
|
|
438
|
+
),
|
|
439
|
+
"mask_generic_long_token": ValidatedPattern(
|
|
440
|
+
name="mask_generic_long_token",
|
|
441
|
+
pattern=r"\b[a-zA-Z0-9_-]{32,}\b",
|
|
442
|
+
replacement="****",
|
|
443
|
+
description="Mask generic long tokens (32+ chars, word boundaries to avoid"
|
|
444
|
+
" false positives)",
|
|
445
|
+
global_replace=True,
|
|
446
|
+
test_cases=[
|
|
447
|
+
("secret_key=abcdef1234567890abcdef1234567890abcdef", "secret_key=****"),
|
|
448
|
+
(
|
|
449
|
+
"Short token abc123def456",
|
|
450
|
+
"Short token abc123def456",
|
|
451
|
+
), # Too short, no change
|
|
452
|
+
(
|
|
453
|
+
"File path "
|
|
454
|
+
"/very/long/path/that/should/not/be/masked/even/though/its/long",
|
|
455
|
+
"File path "
|
|
456
|
+
"/very/long/path/that/should/not/be/masked/even/though/its/long",
|
|
457
|
+
), # Contains slashes
|
|
458
|
+
("API_KEY=verylongapikeyhere1234567890123456", "API_KEY=****"),
|
|
459
|
+
(
|
|
460
|
+
"Long-token_with-underscores_123456789012345678",
|
|
461
|
+
"****",
|
|
462
|
+
), # Entire string matches as one long token
|
|
463
|
+
],
|
|
464
|
+
),
|
|
465
|
+
"mask_token_assignment": ValidatedPattern(
|
|
466
|
+
name="mask_token_assignment",
|
|
467
|
+
pattern=r"(?i)\b(token\s*[=:]\s*)['\"]([^'\"]{8,})['\"]",
|
|
468
|
+
replacement=r"\1'****'",
|
|
469
|
+
description="Mask token assignments in various formats (case insensitive)",
|
|
470
|
+
global_replace=True,
|
|
471
|
+
test_cases=[
|
|
472
|
+
('token="abc123def456789"', "token='****'"),
|
|
473
|
+
("token='long_secret_token_here'", "token='****'"),
|
|
474
|
+
('token: "another_secret_token"', "token: '****'"),
|
|
475
|
+
("token = 'spaced_assignment_token'", "token = '****'"),
|
|
476
|
+
('token="short"', 'token="short"'), # Too short, no change
|
|
477
|
+
(
|
|
478
|
+
"not_token='should_not_be_masked'",
|
|
479
|
+
"not_token='should_not_be_masked'",
|
|
480
|
+
), # Wrong key
|
|
481
|
+
('TOKEN="UPPERCASE_TOKEN_HERE"', "TOKEN='****'"), # Case insensitive
|
|
482
|
+
],
|
|
483
|
+
),
|
|
484
|
+
"mask_password_assignment": ValidatedPattern(
|
|
485
|
+
name="mask_password_assignment",
|
|
486
|
+
pattern=r"(?i)\b(password\s*[=:]\s*)['\"]([^'\"]{8,})['\"]",
|
|
487
|
+
replacement=r"\1'****'",
|
|
488
|
+
description="Mask password assignments in various formats (case insensitive)",
|
|
489
|
+
global_replace=True,
|
|
490
|
+
test_cases=[
|
|
491
|
+
('password="secret123456"', "password='****'"),
|
|
492
|
+
("password='my_long_password'", "password='****'"),
|
|
493
|
+
('password: "another_secret_password"', "password: '****'"),
|
|
494
|
+
("password = 'spaced_password_assignment'", "password = '****'"),
|
|
495
|
+
('password="short"', 'password="short"'), # Too short, no change
|
|
496
|
+
(
|
|
497
|
+
"not_password='should_not_be_masked'",
|
|
498
|
+
"not_password='should_not_be_masked'",
|
|
499
|
+
), # Wrong key
|
|
500
|
+
('PASSWORD="UPPERCASE_PASSWORD"', "PASSWORD='****'"), # Case insensitive
|
|
501
|
+
],
|
|
502
|
+
),
|
|
503
|
+
# Version management patterns
|
|
504
|
+
"update_pyproject_version": ValidatedPattern(
|
|
505
|
+
name="update_pyproject_version",
|
|
506
|
+
pattern=r'^(version\s*=\s*["\'])([^"\']+)(["\'])$',
|
|
507
|
+
replacement=r"\g<1>NEW_VERSION\g<3>",
|
|
508
|
+
description="Update version in pyproject.toml files (NEW_VERSION placeholder"
|
|
509
|
+
" replaced dynamically)",
|
|
510
|
+
test_cases=[
|
|
511
|
+
('version = "1.2.3"', 'version = "NEW_VERSION"'),
|
|
512
|
+
("version='0.1.0'", "version='NEW_VERSION'"),
|
|
513
|
+
('version="1.0.0-beta"', 'version="NEW_VERSION"'),
|
|
514
|
+
("version = '2.1.0'", "version = 'NEW_VERSION'"),
|
|
515
|
+
("version='10.20.30'", "version='NEW_VERSION'"),
|
|
516
|
+
# Should not match non-version lines
|
|
517
|
+
('name = "my-package"', 'name = "my-package"'), # No change
|
|
518
|
+
],
|
|
519
|
+
),
|
|
520
|
+
# Formatting agent patterns
|
|
521
|
+
"remove_trailing_whitespace": ValidatedPattern(
|
|
522
|
+
name="remove_trailing_whitespace",
|
|
523
|
+
pattern=r"[ \t]+$",
|
|
524
|
+
replacement="",
|
|
525
|
+
description="Remove trailing whitespace from lines",
|
|
526
|
+
global_replace=True,
|
|
527
|
+
test_cases=[
|
|
528
|
+
("line with spaces ", "line with spaces"),
|
|
529
|
+
("line with tabs\t\t", "line with tabs"),
|
|
530
|
+
("normal line", "normal line"), # No change
|
|
531
|
+
("mixed \t ", "mixed"),
|
|
532
|
+
("", ""), # Empty lines
|
|
533
|
+
],
|
|
534
|
+
),
|
|
535
|
+
"normalize_multiple_newlines": ValidatedPattern(
|
|
536
|
+
name="normalize_multiple_newlines",
|
|
537
|
+
pattern=r"\n{3,}",
|
|
538
|
+
replacement="\n\n",
|
|
539
|
+
description="Normalize multiple consecutive newlines to maximum 2",
|
|
540
|
+
global_replace=True,
|
|
541
|
+
test_cases=[
|
|
542
|
+
("line1\n\n\nline2", "line1\n\nline2"),
|
|
543
|
+
("line1\n\n\n\n\nline2", "line1\n\nline2"),
|
|
544
|
+
("line1\n\nline2", "line1\n\nline2"), # No change
|
|
545
|
+
("line1\nline2", "line1\nline2"), # No change
|
|
546
|
+
],
|
|
547
|
+
),
|
|
548
|
+
# Security agent patterns - subprocess fixes
|
|
549
|
+
"fix_subprocess_run_shell": ValidatedPattern(
|
|
550
|
+
name="fix_subprocess_run_shell",
|
|
551
|
+
pattern=r"subprocess\.run\(([^,]+),\s*shell=True\)",
|
|
552
|
+
replacement=r"subprocess.run(\1.split())",
|
|
553
|
+
description="Remove shell=True from subprocess.run calls",
|
|
554
|
+
global_replace=True,
|
|
555
|
+
test_cases=[
|
|
556
|
+
("subprocess.run(cmd, shell=True)", "subprocess.run(cmd.split())"),
|
|
557
|
+
(
|
|
558
|
+
"subprocess.run('ls -la', shell=True)",
|
|
559
|
+
"subprocess.run('ls -la'.split())",
|
|
560
|
+
),
|
|
561
|
+
(
|
|
562
|
+
"subprocess.run(command, shell=False)",
|
|
563
|
+
"subprocess.run(command, shell=False)",
|
|
564
|
+
), # No change
|
|
565
|
+
],
|
|
566
|
+
),
|
|
567
|
+
"fix_subprocess_call_shell": ValidatedPattern(
|
|
568
|
+
name="fix_subprocess_call_shell",
|
|
569
|
+
pattern=r"subprocess\.call\(([^,]+),\s*shell=True\)",
|
|
570
|
+
replacement=r"subprocess.call(\1.split())",
|
|
571
|
+
description="Remove shell=True from subprocess.call calls",
|
|
572
|
+
global_replace=True,
|
|
573
|
+
test_cases=[
|
|
574
|
+
("subprocess.call(cmd, shell=True)", "subprocess.call(cmd.split())"),
|
|
575
|
+
(
|
|
576
|
+
"subprocess.call('ls -la', shell=True)",
|
|
577
|
+
"subprocess.call('ls -la'.split())",
|
|
578
|
+
),
|
|
579
|
+
(
|
|
580
|
+
"subprocess.call(command, shell=False)",
|
|
581
|
+
"subprocess.call(command, shell=False)",
|
|
582
|
+
), # No change
|
|
583
|
+
],
|
|
584
|
+
),
|
|
585
|
+
"fix_subprocess_popen_shell": ValidatedPattern(
|
|
586
|
+
name="fix_subprocess_popen_shell",
|
|
587
|
+
pattern=r"subprocess\.Popen\(([^,]+),\s*shell=True\)",
|
|
588
|
+
replacement=r"subprocess.Popen(\1.split())",
|
|
589
|
+
description="Remove shell=True from subprocess.Popen calls",
|
|
590
|
+
global_replace=True,
|
|
591
|
+
test_cases=[
|
|
592
|
+
("subprocess.Popen(cmd, shell=True)", "subprocess.Popen(cmd.split())"),
|
|
593
|
+
(
|
|
594
|
+
"subprocess.Popen('ls -la', shell=True)",
|
|
595
|
+
"subprocess.Popen('ls -la'.split())",
|
|
596
|
+
),
|
|
597
|
+
(
|
|
598
|
+
"subprocess.Popen(command, shell=False)",
|
|
599
|
+
"subprocess.Popen(command, shell=False)",
|
|
600
|
+
), # No change
|
|
601
|
+
],
|
|
602
|
+
),
|
|
603
|
+
# Security agent patterns - unsafe library usage
|
|
604
|
+
"fix_unsafe_yaml_load": ValidatedPattern(
|
|
605
|
+
name="fix_unsafe_yaml_load",
|
|
606
|
+
pattern=r"\byaml\.load\(",
|
|
607
|
+
replacement="yaml.safe_load(",
|
|
608
|
+
description="Replace unsafe yaml.load with yaml.safe_load",
|
|
609
|
+
global_replace=True,
|
|
610
|
+
test_cases=[
|
|
611
|
+
("yaml.load(file)", "yaml.safe_load(file)"),
|
|
612
|
+
("data = yaml.load(content)", "data = yaml.safe_load(content)"),
|
|
613
|
+
("yaml.safe_load(content)", "yaml.safe_load(content)"), # No change
|
|
614
|
+
(
|
|
615
|
+
"my_yaml.load(content)",
|
|
616
|
+
"my_yaml.load(content)",
|
|
617
|
+
), # No change (not yaml module)
|
|
618
|
+
],
|
|
619
|
+
),
|
|
620
|
+
"fix_weak_md5_hash": ValidatedPattern(
|
|
621
|
+
name="fix_weak_md5_hash",
|
|
622
|
+
pattern=r"\bhashlib\.md5\(",
|
|
623
|
+
replacement="hashlib.sha256(",
|
|
624
|
+
description="Replace weak MD5 hashing with SHA256",
|
|
625
|
+
global_replace=True,
|
|
626
|
+
test_cases=[
|
|
627
|
+
("hashlib.md5(data)", "hashlib.sha256(data)"),
|
|
628
|
+
("hash = hashlib.md5(content)", "hash = hashlib.sha256(content)"),
|
|
629
|
+
("hashlib.sha256(data)", "hashlib.sha256(data)"), # No change
|
|
630
|
+
],
|
|
631
|
+
),
|
|
632
|
+
"fix_weak_sha1_hash": ValidatedPattern(
|
|
633
|
+
name="fix_weak_sha1_hash",
|
|
634
|
+
pattern=r"\bhashlib\.sha1\(",
|
|
635
|
+
replacement="hashlib.sha256(",
|
|
636
|
+
description="Replace weak SHA1 hashing with SHA256",
|
|
637
|
+
global_replace=True,
|
|
638
|
+
test_cases=[
|
|
639
|
+
("hashlib.sha1(data)", "hashlib.sha256(data)"),
|
|
640
|
+
("hash = hashlib.sha1(content)", "hash = hashlib.sha256(content)"),
|
|
641
|
+
("hashlib.sha256(data)", "hashlib.sha256(data)"), # No change
|
|
642
|
+
],
|
|
643
|
+
),
|
|
644
|
+
"fix_insecure_random_choice": ValidatedPattern(
|
|
645
|
+
name="fix_insecure_random_choice",
|
|
646
|
+
pattern=r"random\.choice\(([^)]+)\)",
|
|
647
|
+
replacement=r"secrets.choice(\1)",
|
|
648
|
+
description="Replace insecure random.choice with secrets.choice",
|
|
649
|
+
global_replace=True,
|
|
650
|
+
test_cases=[
|
|
651
|
+
("random.choice(options)", "secrets.choice(options)"),
|
|
652
|
+
("item = random.choice(items)", "item = secrets.choice(items)"),
|
|
653
|
+
("secrets.choice(options)", "secrets.choice(options)"), # No change
|
|
654
|
+
],
|
|
655
|
+
),
|
|
656
|
+
"remove_debug_prints_with_secrets": ValidatedPattern(
|
|
657
|
+
name="remove_debug_prints_with_secrets",
|
|
658
|
+
pattern=r"print\s*\([^)]*(?:password|secret|key|token)[^)]*\)",
|
|
659
|
+
replacement="",
|
|
660
|
+
description="Remove debug print statements that contain sensitive information",
|
|
661
|
+
global_replace=True,
|
|
662
|
+
test_cases=[
|
|
663
|
+
('print("password:", password)', ""),
|
|
664
|
+
("print(f'Token: {token}')", ""),
|
|
665
|
+
("print('Debug secret value')", ""),
|
|
666
|
+
(
|
|
667
|
+
"print('Normal debug message')",
|
|
668
|
+
"print('Normal debug message')",
|
|
669
|
+
), # No change
|
|
670
|
+
('print("API key is", key)', ""),
|
|
671
|
+
],
|
|
672
|
+
),
|
|
673
|
+
# Test specialist agent patterns
|
|
674
|
+
"normalize_assert_statements": ValidatedPattern(
|
|
675
|
+
name="normalize_assert_statements",
|
|
676
|
+
pattern=r"assert (.+?)\s*==\s*(.+)",
|
|
677
|
+
replacement=r"assert \1 == \2",
|
|
678
|
+
description="Normalize spacing around == in assert statements",
|
|
679
|
+
global_replace=True,
|
|
680
|
+
test_cases=[
|
|
681
|
+
("assert result==expected", "assert result == expected"),
|
|
682
|
+
("assert value == other", "assert value == other"),
|
|
683
|
+
("assert result== expected", "assert result == expected"),
|
|
684
|
+
("assert result ==expected", "assert result == expected"),
|
|
685
|
+
(
|
|
686
|
+
"assert result == expected",
|
|
687
|
+
"assert result == expected",
|
|
688
|
+
), # No change (already spaced)
|
|
689
|
+
],
|
|
690
|
+
),
|
|
691
|
+
# Job ID validation patterns
|
|
692
|
+
"validate_job_id_alphanumeric": ValidatedPattern(
|
|
693
|
+
name="validate_job_id_alphanumeric",
|
|
694
|
+
pattern=r"^[a-zA-Z0-9_-]+$",
|
|
695
|
+
replacement="VALID", # Dummy replacement for validation patterns
|
|
696
|
+
description="Validate job ID contains only alphanumeric characters, "
|
|
697
|
+
"underscores, and hyphens",
|
|
698
|
+
test_cases=[
|
|
699
|
+
# For validation patterns, we test against strings that SHOULD match
|
|
700
|
+
("valid_job-123", "VALID"), # Valid ID
|
|
701
|
+
("another_valid-job_456", "VALID"), # Valid ID
|
|
702
|
+
("job_123", "VALID"), # Valid ID
|
|
703
|
+
],
|
|
704
|
+
),
|
|
705
|
+
# Service configuration patterns
|
|
706
|
+
"remove_coverage_fail_under": ValidatedPattern(
|
|
707
|
+
name="remove_coverage_fail_under",
|
|
708
|
+
pattern=r"--cov-fail-under=\d+\.?\d*\s*",
|
|
709
|
+
replacement="",
|
|
710
|
+
description="Remove coverage fail-under flags from pytest addopts",
|
|
711
|
+
global_replace=True,
|
|
712
|
+
test_cases=[
|
|
713
|
+
("--cov-fail-under=85 --verbose", "--verbose"),
|
|
714
|
+
("--cov-fail-under=90.5 -x", "-x"),
|
|
715
|
+
("--verbose --cov-fail-under=80 ", "--verbose "),
|
|
716
|
+
("--no-cov", "--no-cov"), # No change
|
|
717
|
+
],
|
|
718
|
+
),
|
|
719
|
+
"update_coverage_requirement": ValidatedPattern(
|
|
720
|
+
name="update_coverage_requirement",
|
|
721
|
+
pattern=r"(--cov-fail-under=)\d+\.?\d*",
|
|
722
|
+
replacement=r"\1NEW_COVERAGE",
|
|
723
|
+
description="Update coverage fail-under requirement (NEW_COVERAGE placeholder"
|
|
724
|
+
" replaced dynamically)",
|
|
725
|
+
test_cases=[
|
|
726
|
+
("--cov-fail-under=85", "--cov-fail-under=NEW_COVERAGE"),
|
|
727
|
+
("--cov-fail-under=90.5", "--cov-fail-under=NEW_COVERAGE"),
|
|
728
|
+
("--verbose", "--verbose"), # No change
|
|
729
|
+
],
|
|
730
|
+
),
|
|
731
|
+
# Path security validation patterns - designed for testing existence, not
|
|
732
|
+
# replacement
|
|
733
|
+
"detect_directory_traversal_basic": ValidatedPattern(
|
|
734
|
+
name="detect_directory_traversal_basic",
|
|
735
|
+
pattern=r"\.\./",
|
|
736
|
+
replacement="[TRAVERSAL]",
|
|
737
|
+
description="Detect basic directory traversal patterns (../)",
|
|
738
|
+
global_replace=True,
|
|
739
|
+
test_cases=[
|
|
740
|
+
("../config.txt", "[TRAVERSAL]config.txt"),
|
|
741
|
+
("normal/path", "normal/path"), # No change
|
|
742
|
+
("../../etc/passwd", "[TRAVERSAL][TRAVERSAL]etc/passwd"),
|
|
743
|
+
],
|
|
744
|
+
),
|
|
745
|
+
"detect_directory_traversal_backslash": ValidatedPattern(
|
|
746
|
+
name="detect_directory_traversal_backslash",
|
|
747
|
+
pattern=r"\.\.[/\\]",
|
|
748
|
+
replacement="[TRAVERSAL]",
|
|
749
|
+
description="Detect directory traversal with forward/back slashes",
|
|
750
|
+
global_replace=True,
|
|
751
|
+
test_cases=[
|
|
752
|
+
("..\\config.txt", "[TRAVERSAL]config.txt"),
|
|
753
|
+
("../config.txt", "[TRAVERSAL]config.txt"),
|
|
754
|
+
("normal/path", "normal/path"), # No change
|
|
755
|
+
],
|
|
756
|
+
),
|
|
757
|
+
"detect_url_encoded_traversal": ValidatedPattern(
|
|
758
|
+
name="detect_url_encoded_traversal",
|
|
759
|
+
pattern=r"%2e%2e%2f",
|
|
760
|
+
replacement="[TRAVERSAL]",
|
|
761
|
+
description="Detect URL encoded directory traversal (%2e%2e%2f = ../)",
|
|
762
|
+
global_replace=True,
|
|
763
|
+
test_cases=[
|
|
764
|
+
("path/%2e%2e%2f/config", "path/[TRAVERSAL]/config"),
|
|
765
|
+
("normal/path", "normal/path"), # No change
|
|
766
|
+
("%2e%2e%2fpasswd", "[TRAVERSAL]passwd"),
|
|
767
|
+
],
|
|
768
|
+
),
|
|
769
|
+
"detect_double_url_encoded_traversal": ValidatedPattern(
|
|
770
|
+
name="detect_double_url_encoded_traversal",
|
|
771
|
+
pattern=r"%252e%252e%252f",
|
|
772
|
+
replacement="[TRAVERSAL]",
|
|
773
|
+
description="Detect double URL encoded directory traversal",
|
|
774
|
+
global_replace=True,
|
|
775
|
+
test_cases=[
|
|
776
|
+
("path/%252e%252e%252f/config", "path/[TRAVERSAL]/config"),
|
|
777
|
+
("normal/path", "normal/path"), # No change
|
|
778
|
+
],
|
|
779
|
+
),
|
|
780
|
+
"detect_null_bytes_url": ValidatedPattern(
|
|
781
|
+
name="detect_null_bytes_url",
|
|
782
|
+
pattern=r"%00",
|
|
783
|
+
replacement="[NULL]",
|
|
784
|
+
description="Detect URL encoded null bytes",
|
|
785
|
+
global_replace=True,
|
|
786
|
+
test_cases=[
|
|
787
|
+
("file.txt%00.jpg", "file.txt[NULL].jpg"),
|
|
788
|
+
("normal.txt", "normal.txt"), # No change
|
|
789
|
+
],
|
|
790
|
+
),
|
|
791
|
+
"detect_null_bytes_literal": ValidatedPattern(
|
|
792
|
+
name="detect_null_bytes_literal",
|
|
793
|
+
pattern=r"\\x00",
|
|
794
|
+
replacement="[NULL]",
|
|
795
|
+
description="Detect literal null byte patterns",
|
|
796
|
+
global_replace=True,
|
|
797
|
+
test_cases=[
|
|
798
|
+
("file.txt\\x00", "file.txt[NULL]"),
|
|
799
|
+
("normal.txt", "normal.txt"), # No change
|
|
800
|
+
],
|
|
801
|
+
),
|
|
802
|
+
"detect_utf8_overlong_null": ValidatedPattern(
|
|
803
|
+
name="detect_utf8_overlong_null",
|
|
804
|
+
pattern=r"%c0%80",
|
|
805
|
+
replacement="[NULL]",
|
|
806
|
+
description="Detect UTF-8 overlong null byte encoding",
|
|
807
|
+
global_replace=True,
|
|
808
|
+
test_cases=[
|
|
809
|
+
("file.txt%c0%80", "file.txt[NULL]"),
|
|
810
|
+
("normal.txt", "normal.txt"), # No change
|
|
811
|
+
],
|
|
812
|
+
),
|
|
813
|
+
"detect_sys_directory_pattern": ValidatedPattern(
|
|
814
|
+
name="detect_sys_directory_pattern",
|
|
815
|
+
pattern=r"^/sys/?.*",
|
|
816
|
+
replacement="[DANGER]",
|
|
817
|
+
description="Detect access to /sys directory",
|
|
818
|
+
test_cases=[
|
|
819
|
+
("/sys/", "[DANGER]"),
|
|
820
|
+
("/sys/devices", "[DANGER]"),
|
|
821
|
+
("/usr/sys", "/usr/sys"), # No change
|
|
822
|
+
],
|
|
823
|
+
),
|
|
824
|
+
"detect_proc_directory_pattern": ValidatedPattern(
|
|
825
|
+
name="detect_proc_directory_pattern",
|
|
826
|
+
pattern=r"^/proc/?.*",
|
|
827
|
+
replacement="[DANGER]",
|
|
828
|
+
description="Detect access to /proc directory",
|
|
829
|
+
test_cases=[
|
|
830
|
+
("/proc/", "[DANGER]"),
|
|
831
|
+
("/proc/self", "[DANGER]"),
|
|
832
|
+
("/usr/proc", "/usr/proc"), # No change
|
|
833
|
+
],
|
|
834
|
+
),
|
|
835
|
+
"detect_etc_directory_pattern": ValidatedPattern(
|
|
836
|
+
name="detect_etc_directory_pattern",
|
|
837
|
+
pattern=r"^/etc/?.*",
|
|
838
|
+
replacement="[DANGER]",
|
|
839
|
+
description="Detect access to /etc directory",
|
|
840
|
+
test_cases=[
|
|
841
|
+
("/etc/", "[DANGER]"),
|
|
842
|
+
("/etc/passwd", "[DANGER]"),
|
|
843
|
+
("/usr/etc", "/usr/etc"), # No change
|
|
844
|
+
],
|
|
845
|
+
),
|
|
846
|
+
"detect_boot_directory_pattern": ValidatedPattern(
|
|
847
|
+
name="detect_boot_directory_pattern",
|
|
848
|
+
pattern=r"^/boot/?.*",
|
|
849
|
+
replacement="[DANGER]",
|
|
850
|
+
description="Detect access to /boot directory",
|
|
851
|
+
test_cases=[
|
|
852
|
+
("/boot/", "[DANGER]"),
|
|
853
|
+
("/boot/grub", "[DANGER]"),
|
|
854
|
+
("/usr/boot", "/usr/boot"), # No change
|
|
855
|
+
],
|
|
856
|
+
),
|
|
857
|
+
"detect_dev_directory_pattern": ValidatedPattern(
|
|
858
|
+
name="detect_dev_directory_pattern",
|
|
859
|
+
pattern=r"^/dev/?.*",
|
|
860
|
+
replacement="[DANGER]",
|
|
861
|
+
description="Detect access to /dev directory",
|
|
862
|
+
test_cases=[
|
|
863
|
+
("/dev/", "[DANGER]"),
|
|
864
|
+
("/dev/null", "[DANGER]"),
|
|
865
|
+
("/usr/dev", "/usr/dev"), # No change
|
|
866
|
+
],
|
|
867
|
+
),
|
|
868
|
+
"detect_root_directory_pattern": ValidatedPattern(
|
|
869
|
+
name="detect_root_directory_pattern",
|
|
870
|
+
pattern=r"^/root/?.*",
|
|
871
|
+
replacement="[DANGER]",
|
|
872
|
+
description="Detect access to /root directory",
|
|
873
|
+
test_cases=[
|
|
874
|
+
("/root/", "[DANGER]"),
|
|
875
|
+
("/root/.ssh", "[DANGER]"),
|
|
876
|
+
("/usr/root", "/usr/root"), # No change
|
|
877
|
+
],
|
|
878
|
+
),
|
|
879
|
+
"detect_var_log_directory_pattern": ValidatedPattern(
|
|
880
|
+
name="detect_var_log_directory_pattern",
|
|
881
|
+
pattern=r"^/var/log/?.*",
|
|
882
|
+
replacement="[DANGER]",
|
|
883
|
+
description="Detect access to /var/log directory",
|
|
884
|
+
test_cases=[
|
|
885
|
+
("/var/log/", "[DANGER]"),
|
|
886
|
+
("/var/log/messages", "[DANGER]"),
|
|
887
|
+
("/usr/var/log", "/usr/var/log"), # No change
|
|
888
|
+
],
|
|
889
|
+
),
|
|
890
|
+
"detect_bin_directory_pattern": ValidatedPattern(
|
|
891
|
+
name="detect_bin_directory_pattern",
|
|
892
|
+
pattern=r"^/(usr/)?bin/?.*",
|
|
893
|
+
replacement="[DANGER]",
|
|
894
|
+
description="Detect access to /bin or /usr/bin directories",
|
|
895
|
+
test_cases=[
|
|
896
|
+
("/bin/", "[DANGER]"),
|
|
897
|
+
("/usr/bin/", "[DANGER]"),
|
|
898
|
+
("/usr/local/bin", "/usr/local/bin"), # No change
|
|
899
|
+
],
|
|
900
|
+
),
|
|
901
|
+
"detect_sbin_directory_pattern": ValidatedPattern(
|
|
902
|
+
name="detect_sbin_directory_pattern",
|
|
903
|
+
pattern=r"^/(usr/)?sbin/?.*",
|
|
904
|
+
replacement="[DANGER]",
|
|
905
|
+
description="Detect access to /sbin or /usr/sbin directories",
|
|
906
|
+
test_cases=[
|
|
907
|
+
("/sbin/", "[DANGER]"),
|
|
908
|
+
("/usr/sbin/", "[DANGER]"),
|
|
909
|
+
("/usr/local/sbin", "/usr/local/sbin"), # No change
|
|
910
|
+
],
|
|
911
|
+
),
|
|
912
|
+
"detect_parent_directory_in_path": ValidatedPattern(
|
|
913
|
+
name="detect_parent_directory_in_path",
|
|
914
|
+
pattern=r"\.\.",
|
|
915
|
+
replacement="[PARENT]",
|
|
916
|
+
description="Detect parent directory references anywhere in path",
|
|
917
|
+
global_replace=True,
|
|
918
|
+
test_cases=[
|
|
919
|
+
("../config", "[PARENT]/config"),
|
|
920
|
+
("safe/path", "safe/path"), # No change
|
|
921
|
+
("path/../other", "path/[PARENT]/other"),
|
|
922
|
+
],
|
|
923
|
+
),
|
|
924
|
+
"detect_suspicious_temp_traversal": ValidatedPattern(
|
|
925
|
+
name="detect_suspicious_temp_traversal",
|
|
926
|
+
pattern=r"/tmp/.*\.\./", # nosec B108
|
|
927
|
+
replacement="[SUSPICIOUS]",
|
|
928
|
+
description="Detect traversal attempts in temp directories",
|
|
929
|
+
test_cases=[
|
|
930
|
+
("/tmp/safe/../etc/passwd", "[SUSPICIOUS]etc/passwd"), # nosec B108
|
|
931
|
+
("/tmp/normal/file.txt", "/tmp/normal/file.txt"), # No change # nosec B108
|
|
932
|
+
],
|
|
933
|
+
),
|
|
934
|
+
"detect_suspicious_var_traversal": ValidatedPattern(
|
|
935
|
+
name="detect_suspicious_var_traversal",
|
|
936
|
+
pattern=r"/var/.*\.\./",
|
|
937
|
+
replacement="[SUSPICIOUS]",
|
|
938
|
+
description="Detect traversal attempts in var directories",
|
|
939
|
+
test_cases=[
|
|
940
|
+
("/var/lib/../etc/passwd", "[SUSPICIOUS]etc/passwd"),
|
|
941
|
+
("/var/lib/normal.txt", "/var/lib/normal.txt"), # No change
|
|
942
|
+
],
|
|
943
|
+
),
|
|
944
|
+
# Tool output parsing patterns - for development tool output processing
|
|
945
|
+
"ruff_check_error": ValidatedPattern(
|
|
946
|
+
name="ruff_check_error",
|
|
947
|
+
pattern=r"^(.+?): (\d+): (\d+): ([A-Z]\d+) (.+)$",
|
|
948
|
+
replacement=r"File: \1, Line: \2, Col: \3, Code: \4, Message: \5",
|
|
949
|
+
description="Parse ruff-check error output: file:line:col:code message",
|
|
950
|
+
test_cases=[
|
|
951
|
+
(
|
|
952
|
+
"crackerjack/core.py: 123: 45: E501 line too long",
|
|
953
|
+
"File: crackerjack/core.py, Line: 123, Col: 45, Code: E501, Message: "
|
|
954
|
+
"line too long",
|
|
955
|
+
),
|
|
956
|
+
(
|
|
957
|
+
"./test.py: 1: 1: F401 unused import",
|
|
958
|
+
"File: ./test.py, Line: 1, Col: 1, Code: F401, Message: unused import",
|
|
959
|
+
),
|
|
960
|
+
(
|
|
961
|
+
"src/main.py: 999: 80: W291 trailing whitespace",
|
|
962
|
+
"File: src/main.py, Line: 999, Col: 80, Code: W291, Message: trailing "
|
|
963
|
+
"whitespace",
|
|
964
|
+
),
|
|
965
|
+
],
|
|
966
|
+
),
|
|
967
|
+
"ruff_check_summary": ValidatedPattern(
|
|
968
|
+
name="ruff_check_summary",
|
|
969
|
+
pattern=r"Found (\d+) error",
|
|
970
|
+
replacement=r"Found \1 error(s)",
|
|
971
|
+
description="Parse ruff-check summary line for error count",
|
|
972
|
+
test_cases=[
|
|
973
|
+
("Found 5 error", "Found 5 error(s)"),
|
|
974
|
+
("Found 1 error in 3 files", "Found 1 error(s) in 3 files"),
|
|
975
|
+
("Found 42 error detected", "Found 42 error(s) detected"),
|
|
976
|
+
],
|
|
977
|
+
),
|
|
978
|
+
"pyright_error": ValidatedPattern(
|
|
979
|
+
name="pyright_error",
|
|
980
|
+
pattern=r"^(.+?): (\d+): (\d+) - error: (.+)$",
|
|
981
|
+
replacement=r"File: \1, Line: \2, Col: \3, Error: \4",
|
|
982
|
+
description="Parse pyright error output: file:line:col - error: message",
|
|
983
|
+
test_cases=[
|
|
984
|
+
(
|
|
985
|
+
"src/app.py: 45: 12 - error: Undefined variable",
|
|
986
|
+
"File: src/app.py, Line: 45, Col: 12, Error: Undefined variable",
|
|
987
|
+
),
|
|
988
|
+
(
|
|
989
|
+
"test.py: 1: 1 - error: Type mismatch",
|
|
990
|
+
"File: test.py, Line: 1, Col: 1, Error: Type mismatch",
|
|
991
|
+
),
|
|
992
|
+
(
|
|
993
|
+
"./main.py: 999: 50 - error: Missing return statement",
|
|
994
|
+
"File: ./main.py, Line: 999, Col: 50, Error: Missing return statement",
|
|
995
|
+
),
|
|
996
|
+
],
|
|
997
|
+
),
|
|
998
|
+
"pyright_warning": ValidatedPattern(
|
|
999
|
+
name="pyright_warning",
|
|
1000
|
+
pattern=r"^(.+?): (\d+): (\d+) - warning: (.+)$",
|
|
1001
|
+
replacement=r"File: \1, Line: \2, Col: \3, Warning: \4",
|
|
1002
|
+
description="Parse pyright warning output: file:line:col - warning: message",
|
|
1003
|
+
test_cases=[
|
|
1004
|
+
(
|
|
1005
|
+
"src/app.py: 45: 12 - warning: Unused variable",
|
|
1006
|
+
"File: src/app.py, Line: 45, Col: 12, Warning: Unused variable",
|
|
1007
|
+
),
|
|
1008
|
+
(
|
|
1009
|
+
"test.py: 1: 1 - warning: Deprecated API",
|
|
1010
|
+
"File: test.py, Line: 1, Col: 1, Warning: Deprecated API",
|
|
1011
|
+
),
|
|
1012
|
+
(
|
|
1013
|
+
"./main.py: 999: 50 - warning: Type could be more specific",
|
|
1014
|
+
"File: ./main.py, Line: 999, Col: 50, Warning: Type could be more"
|
|
1015
|
+
" specific",
|
|
1016
|
+
),
|
|
1017
|
+
],
|
|
1018
|
+
),
|
|
1019
|
+
"pyright_summary": ValidatedPattern(
|
|
1020
|
+
name="pyright_summary",
|
|
1021
|
+
pattern=r"(\d+) error[s]?, (\d+) warning[s]?",
|
|
1022
|
+
replacement=r"\1 errors, \2 warnings",
|
|
1023
|
+
description="Parse pyright summary with error and warning counts",
|
|
1024
|
+
test_cases=[
|
|
1025
|
+
("5 errors, 3 warnings", "5 errors, 3 warnings"),
|
|
1026
|
+
("1 error, 1 warning", "1 errors, 1 warnings"),
|
|
1027
|
+
("0 errors, 10 warnings found", "0 errors, 10 warnings found"),
|
|
1028
|
+
],
|
|
1029
|
+
),
|
|
1030
|
+
"bandit_issue": ValidatedPattern(
|
|
1031
|
+
name="bandit_issue",
|
|
1032
|
+
pattern=r">> Issue: \[([A-Z]\d+): \w+\] (.+)",
|
|
1033
|
+
replacement=r"Security Issue [\1]: \2",
|
|
1034
|
+
description="Parse bandit security issue output with code and message",
|
|
1035
|
+
test_cases=[
|
|
1036
|
+
(
|
|
1037
|
+
">> Issue: [B602: subprocess_popen_with_shell_equals_true] Use of "
|
|
1038
|
+
"shell=True",
|
|
1039
|
+
"Security Issue [B602]: Use of shell=True",
|
|
1040
|
+
),
|
|
1041
|
+
(
|
|
1042
|
+
">> Issue: [B101: assert_used] Use of assert detected",
|
|
1043
|
+
"Security Issue [B101]: Use of assert detected",
|
|
1044
|
+
),
|
|
1045
|
+
(
|
|
1046
|
+
">> Issue: [B301: pickle] Pickle library detected",
|
|
1047
|
+
"Security Issue [B301]: Pickle library detected",
|
|
1048
|
+
),
|
|
1049
|
+
],
|
|
1050
|
+
),
|
|
1051
|
+
"bandit_location": ValidatedPattern(
|
|
1052
|
+
name="bandit_location",
|
|
1053
|
+
pattern=r"Location: (.+?): (\d+): (\d+)",
|
|
1054
|
+
replacement=r"Location: File \1, Line \2, Column \3",
|
|
1055
|
+
description="Parse bandit location information for security issues",
|
|
1056
|
+
test_cases=[
|
|
1057
|
+
(
|
|
1058
|
+
"Location: src/security.py: 123: 45",
|
|
1059
|
+
"Location: File src/security.py, Line 123, Column 45",
|
|
1060
|
+
),
|
|
1061
|
+
("Location: ./test.py: 1: 1", "Location: File ./test.py, Line 1, Column 1"),
|
|
1062
|
+
(
|
|
1063
|
+
"Location: crackerjack/core.py: 999: 80",
|
|
1064
|
+
"Location: File crackerjack/core.py, Line 999, Column 80",
|
|
1065
|
+
),
|
|
1066
|
+
],
|
|
1067
|
+
),
|
|
1068
|
+
"bandit_confidence": ValidatedPattern(
|
|
1069
|
+
name="bandit_confidence",
|
|
1070
|
+
pattern=r"Confidence: (\w+)",
|
|
1071
|
+
replacement=r"Confidence Level: \1",
|
|
1072
|
+
description="Parse bandit confidence level for security issues",
|
|
1073
|
+
test_cases=[
|
|
1074
|
+
("Confidence: HIGH", "Confidence Level: HIGH"),
|
|
1075
|
+
("Confidence: MEDIUM", "Confidence Level: MEDIUM"),
|
|
1076
|
+
("Confidence: LOW", "Confidence Level: LOW"),
|
|
1077
|
+
],
|
|
1078
|
+
),
|
|
1079
|
+
"bandit_severity": ValidatedPattern(
|
|
1080
|
+
name="bandit_severity",
|
|
1081
|
+
pattern=r"Severity: (\w+)",
|
|
1082
|
+
replacement=r"Severity Level: \1",
|
|
1083
|
+
description="Parse bandit severity level for security issues",
|
|
1084
|
+
test_cases=[
|
|
1085
|
+
("Severity: HIGH", "Severity Level: HIGH"),
|
|
1086
|
+
("Severity: MEDIUM", "Severity Level: MEDIUM"),
|
|
1087
|
+
("Severity: LOW", "Severity Level: LOW"),
|
|
1088
|
+
],
|
|
1089
|
+
),
|
|
1090
|
+
"mypy_error": ValidatedPattern(
|
|
1091
|
+
name="mypy_error",
|
|
1092
|
+
pattern=r"^(.+?): (\d+): error: (.+)$",
|
|
1093
|
+
replacement=r"File: \1, Line: \2, Error: \3",
|
|
1094
|
+
description="Parse mypy error output: file:line: error: message",
|
|
1095
|
+
test_cases=[
|
|
1096
|
+
(
|
|
1097
|
+
"src/app.py: 45: error: Name 'undefined_var' is not defined",
|
|
1098
|
+
"File: src/app.py, Line: 45, Error: Name 'undefined_var' is not "
|
|
1099
|
+
"defined",
|
|
1100
|
+
),
|
|
1101
|
+
(
|
|
1102
|
+
"test.py: 1: error: Incompatible return value type",
|
|
1103
|
+
"File: test.py, Line: 1, Error: Incompatible return value type",
|
|
1104
|
+
),
|
|
1105
|
+
(
|
|
1106
|
+
"./main.py: 999: error: Argument has incompatible type",
|
|
1107
|
+
"File: ./main.py, Line: 999, Error: Argument has incompatible type",
|
|
1108
|
+
),
|
|
1109
|
+
],
|
|
1110
|
+
),
|
|
1111
|
+
"mypy_note": ValidatedPattern(
|
|
1112
|
+
name="mypy_note",
|
|
1113
|
+
pattern=r"^(.+?): (\d+): note: (.+)$",
|
|
1114
|
+
replacement=r"File: \1, Line: \2, Note: \3",
|
|
1115
|
+
description="Parse mypy note output: file:line: note: message",
|
|
1116
|
+
test_cases=[
|
|
1117
|
+
(
|
|
1118
|
+
"src/app.py: 45: note: Expected type Union[int, str]",
|
|
1119
|
+
"File: src/app.py, Line: 45, Note: Expected type Union[int, str]",
|
|
1120
|
+
),
|
|
1121
|
+
(
|
|
1122
|
+
"test.py: 1: note: See https://mypy.readthedocs.io/",
|
|
1123
|
+
"File: test.py, Line: 1, Note: See https://mypy.readthedocs.io/",
|
|
1124
|
+
),
|
|
1125
|
+
(
|
|
1126
|
+
"./main.py: 999: note: Consider using Optional[...]",
|
|
1127
|
+
"File: ./main.py, Line: 999, Note: Consider using Optional[...]",
|
|
1128
|
+
),
|
|
1129
|
+
],
|
|
1130
|
+
),
|
|
1131
|
+
"vulture_unused": ValidatedPattern(
|
|
1132
|
+
name="vulture_unused",
|
|
1133
|
+
pattern=r"^(.+?): (\d+): unused (.+) '(.+)'",
|
|
1134
|
+
replacement=r"File: \1, Line: \2, Unused \3: '\4'",
|
|
1135
|
+
description="Parse vulture unused code detection: file:line: unused type"
|
|
1136
|
+
" 'name'",
|
|
1137
|
+
test_cases=[
|
|
1138
|
+
(
|
|
1139
|
+
"src/app.py: 45: unused variable 'temp_var'",
|
|
1140
|
+
"File: src/app.py, Line: 45, Unused variable: 'temp_var'",
|
|
1141
|
+
),
|
|
1142
|
+
(
|
|
1143
|
+
"test.py: 1: unused function 'helper'",
|
|
1144
|
+
"File: test.py, Line: 1, Unused function: 'helper'",
|
|
1145
|
+
),
|
|
1146
|
+
(
|
|
1147
|
+
"./main.py: 999: unused import 'os'",
|
|
1148
|
+
"File: ./main.py, Line: 999, Unused import: 'os'",
|
|
1149
|
+
),
|
|
1150
|
+
],
|
|
1151
|
+
),
|
|
1152
|
+
"complexipy_complex": ValidatedPattern(
|
|
1153
|
+
name="complexipy_complex",
|
|
1154
|
+
pattern=r"^(.+?): (\d+): (\d+) - (.+) is too complex \((\d+)\)",
|
|
1155
|
+
replacement=r"File: \1, Line: \2, Col: \3, Function: \4, Complexity: \5",
|
|
1156
|
+
description="Parse complexipy complexity detection: file:line:col - function "
|
|
1157
|
+
"is too complex (score)",
|
|
1158
|
+
test_cases=[
|
|
1159
|
+
(
|
|
1160
|
+
"src/app.py: 45: 1 - complex_function is too complex (15)",
|
|
1161
|
+
"File: src/app.py, Line: 45, Col: 1, Function: complex_function,"
|
|
1162
|
+
" Complexity: 15",
|
|
1163
|
+
),
|
|
1164
|
+
(
|
|
1165
|
+
"test.py: 1: 1 - nested_loops is too complex (20)",
|
|
1166
|
+
"File: test.py, Line: 1, Col: 1, Function: nested_loops, "
|
|
1167
|
+
"Complexity: 20",
|
|
1168
|
+
),
|
|
1169
|
+
(
|
|
1170
|
+
"./main.py: 999: 5 - process_data is too complex (18)",
|
|
1171
|
+
"File: ./main.py, Line: 999, Col: 5, Function: process_data, "
|
|
1172
|
+
"Complexity: 18",
|
|
1173
|
+
),
|
|
1174
|
+
],
|
|
1175
|
+
),
|
|
1176
|
+
# Test output parsing patterns for test_progress_streamer.py
|
|
1177
|
+
# These patterns are used for matching/extraction, not replacement
|
|
1178
|
+
"pytest_test_start": ValidatedPattern(
|
|
1179
|
+
name="pytest_test_start",
|
|
1180
|
+
pattern=r"^(.+?):: ?(.+?):: ?(.+?) (PASSED|FAILED|SKIPPED|ERROR)$",
|
|
1181
|
+
replacement=r"\1::\2::\3", # Extract file::class::method
|
|
1182
|
+
description="Parse pytest test start line with file, class, and method "
|
|
1183
|
+
"(3-part format)",
|
|
1184
|
+
test_cases=[
|
|
1185
|
+
(
|
|
1186
|
+
"test_file.py::TestClass::test_method PASSED",
|
|
1187
|
+
"test_file.py::TestClass::test_method",
|
|
1188
|
+
),
|
|
1189
|
+
(
|
|
1190
|
+
"tests/test_core.py::TestCore::test_function FAILED",
|
|
1191
|
+
"tests/test_core.py::TestCore::test_function",
|
|
1192
|
+
),
|
|
1193
|
+
(
|
|
1194
|
+
"src/test.py::MyTest::test_case SKIPPED",
|
|
1195
|
+
"src/test.py::MyTest::test_case",
|
|
1196
|
+
),
|
|
1197
|
+
],
|
|
1198
|
+
),
|
|
1199
|
+
"pytest_test_result": ValidatedPattern(
|
|
1200
|
+
name="pytest_test_result",
|
|
1201
|
+
pattern=r"^(.+?) (PASSED|FAILED|SKIPPED|ERROR)(?: \[.*?\])?\s*$",
|
|
1202
|
+
replacement=r"\1", # Extract just the test identifier
|
|
1203
|
+
description="Parse pytest test result line with test identifier",
|
|
1204
|
+
test_cases=[
|
|
1205
|
+
("test_file.py::test_method PASSED", "test_file.py::test_method"),
|
|
1206
|
+
(
|
|
1207
|
+
"tests/test_core.py::test_func FAILED [100%]",
|
|
1208
|
+
"tests/test_core.py::test_func",
|
|
1209
|
+
),
|
|
1210
|
+
("src/test.py::test_case SKIPPED ", "src/test.py::test_case"),
|
|
1211
|
+
],
|
|
1212
|
+
),
|
|
1213
|
+
"pytest_collection_count": ValidatedPattern(
|
|
1214
|
+
name="pytest_collection_count",
|
|
1215
|
+
pattern=r"collected (\d+) items?",
|
|
1216
|
+
replacement=r"\1", # Extract just the count
|
|
1217
|
+
description="Parse pytest test collection count",
|
|
1218
|
+
test_cases=[
|
|
1219
|
+
("collected 5 items", "5"),
|
|
1220
|
+
("collected 1 item", "1"),
|
|
1221
|
+
(
|
|
1222
|
+
"collected 42 items for execution",
|
|
1223
|
+
"42 for execution",
|
|
1224
|
+
), # Only the match is replaced
|
|
1225
|
+
],
|
|
1226
|
+
),
|
|
1227
|
+
"pytest_session_start": ValidatedPattern(
|
|
1228
|
+
name="pytest_session_start",
|
|
1229
|
+
pattern=r"test session starts",
|
|
1230
|
+
replacement=r"test session starts", # Identity replacement
|
|
1231
|
+
description="Match pytest session start indicator",
|
|
1232
|
+
test_cases=[
|
|
1233
|
+
("test session starts", "test session starts"),
|
|
1234
|
+
("pytest test session starts", "pytest test session starts"),
|
|
1235
|
+
],
|
|
1236
|
+
),
|
|
1237
|
+
"pytest_coverage_total": ValidatedPattern(
|
|
1238
|
+
name="pytest_coverage_total",
|
|
1239
|
+
pattern=r"TOTAL\s+\d+\s+\d+\s+(\d+)%",
|
|
1240
|
+
replacement=r"\1", # Extract just the percentage
|
|
1241
|
+
description="Parse pytest coverage total percentage",
|
|
1242
|
+
test_cases=[
|
|
1243
|
+
("TOTAL 123 45 85%", "85"),
|
|
1244
|
+
("TOTAL 1000 250 75%", "75"),
|
|
1245
|
+
("TOTAL 50 0 100%", "100"),
|
|
1246
|
+
],
|
|
1247
|
+
),
|
|
1248
|
+
"pytest_detailed_test": ValidatedPattern(
|
|
1249
|
+
name="pytest_detailed_test",
|
|
1250
|
+
pattern=r"^(.+\.py)::(.+) (PASSED|FAILED|SKIPPED|ERROR)",
|
|
1251
|
+
replacement=r"\1::\2", # Extract file and test name
|
|
1252
|
+
description="Parse detailed pytest test output with file, test name, and "
|
|
1253
|
+
"status",
|
|
1254
|
+
test_cases=[
|
|
1255
|
+
(
|
|
1256
|
+
"test_file.py::test_method PASSED [50%]",
|
|
1257
|
+
"test_file.py::test_method [50%]", # Only the matched part is replaced
|
|
1258
|
+
),
|
|
1259
|
+
(
|
|
1260
|
+
"tests/core.py::TestClass::test_func FAILED [75%] [0.1s]",
|
|
1261
|
+
"tests/core.py::TestClass::test_func [75%] [0.1s]",
|
|
1262
|
+
),
|
|
1263
|
+
(
|
|
1264
|
+
"src/test.py::test_case SKIPPED",
|
|
1265
|
+
"src/test.py::test_case",
|
|
1266
|
+
),
|
|
1267
|
+
],
|
|
1268
|
+
),
|
|
1269
|
+
# Code cleaning patterns (from code_cleaner.py)
|
|
1270
|
+
"docstring_triple_double": ValidatedPattern(
|
|
1271
|
+
name="docstring_triple_double",
|
|
1272
|
+
pattern=r'^\s*""".*?"""\s*$',
|
|
1273
|
+
replacement=r"",
|
|
1274
|
+
flags=re.MULTILINE | re.DOTALL,
|
|
1275
|
+
description="Remove triple-quoted docstrings with double quotes",
|
|
1276
|
+
test_cases=[
|
|
1277
|
+
(' """This is a docstring""" ', ""),
|
|
1278
|
+
('"""Module docstring"""', ""),
|
|
1279
|
+
(' """\n Multi-line\n docstring\n """', ""),
|
|
1280
|
+
(
|
|
1281
|
+
'regular_code = "not a docstring"',
|
|
1282
|
+
'regular_code = "not a docstring"',
|
|
1283
|
+
), # No change
|
|
1284
|
+
],
|
|
1285
|
+
),
|
|
1286
|
+
"docstring_triple_single": ValidatedPattern(
|
|
1287
|
+
name="docstring_triple_single",
|
|
1288
|
+
pattern=r"^\s*'''.*?'''\s*$",
|
|
1289
|
+
replacement=r"",
|
|
1290
|
+
flags=re.MULTILINE | re.DOTALL,
|
|
1291
|
+
description="Remove triple-quoted docstrings with single quotes",
|
|
1292
|
+
test_cases=[
|
|
1293
|
+
(" '''This is a docstring''' ", ""),
|
|
1294
|
+
("'''Module docstring'''", ""),
|
|
1295
|
+
(" '''\n Multi-line\n docstring\n '''", ""),
|
|
1296
|
+
(
|
|
1297
|
+
"regular_code = 'not a docstring'",
|
|
1298
|
+
"regular_code = 'not a docstring'",
|
|
1299
|
+
), # No change
|
|
1300
|
+
],
|
|
1301
|
+
),
|
|
1302
|
+
"spacing_after_comma": ValidatedPattern(
|
|
1303
|
+
name="spacing_after_comma",
|
|
1304
|
+
pattern=r",([^ \n])",
|
|
1305
|
+
replacement=r", \1",
|
|
1306
|
+
global_replace=True,
|
|
1307
|
+
description="Add space after comma if missing",
|
|
1308
|
+
test_cases=[
|
|
1309
|
+
("def func(a,b,c):", "def func(a, b, c):"),
|
|
1310
|
+
("items = [1,2,3,4]", "items = [1, 2, 3, 4]"),
|
|
1311
|
+
("already, spaced, properly", "already, spaced, properly"), # No change
|
|
1312
|
+
("mixed,spacing, here", "mixed, spacing, here"),
|
|
1313
|
+
],
|
|
1314
|
+
),
|
|
1315
|
+
"spacing_after_colon": ValidatedPattern(
|
|
1316
|
+
name="spacing_after_colon",
|
|
1317
|
+
pattern=r"(?<!:):([^ \n:])",
|
|
1318
|
+
replacement=r": \1",
|
|
1319
|
+
global_replace=True,
|
|
1320
|
+
description="Add space after colon if missing (avoid double colons)",
|
|
1321
|
+
test_cases=[
|
|
1322
|
+
("def func(x:int, y:str):", "def func(x: int, y: str):"),
|
|
1323
|
+
("dict_item = {'key':'value'}", "dict_item = {'key': 'value'}"),
|
|
1324
|
+
("already: spaced: properly", "already: spaced: properly"), # No change
|
|
1325
|
+
("class::method", "class::method"), # No change (double colon)
|
|
1326
|
+
],
|
|
1327
|
+
),
|
|
1328
|
+
"multiple_spaces": ValidatedPattern(
|
|
1329
|
+
name="multiple_spaces",
|
|
1330
|
+
pattern=r" {2,}",
|
|
1331
|
+
replacement=r" ",
|
|
1332
|
+
description="Replace multiple spaces with single space",
|
|
1333
|
+
global_replace=True,
|
|
1334
|
+
test_cases=[
|
|
1335
|
+
("def func( x, y ):", "def func( x, y ):"),
|
|
1336
|
+
("single space only", "single space only"), # No change
|
|
1337
|
+
("lots of spaces", "lots of spaces"),
|
|
1338
|
+
("\tkeep\ttabs\tbut fix spaces", "\tkeep\ttabs\tbut fix spaces"),
|
|
1339
|
+
],
|
|
1340
|
+
),
|
|
1341
|
+
"preserved_comments": ValidatedPattern(
|
|
1342
|
+
name="preserved_comments",
|
|
1343
|
+
pattern=r"(#.*?(?: coding: | encoding: | type: | noqa | pragma).*)",
|
|
1344
|
+
replacement=r"\1", # Identity replacement - used for matching only
|
|
1345
|
+
description="Match preserved code comments (encoding, type hints, etc.)",
|
|
1346
|
+
test_cases=[
|
|
1347
|
+
("# coding: utf-8", "# coding: utf-8"), # No change - identity replacement
|
|
1348
|
+
(
|
|
1349
|
+
"# encoding: latin-1",
|
|
1350
|
+
"# encoding: latin-1",
|
|
1351
|
+
), # No change - identity replacement
|
|
1352
|
+
("# type: ignore", "# type: ignore"), # No change - identity replacement
|
|
1353
|
+
("# noqa: E501", "# noqa: E501"), # No change - identity replacement
|
|
1354
|
+
(
|
|
1355
|
+
"# pragma: no cover",
|
|
1356
|
+
"# pragma: no cover",
|
|
1357
|
+
), # No change - identity replacement
|
|
1358
|
+
("# regular comment", "# regular comment"), # No change - no match
|
|
1359
|
+
],
|
|
1360
|
+
),
|
|
1361
|
+
"todo_pattern": ValidatedPattern(
|
|
1362
|
+
name="todo_pattern",
|
|
1363
|
+
pattern=r"(#.*?TODO.*)",
|
|
1364
|
+
replacement=r"\1", # Identity replacement - used for matching only
|
|
1365
|
+
flags=re.IGNORECASE,
|
|
1366
|
+
description="Match TODO comments for validation",
|
|
1367
|
+
test_cases=[
|
|
1368
|
+
(
|
|
1369
|
+
"# TODO: Fix this bug",
|
|
1370
|
+
"# TODO: Fix this bug",
|
|
1371
|
+
), # No change - identity replacement
|
|
1372
|
+
(
|
|
1373
|
+
"# todo: implement later",
|
|
1374
|
+
"# todo: implement later",
|
|
1375
|
+
), # No change - identity replacement
|
|
1376
|
+
(
|
|
1377
|
+
"# TODO refactor this method",
|
|
1378
|
+
"# TODO refactor this method",
|
|
1379
|
+
), # No change - identity replacement
|
|
1380
|
+
(
|
|
1381
|
+
"# FIXME: another issue",
|
|
1382
|
+
"# FIXME: another issue",
|
|
1383
|
+
), # No change - no match
|
|
1384
|
+
("# regular comment", "# regular comment"), # No change - no match
|
|
1385
|
+
],
|
|
1386
|
+
),
|
|
1387
|
+
# DRY agent patterns - for code duplication detection
|
|
1388
|
+
"detect_error_response_patterns": ValidatedPattern(
|
|
1389
|
+
name="detect_error_response_patterns",
|
|
1390
|
+
pattern=r'return\s+.*[\'\"]\{.*[\'\""]error[\'\""].*\}.*[\'\""]',
|
|
1391
|
+
replacement=r"MATCH", # Dummy replacement for detection patterns
|
|
1392
|
+
description="Detect error response patterns in Python code for DRY violations",
|
|
1393
|
+
test_cases=[
|
|
1394
|
+
('return \'{"error": "msg"}\'', "MATCH"),
|
|
1395
|
+
('return f\'{"error": "msg"}\'', "MATCH"),
|
|
1396
|
+
('return {"success": True}', 'return {"success": True}'), # No match
|
|
1397
|
+
('return \'{"error": "test message", "code": 500}\'', "MATCH"),
|
|
1398
|
+
],
|
|
1399
|
+
),
|
|
1400
|
+
"detect_path_conversion_patterns": ValidatedPattern(
|
|
1401
|
+
name="detect_path_conversion_patterns",
|
|
1402
|
+
pattern=r"Path\([^)]+\)\s+if\s+isinstance\([^)]+,\s*str\)\s+else\s+[^)]+",
|
|
1403
|
+
replacement=r"MATCH", # Dummy replacement for detection patterns
|
|
1404
|
+
description="Detect path conversion patterns in Python code for DRY violations",
|
|
1405
|
+
test_cases=[
|
|
1406
|
+
("Path(value) if isinstance(value, str) else value", "MATCH"),
|
|
1407
|
+
("Path(path) if isinstance(path, str) else path", "MATCH"),
|
|
1408
|
+
("Path('/tmp/file')", "Path('/tmp/file')"), # No match
|
|
1409
|
+
(
|
|
1410
|
+
"Path(input_path) if isinstance(input_path, str) else input_path",
|
|
1411
|
+
"MATCH",
|
|
1412
|
+
),
|
|
1413
|
+
],
|
|
1414
|
+
),
|
|
1415
|
+
"detect_file_existence_patterns": ValidatedPattern(
|
|
1416
|
+
name="detect_file_existence_patterns",
|
|
1417
|
+
pattern=r"if\s+not\s+\w+\.exists\(\):",
|
|
1418
|
+
replacement=r"MATCH", # Dummy replacement for detection patterns
|
|
1419
|
+
description="Detect file existence check patterns in Python code for DRY"
|
|
1420
|
+
" violations",
|
|
1421
|
+
test_cases=[
|
|
1422
|
+
("if not file.exists():", "MATCH"),
|
|
1423
|
+
("if not path.exists():", "MATCH"),
|
|
1424
|
+
("if not file_path.exists():", "MATCH"),
|
|
1425
|
+
("if file.exists():", "if file.exists():"), # No match
|
|
1426
|
+
],
|
|
1427
|
+
),
|
|
1428
|
+
"detect_exception_patterns": ValidatedPattern(
|
|
1429
|
+
name="detect_exception_patterns",
|
|
1430
|
+
pattern=r"except\s+\w*Exception\s+as\s+\w+:",
|
|
1431
|
+
replacement=r"MATCH", # Dummy replacement for detection patterns
|
|
1432
|
+
description="Detect exception handling patterns for base Exception class in Python code for DRY violations",
|
|
1433
|
+
test_cases=[
|
|
1434
|
+
("except Exception as e:", "MATCH"),
|
|
1435
|
+
("except BaseException as error:", "MATCH"),
|
|
1436
|
+
(
|
|
1437
|
+
"except ValueError as error:",
|
|
1438
|
+
"except ValueError as error:",
|
|
1439
|
+
), # No match - doesn't match pattern
|
|
1440
|
+
("try:", "try:"), # No match
|
|
1441
|
+
],
|
|
1442
|
+
),
|
|
1443
|
+
"fix_path_conversion_with_ensure_path": ValidatedPattern(
|
|
1444
|
+
name="fix_path_conversion_with_ensure_path",
|
|
1445
|
+
pattern=r"Path\([^)]+\)\s+if\s+isinstance\([^)]+,\s*str\)\s+else\s+([^)]+)",
|
|
1446
|
+
replacement=r"_ensure_path(\1)",
|
|
1447
|
+
description="Replace path conversion patterns with _ensure_path utility "
|
|
1448
|
+
"function",
|
|
1449
|
+
test_cases=[
|
|
1450
|
+
("Path(value) if isinstance(value, str) else value", "_ensure_path(value)"),
|
|
1451
|
+
("Path(path) if isinstance(path, str) else path", "_ensure_path(path)"),
|
|
1452
|
+
(
|
|
1453
|
+
"Path(input_path) if isinstance(input_path, str) else input_path",
|
|
1454
|
+
"_ensure_path(input_path)",
|
|
1455
|
+
),
|
|
1456
|
+
],
|
|
1457
|
+
),
|
|
1458
|
+
"fix_path_conversion_simple": ValidatedPattern(
|
|
1459
|
+
name="fix_path_conversion_simple",
|
|
1460
|
+
pattern=r"Path\(([^)]+)\)\s+if\s+isinstance\(\1,\s*str\)\s+else\s+\1",
|
|
1461
|
+
replacement=r"_ensure_path(\1)",
|
|
1462
|
+
description="Replace simple path conversion patterns with _ensure_path utility "
|
|
1463
|
+
"function",
|
|
1464
|
+
test_cases=[
|
|
1465
|
+
("Path(value) if isinstance(value, str) else value", "_ensure_path(value)"),
|
|
1466
|
+
("Path(path) if isinstance(path, str) else path", "_ensure_path(path)"),
|
|
1467
|
+
(
|
|
1468
|
+
"Path(file_path) if isinstance(file_path, str) else file_path",
|
|
1469
|
+
"_ensure_path(file_path)",
|
|
1470
|
+
),
|
|
1471
|
+
],
|
|
1472
|
+
),
|
|
1473
|
+
# Security agent patterns - NEW PATTERNS FOR SECURITY_AGENT.PY
|
|
1474
|
+
"detect_security_keywords": ValidatedPattern(
|
|
1475
|
+
name="detect_security_keywords",
|
|
1476
|
+
pattern=r"(?i)(bandit|security|vulnerability|hardcoded|"
|
|
1477
|
+
r"shell=true|b108|b602|b301|b506|unsafe|injection)",
|
|
1478
|
+
replacement=r"MATCH", # Dummy replacement for detection patterns
|
|
1479
|
+
description="Detect security-related keywords in issue messages "
|
|
1480
|
+
"(case insensitive)",
|
|
1481
|
+
flags=re.IGNORECASE,
|
|
1482
|
+
test_cases=[
|
|
1483
|
+
("Bandit security issue found", "MATCH security issue found"),
|
|
1484
|
+
("VULNERABILITY detected", "MATCH detected"),
|
|
1485
|
+
("hardcoded path found", "MATCH path found"),
|
|
1486
|
+
("shell=True usage", "MATCH usage"),
|
|
1487
|
+
("B108 violation", "MATCH violation"),
|
|
1488
|
+
("normal message", "normal message"), # No match
|
|
1489
|
+
],
|
|
1490
|
+
),
|
|
1491
|
+
"detect_hardcoded_temp_paths_basic": ValidatedPattern(
|
|
1492
|
+
name="detect_hardcoded_temp_paths_basic",
|
|
1493
|
+
pattern=r"(?:/tmp/|/temp/|C:\\temp\\|C:\\tmp\\)", # nosec B108
|
|
1494
|
+
replacement="[TEMP_PATH]/",
|
|
1495
|
+
description="Detect hardcoded temporary directory paths",
|
|
1496
|
+
global_replace=True,
|
|
1497
|
+
test_cases=[
|
|
1498
|
+
("/tmp/myfile.txt", "[TEMP_PATH]/myfile.txt"), # nosec B108
|
|
1499
|
+
(r"C:\tmp\data.log", "[TEMP_PATH]/data.log"),
|
|
1500
|
+
("/temp/cache", "[TEMP_PATH]/cache"),
|
|
1501
|
+
(r"C:\temp\work", "[TEMP_PATH]/work"),
|
|
1502
|
+
("/regular/path", "/regular/path"), # No change
|
|
1503
|
+
],
|
|
1504
|
+
),
|
|
1505
|
+
"replace_hardcoded_temp_paths": ValidatedPattern(
|
|
1506
|
+
name="replace_hardcoded_temp_paths",
|
|
1507
|
+
pattern=r'Path\("/tmp/([^"]+)"\)',
|
|
1508
|
+
replacement=r'Path(tempfile.gettempdir()) / "\1"',
|
|
1509
|
+
description="Replace hardcoded /tmp paths with tempfile.gettempdir()",
|
|
1510
|
+
global_replace=True,
|
|
1511
|
+
test_cases=[
|
|
1512
|
+
('Path("/tmp/myfile.txt")', 'Path(tempfile.gettempdir()) / "myfile.txt"'),
|
|
1513
|
+
('Path("/tmp/data.log")', 'Path(tempfile.gettempdir()) / "data.log"'),
|
|
1514
|
+
('Path("/regular/path")', 'Path("/regular/path")'), # No change
|
|
1515
|
+
],
|
|
1516
|
+
),
|
|
1517
|
+
"replace_hardcoded_temp_strings": ValidatedPattern(
|
|
1518
|
+
name="replace_hardcoded_temp_strings",
|
|
1519
|
+
pattern=r'"/tmp/([^"]+)"',
|
|
1520
|
+
replacement=r'str(Path(tempfile.gettempdir()) / "\1")',
|
|
1521
|
+
description="Replace hardcoded /tmp string paths with tempfile equivalent",
|
|
1522
|
+
global_replace=True,
|
|
1523
|
+
test_cases=[
|
|
1524
|
+
('"/tmp/myfile.txt"', 'str(Path(tempfile.gettempdir()) / "myfile.txt")'),
|
|
1525
|
+
('"/tmp/data.log"', 'str(Path(tempfile.gettempdir()) / "data.log")'),
|
|
1526
|
+
('"/regular/path"', '"/regular/path"'), # No change
|
|
1527
|
+
],
|
|
1528
|
+
),
|
|
1529
|
+
"replace_hardcoded_temp_single_quotes": ValidatedPattern(
|
|
1530
|
+
name="replace_hardcoded_temp_single_quotes",
|
|
1531
|
+
pattern=r"'/tmp/([^']+)'",
|
|
1532
|
+
replacement=r"str(Path(tempfile.gettempdir()) / '\1')",
|
|
1533
|
+
description="Replace hardcoded /tmp paths (single quotes) with tempfile"
|
|
1534
|
+
" equivalent",
|
|
1535
|
+
global_replace=True,
|
|
1536
|
+
test_cases=[
|
|
1537
|
+
("'/tmp/myfile.txt'", "str(Path(tempfile.gettempdir()) / 'myfile.txt')"),
|
|
1538
|
+
("'/tmp/data.log'", "str(Path(tempfile.gettempdir()) / 'data.log')"),
|
|
1539
|
+
("'/regular/path'", "'/regular/path'"), # No change
|
|
1540
|
+
],
|
|
1541
|
+
),
|
|
1542
|
+
"replace_test_path_patterns": ValidatedPattern(
|
|
1543
|
+
name="replace_test_path_patterns",
|
|
1544
|
+
pattern=r'Path\("/test/path"\)',
|
|
1545
|
+
replacement=r"Path(tempfile.gettempdir()) / 'test-path'",
|
|
1546
|
+
description="Replace hardcoded /test/path patterns with tempfile equivalent",
|
|
1547
|
+
test_cases=[
|
|
1548
|
+
('Path("/test/path")', "Path(tempfile.gettempdir()) / 'test-path'"),
|
|
1549
|
+
('Path("/other/path")', 'Path("/other/path")'), # No change
|
|
1550
|
+
],
|
|
1551
|
+
),
|
|
1552
|
+
"detect_hardcoded_secrets": ValidatedPattern(
|
|
1553
|
+
name="detect_hardcoded_secrets",
|
|
1554
|
+
pattern=r'\b\w*(password|secret|key|token)\w*\s*=\s*[\'"][^\'"]+[\'"]',
|
|
1555
|
+
replacement="[SECRET_DETECTED]",
|
|
1556
|
+
description="Detect hardcoded secrets in assignments (case insensitive)",
|
|
1557
|
+
flags=re.IGNORECASE,
|
|
1558
|
+
global_replace=True,
|
|
1559
|
+
test_cases=[
|
|
1560
|
+
('password = "secret123"', "[SECRET_DETECTED]"),
|
|
1561
|
+
("api_key = 'abc123def'", "[SECRET_DETECTED]"),
|
|
1562
|
+
('TOKEN = "my-token-here"', "[SECRET_DETECTED]"),
|
|
1563
|
+
("username = 'user123'", "username = 'user123'"), # No match
|
|
1564
|
+
],
|
|
1565
|
+
),
|
|
1566
|
+
"extract_variable_name_from_assignment": ValidatedPattern(
|
|
1567
|
+
name="extract_variable_name_from_assignment",
|
|
1568
|
+
pattern=r"\s*(\w+)\s*=.*",
|
|
1569
|
+
replacement=r"\1",
|
|
1570
|
+
description="Extract variable name from assignment statement",
|
|
1571
|
+
test_cases=[
|
|
1572
|
+
("password = 'secret'", "password"),
|
|
1573
|
+
("api_key = 'value'", "api_key"),
|
|
1574
|
+
(" token =", "token"), # Matches just the word part
|
|
1575
|
+
("complex_variable_name = value", "complex_variable_name"),
|
|
1576
|
+
],
|
|
1577
|
+
),
|
|
1578
|
+
"detect_insecure_random_usage": ValidatedPattern(
|
|
1579
|
+
name="detect_insecure_random_usage",
|
|
1580
|
+
pattern=r"\brandom\.(?:random|choice)\([^)]*\)",
|
|
1581
|
+
replacement="[INSECURE_RANDOM]()",
|
|
1582
|
+
description="Detect insecure random module usage",
|
|
1583
|
+
global_replace=True,
|
|
1584
|
+
test_cases=[
|
|
1585
|
+
("random.random()", "[INSECURE_RANDOM]()"),
|
|
1586
|
+
("random.choice(options)", "[INSECURE_RANDOM]()"),
|
|
1587
|
+
("secrets.choice(options)", "secrets.choice(options)"), # No change
|
|
1588
|
+
("my_random.choice()", "my_random.choice()"), # No change
|
|
1589
|
+
],
|
|
1590
|
+
),
|
|
1591
|
+
# Input validation patterns for security-critical validation
|
|
1592
|
+
"validate_sql_injection_patterns": ValidatedPattern(
|
|
1593
|
+
name="validate_sql_injection_patterns",
|
|
1594
|
+
pattern=r"\b(union|select|insert|update|delete|drop|create|alter|"
|
|
1595
|
+
r"exec|execute)\b",
|
|
1596
|
+
replacement="[SQL_INJECTION]",
|
|
1597
|
+
flags=re.IGNORECASE,
|
|
1598
|
+
description="Detect SQL injection patterns in input validation "
|
|
1599
|
+
"(case insensitive)",
|
|
1600
|
+
global_replace=True,
|
|
1601
|
+
test_cases=[
|
|
1602
|
+
("UNION SELECT", "[SQL_INJECTION] [SQL_INJECTION]"),
|
|
1603
|
+
("drop table", "[SQL_INJECTION] table"),
|
|
1604
|
+
("normal text", "normal text"), # No change
|
|
1605
|
+
("exec command", "[SQL_INJECTION] command"),
|
|
1606
|
+
("execute procedure", "[SQL_INJECTION] procedure"),
|
|
1607
|
+
],
|
|
1608
|
+
),
|
|
1609
|
+
"validate_sql_comment_patterns": ValidatedPattern(
|
|
1610
|
+
name="validate_sql_comment_patterns",
|
|
1611
|
+
pattern=r"(-{2,}|\/\*|\*\/)",
|
|
1612
|
+
replacement="[SQL_COMMENT]",
|
|
1613
|
+
description="Detect SQL comment patterns in input validation",
|
|
1614
|
+
global_replace=True,
|
|
1615
|
+
test_cases=[
|
|
1616
|
+
("--comment", "[SQL_COMMENT]comment"),
|
|
1617
|
+
("/* comment */", "[SQL_COMMENT] comment [SQL_COMMENT]"),
|
|
1618
|
+
("normal-text", "normal-text"), # No change (single hyphen)
|
|
1619
|
+
("---triple", "[SQL_COMMENT]triple"),
|
|
1620
|
+
],
|
|
1621
|
+
),
|
|
1622
|
+
"validate_sql_boolean_injection": ValidatedPattern(
|
|
1623
|
+
name="validate_sql_boolean_injection",
|
|
1624
|
+
pattern=r"\b(or|and)\b.*=",
|
|
1625
|
+
replacement="[BOOLEAN_INJECTION]",
|
|
1626
|
+
flags=re.IGNORECASE,
|
|
1627
|
+
description="Detect boolean-based SQL injection patterns (case insensitive)",
|
|
1628
|
+
global_replace=True,
|
|
1629
|
+
test_cases=[
|
|
1630
|
+
("or 1=1", "[BOOLEAN_INJECTION]1"),
|
|
1631
|
+
("AND password=", "[BOOLEAN_INJECTION]"),
|
|
1632
|
+
("normal or text", "normal or text"), # No change (no equals)
|
|
1633
|
+
("value=test", "value=test"), # No change (no boolean operator)
|
|
1634
|
+
],
|
|
1635
|
+
),
|
|
1636
|
+
"validate_sql_server_specific": ValidatedPattern(
|
|
1637
|
+
name="validate_sql_server_specific",
|
|
1638
|
+
pattern=r"\b(xp_cmdshell|sp_executesql)\b",
|
|
1639
|
+
replacement="[SQLSERVER_EXPLOIT]",
|
|
1640
|
+
flags=re.IGNORECASE,
|
|
1641
|
+
description="Detect SQL Server specific attack patterns (case insensitive)",
|
|
1642
|
+
global_replace=True,
|
|
1643
|
+
test_cases=[
|
|
1644
|
+
("xp_cmdshell", "[SQLSERVER_EXPLOIT]"),
|
|
1645
|
+
("SP_EXECUTESQL", "[SQLSERVER_EXPLOIT]"),
|
|
1646
|
+
("normal text", "normal text"), # No change
|
|
1647
|
+
],
|
|
1648
|
+
),
|
|
1649
|
+
"validate_code_eval_injection": ValidatedPattern(
|
|
1650
|
+
name="validate_code_eval_injection",
|
|
1651
|
+
pattern=r"\b(eval|exec|execfile)\s*\(",
|
|
1652
|
+
replacement="[CODE_EVAL](",
|
|
1653
|
+
description="Detect Python code evaluation injection patterns",
|
|
1654
|
+
global_replace=True,
|
|
1655
|
+
test_cases=[
|
|
1656
|
+
("eval(code)", "[CODE_EVAL](code)"),
|
|
1657
|
+
("exec(command)", "[CODE_EVAL](command)"),
|
|
1658
|
+
("execfile(script)", "[CODE_EVAL](script)"),
|
|
1659
|
+
("evaluate()", "evaluate()"), # No change (not exact match)
|
|
1660
|
+
],
|
|
1661
|
+
),
|
|
1662
|
+
"validate_code_dynamic_access": ValidatedPattern(
|
|
1663
|
+
name="validate_code_dynamic_access",
|
|
1664
|
+
pattern=r"\b(__import__|getattr|setattr|delattr)\b",
|
|
1665
|
+
replacement="[DYNAMIC_ACCESS]",
|
|
1666
|
+
description="Detect dynamic attribute access patterns for code injection",
|
|
1667
|
+
global_replace=True,
|
|
1668
|
+
test_cases=[
|
|
1669
|
+
("__import__", "[DYNAMIC_ACCESS]"),
|
|
1670
|
+
("getattr(obj, name)", "[DYNAMIC_ACCESS](obj, name)"),
|
|
1671
|
+
("setattr(obj, name)", "[DYNAMIC_ACCESS](obj, name)"),
|
|
1672
|
+
("delattr(obj, name)", "[DYNAMIC_ACCESS](obj, name)"),
|
|
1673
|
+
("mygetattr", "mygetattr"), # No change (not exact match)
|
|
1674
|
+
],
|
|
1675
|
+
),
|
|
1676
|
+
"validate_code_system_commands": ValidatedPattern(
|
|
1677
|
+
name="validate_code_system_commands",
|
|
1678
|
+
pattern=r"\b(subprocess|os\.system|os\.popen|commands\.)",
|
|
1679
|
+
replacement="[SYSTEM_COMMAND]",
|
|
1680
|
+
description="Detect system command execution patterns for code injection",
|
|
1681
|
+
global_replace=True,
|
|
1682
|
+
test_cases=[
|
|
1683
|
+
("subprocess.run", "[SYSTEM_COMMAND].run"),
|
|
1684
|
+
("os.system(cmd)", "[SYSTEM_COMMAND](cmd)"),
|
|
1685
|
+
("os.popen(cmd)", "[SYSTEM_COMMAND](cmd)"),
|
|
1686
|
+
("commands.getoutput", "[SYSTEM_COMMAND]getoutput"),
|
|
1687
|
+
("mysubprocess", "mysubprocess"), # No change (not exact match)
|
|
1688
|
+
],
|
|
1689
|
+
),
|
|
1690
|
+
"validate_code_compilation": ValidatedPattern(
|
|
1691
|
+
name="validate_code_compilation",
|
|
1692
|
+
pattern=r"\bcompile\s*\(|code\.compile",
|
|
1693
|
+
replacement="[CODE_COMPILE]",
|
|
1694
|
+
description="Detect code compilation patterns for injection",
|
|
1695
|
+
global_replace=True,
|
|
1696
|
+
test_cases=[
|
|
1697
|
+
("compile(source)", "[CODE_COMPILE]source)"),
|
|
1698
|
+
("code.compile(source)", "[CODE_COMPILE](source)"),
|
|
1699
|
+
("compiled", "compiled"), # No change (not exact match)
|
|
1700
|
+
],
|
|
1701
|
+
),
|
|
1702
|
+
"validate_job_id_format": ValidatedPattern(
|
|
1703
|
+
name="validate_job_id_format",
|
|
1704
|
+
pattern=r"^[a-zA-Z0-9\-_]+$",
|
|
1705
|
+
replacement="VALID_JOB_ID",
|
|
1706
|
+
description="Validate job ID format - alphanumeric with hyphens and"
|
|
1707
|
+
" underscores only",
|
|
1708
|
+
test_cases=[
|
|
1709
|
+
("valid_job-123", "VALID_JOB_ID"),
|
|
1710
|
+
("another-valid_job_456", "VALID_JOB_ID"),
|
|
1711
|
+
("job_123", "VALID_JOB_ID"),
|
|
1712
|
+
("UPPERCASE_JOB-ID", "VALID_JOB_ID"),
|
|
1713
|
+
("hyphen-underscore_combo", "VALID_JOB_ID"),
|
|
1714
|
+
],
|
|
1715
|
+
),
|
|
1716
|
+
"validate_env_var_name_format": ValidatedPattern(
|
|
1717
|
+
name="validate_env_var_name_format",
|
|
1718
|
+
pattern=r"^[A-Z_][A-Z0-9_]*$",
|
|
1719
|
+
replacement="VALID_ENV_VAR_NAME",
|
|
1720
|
+
description="Validate environment variable name format - uppercase letters,"
|
|
1721
|
+
" numbers, underscores only, must start with letter or underscore",
|
|
1722
|
+
test_cases=[
|
|
1723
|
+
("VALID_VAR", "VALID_ENV_VAR_NAME"),
|
|
1724
|
+
("_VALID_VAR", "VALID_ENV_VAR_NAME"),
|
|
1725
|
+
("API_KEY_123", "VALID_ENV_VAR_NAME"),
|
|
1726
|
+
("DATABASE_URL", "VALID_ENV_VAR_NAME"),
|
|
1727
|
+
("_PRIVATE_VAR", "VALID_ENV_VAR_NAME"),
|
|
1728
|
+
],
|
|
1729
|
+
),
|
|
1730
|
+
# Config file update patterns
|
|
1731
|
+
"update_repo_revision": ValidatedPattern(
|
|
1732
|
+
name="update_repo_revision",
|
|
1733
|
+
pattern=r'("repo": "[^"]+?".*?"rev": )"([^"]+)"',
|
|
1734
|
+
replacement=r'\1"NEW_REVISION"',
|
|
1735
|
+
description="Update repository revision in config files (NEW_REVISION"
|
|
1736
|
+
" placeholder replaced dynamically)",
|
|
1737
|
+
flags=re.DOTALL,
|
|
1738
|
+
test_cases=[
|
|
1739
|
+
(
|
|
1740
|
+
'"repo": "https://github.com/user/repo".*"rev": "old_rev"',
|
|
1741
|
+
'"repo": "https://github.com/user/repo".*"rev": "NEW_REVISION"',
|
|
1742
|
+
),
|
|
1743
|
+
(
|
|
1744
|
+
'"repo": "git@github.com:user/repo.git", "branch": "main", "rev": '
|
|
1745
|
+
'"abc123"',
|
|
1746
|
+
'"repo": "git@github.com:user/repo.git", "branch": "main", "rev":'
|
|
1747
|
+
' "NEW_REVISION"',
|
|
1748
|
+
),
|
|
1749
|
+
(
|
|
1750
|
+
'{"repo": "https://example.com/repo", "description": "test", "rev": '
|
|
1751
|
+
'"456def"}',
|
|
1752
|
+
'{"repo": "https://example.com/repo", "description": "test", "rev":'
|
|
1753
|
+
' "NEW_REVISION"}',
|
|
1754
|
+
),
|
|
1755
|
+
],
|
|
1756
|
+
),
|
|
1757
|
+
# URL sanitization patterns for security
|
|
1758
|
+
"sanitize_localhost_urls": ValidatedPattern(
|
|
1759
|
+
name="sanitize_localhost_urls",
|
|
1760
|
+
pattern=r"https?://localhost:\d+[^\s]*",
|
|
1761
|
+
replacement="[INTERNAL_URL]",
|
|
1762
|
+
description="Sanitize localhost URLs with ports for security",
|
|
1763
|
+
global_replace=True,
|
|
1764
|
+
test_cases=[
|
|
1765
|
+
("http://localhost:8000/api/test", "[INTERNAL_URL]"),
|
|
1766
|
+
("https://localhost:3000/dashboard", "[INTERNAL_URL]"),
|
|
1767
|
+
(
|
|
1768
|
+
"Visit http://localhost:8080/admin for details",
|
|
1769
|
+
"Visit [INTERNAL_URL] for details",
|
|
1770
|
+
),
|
|
1771
|
+
("https://example.com/test", "https://example.com/test"), # No change
|
|
1772
|
+
],
|
|
1773
|
+
),
|
|
1774
|
+
"sanitize_127_urls": ValidatedPattern(
|
|
1775
|
+
name="sanitize_127_urls",
|
|
1776
|
+
pattern=r"https?://127\.0\.0\.1:\d+[^\s]*",
|
|
1777
|
+
replacement="[INTERNAL_URL]",
|
|
1778
|
+
description="Sanitize 127.0.0.1 URLs with ports for security",
|
|
1779
|
+
global_replace=True,
|
|
1780
|
+
test_cases=[
|
|
1781
|
+
("http://127.0.0.1:8000/api", "[INTERNAL_URL]"),
|
|
1782
|
+
("https://127.0.0.1:3000/test", "[INTERNAL_URL]"),
|
|
1783
|
+
("Connect to http://127.0.0.1:5000/status", "Connect to [INTERNAL_URL]"),
|
|
1784
|
+
(
|
|
1785
|
+
"https://192.168.1.1:8080/test",
|
|
1786
|
+
"https://192.168.1.1:8080/test",
|
|
1787
|
+
), # No change
|
|
1788
|
+
],
|
|
1789
|
+
),
|
|
1790
|
+
"sanitize_any_localhost_urls": ValidatedPattern(
|
|
1791
|
+
name="sanitize_any_localhost_urls",
|
|
1792
|
+
pattern=r"https?://0\.0\.0\.0:\d+[^\s]*",
|
|
1793
|
+
replacement="[INTERNAL_URL]",
|
|
1794
|
+
description="Sanitize 0.0.0.0 URLs with ports for security",
|
|
1795
|
+
global_replace=True,
|
|
1796
|
+
test_cases=[
|
|
1797
|
+
("http://0.0.0.0:8000/api", "[INTERNAL_URL]"),
|
|
1798
|
+
("https://0.0.0.0:3000/test", "[INTERNAL_URL]"),
|
|
1799
|
+
("https://1.1.1.1:8080/test", "https://1.1.1.1:8080/test"), # No change
|
|
1800
|
+
],
|
|
1801
|
+
),
|
|
1802
|
+
"sanitize_ws_localhost_urls": ValidatedPattern(
|
|
1803
|
+
name="sanitize_ws_localhost_urls",
|
|
1804
|
+
pattern=r"ws://localhost:\d+[^\s]*",
|
|
1805
|
+
replacement="[INTERNAL_URL]",
|
|
1806
|
+
description="Sanitize WebSocket localhost URLs with ports for security",
|
|
1807
|
+
global_replace=True,
|
|
1808
|
+
test_cases=[
|
|
1809
|
+
("ws://localhost:8675/websocket", "[INTERNAL_URL]"),
|
|
1810
|
+
("ws://localhost:3000/socket", "[INTERNAL_URL]"),
|
|
1811
|
+
("Connect to ws://localhost:8000/ws", "Connect to [INTERNAL_URL]"),
|
|
1812
|
+
(
|
|
1813
|
+
"wss://example.com:443/socket",
|
|
1814
|
+
"wss://example.com:443/socket",
|
|
1815
|
+
), # No change
|
|
1816
|
+
],
|
|
1817
|
+
),
|
|
1818
|
+
"sanitize_ws_127_urls": ValidatedPattern(
|
|
1819
|
+
name="sanitize_ws_127_urls",
|
|
1820
|
+
pattern=r"ws://127\.0\.0\.1:\d+[^\s]*",
|
|
1821
|
+
replacement="[INTERNAL_URL]",
|
|
1822
|
+
description="Sanitize WebSocket 127.0.0.1 URLs with ports for security",
|
|
1823
|
+
global_replace=True,
|
|
1824
|
+
test_cases=[
|
|
1825
|
+
("ws://127.0.0.1:8675/websocket", "[INTERNAL_URL]"),
|
|
1826
|
+
("ws://127.0.0.1:3000/socket", "[INTERNAL_URL]"),
|
|
1827
|
+
(
|
|
1828
|
+
"ws://192.168.1.1:8080/socket",
|
|
1829
|
+
"ws://192.168.1.1:8080/socket",
|
|
1830
|
+
), # No change
|
|
1831
|
+
],
|
|
1832
|
+
),
|
|
1833
|
+
"sanitize_simple_localhost_urls": ValidatedPattern(
|
|
1834
|
+
name="sanitize_simple_localhost_urls",
|
|
1835
|
+
pattern=r"http://localhost[^\s]*",
|
|
1836
|
+
replacement="[INTERNAL_URL]",
|
|
1837
|
+
description="Sanitize simple localhost URLs without explicit ports for security",
|
|
1838
|
+
global_replace=True,
|
|
1839
|
+
test_cases=[
|
|
1840
|
+
("http://localhost/api/test", "[INTERNAL_URL]"),
|
|
1841
|
+
("http://localhost/dashboard", "[INTERNAL_URL]"),
|
|
1842
|
+
("Visit http://localhost/admin", "Visit [INTERNAL_URL]"),
|
|
1843
|
+
(
|
|
1844
|
+
"https://localhost:443/test",
|
|
1845
|
+
"https://localhost:443/test",
|
|
1846
|
+
), # No change (different pattern)
|
|
1847
|
+
],
|
|
1848
|
+
),
|
|
1849
|
+
"sanitize_simple_ws_localhost_urls": ValidatedPattern(
|
|
1850
|
+
name="sanitize_simple_ws_localhost_urls",
|
|
1851
|
+
pattern=r"ws://localhost[^\s]*",
|
|
1852
|
+
replacement="[INTERNAL_URL]",
|
|
1853
|
+
description="Sanitize simple WebSocket localhost URLs without explicit ports"
|
|
1854
|
+
" for security",
|
|
1855
|
+
global_replace=True,
|
|
1856
|
+
test_cases=[
|
|
1857
|
+
("ws://localhost/websocket", "[INTERNAL_URL]"),
|
|
1858
|
+
("ws://localhost/socket", "[INTERNAL_URL]"),
|
|
1859
|
+
("Connect to ws://localhost/ws", "Connect to [INTERNAL_URL]"),
|
|
1860
|
+
(
|
|
1861
|
+
"wss://localhost:443/socket",
|
|
1862
|
+
"wss://localhost:443/socket",
|
|
1863
|
+
), # No change (different pattern)
|
|
1864
|
+
],
|
|
1865
|
+
),
|
|
1866
|
+
# Integration script patterns for resource management
|
|
1867
|
+
"detect_tempfile_usage": ValidatedPattern(
|
|
1868
|
+
name="detect_tempfile_usage",
|
|
1869
|
+
pattern=r"tempfile\.(?:mkdtemp|NamedTemporaryFile|TemporaryDirectory)",
|
|
1870
|
+
replacement="MATCH", # Dummy replacement for detection patterns
|
|
1871
|
+
test_cases=[
|
|
1872
|
+
("tempfile.mkdtemp()", "MATCH()"),
|
|
1873
|
+
("tempfile.NamedTemporaryFile()", "MATCH()"),
|
|
1874
|
+
("tempfile.TemporaryDirectory()", "MATCH()"),
|
|
1875
|
+
(
|
|
1876
|
+
"not_tempfile.other()",
|
|
1877
|
+
"not_tempfile.other()",
|
|
1878
|
+
), # No match leaves original
|
|
1879
|
+
],
|
|
1880
|
+
description="Detect tempfile module usage for resource management integration",
|
|
1881
|
+
),
|
|
1882
|
+
"detect_subprocess_usage": ValidatedPattern(
|
|
1883
|
+
name="detect_subprocess_usage",
|
|
1884
|
+
pattern=r"subprocess\.(?:Popen|run)",
|
|
1885
|
+
replacement="MATCH", # Dummy replacement for detection patterns
|
|
1886
|
+
test_cases=[
|
|
1887
|
+
("subprocess.Popen(cmd)", "MATCH(cmd)"),
|
|
1888
|
+
("subprocess.run(['cmd'])", "MATCH(['cmd'])"),
|
|
1889
|
+
("not_subprocess.other()", "not_subprocess.other()"),
|
|
1890
|
+
],
|
|
1891
|
+
description="Detect subprocess module usage for resource management integration",
|
|
1892
|
+
),
|
|
1893
|
+
"detect_asyncio_create_task": ValidatedPattern(
|
|
1894
|
+
name="detect_asyncio_create_task",
|
|
1895
|
+
pattern=r"asyncio\.create_task",
|
|
1896
|
+
replacement="MATCH", # Dummy replacement for detection patterns
|
|
1897
|
+
test_cases=[
|
|
1898
|
+
("asyncio.create_task(coro)", "MATCH(coro)"),
|
|
1899
|
+
("not_asyncio.other()", "not_asyncio.other()"),
|
|
1900
|
+
],
|
|
1901
|
+
description="Detect asyncio.create_task usage for resource management"
|
|
1902
|
+
" integration",
|
|
1903
|
+
),
|
|
1904
|
+
"detect_file_open_operations": ValidatedPattern(
|
|
1905
|
+
name="detect_file_open_operations",
|
|
1906
|
+
pattern=r"(\.open\(|with open\()",
|
|
1907
|
+
replacement=r"MATCH", # Dummy replacement for detection patterns
|
|
1908
|
+
test_cases=[
|
|
1909
|
+
("file.open()", "fileMATCH)"),
|
|
1910
|
+
("with open('file.txt'):", "MATCH'file.txt'):"),
|
|
1911
|
+
("other_method()", "other_method()"), # No change
|
|
1912
|
+
],
|
|
1913
|
+
description="Detect file open operations for resource management integration",
|
|
1914
|
+
),
|
|
1915
|
+
"match_async_function_definition": ValidatedPattern(
|
|
1916
|
+
name="match_async_function_definition",
|
|
1917
|
+
pattern=r"(async def \w+\([^)]*\)[^:]*:)",
|
|
1918
|
+
replacement=r"\1",
|
|
1919
|
+
test_cases=[
|
|
1920
|
+
("async def foo():", "async def foo():"),
|
|
1921
|
+
("async def bar(a, b) -> None:", "async def bar(a, b) -> None:"),
|
|
1922
|
+
("def sync_func():", "def sync_func():"),
|
|
1923
|
+
],
|
|
1924
|
+
description="Match async function definitions for resource management"
|
|
1925
|
+
" integration",
|
|
1926
|
+
),
|
|
1927
|
+
"match_class_definition": ValidatedPattern(
|
|
1928
|
+
name="match_class_definition",
|
|
1929
|
+
pattern=r"class (\w+).*:",
|
|
1930
|
+
replacement=r"\1",
|
|
1931
|
+
test_cases=[
|
|
1932
|
+
("class MyClass:", "MyClass"),
|
|
1933
|
+
("class MyClass(BaseClass):", "MyClass"),
|
|
1934
|
+
("class MyClass(Base, Mixin):", "MyClass"),
|
|
1935
|
+
("def not_class():", "def not_class():"),
|
|
1936
|
+
],
|
|
1937
|
+
description="Match class definitions for resource management integration",
|
|
1938
|
+
),
|
|
1939
|
+
"replace_subprocess_popen_basic": ValidatedPattern(
|
|
1940
|
+
name="replace_subprocess_popen_basic",
|
|
1941
|
+
pattern=r"subprocess\.Popen\(",
|
|
1942
|
+
replacement="managed_proc = resource_ctx.managed_process(subprocess.Popen(",
|
|
1943
|
+
test_cases=[
|
|
1944
|
+
(
|
|
1945
|
+
"subprocess.Popen(cmd)",
|
|
1946
|
+
"managed_proc = resource_ctx.managed_process(subprocess.Popen(cmd)",
|
|
1947
|
+
),
|
|
1948
|
+
(
|
|
1949
|
+
"result = subprocess.Popen(['ls'])",
|
|
1950
|
+
"result = managed_proc = resource_ctx.managed_process("
|
|
1951
|
+
"subprocess.Popen(['ls'])",
|
|
1952
|
+
),
|
|
1953
|
+
],
|
|
1954
|
+
description="Replace subprocess.Popen with managed version",
|
|
1955
|
+
),
|
|
1956
|
+
"replace_subprocess_popen_assignment": ValidatedPattern(
|
|
1957
|
+
name="replace_subprocess_popen_assignment",
|
|
1958
|
+
pattern=r"(\w+)\s*=\s*subprocess\.Popen\(",
|
|
1959
|
+
replacement=r"process = subprocess.Popen(",
|
|
1960
|
+
test_cases=[
|
|
1961
|
+
("proc = subprocess.Popen(cmd)", "process = subprocess.Popen(cmd)"),
|
|
1962
|
+
(
|
|
1963
|
+
"my_process = subprocess.Popen(['ls'])",
|
|
1964
|
+
"process = subprocess.Popen(['ls'])",
|
|
1965
|
+
),
|
|
1966
|
+
],
|
|
1967
|
+
description="Replace subprocess.Popen assignment with standard variable name",
|
|
1968
|
+
),
|
|
1969
|
+
"replace_path_open_write": ValidatedPattern(
|
|
1970
|
+
name="replace_path_open_write",
|
|
1971
|
+
pattern=r'(\w+)\.open\(["\']wb?["\'][^)]*\)',
|
|
1972
|
+
replacement=r"atomic_file_write(\1)",
|
|
1973
|
+
test_cases=[
|
|
1974
|
+
("path.open('w')", "atomic_file_write(path)"),
|
|
1975
|
+
("file.open('wb')", "atomic_file_write(file)"),
|
|
1976
|
+
],
|
|
1977
|
+
description="Replace file.open() with atomic_file_write",
|
|
1978
|
+
),
|
|
1979
|
+
"replace_path_write_text": ValidatedPattern(
|
|
1980
|
+
name="replace_path_write_text",
|
|
1981
|
+
pattern=r"(\w+)\.write_text\(([^)]+)\)",
|
|
1982
|
+
replacement=r"await SafeFileOperations.safe_write_text(\1, \2, atomic=True)",
|
|
1983
|
+
test_cases=[
|
|
1984
|
+
(
|
|
1985
|
+
"path.write_text(content)",
|
|
1986
|
+
"await SafeFileOperations.safe_write_text(path, content, atomic=True)",
|
|
1987
|
+
),
|
|
1988
|
+
(
|
|
1989
|
+
"file.write_text(data, encoding='utf-8')",
|
|
1990
|
+
"await SafeFileOperations.safe_write_text(file, data, encoding='utf-8',"
|
|
1991
|
+
" atomic=True)",
|
|
1992
|
+
),
|
|
1993
|
+
],
|
|
1994
|
+
description="Replace path.write_text with SafeFileOperations.safe_write_text",
|
|
1995
|
+
),
|
|
1996
|
+
# Agent-specific patterns - DocumentationAgent
|
|
1997
|
+
"agent_count_pattern": ValidatedPattern(
|
|
1998
|
+
name="agent_count_pattern",
|
|
1999
|
+
pattern=r"(\d+)\s+agents",
|
|
2000
|
+
replacement=r"\1 agents",
|
|
2001
|
+
test_cases=[
|
|
2002
|
+
("9 agents", "9 agents"),
|
|
2003
|
+
("12 agents", "12 agents"),
|
|
2004
|
+
("5 agents", "5 agents"),
|
|
2005
|
+
],
|
|
2006
|
+
description="Match agent count patterns for documentation consistency",
|
|
2007
|
+
flags=re.IGNORECASE,
|
|
2008
|
+
),
|
|
2009
|
+
"specialized_agent_count_pattern": ValidatedPattern(
|
|
2010
|
+
name="specialized_agent_count_pattern",
|
|
2011
|
+
pattern=r"(\d+)\s+specialized\s+agents",
|
|
2012
|
+
replacement=r"\1 specialized agents",
|
|
2013
|
+
test_cases=[
|
|
2014
|
+
("9 specialized agents", "9 specialized agents"),
|
|
2015
|
+
("12 specialized agents", "12 specialized agents"),
|
|
2016
|
+
("5 specialized agents", "5 specialized agents"),
|
|
2017
|
+
],
|
|
2018
|
+
description="Match specialized agent count patterns for documentation "
|
|
2019
|
+
"consistency",
|
|
2020
|
+
flags=re.IGNORECASE,
|
|
2021
|
+
),
|
|
2022
|
+
"total_agents_config_pattern": ValidatedPattern(
|
|
2023
|
+
name="total_agents_config_pattern",
|
|
2024
|
+
pattern=r'total_agents["\'][\s]*:\s*(\d+)',
|
|
2025
|
+
replacement=r'total_agents": \1',
|
|
2026
|
+
test_cases=[
|
|
2027
|
+
('total_agents": 9', 'total_agents": 9'),
|
|
2028
|
+
("total_agents': 12", 'total_agents": 12'),
|
|
2029
|
+
('total_agents" : 5', 'total_agents": 5'),
|
|
2030
|
+
],
|
|
2031
|
+
description="Match total agents configuration patterns",
|
|
2032
|
+
flags=re.IGNORECASE,
|
|
2033
|
+
),
|
|
2034
|
+
"sub_agent_count_pattern": ValidatedPattern(
|
|
2035
|
+
name="sub_agent_count_pattern",
|
|
2036
|
+
pattern=r"(\d+)\s+sub-agents",
|
|
2037
|
+
replacement=r"\1 sub-agents",
|
|
2038
|
+
test_cases=[
|
|
2039
|
+
("9 sub-agents", "9 sub-agents"),
|
|
2040
|
+
("12 sub-agents", "12 sub-agents"),
|
|
2041
|
+
("5 sub-agents", "5 sub-agents"),
|
|
2042
|
+
],
|
|
2043
|
+
description="Match sub-agent count patterns for documentation consistency",
|
|
2044
|
+
flags=re.IGNORECASE,
|
|
2045
|
+
),
|
|
2046
|
+
"update_agent_count": ValidatedPattern(
|
|
2047
|
+
name="update_agent_count",
|
|
2048
|
+
pattern=r"\b(\d+)\s+agents\b",
|
|
2049
|
+
replacement=r"NEW_COUNT agents",
|
|
2050
|
+
test_cases=[
|
|
2051
|
+
("9 agents working", "NEW_COUNT agents working"),
|
|
2052
|
+
("We have 12 agents ready", "We have NEW_COUNT agents ready"),
|
|
2053
|
+
("All 5 agents are active", "All NEW_COUNT agents are active"),
|
|
2054
|
+
],
|
|
2055
|
+
description="Update agent count references (NEW_COUNT replaced dynamically)",
|
|
2056
|
+
),
|
|
2057
|
+
"update_specialized_agent_count": ValidatedPattern(
|
|
2058
|
+
name="update_specialized_agent_count",
|
|
2059
|
+
pattern=r"\b(\d+)\s+specialized\s+agents\b",
|
|
2060
|
+
replacement=r"NEW_COUNT specialized agents",
|
|
2061
|
+
test_cases=[
|
|
2062
|
+
(
|
|
2063
|
+
"9 specialized agents available",
|
|
2064
|
+
"NEW_COUNT specialized agents available",
|
|
2065
|
+
),
|
|
2066
|
+
("We have 12 specialized agents", "We have NEW_COUNT specialized agents"),
|
|
2067
|
+
("All 5 specialized agents work", "All NEW_COUNT specialized agents work"),
|
|
2068
|
+
],
|
|
2069
|
+
description="Update specialized agent count references (NEW_COUNT replaced"
|
|
2070
|
+
" dynamically)",
|
|
2071
|
+
),
|
|
2072
|
+
"update_total_agents_config": ValidatedPattern(
|
|
2073
|
+
name="update_total_agents_config",
|
|
2074
|
+
pattern=r'total_agents["\'][\s]*:\s*\d+',
|
|
2075
|
+
replacement=r'total_agents": NEW_COUNT',
|
|
2076
|
+
test_cases=[
|
|
2077
|
+
('total_agents": 9', 'total_agents": NEW_COUNT'),
|
|
2078
|
+
("total_agents': 12", 'total_agents": NEW_COUNT'),
|
|
2079
|
+
('total_agents" : 5', 'total_agents": NEW_COUNT'),
|
|
2080
|
+
],
|
|
2081
|
+
description="Update total agents configuration (NEW_COUNT replaced"
|
|
2082
|
+
" dynamically)",
|
|
2083
|
+
),
|
|
2084
|
+
"update_sub_agent_count": ValidatedPattern(
|
|
2085
|
+
name="update_sub_agent_count",
|
|
2086
|
+
pattern=r"\b(\d+)\s+sub-agents\b",
|
|
2087
|
+
replacement=r"NEW_COUNT sub-agents",
|
|
2088
|
+
test_cases=[
|
|
2089
|
+
("9 sub-agents working", "NEW_COUNT sub-agents working"),
|
|
2090
|
+
("We have 12 sub-agents ready", "We have NEW_COUNT sub-agents ready"),
|
|
2091
|
+
("All 5 sub-agents are active", "All NEW_COUNT sub-agents are active"),
|
|
2092
|
+
],
|
|
2093
|
+
description="Update sub-agent count references (NEW_COUNT replaced"
|
|
2094
|
+
" dynamically)",
|
|
2095
|
+
),
|
|
2096
|
+
# Agent-specific patterns - TestSpecialistAgent
|
|
2097
|
+
"fixture_not_found_pattern": ValidatedPattern(
|
|
2098
|
+
name="fixture_not_found_pattern",
|
|
2099
|
+
pattern=r"fixture '(\w+)' not found",
|
|
2100
|
+
replacement=r"fixture '\1' not found",
|
|
2101
|
+
test_cases=[
|
|
2102
|
+
("fixture 'temp_pkg_path' not found", "fixture 'temp_pkg_path' not found"),
|
|
2103
|
+
("fixture 'console' not found", "fixture 'console' not found"),
|
|
2104
|
+
("fixture 'tmp_path' not found", "fixture 'tmp_path' not found"),
|
|
2105
|
+
],
|
|
2106
|
+
description="Match pytest fixture not found error patterns",
|
|
2107
|
+
),
|
|
2108
|
+
"import_error_pattern": ValidatedPattern(
|
|
2109
|
+
name="import_error_pattern",
|
|
2110
|
+
pattern=r"ImportError|ModuleNotFoundError",
|
|
2111
|
+
replacement=r"ImportError",
|
|
2112
|
+
test_cases=[
|
|
2113
|
+
("ImportError: No module named", "ImportError: No module named"),
|
|
2114
|
+
("ModuleNotFoundError: No module", "ImportError: No module"),
|
|
2115
|
+
("Other error types", "Other error types"), # No change
|
|
2116
|
+
],
|
|
2117
|
+
description="Match import error patterns in test failures",
|
|
2118
|
+
),
|
|
2119
|
+
"assertion_error_pattern": ValidatedPattern(
|
|
2120
|
+
name="assertion_error_pattern",
|
|
2121
|
+
pattern=r"assert .+ ==",
|
|
2122
|
+
replacement=r"AssertionError",
|
|
2123
|
+
test_cases=[
|
|
2124
|
+
(
|
|
2125
|
+
"AssertionError: Values differ",
|
|
2126
|
+
"AssertionError: Values differ",
|
|
2127
|
+
), # No change
|
|
2128
|
+
("assert result == expected", "AssertionError expected"),
|
|
2129
|
+
("Normal code", "Normal code"), # No change
|
|
2130
|
+
],
|
|
2131
|
+
description="Match assertion error patterns in test failures",
|
|
2132
|
+
),
|
|
2133
|
+
"attribute_error_pattern": ValidatedPattern(
|
|
2134
|
+
name="attribute_error_pattern",
|
|
2135
|
+
pattern=r"AttributeError: .+ has no attribute",
|
|
2136
|
+
replacement=r"AttributeError: has no attribute",
|
|
2137
|
+
test_cases=[
|
|
2138
|
+
(
|
|
2139
|
+
"AttributeError: 'Mock' has no attribute 'test'",
|
|
2140
|
+
"AttributeError: has no attribute 'test'",
|
|
2141
|
+
),
|
|
2142
|
+
(
|
|
2143
|
+
"AttributeError: 'NoneType' has no attribute 'value'",
|
|
2144
|
+
"AttributeError: has no attribute 'value'",
|
|
2145
|
+
),
|
|
2146
|
+
("Normal error", "Normal error"), # No change
|
|
2147
|
+
],
|
|
2148
|
+
description="Match attribute error patterns in test failures",
|
|
2149
|
+
),
|
|
2150
|
+
"mock_spec_error_pattern": ValidatedPattern(
|
|
2151
|
+
name="mock_spec_error_pattern",
|
|
2152
|
+
pattern=r"MockSpec|spec.*Mock",
|
|
2153
|
+
replacement=r"MockSpec",
|
|
2154
|
+
test_cases=[
|
|
2155
|
+
("MockSpec error occurred", "MockSpec error occurred"),
|
|
2156
|
+
("spec for Mock failed", "MockSpec failed"),
|
|
2157
|
+
("Normal mock usage", "Normal mock usage"), # No change
|
|
2158
|
+
],
|
|
2159
|
+
description="Match mock specification error patterns in test failures",
|
|
2160
|
+
),
|
|
2161
|
+
"hardcoded_path_pattern": ValidatedPattern(
|
|
2162
|
+
name="hardcoded_path_pattern",
|
|
2163
|
+
pattern=r"'/test/path'|/test/path",
|
|
2164
|
+
replacement=r"str(tmp_path)",
|
|
2165
|
+
test_cases=[
|
|
2166
|
+
("'/test/path'", "str(tmp_path)"),
|
|
2167
|
+
("/test/path", "str(tmp_path)"),
|
|
2168
|
+
("'/other/path'", "'/other/path'"), # No change
|
|
2169
|
+
],
|
|
2170
|
+
description="Match hardcoded test path patterns that should use tmp_path",
|
|
2171
|
+
),
|
|
2172
|
+
"missing_name_pattern": ValidatedPattern(
|
|
2173
|
+
name="missing_name_pattern",
|
|
2174
|
+
pattern=r"name '(\w+)' is not defined",
|
|
2175
|
+
replacement=r"name '\1' is not defined",
|
|
2176
|
+
test_cases=[
|
|
2177
|
+
("name 'pytest' is not defined", "name 'pytest' is not defined"),
|
|
2178
|
+
("name 'Mock' is not defined", "name 'Mock' is not defined"),
|
|
2179
|
+
("name 'Path' is not defined", "name 'Path' is not defined"),
|
|
2180
|
+
],
|
|
2181
|
+
description="Match undefined name patterns in test failures",
|
|
2182
|
+
),
|
|
2183
|
+
"pydantic_validation_pattern": ValidatedPattern(
|
|
2184
|
+
name="pydantic_validation_pattern",
|
|
2185
|
+
pattern=r"ValidationError|validation error",
|
|
2186
|
+
replacement=r"ValidationError",
|
|
2187
|
+
test_cases=[
|
|
2188
|
+
("ValidationError: field required", "ValidationError: field required"),
|
|
2189
|
+
("validation error in field", "ValidationError in field"),
|
|
2190
|
+
("Normal validation", "Normal validation"), # No change
|
|
2191
|
+
],
|
|
2192
|
+
description="Match Pydantic validation error patterns in test failures",
|
|
2193
|
+
),
|
|
2194
|
+
# Agent-specific patterns - PerformanceAgent
|
|
2195
|
+
"list_append_inefficiency_pattern": ValidatedPattern(
|
|
2196
|
+
name="list_append_inefficiency_pattern",
|
|
2197
|
+
pattern=r"(\s*)(\w+)\s*\+=\s*\[([^]]+)\]",
|
|
2198
|
+
replacement=r"\1\2.append(\3)",
|
|
2199
|
+
test_cases=[
|
|
2200
|
+
(" items += [new_item]", " items.append(new_item)"),
|
|
2201
|
+
("results += [result]", "results.append(result)"),
|
|
2202
|
+
(" data += [value, other]", " data.append(value, other)"),
|
|
2203
|
+
],
|
|
2204
|
+
description="Replace inefficient list concatenation with append for"
|
|
2205
|
+
" performance",
|
|
2206
|
+
),
|
|
2207
|
+
"string_concatenation_pattern": ValidatedPattern(
|
|
2208
|
+
name="string_concatenation_pattern",
|
|
2209
|
+
pattern=r"(\s*)(\w+)\s*\+=\s*(.+)",
|
|
2210
|
+
replacement=r"\1\2_parts.append(\3)",
|
|
2211
|
+
test_cases=[
|
|
2212
|
+
(" text += new_text", " text_parts.append(new_text)"),
|
|
2213
|
+
("result += line", "result_parts.append(line)"),
|
|
2214
|
+
(" output += data", " output_parts.append(data)"),
|
|
2215
|
+
],
|
|
2216
|
+
description="Replace string concatenation with list append for performance "
|
|
2217
|
+
"optimization",
|
|
2218
|
+
),
|
|
2219
|
+
# Enhanced performance patterns for PerformanceAgent optimization
|
|
2220
|
+
"nested_loop_detection_pattern": ValidatedPattern(
|
|
2221
|
+
name="nested_loop_detection_pattern",
|
|
2222
|
+
pattern=r"(\s*)(for\s+\w+\s+in\s+.*:)",
|
|
2223
|
+
replacement=r"\1# Performance: Potential nested loop - check complexity\n\1\2",
|
|
2224
|
+
test_cases=[
|
|
2225
|
+
(
|
|
2226
|
+
" for j in other:",
|
|
2227
|
+
" # Performance: Potential nested loop - check complexity\n "
|
|
2228
|
+
" for j in other:",
|
|
2229
|
+
),
|
|
2230
|
+
(
|
|
2231
|
+
"for i in items:",
|
|
2232
|
+
"# Performance: Potential nested loop - check complexity\nfor i"
|
|
2233
|
+
" in items:",
|
|
2234
|
+
),
|
|
2235
|
+
],
|
|
2236
|
+
description="Detect loop patterns that might be nested creating O(n²)"
|
|
2237
|
+
" complexity",
|
|
2238
|
+
flags=re.MULTILINE,
|
|
2239
|
+
),
|
|
2240
|
+
"list_extend_optimization_pattern": ValidatedPattern(
|
|
2241
|
+
name="list_extend_optimization_pattern",
|
|
2242
|
+
pattern=r"(\s*)(\w+)\s*\+=\s*\[([^]]+(?:,\s*[^]]+)*)\]",
|
|
2243
|
+
replacement=r"\1\2.extend([\3])",
|
|
2244
|
+
test_cases=[
|
|
2245
|
+
(" items += [a, b, c]", " items.extend([a, b, c])"),
|
|
2246
|
+
("results += [x, y]", "results.extend([x, y])"),
|
|
2247
|
+
(" data += [single_item]", " data.extend([single_item])"),
|
|
2248
|
+
],
|
|
2249
|
+
description="Replace list concatenation with extend for better performance with multiple items",
|
|
2250
|
+
),
|
|
2251
|
+
"inefficient_string_join_pattern": ValidatedPattern(
|
|
2252
|
+
name="inefficient_string_join_pattern",
|
|
2253
|
+
pattern=r"(\s*)(\w+)\s*=\s*([\"'])([\"'])\s*\.\s*join\(\s*\[\s*\]\s*\)",
|
|
2254
|
+
replacement=r"\1\2 = \3\4 # Performance: Use empty string directly instead"
|
|
2255
|
+
r" of join",
|
|
2256
|
+
test_cases=[
|
|
2257
|
+
(
|
|
2258
|
+
' text = "".join([])',
|
|
2259
|
+
' text = "" # Performance: Use empty string directly instead of'
|
|
2260
|
+
" join",
|
|
2261
|
+
),
|
|
2262
|
+
(
|
|
2263
|
+
"result = ''.join([])",
|
|
2264
|
+
"result = '' # Performance: Use empty string directly instead of join",
|
|
2265
|
+
),
|
|
2266
|
+
],
|
|
2267
|
+
description="Replace inefficient empty list join with direct empty string"
|
|
2268
|
+
" assignment",
|
|
2269
|
+
),
|
|
2270
|
+
"repeated_len_in_loop_pattern": ValidatedPattern(
|
|
2271
|
+
name="repeated_len_in_loop_pattern",
|
|
2272
|
+
pattern=r"(\s*)(len\(\s*(\w+)\s*\))",
|
|
2273
|
+
replacement=r"\1# Performance: Consider caching len(\3) if used "
|
|
2274
|
+
r"repeatedly\n\1\2",
|
|
2275
|
+
test_cases=[
|
|
2276
|
+
(
|
|
2277
|
+
" len(items)",
|
|
2278
|
+
" # Performance: Consider caching len(items) if used repeatedly\n"
|
|
2279
|
+
" len(items)",
|
|
2280
|
+
),
|
|
2281
|
+
(
|
|
2282
|
+
"len(data)",
|
|
2283
|
+
"# Performance: Consider caching len(data) if used "
|
|
2284
|
+
"repeatedly\nlen(data)",
|
|
2285
|
+
),
|
|
2286
|
+
],
|
|
2287
|
+
description="Suggest caching len() calls that might be repeated",
|
|
2288
|
+
),
|
|
2289
|
+
"list_comprehension_optimization_pattern": ValidatedPattern(
|
|
2290
|
+
name="list_comprehension_optimization_pattern",
|
|
2291
|
+
pattern=r"(\s*)(\w+)\.append\(([^)]+)\)",
|
|
2292
|
+
replacement=r"\1# Performance: Consider list comprehension if this is in a "
|
|
2293
|
+
r"simple loop\n\1\2.append(\3)",
|
|
2294
|
+
test_cases=[
|
|
2295
|
+
(
|
|
2296
|
+
" results.append(item * 2)",
|
|
2297
|
+
" # Performance: Consider list comprehension if this is in a "
|
|
2298
|
+
"simple loop\n results.append(item * 2)",
|
|
2299
|
+
),
|
|
2300
|
+
(
|
|
2301
|
+
"data.append(value)",
|
|
2302
|
+
"# Performance: Consider list comprehension if this is in a simple"
|
|
2303
|
+
" loop\ndata.append(value)",
|
|
2304
|
+
),
|
|
2305
|
+
],
|
|
2306
|
+
description="Suggest list comprehensions for simple append patterns",
|
|
2307
|
+
),
|
|
2308
|
+
# Enhanced security patterns for improved SecurityAgent capabilities
|
|
2309
|
+
"detect_crypto_weak_algorithms": ValidatedPattern(
|
|
2310
|
+
name="detect_crypto_weak_algorithms",
|
|
2311
|
+
pattern=r"\b(?:md4|md5|sha1|des|3des|rc4)\b",
|
|
2312
|
+
replacement="[WEAK_CRYPTO_ALGORITHM]",
|
|
2313
|
+
description="Detect weak cryptographic algorithms",
|
|
2314
|
+
flags=re.IGNORECASE,
|
|
2315
|
+
global_replace=True,
|
|
2316
|
+
test_cases=[
|
|
2317
|
+
("hashlib.md5()", "hashlib.[WEAK_CRYPTO_ALGORITHM]()"),
|
|
2318
|
+
("using DES encryption", "using [WEAK_CRYPTO_ALGORITHM] encryption"),
|
|
2319
|
+
("SHA256 is good", "SHA256 is good"), # No change
|
|
2320
|
+
("MD4 hashing", "[WEAK_CRYPTO_ALGORITHM] hashing"),
|
|
2321
|
+
],
|
|
2322
|
+
),
|
|
2323
|
+
"detect_hardcoded_credentials_advanced": ValidatedPattern(
|
|
2324
|
+
name="detect_hardcoded_credentials_advanced",
|
|
2325
|
+
pattern=r"(?i)\b(?:password|passwd|pwd|secret|key|token|api_key|"
|
|
2326
|
+
r'apikey)\s*[:=]\s*["\'][^"\']{3,}["\']',
|
|
2327
|
+
replacement="[HARDCODED_CREDENTIAL_DETECTED]",
|
|
2328
|
+
description="Detect hardcoded credentials in various formats "
|
|
2329
|
+
"(case insensitive)",
|
|
2330
|
+
flags=re.IGNORECASE,
|
|
2331
|
+
global_replace=True,
|
|
2332
|
+
test_cases=[
|
|
2333
|
+
('password="secret123"', "[HARDCODED_CREDENTIAL_DETECTED]"),
|
|
2334
|
+
("API_KEY = 'abc-123-def'", "[HARDCODED_CREDENTIAL_DETECTED]"),
|
|
2335
|
+
('token: "my-secret-token"', "[HARDCODED_CREDENTIAL_DETECTED]"),
|
|
2336
|
+
(
|
|
2337
|
+
'username = "user"',
|
|
2338
|
+
'username = "user"',
|
|
2339
|
+
), # No match - not a credential field
|
|
2340
|
+
],
|
|
2341
|
+
),
|
|
2342
|
+
"detect_subprocess_shell_injection": ValidatedPattern(
|
|
2343
|
+
name="detect_subprocess_shell_injection",
|
|
2344
|
+
pattern=r"\bsubprocess\.\w+\([^)]*shell\s*=\s*True[^)]*\)",
|
|
2345
|
+
replacement="[SHELL_INJECTION_RISK]",
|
|
2346
|
+
description="Detect subprocess calls with shell=True",
|
|
2347
|
+
global_replace=True,
|
|
2348
|
+
test_cases=[
|
|
2349
|
+
("subprocess.run(cmd, shell=True)", "[SHELL_INJECTION_RISK]"),
|
|
2350
|
+
("subprocess.call(command, shell=True)", "[SHELL_INJECTION_RISK]"),
|
|
2351
|
+
(
|
|
2352
|
+
"subprocess.run(cmd, shell=False)",
|
|
2353
|
+
"subprocess.run(cmd, shell=False)",
|
|
2354
|
+
), # No change
|
|
2355
|
+
],
|
|
2356
|
+
),
|
|
2357
|
+
"detect_regex_redos_vulnerable": ValidatedPattern(
|
|
2358
|
+
name="detect_regex_redos_vulnerable",
|
|
2359
|
+
pattern=r"\([^)]+\)[\*\+]",
|
|
2360
|
+
replacement="[REDOS_VULNERABLE_PATTERN]",
|
|
2361
|
+
description="Detect regex patterns vulnerable to ReDoS attacks (simplified"
|
|
2362
|
+
" detection)",
|
|
2363
|
+
global_replace=True,
|
|
2364
|
+
test_cases=[
|
|
2365
|
+
("(a+)*", "[REDOS_VULNERABLE_PATTERN]"),
|
|
2366
|
+
("(a*)+", "[REDOS_VULNERABLE_PATTERN]"),
|
|
2367
|
+
("(abc)+", "[REDOS_VULNERABLE_PATTERN]"),
|
|
2368
|
+
("simple+", "simple+"), # No change - not vulnerable
|
|
2369
|
+
],
|
|
2370
|
+
),
|
|
2371
|
+
"fix_hardcoded_jwt_secret": ValidatedPattern(
|
|
2372
|
+
name="fix_hardcoded_jwt_secret",
|
|
2373
|
+
pattern=r'(JWT_SECRET|jwt_secret)\s*=\s*["\'][^"\']+["\']',
|
|
2374
|
+
replacement=r'\1 = os.getenv("JWT_SECRET", "")',
|
|
2375
|
+
description="Replace hardcoded JWT secrets with environment variables",
|
|
2376
|
+
global_replace=True,
|
|
2377
|
+
test_cases=[
|
|
2378
|
+
(
|
|
2379
|
+
'JWT_SECRET = "hardcoded-secret"',
|
|
2380
|
+
'JWT_SECRET = os.getenv("JWT_SECRET", "")',
|
|
2381
|
+
),
|
|
2382
|
+
('jwt_secret = "my-secret"', 'jwt_secret = os.getenv("JWT_SECRET", "")'),
|
|
2383
|
+
('other_var = "value"', 'other_var = "value"'), # No change
|
|
2384
|
+
],
|
|
2385
|
+
),
|
|
2386
|
+
"detect_unsafe_pickle_usage": ValidatedPattern(
|
|
2387
|
+
name="detect_unsafe_pickle_usage",
|
|
2388
|
+
pattern=r"\bpickle\.(loads?)\s*\(",
|
|
2389
|
+
replacement=r"[UNSAFE_PICKLE_USAGE].\1(",
|
|
2390
|
+
description="Detect potentially unsafe pickle usage",
|
|
2391
|
+
global_replace=True,
|
|
2392
|
+
test_cases=[
|
|
2393
|
+
("pickle.load(file)", "[UNSAFE_PICKLE_USAGE].load(file)"),
|
|
2394
|
+
("pickle.loads(data)", "[UNSAFE_PICKLE_USAGE].loads(data)"),
|
|
2395
|
+
("my_pickle.load(file)", "my_pickle.load(file)"), # No change
|
|
2396
|
+
],
|
|
2397
|
+
),
|
|
2398
|
+
# Agent-specific patterns for validation and analysis
|
|
2399
|
+
"extract_range_size": ValidatedPattern(
|
|
2400
|
+
name="extract_range_size",
|
|
2401
|
+
pattern=r"range\((\d+)\)",
|
|
2402
|
+
replacement=r"\1",
|
|
2403
|
+
description="Extract numeric size from range() calls",
|
|
2404
|
+
test_cases=[
|
|
2405
|
+
("range(1000)", "1000"),
|
|
2406
|
+
("range(50)", "50"),
|
|
2407
|
+
("for i in range(100):", "for i in 100:"),
|
|
2408
|
+
("other_func(10)", "other_func(10)"), # No change
|
|
2409
|
+
],
|
|
2410
|
+
),
|
|
2411
|
+
"match_error_code_patterns": ValidatedPattern(
|
|
2412
|
+
name="match_error_code_patterns",
|
|
2413
|
+
pattern=r"F\d{3}|I\d{3}|E\d{3}|W\d{3}",
|
|
2414
|
+
replacement=r"\g<0>",
|
|
2415
|
+
description="Match standard error codes like F403, I001, etc.",
|
|
2416
|
+
test_cases=[
|
|
2417
|
+
("F403", "F403"),
|
|
2418
|
+
("I001", "I001"),
|
|
2419
|
+
("E302", "E302"),
|
|
2420
|
+
("W291", "W291"),
|
|
2421
|
+
("ABC123", "ABC123"), # No change
|
|
2422
|
+
],
|
|
2423
|
+
),
|
|
2424
|
+
"match_validation_patterns": ValidatedPattern(
|
|
2425
|
+
name="match_validation_patterns",
|
|
2426
|
+
pattern=r"if\s+not\s+\w+\s*:|if\s+\w+\s+is\s+None\s*:|if\s+len\(\w+\)\s*[<>=]",
|
|
2427
|
+
replacement=r"\g<0>",
|
|
2428
|
+
description="Match common validation patterns for extraction",
|
|
2429
|
+
test_cases=[
|
|
2430
|
+
("if not var:", "if not var:"),
|
|
2431
|
+
("if item is None:", "if item is None:"),
|
|
2432
|
+
("if len(items) >", "if len(items) >"),
|
|
2433
|
+
("other code", "other code"), # No change
|
|
2434
|
+
],
|
|
2435
|
+
),
|
|
2436
|
+
"match_loop_patterns": ValidatedPattern(
|
|
2437
|
+
name="match_loop_patterns",
|
|
2438
|
+
pattern=r"\s*for\s+.*:\s*$|\s*while\s+.*:\s*$",
|
|
2439
|
+
replacement=r"\g<0>",
|
|
2440
|
+
description="Match for/while loop patterns",
|
|
2441
|
+
test_cases=[
|
|
2442
|
+
(" for i in items:", " for i in items:"),
|
|
2443
|
+
(" while condition:", " while condition:"),
|
|
2444
|
+
("regular line", "regular line"), # No change
|
|
2445
|
+
],
|
|
2446
|
+
),
|
|
2447
|
+
"match_star_import": ValidatedPattern(
|
|
2448
|
+
name="match_star_import",
|
|
2449
|
+
pattern=r"from\s+\w+\s+import\s+\*",
|
|
2450
|
+
replacement=r"\g<0>",
|
|
2451
|
+
description="Match star import statements",
|
|
2452
|
+
test_cases=[
|
|
2453
|
+
("from module import *", "from module import *"),
|
|
2454
|
+
("from my_pkg import *", "from my_pkg import *"),
|
|
2455
|
+
("from module import specific", "from module import specific"), # No change
|
|
2456
|
+
],
|
|
2457
|
+
),
|
|
2458
|
+
"clean_unused_import": ValidatedPattern(
|
|
2459
|
+
name="clean_unused_import",
|
|
2460
|
+
pattern=r"^\s*import\s+unused_module\s*$",
|
|
2461
|
+
replacement=r"",
|
|
2462
|
+
description="Remove unused import statements (example with unused_module)",
|
|
2463
|
+
test_cases=[
|
|
2464
|
+
(" import unused_module", ""),
|
|
2465
|
+
(
|
|
2466
|
+
"import other_module",
|
|
2467
|
+
"import other_module",
|
|
2468
|
+
), # No change for different module
|
|
2469
|
+
],
|
|
2470
|
+
),
|
|
2471
|
+
"clean_unused_from_import": ValidatedPattern(
|
|
2472
|
+
name="clean_unused_from_import",
|
|
2473
|
+
pattern=r"^\s*from\s+\w+\s+import\s+.*\bunused_item\b",
|
|
2474
|
+
replacement=r"\g<0>",
|
|
2475
|
+
description="Match from import statements with unused items (example with "
|
|
2476
|
+
"unused_item)",
|
|
2477
|
+
test_cases=[
|
|
2478
|
+
(
|
|
2479
|
+
"from module import used, unused_item",
|
|
2480
|
+
"from module import used, unused_item",
|
|
2481
|
+
),
|
|
2482
|
+
("from other import needed", "from other import needed"), # No change
|
|
2483
|
+
],
|
|
2484
|
+
),
|
|
2485
|
+
"clean_import_commas": ValidatedPattern(
|
|
2486
|
+
name="clean_import_commas",
|
|
2487
|
+
pattern=r",\s*,",
|
|
2488
|
+
replacement=r",",
|
|
2489
|
+
description="Clean double commas in import statements",
|
|
2490
|
+
test_cases=[
|
|
2491
|
+
("from module import a, , b", "from module import a, b"),
|
|
2492
|
+
("items = [a, , b]", "items = [a, b]"),
|
|
2493
|
+
("normal, list", "normal, list"), # No change
|
|
2494
|
+
],
|
|
2495
|
+
),
|
|
2496
|
+
"clean_trailing_import_comma": ValidatedPattern(
|
|
2497
|
+
name="clean_trailing_import_comma",
|
|
2498
|
+
pattern=r",\s*$",
|
|
2499
|
+
replacement=r"",
|
|
2500
|
+
description="Remove trailing commas from lines",
|
|
2501
|
+
test_cases=[
|
|
2502
|
+
("from module import a, b,", "from module import a, b"),
|
|
2503
|
+
("import item,", "import item"),
|
|
2504
|
+
("normal line", "normal line"), # No change
|
|
2505
|
+
],
|
|
2506
|
+
),
|
|
2507
|
+
"clean_import_prefix": ValidatedPattern(
|
|
2508
|
+
name="clean_import_prefix",
|
|
2509
|
+
pattern=r"import\s*,\s*",
|
|
2510
|
+
replacement=r"import ",
|
|
2511
|
+
description="Clean malformed import statements with leading comma",
|
|
2512
|
+
test_cases=[
|
|
2513
|
+
("import ,module", "import module"),
|
|
2514
|
+
("from pkg import ,item", "from pkg import item"),
|
|
2515
|
+
("import normal", "import normal"), # No change
|
|
2516
|
+
],
|
|
2517
|
+
),
|
|
2518
|
+
"extract_unused_import_name": ValidatedPattern(
|
|
2519
|
+
name="extract_unused_import_name",
|
|
2520
|
+
pattern=r"unused import ['\"]([^'\"]+)['\"]",
|
|
2521
|
+
replacement=r"\1",
|
|
2522
|
+
description="Extract import name from vulture unused import messages",
|
|
2523
|
+
test_cases=[
|
|
2524
|
+
("unused import 'module_name'", "module_name"),
|
|
2525
|
+
('unused import "other_module"', "other_module"),
|
|
2526
|
+
("some other text", "some other text"), # No change
|
|
2527
|
+
],
|
|
2528
|
+
),
|
|
2529
|
+
"normalize_whitespace": ValidatedPattern(
|
|
2530
|
+
name="normalize_whitespace",
|
|
2531
|
+
pattern=r"\s+",
|
|
2532
|
+
replacement=r" ",
|
|
2533
|
+
description="Normalize multiple whitespace to single space",
|
|
2534
|
+
global_replace=True,
|
|
2535
|
+
test_cases=[
|
|
2536
|
+
("import module", "import module"),
|
|
2537
|
+
("from pkg import item", "from pkg import item"),
|
|
2538
|
+
("normal text", "normal text"), # No change with single spaces
|
|
2539
|
+
],
|
|
2540
|
+
),
|
|
2541
|
+
}
|
|
2542
|
+
|
|
2543
|
+
|
|
2544
|
+
def validate_all_patterns() -> dict[str, bool]:
|
|
2545
|
+
"""Validate all patterns and return results."""
|
|
2546
|
+
validate_results = {}
|
|
2547
|
+
for name, pattern in SAFE_PATTERNS.items():
|
|
2548
|
+
try:
|
|
2549
|
+
pattern._validate()
|
|
2550
|
+
results[name] = True
|
|
2551
|
+
except ValueError as e:
|
|
2552
|
+
results[name] = False
|
|
2553
|
+
print(f"Pattern '{name}' failed validation: {e}")
|
|
2554
|
+
return validate_results
|
|
2555
|
+
|
|
2556
|
+
|
|
2557
|
+
def find_pattern_for_text(text: str) -> list[str]:
|
|
2558
|
+
"""Find which patterns match the given text."""
|
|
2559
|
+
return [name for name, pattern in SAFE_PATTERNS.items() if pattern.test(text)]
|
|
2560
|
+
|
|
2561
|
+
|
|
2562
|
+
def apply_safe_replacement(text: str, pattern_name: str) -> str:
|
|
2563
|
+
"""Apply a safe replacement pattern by name."""
|
|
2564
|
+
if pattern_name not in SAFE_PATTERNS:
|
|
2565
|
+
raise ValueError(f"Unknown pattern: {pattern_name}")
|
|
2566
|
+
|
|
2567
|
+
return SAFE_PATTERNS[pattern_name].apply(text)
|
|
2568
|
+
|
|
2569
|
+
|
|
2570
|
+
def get_pattern_description(pattern_name: str) -> str:
|
|
2571
|
+
"""Get description of a pattern."""
|
|
2572
|
+
if pattern_name not in SAFE_PATTERNS:
|
|
2573
|
+
return "Unknown pattern"
|
|
2574
|
+
|
|
2575
|
+
return SAFE_PATTERNS[pattern_name].description
|
|
2576
|
+
|
|
2577
|
+
|
|
2578
|
+
def fix_multi_word_hyphenation(text: str) -> str:
|
|
2579
|
+
"""
|
|
2580
|
+
Fix complex multi-word hyphenation cases like 'pytest - hypothesis - specialist'.
|
|
2581
|
+
|
|
2582
|
+
Uses iterative application of the spaced_hyphens pattern to handle multiple words.
|
|
2583
|
+
"""
|
|
2584
|
+
return SAFE_PATTERNS["fix_spaced_hyphens"].apply_iteratively(text)
|
|
2585
|
+
|
|
2586
|
+
|
|
2587
|
+
def update_pyproject_version(content: str, new_version: str) -> str:
|
|
2588
|
+
"""
|
|
2589
|
+
Update version in pyproject.toml content with safe regex.
|
|
2590
|
+
|
|
2591
|
+
Args:
|
|
2592
|
+
content: The pyproject.toml file content
|
|
2593
|
+
new_version: The new version to set
|
|
2594
|
+
|
|
2595
|
+
Returns:
|
|
2596
|
+
Updated content with new version
|
|
2597
|
+
"""
|
|
2598
|
+
import re
|
|
2599
|
+
|
|
2600
|
+
pattern_obj = SAFE_PATTERNS["update_pyproject_version"]
|
|
2601
|
+
# Create a temporary pattern with the actual version
|
|
2602
|
+
temp_pattern = ValidatedPattern(
|
|
2603
|
+
name="temp_version_update",
|
|
2604
|
+
pattern=pattern_obj.pattern,
|
|
2605
|
+
replacement=f"\\g<1>{new_version}\\g<3>",
|
|
2606
|
+
description=f"Update version to {new_version}",
|
|
2607
|
+
test_cases=[
|
|
2608
|
+
('version = "1.2.3"', f'version = "{new_version}"'),
|
|
2609
|
+
],
|
|
2610
|
+
)
|
|
2611
|
+
|
|
2612
|
+
# Apply with MULTILINE flag for line-by-line matching
|
|
2613
|
+
return re.compile(pattern_obj.pattern, re.MULTILINE).sub(
|
|
2614
|
+
temp_pattern.replacement, content
|
|
2615
|
+
)
|
|
2616
|
+
|
|
2617
|
+
|
|
2618
|
+
def apply_formatting_fixes(content: str) -> str:
|
|
2619
|
+
"""Apply standard formatting fixes to content."""
|
|
2620
|
+
# Remove trailing whitespace using MULTILINE flag
|
|
2621
|
+
import re
|
|
2622
|
+
|
|
2623
|
+
pattern = SAFE_PATTERNS["remove_trailing_whitespace"]
|
|
2624
|
+
content = re.compile(pattern.pattern, re.MULTILINE).sub(
|
|
2625
|
+
pattern.replacement, content
|
|
2626
|
+
)
|
|
2627
|
+
|
|
2628
|
+
# Normalize multiple newlines
|
|
2629
|
+
content = SAFE_PATTERNS["normalize_multiple_newlines"].apply(content)
|
|
2630
|
+
|
|
2631
|
+
return content
|
|
2632
|
+
|
|
2633
|
+
|
|
2634
|
+
def apply_security_fixes(content: str) -> str:
|
|
2635
|
+
"""Apply all security-related fixes to content."""
|
|
2636
|
+
# Fix subprocess shell injections
|
|
2637
|
+
content = SAFE_PATTERNS["fix_subprocess_run_shell"].apply(content)
|
|
2638
|
+
content = SAFE_PATTERNS["fix_subprocess_call_shell"].apply(content)
|
|
2639
|
+
content = SAFE_PATTERNS["fix_subprocess_popen_shell"].apply(content)
|
|
2640
|
+
|
|
2641
|
+
# Fix unsafe library usage
|
|
2642
|
+
content = SAFE_PATTERNS["fix_unsafe_yaml_load"].apply(content)
|
|
2643
|
+
content = SAFE_PATTERNS["fix_weak_md5_hash"].apply(content)
|
|
2644
|
+
content = SAFE_PATTERNS["fix_weak_sha1_hash"].apply(content)
|
|
2645
|
+
content = SAFE_PATTERNS["fix_insecure_random_choice"].apply(content)
|
|
2646
|
+
|
|
2647
|
+
# Remove debug prints with secrets
|
|
2648
|
+
content = SAFE_PATTERNS["remove_debug_prints_with_secrets"].apply(content)
|
|
2649
|
+
|
|
2650
|
+
return content
|
|
2651
|
+
|
|
2652
|
+
|
|
2653
|
+
def apply_test_fixes(content: str) -> str:
|
|
2654
|
+
"""Apply test-related fixes to content."""
|
|
2655
|
+
return SAFE_PATTERNS["normalize_assert_statements"].apply(content)
|
|
2656
|
+
|
|
2657
|
+
|
|
2658
|
+
def is_valid_job_id(job_id: str) -> bool:
|
|
2659
|
+
"""Validate job ID using safe regex patterns."""
|
|
2660
|
+
return SAFE_PATTERNS["validate_job_id_alphanumeric"].test(job_id)
|
|
2661
|
+
|
|
2662
|
+
|
|
2663
|
+
def remove_coverage_fail_under(addopts: str) -> str:
|
|
2664
|
+
"""Remove coverage fail-under flags from pytest addopts."""
|
|
2665
|
+
return SAFE_PATTERNS["remove_coverage_fail_under"].apply(addopts)
|
|
2666
|
+
|
|
2667
|
+
|
|
2668
|
+
def update_coverage_requirement(content: str, new_coverage: float) -> str:
|
|
2669
|
+
"""Update coverage requirement in content."""
|
|
2670
|
+
import re
|
|
2671
|
+
|
|
2672
|
+
pattern_obj = SAFE_PATTERNS["update_coverage_requirement"]
|
|
2673
|
+
# Create a temporary pattern with the actual coverage value
|
|
2674
|
+
temp_pattern = ValidatedPattern(
|
|
2675
|
+
name="temp_coverage_update",
|
|
2676
|
+
pattern=pattern_obj.pattern,
|
|
2677
|
+
replacement=f"\\1{new_coverage:.0f}",
|
|
2678
|
+
description=f"Update coverage to {new_coverage}",
|
|
2679
|
+
test_cases=[
|
|
2680
|
+
("--cov-fail-under=85", f"--cov-fail-under={new_coverage:.0f}"),
|
|
2681
|
+
],
|
|
2682
|
+
)
|
|
2683
|
+
|
|
2684
|
+
return re.compile(pattern_obj.pattern).sub(temp_pattern.replacement, content)
|
|
2685
|
+
|
|
2686
|
+
|
|
2687
|
+
def update_repo_revision(content: str, repo_url: str, new_revision: str) -> str:
|
|
2688
|
+
"""
|
|
2689
|
+
Update repository revision in config content with safe regex.
|
|
2690
|
+
|
|
2691
|
+
Args:
|
|
2692
|
+
content: The config file content (JSON-like format)
|
|
2693
|
+
repo_url: The repository URL to find and update
|
|
2694
|
+
new_revision: The new revision to set
|
|
2695
|
+
|
|
2696
|
+
Returns:
|
|
2697
|
+
Updated content with new revision
|
|
2698
|
+
"""
|
|
2699
|
+
import re
|
|
2700
|
+
|
|
2701
|
+
# Create a pattern specific to the repo URL (escaped for safety)
|
|
2702
|
+
escaped_url = re.escape(repo_url)
|
|
2703
|
+
pattern = rf'("repo": "{escaped_url}".*?"rev": )"([^"]+)"'
|
|
2704
|
+
replacement = rf'\1"{new_revision}"'
|
|
2705
|
+
|
|
2706
|
+
# Use DOTALL flag for multiline matching
|
|
2707
|
+
return re.compile(pattern, re.DOTALL).sub(replacement, content)
|
|
2708
|
+
|
|
2709
|
+
|
|
2710
|
+
def sanitize_internal_urls(text: str) -> str:
|
|
2711
|
+
"""
|
|
2712
|
+
Sanitize internal URLs using safe patterns for security.
|
|
2713
|
+
|
|
2714
|
+
Args:
|
|
2715
|
+
text: Text that may contain internal URLs
|
|
2716
|
+
|
|
2717
|
+
Returns:
|
|
2718
|
+
Text with internal URLs replaced with [INTERNAL_URL]
|
|
2719
|
+
"""
|
|
2720
|
+
# Apply all URL sanitization patterns
|
|
2721
|
+
url_patterns = [
|
|
2722
|
+
"sanitize_localhost_urls",
|
|
2723
|
+
"sanitize_127_urls",
|
|
2724
|
+
"sanitize_any_localhost_urls",
|
|
2725
|
+
"sanitize_ws_localhost_urls",
|
|
2726
|
+
"sanitize_ws_127_urls",
|
|
2727
|
+
"sanitize_simple_localhost_urls",
|
|
2728
|
+
"sanitize_simple_ws_localhost_urls",
|
|
2729
|
+
]
|
|
2730
|
+
|
|
2731
|
+
result = text
|
|
2732
|
+
for pattern_name in url_patterns:
|
|
2733
|
+
result = SAFE_PATTERNS[pattern_name].apply(result)
|
|
2734
|
+
|
|
2735
|
+
return result
|
|
2736
|
+
|
|
2737
|
+
|
|
2738
|
+
def apply_pattern_iteratively(
|
|
2739
|
+
text: str, pattern_name: str, max_iterations: int = MAX_ITERATIONS
|
|
2740
|
+
) -> str:
|
|
2741
|
+
"""Apply a pattern iteratively until no more changes occur."""
|
|
2742
|
+
if pattern_name not in SAFE_PATTERNS:
|
|
2743
|
+
raise ValueError(f"Unknown pattern: {pattern_name}")
|
|
2744
|
+
|
|
2745
|
+
return SAFE_PATTERNS[pattern_name].apply_iteratively(text, max_iterations)
|
|
2746
|
+
|
|
2747
|
+
|
|
2748
|
+
def get_all_pattern_stats() -> dict[str, dict[str, int | float]]:
|
|
2749
|
+
"""Get performance statistics for all patterns."""
|
|
2750
|
+
test_text = "python - m crackerjack - t with pytest - hypothesis - specialist"
|
|
2751
|
+
stats = {}
|
|
2752
|
+
|
|
2753
|
+
for name, pattern in SAFE_PATTERNS.items():
|
|
2754
|
+
try:
|
|
2755
|
+
pattern_stats = pattern.get_performance_stats(test_text, iterations=10)
|
|
2756
|
+
stats[name] = pattern_stats
|
|
2757
|
+
except Exception as e:
|
|
2758
|
+
stats[name] = {"error": str(e)}
|
|
2759
|
+
|
|
2760
|
+
return stats
|
|
2761
|
+
|
|
2762
|
+
|
|
2763
|
+
def clear_all_caches() -> None:
|
|
2764
|
+
"""Clear all caches (useful for testing and memory management)."""
|
|
2765
|
+
CompiledPatternCache.clear_cache()
|
|
2766
|
+
|
|
2767
|
+
|
|
2768
|
+
def get_cache_info() -> dict[str, int | list[str]]:
|
|
2769
|
+
"""Get information about pattern cache usage."""
|
|
2770
|
+
return CompiledPatternCache.get_cache_stats()
|
|
2771
|
+
|
|
2772
|
+
|
|
2773
|
+
# Security validation functions
|
|
2774
|
+
def detect_path_traversal_patterns(path_str: str) -> list[str]:
|
|
2775
|
+
"""
|
|
2776
|
+
Detect directory traversal patterns in a path string.
|
|
2777
|
+
|
|
2778
|
+
Returns list of detected pattern names.
|
|
2779
|
+
"""
|
|
2780
|
+
detected = []
|
|
2781
|
+
traversal_patterns = [
|
|
2782
|
+
"detect_directory_traversal_basic",
|
|
2783
|
+
"detect_directory_traversal_backslash",
|
|
2784
|
+
"detect_url_encoded_traversal",
|
|
2785
|
+
"detect_double_url_encoded_traversal",
|
|
2786
|
+
]
|
|
2787
|
+
|
|
2788
|
+
for pattern_name in traversal_patterns:
|
|
2789
|
+
pattern = SAFE_PATTERNS[pattern_name]
|
|
2790
|
+
if pattern.test(path_str):
|
|
2791
|
+
detected.append(pattern_name)
|
|
2792
|
+
|
|
2793
|
+
return detected
|
|
2794
|
+
|
|
2795
|
+
|
|
2796
|
+
def detect_null_byte_patterns(path_str: str) -> list[str]:
|
|
2797
|
+
"""
|
|
2798
|
+
Detect null byte patterns in a path string.
|
|
2799
|
+
|
|
2800
|
+
Returns list of detected pattern names.
|
|
2801
|
+
"""
|
|
2802
|
+
detected = []
|
|
2803
|
+
null_patterns = [
|
|
2804
|
+
"detect_null_bytes_url",
|
|
2805
|
+
"detect_null_bytes_literal",
|
|
2806
|
+
"detect_utf8_overlong_null",
|
|
2807
|
+
]
|
|
2808
|
+
|
|
2809
|
+
for pattern_name in null_patterns:
|
|
2810
|
+
pattern = SAFE_PATTERNS[pattern_name]
|
|
2811
|
+
if pattern.test(path_str):
|
|
2812
|
+
detected.append(pattern_name)
|
|
2813
|
+
|
|
2814
|
+
return detected
|
|
2815
|
+
|
|
2816
|
+
|
|
2817
|
+
def detect_dangerous_directory_patterns(path_str: str) -> list[str]:
|
|
2818
|
+
"""
|
|
2819
|
+
Detect dangerous directory access patterns.
|
|
2820
|
+
|
|
2821
|
+
Returns list of detected pattern names.
|
|
2822
|
+
"""
|
|
2823
|
+
detected = []
|
|
2824
|
+
dangerous_patterns = [
|
|
2825
|
+
"detect_sys_directory_pattern",
|
|
2826
|
+
"detect_proc_directory_pattern",
|
|
2827
|
+
"detect_etc_directory_pattern",
|
|
2828
|
+
"detect_boot_directory_pattern",
|
|
2829
|
+
"detect_dev_directory_pattern",
|
|
2830
|
+
"detect_root_directory_pattern",
|
|
2831
|
+
"detect_var_log_directory_pattern",
|
|
2832
|
+
"detect_bin_directory_pattern",
|
|
2833
|
+
"detect_sbin_directory_pattern",
|
|
2834
|
+
]
|
|
2835
|
+
|
|
2836
|
+
for pattern_name in dangerous_patterns:
|
|
2837
|
+
pattern = SAFE_PATTERNS[pattern_name]
|
|
2838
|
+
if pattern.test(path_str):
|
|
2839
|
+
detected.append(pattern_name)
|
|
2840
|
+
|
|
2841
|
+
return detected
|
|
2842
|
+
|
|
2843
|
+
|
|
2844
|
+
def detect_suspicious_path_patterns(path_str: str) -> list[str]:
|
|
2845
|
+
"""
|
|
2846
|
+
Detect suspicious path patterns that might indicate attacks.
|
|
2847
|
+
|
|
2848
|
+
Returns list of detected pattern names.
|
|
2849
|
+
"""
|
|
2850
|
+
detected = []
|
|
2851
|
+
suspicious_patterns = [
|
|
2852
|
+
"detect_parent_directory_in_path",
|
|
2853
|
+
"detect_suspicious_temp_traversal",
|
|
2854
|
+
"detect_suspicious_var_traversal",
|
|
2855
|
+
]
|
|
2856
|
+
|
|
2857
|
+
for pattern_name in suspicious_patterns:
|
|
2858
|
+
pattern = SAFE_PATTERNS[pattern_name]
|
|
2859
|
+
if pattern.test(path_str):
|
|
2860
|
+
detected.append(pattern_name)
|
|
2861
|
+
|
|
2862
|
+
return detected
|
|
2863
|
+
|
|
2864
|
+
|
|
2865
|
+
def validate_path_security(path_str: str) -> dict[str, list[str]]:
|
|
2866
|
+
"""
|
|
2867
|
+
Comprehensive path security validation using safe patterns.
|
|
2868
|
+
|
|
2869
|
+
Returns dict with categories of detected issues.
|
|
2870
|
+
"""
|
|
2871
|
+
return {
|
|
2872
|
+
"traversal_patterns": detect_path_traversal_patterns(path_str),
|
|
2873
|
+
"null_bytes": detect_null_byte_patterns(path_str),
|
|
2874
|
+
"dangerous_directories": detect_dangerous_directory_patterns(path_str),
|
|
2875
|
+
"suspicious_patterns": detect_suspicious_path_patterns(path_str),
|
|
2876
|
+
}
|
|
2877
|
+
|
|
2878
|
+
|
|
2879
|
+
# Validation on module import
|
|
2880
|
+
if __name__ == "__main__":
|
|
2881
|
+
results = validate_all_patterns()
|
|
2882
|
+
if all(results.values()):
|
|
2883
|
+
print("✅ All regex patterns validated successfully!")
|
|
2884
|
+
else:
|
|
2885
|
+
failed = [name for name, success in results.items() if not success]
|
|
2886
|
+
print(f"❌ Pattern validation failed for: {failed}")
|
|
2887
|
+
exit(1)
|