crackerjack 0.33.0__py3-none-any.whl → 0.33.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crackerjack might be problematic. Click here for more details.
- crackerjack/__main__.py +1350 -34
- crackerjack/adapters/__init__.py +17 -0
- crackerjack/adapters/lsp_client.py +358 -0
- crackerjack/adapters/rust_tool_adapter.py +194 -0
- crackerjack/adapters/rust_tool_manager.py +193 -0
- crackerjack/adapters/skylos_adapter.py +231 -0
- crackerjack/adapters/zuban_adapter.py +560 -0
- crackerjack/agents/base.py +7 -3
- crackerjack/agents/coordinator.py +271 -33
- crackerjack/agents/documentation_agent.py +9 -15
- crackerjack/agents/dry_agent.py +3 -15
- crackerjack/agents/formatting_agent.py +1 -1
- crackerjack/agents/import_optimization_agent.py +36 -180
- crackerjack/agents/performance_agent.py +17 -98
- crackerjack/agents/performance_helpers.py +7 -31
- crackerjack/agents/proactive_agent.py +1 -3
- crackerjack/agents/refactoring_agent.py +16 -85
- crackerjack/agents/refactoring_helpers.py +7 -42
- crackerjack/agents/security_agent.py +9 -48
- crackerjack/agents/test_creation_agent.py +356 -513
- crackerjack/agents/test_specialist_agent.py +0 -4
- crackerjack/api.py +6 -25
- crackerjack/cli/cache_handlers.py +204 -0
- crackerjack/cli/cache_handlers_enhanced.py +683 -0
- crackerjack/cli/facade.py +100 -0
- crackerjack/cli/handlers.py +224 -9
- crackerjack/cli/interactive.py +6 -4
- crackerjack/cli/options.py +642 -55
- crackerjack/cli/utils.py +2 -1
- crackerjack/code_cleaner.py +58 -117
- crackerjack/config/global_lock_config.py +8 -48
- crackerjack/config/hooks.py +53 -62
- crackerjack/core/async_workflow_orchestrator.py +24 -34
- crackerjack/core/autofix_coordinator.py +3 -17
- crackerjack/core/enhanced_container.py +4 -13
- crackerjack/core/file_lifecycle.py +12 -89
- crackerjack/core/performance.py +2 -2
- crackerjack/core/performance_monitor.py +15 -55
- crackerjack/core/phase_coordinator.py +104 -204
- crackerjack/core/resource_manager.py +14 -90
- crackerjack/core/service_watchdog.py +62 -95
- crackerjack/core/session_coordinator.py +149 -0
- crackerjack/core/timeout_manager.py +14 -72
- crackerjack/core/websocket_lifecycle.py +13 -78
- crackerjack/core/workflow_orchestrator.py +171 -174
- crackerjack/docs/INDEX.md +11 -0
- crackerjack/docs/generated/api/API_REFERENCE.md +10895 -0
- crackerjack/docs/generated/api/CLI_REFERENCE.md +109 -0
- crackerjack/docs/generated/api/CROSS_REFERENCES.md +1755 -0
- crackerjack/docs/generated/api/PROTOCOLS.md +3 -0
- crackerjack/docs/generated/api/SERVICES.md +1252 -0
- crackerjack/documentation/__init__.py +31 -0
- crackerjack/documentation/ai_templates.py +756 -0
- crackerjack/documentation/dual_output_generator.py +765 -0
- crackerjack/documentation/mkdocs_integration.py +518 -0
- crackerjack/documentation/reference_generator.py +977 -0
- crackerjack/dynamic_config.py +55 -50
- crackerjack/executors/async_hook_executor.py +10 -15
- crackerjack/executors/cached_hook_executor.py +117 -43
- crackerjack/executors/hook_executor.py +8 -34
- crackerjack/executors/hook_lock_manager.py +26 -183
- crackerjack/executors/individual_hook_executor.py +13 -11
- crackerjack/executors/lsp_aware_hook_executor.py +270 -0
- crackerjack/executors/tool_proxy.py +417 -0
- crackerjack/hooks/lsp_hook.py +79 -0
- crackerjack/intelligence/adaptive_learning.py +25 -10
- crackerjack/intelligence/agent_orchestrator.py +2 -5
- crackerjack/intelligence/agent_registry.py +34 -24
- crackerjack/intelligence/agent_selector.py +5 -7
- crackerjack/interactive.py +17 -6
- crackerjack/managers/async_hook_manager.py +0 -1
- crackerjack/managers/hook_manager.py +79 -1
- crackerjack/managers/publish_manager.py +44 -8
- crackerjack/managers/test_command_builder.py +1 -15
- crackerjack/managers/test_executor.py +1 -3
- crackerjack/managers/test_manager.py +98 -7
- crackerjack/managers/test_manager_backup.py +10 -9
- crackerjack/mcp/cache.py +2 -2
- crackerjack/mcp/client_runner.py +1 -1
- crackerjack/mcp/context.py +191 -68
- crackerjack/mcp/dashboard.py +7 -5
- crackerjack/mcp/enhanced_progress_monitor.py +31 -28
- crackerjack/mcp/file_monitor.py +30 -23
- crackerjack/mcp/progress_components.py +31 -21
- crackerjack/mcp/progress_monitor.py +50 -53
- crackerjack/mcp/rate_limiter.py +6 -6
- crackerjack/mcp/server_core.py +17 -16
- crackerjack/mcp/service_watchdog.py +2 -1
- crackerjack/mcp/state.py +4 -7
- crackerjack/mcp/task_manager.py +11 -9
- crackerjack/mcp/tools/core_tools.py +173 -32
- crackerjack/mcp/tools/error_analyzer.py +3 -2
- crackerjack/mcp/tools/execution_tools.py +8 -10
- crackerjack/mcp/tools/execution_tools_backup.py +42 -30
- crackerjack/mcp/tools/intelligence_tool_registry.py +7 -5
- crackerjack/mcp/tools/intelligence_tools.py +5 -2
- crackerjack/mcp/tools/monitoring_tools.py +33 -70
- crackerjack/mcp/tools/proactive_tools.py +24 -11
- crackerjack/mcp/tools/progress_tools.py +5 -8
- crackerjack/mcp/tools/utility_tools.py +20 -14
- crackerjack/mcp/tools/workflow_executor.py +62 -40
- crackerjack/mcp/websocket/app.py +8 -0
- crackerjack/mcp/websocket/endpoints.py +352 -357
- crackerjack/mcp/websocket/jobs.py +40 -57
- crackerjack/mcp/websocket/monitoring_endpoints.py +2935 -0
- crackerjack/mcp/websocket/server.py +7 -25
- crackerjack/mcp/websocket/websocket_handler.py +6 -17
- crackerjack/mixins/__init__.py +0 -2
- crackerjack/mixins/error_handling.py +1 -70
- crackerjack/models/config.py +12 -1
- crackerjack/models/config_adapter.py +49 -1
- crackerjack/models/protocols.py +122 -122
- crackerjack/models/resource_protocols.py +55 -210
- crackerjack/monitoring/ai_agent_watchdog.py +13 -13
- crackerjack/monitoring/metrics_collector.py +426 -0
- crackerjack/monitoring/regression_prevention.py +8 -8
- crackerjack/monitoring/websocket_server.py +643 -0
- crackerjack/orchestration/advanced_orchestrator.py +11 -6
- crackerjack/orchestration/coverage_improvement.py +3 -3
- crackerjack/orchestration/execution_strategies.py +26 -6
- crackerjack/orchestration/test_progress_streamer.py +8 -5
- crackerjack/plugins/base.py +2 -2
- crackerjack/plugins/hooks.py +7 -0
- crackerjack/plugins/managers.py +11 -8
- crackerjack/security/__init__.py +0 -1
- crackerjack/security/audit.py +6 -35
- crackerjack/services/anomaly_detector.py +392 -0
- crackerjack/services/api_extractor.py +615 -0
- crackerjack/services/backup_service.py +2 -2
- crackerjack/services/bounded_status_operations.py +15 -152
- crackerjack/services/cache.py +127 -1
- crackerjack/services/changelog_automation.py +395 -0
- crackerjack/services/config.py +15 -9
- crackerjack/services/config_merge.py +19 -80
- crackerjack/services/config_template.py +506 -0
- crackerjack/services/contextual_ai_assistant.py +48 -22
- crackerjack/services/coverage_badge_service.py +171 -0
- crackerjack/services/coverage_ratchet.py +27 -25
- crackerjack/services/debug.py +3 -3
- crackerjack/services/dependency_analyzer.py +460 -0
- crackerjack/services/dependency_monitor.py +14 -11
- crackerjack/services/documentation_generator.py +491 -0
- crackerjack/services/documentation_service.py +675 -0
- crackerjack/services/enhanced_filesystem.py +6 -5
- crackerjack/services/enterprise_optimizer.py +865 -0
- crackerjack/services/error_pattern_analyzer.py +676 -0
- crackerjack/services/file_hasher.py +1 -1
- crackerjack/services/git.py +8 -25
- crackerjack/services/health_metrics.py +10 -8
- crackerjack/services/heatmap_generator.py +735 -0
- crackerjack/services/initialization.py +11 -30
- crackerjack/services/input_validator.py +5 -97
- crackerjack/services/intelligent_commit.py +327 -0
- crackerjack/services/log_manager.py +15 -12
- crackerjack/services/logging.py +4 -3
- crackerjack/services/lsp_client.py +628 -0
- crackerjack/services/memory_optimizer.py +19 -87
- crackerjack/services/metrics.py +42 -33
- crackerjack/services/parallel_executor.py +9 -67
- crackerjack/services/pattern_cache.py +1 -1
- crackerjack/services/pattern_detector.py +6 -6
- crackerjack/services/performance_benchmarks.py +18 -59
- crackerjack/services/performance_cache.py +20 -81
- crackerjack/services/performance_monitor.py +27 -95
- crackerjack/services/predictive_analytics.py +510 -0
- crackerjack/services/quality_baseline.py +234 -0
- crackerjack/services/quality_baseline_enhanced.py +646 -0
- crackerjack/services/quality_intelligence.py +785 -0
- crackerjack/services/regex_patterns.py +618 -524
- crackerjack/services/regex_utils.py +43 -123
- crackerjack/services/secure_path_utils.py +5 -164
- crackerjack/services/secure_status_formatter.py +30 -141
- crackerjack/services/secure_subprocess.py +11 -92
- crackerjack/services/security.py +9 -41
- crackerjack/services/security_logger.py +12 -24
- crackerjack/services/server_manager.py +124 -16
- crackerjack/services/status_authentication.py +16 -159
- crackerjack/services/status_security_manager.py +4 -131
- crackerjack/services/thread_safe_status_collector.py +19 -125
- crackerjack/services/unified_config.py +21 -13
- crackerjack/services/validation_rate_limiter.py +5 -54
- crackerjack/services/version_analyzer.py +459 -0
- crackerjack/services/version_checker.py +1 -1
- crackerjack/services/websocket_resource_limiter.py +10 -144
- crackerjack/services/zuban_lsp_service.py +390 -0
- crackerjack/slash_commands/__init__.py +2 -7
- crackerjack/slash_commands/run.md +2 -2
- crackerjack/tools/validate_input_validator_patterns.py +14 -40
- crackerjack/tools/validate_regex_patterns.py +19 -48
- {crackerjack-0.33.0.dist-info → crackerjack-0.33.2.dist-info}/METADATA +196 -25
- crackerjack-0.33.2.dist-info/RECORD +229 -0
- crackerjack/CLAUDE.md +0 -207
- crackerjack/RULES.md +0 -380
- crackerjack/py313.py +0 -234
- crackerjack-0.33.0.dist-info/RECORD +0 -187
- {crackerjack-0.33.0.dist-info → crackerjack-0.33.2.dist-info}/WHEEL +0 -0
- {crackerjack-0.33.0.dist-info → crackerjack-0.33.2.dist-info}/entry_points.txt +0 -0
- {crackerjack-0.33.0.dist-info → crackerjack-0.33.2.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,16 +1,3 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Centralized regex patterns with validation to prevent bad regex issues.
|
|
3
|
-
|
|
4
|
-
CRITICAL: All regex patterns in this codebase MUST be defined here with comprehensive
|
|
5
|
-
testing to prevent spacing and replacement syntax errors.
|
|
6
|
-
|
|
7
|
-
Optimized for performance, safety, and maintainability with:
|
|
8
|
-
- Thread-safe compiled pattern caching
|
|
9
|
-
- Iterative application for complex multi-word cases
|
|
10
|
-
- Safety limits to prevent catastrophic backtracking
|
|
11
|
-
- Performance monitoring capabilities
|
|
12
|
-
"""
|
|
13
|
-
|
|
14
1
|
import re
|
|
15
2
|
import threading
|
|
16
3
|
import time
|
|
@@ -18,43 +5,34 @@ import typing as t
|
|
|
18
5
|
from dataclasses import dataclass, field
|
|
19
6
|
from re import Pattern
|
|
20
7
|
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
PATTERN_CACHE_SIZE = 100 # Max cached compiled patterns
|
|
8
|
+
MAX_INPUT_SIZE = 10 * 1024 * 1024
|
|
9
|
+
MAX_ITERATIONS = 10
|
|
10
|
+
PATTERN_CACHE_SIZE = 100
|
|
25
11
|
|
|
26
12
|
|
|
27
13
|
class CompiledPatternCache:
|
|
28
|
-
"""Thread-safe cache for compiled regex patterns."""
|
|
29
|
-
|
|
30
14
|
_lock = threading.RLock()
|
|
31
15
|
_cache: dict[str, Pattern[str]] = {}
|
|
32
16
|
_max_size = PATTERN_CACHE_SIZE
|
|
33
17
|
|
|
34
18
|
@classmethod
|
|
35
19
|
def get_compiled_pattern(cls, pattern: str) -> Pattern[str]:
|
|
36
|
-
"""Get compiled pattern from cache, compiling if necessary."""
|
|
37
20
|
return cls.get_compiled_pattern_with_flags(pattern, pattern, 0)
|
|
38
21
|
|
|
39
22
|
@classmethod
|
|
40
23
|
def get_compiled_pattern_with_flags(
|
|
41
24
|
cls, cache_key: str, pattern: str, flags: int
|
|
42
25
|
) -> Pattern[str]:
|
|
43
|
-
"""Get compiled pattern with flags from cache, compiling if necessary."""
|
|
44
26
|
with cls._lock:
|
|
45
27
|
if cache_key in cls._cache:
|
|
46
28
|
return cls._cache[cache_key]
|
|
47
29
|
|
|
48
|
-
# Compile new pattern
|
|
49
30
|
try:
|
|
50
31
|
compiled = re.compile(pattern, flags)
|
|
51
32
|
except re.error as e:
|
|
52
|
-
# Maintain backward compatibility with existing error message format
|
|
53
33
|
raise ValueError(f"Invalid regex pattern '{pattern}': {e}")
|
|
54
34
|
|
|
55
|
-
# Add to cache with size limit
|
|
56
35
|
if len(cls._cache) >= cls._max_size:
|
|
57
|
-
# Remove oldest entry (simple FIFO eviction)
|
|
58
36
|
oldest_key = next(iter(cls._cache))
|
|
59
37
|
del cls._cache[oldest_key]
|
|
60
38
|
|
|
@@ -63,38 +41,32 @@ class CompiledPatternCache:
|
|
|
63
41
|
|
|
64
42
|
@classmethod
|
|
65
43
|
def clear_cache(cls) -> None:
|
|
66
|
-
"""Clear the pattern cache (useful for testing)."""
|
|
67
44
|
with cls._lock:
|
|
68
45
|
cls._cache.clear()
|
|
69
46
|
|
|
70
47
|
@classmethod
|
|
71
48
|
def get_cache_stats(cls) -> dict[str, int | list[str]]:
|
|
72
|
-
"""Get cache statistics for monitoring."""
|
|
73
49
|
with cls._lock:
|
|
74
50
|
return {
|
|
75
51
|
"size": len(cls._cache),
|
|
76
52
|
"max_size": cls._max_size,
|
|
77
|
-
"patterns": list(cls._cache.keys()),
|
|
53
|
+
"patterns": list[t.Any](cls._cache.keys()),
|
|
78
54
|
}
|
|
79
55
|
|
|
80
56
|
|
|
81
57
|
def validate_pattern_safety(pattern: str) -> list[str]:
|
|
82
|
-
"""Validate pattern for potential safety issues."""
|
|
83
58
|
warnings = []
|
|
84
59
|
|
|
85
|
-
# Check for potentially problematic constructs
|
|
86
60
|
if ".*.*" in pattern:
|
|
87
61
|
warnings.append("Multiple .* constructs may cause performance issues")
|
|
88
62
|
|
|
89
63
|
if ".+.+" in pattern:
|
|
90
64
|
warnings.append("Multiple .+ constructs may cause performance issues")
|
|
91
65
|
|
|
92
|
-
# Check for nested quantifiers
|
|
93
66
|
nested_quantifiers = re.findall(r"[+*?]\??[+*?]", pattern)
|
|
94
67
|
if nested_quantifiers:
|
|
95
68
|
warnings.append(f"Nested quantifiers detected: {nested_quantifiers}")
|
|
96
69
|
|
|
97
|
-
# Check for alternation with overlapping cases
|
|
98
70
|
if "|" in pattern and pattern.count("|") > 10:
|
|
99
71
|
warnings.append("Many alternations may cause performance issues")
|
|
100
72
|
|
|
@@ -103,48 +75,37 @@ def validate_pattern_safety(pattern: str) -> list[str]:
|
|
|
103
75
|
|
|
104
76
|
@dataclass
|
|
105
77
|
class ValidatedPattern:
|
|
106
|
-
"""A regex pattern that has been tested and validated."""
|
|
107
|
-
|
|
108
78
|
name: str
|
|
109
79
|
pattern: str
|
|
110
80
|
replacement: str
|
|
111
|
-
test_cases: list[tuple[str, str]]
|
|
81
|
+
test_cases: list[tuple[str, str]]
|
|
112
82
|
description: str = ""
|
|
113
|
-
global_replace: bool = False
|
|
114
|
-
flags: int = 0
|
|
83
|
+
global_replace: bool = False
|
|
84
|
+
flags: int = 0
|
|
115
85
|
_compiled_pattern: Pattern[str] | None = field(default=None, init=False)
|
|
116
86
|
|
|
117
|
-
def __post_init__(self):
|
|
118
|
-
"""Validate pattern on creation."""
|
|
87
|
+
def __post_init__(self) -> None:
|
|
119
88
|
self._validate()
|
|
120
89
|
|
|
121
90
|
def _validate(self) -> None:
|
|
122
|
-
"""Ensure pattern works with all test cases."""
|
|
123
91
|
try:
|
|
124
|
-
# Use cached compilation for validation
|
|
125
92
|
self._get_compiled_pattern()
|
|
126
93
|
except ValueError as e:
|
|
127
|
-
# Maintain backward compatibility with error message format
|
|
128
94
|
if "Invalid regex pattern" in str(e):
|
|
129
|
-
# Replace the pattern string with the name in the error message
|
|
130
95
|
error_msg = str(e).replace(f"'{self.pattern}'", f"'{self.name}'")
|
|
131
96
|
raise ValueError(error_msg) from e
|
|
132
|
-
raise
|
|
97
|
+
raise
|
|
133
98
|
|
|
134
|
-
# Check for forbidden replacement syntax
|
|
135
99
|
if r"\g < " in self.replacement or r" >" in self.replacement:
|
|
136
100
|
raise ValueError(
|
|
137
101
|
f"Bad replacement syntax in '{self.name}': {self.replacement}. "
|
|
138
|
-
"Use \\g<1> not \\g
|
|
102
|
+
"Use \\g<1> not \\g<1>" # REGEX OK: educational example
|
|
139
103
|
)
|
|
140
104
|
|
|
141
|
-
# Check for safety warnings
|
|
142
105
|
warnings = validate_pattern_safety(self.pattern)
|
|
143
106
|
if warnings:
|
|
144
|
-
# For now, just store warnings - could log them in the future
|
|
145
107
|
pass
|
|
146
108
|
|
|
147
|
-
# Validate all test cases
|
|
148
109
|
for input_text, expected in self.test_cases:
|
|
149
110
|
try:
|
|
150
111
|
count = 0 if self.global_replace else 1
|
|
@@ -158,15 +119,12 @@ class ValidatedPattern:
|
|
|
158
119
|
raise ValueError(f"Pattern '{self.name}' failed on '{input_text}': {e}")
|
|
159
120
|
|
|
160
121
|
def _get_compiled_pattern(self) -> Pattern[str]:
|
|
161
|
-
|
|
162
|
-
# Create cache key that includes flags
|
|
163
|
-
cache_key = f"{self.pattern}|flags:{self.flags}"
|
|
122
|
+
cache_key = f"{self.pattern}|flags: {self.flags}"
|
|
164
123
|
return CompiledPatternCache.get_compiled_pattern_with_flags(
|
|
165
124
|
cache_key, self.pattern, self.flags
|
|
166
125
|
)
|
|
167
126
|
|
|
168
127
|
def _apply_internal(self, text: str, count: int = 1) -> str:
|
|
169
|
-
"""Internal method for applying pattern with compiled regex."""
|
|
170
128
|
if len(text) > MAX_INPUT_SIZE:
|
|
171
129
|
raise ValueError(
|
|
172
130
|
f"Input text too large: {len(text)} bytes > {MAX_INPUT_SIZE}"
|
|
@@ -175,17 +133,10 @@ class ValidatedPattern:
|
|
|
175
133
|
return self._get_compiled_pattern().sub(self.replacement, text, count=count)
|
|
176
134
|
|
|
177
135
|
def apply(self, text: str) -> str:
|
|
178
|
-
"""Apply the validated pattern safely."""
|
|
179
136
|
count = 0 if self.global_replace else 1
|
|
180
137
|
return self._apply_internal(text, count)
|
|
181
138
|
|
|
182
139
|
def apply_iteratively(self, text: str, max_iterations: int = MAX_ITERATIONS) -> str:
|
|
183
|
-
"""
|
|
184
|
-
Apply pattern repeatedly until no more changes occur.
|
|
185
|
-
|
|
186
|
-
Useful for cases like 'pytest - hypothesis - specialist' -> 'pytest-hypothesis-specialist'
|
|
187
|
-
where multiple passes are needed.
|
|
188
|
-
"""
|
|
189
140
|
if max_iterations <= 0:
|
|
190
141
|
raise ValueError("max_iterations must be positive")
|
|
191
142
|
|
|
@@ -193,18 +144,14 @@ class ValidatedPattern:
|
|
|
193
144
|
for _ in range(max_iterations):
|
|
194
145
|
new_result = self.apply(result)
|
|
195
146
|
if new_result == result:
|
|
196
|
-
# No more changes, done
|
|
197
147
|
break
|
|
198
148
|
result = new_result
|
|
199
149
|
else:
|
|
200
|
-
# Reached max iterations without convergence
|
|
201
|
-
# This might indicate a problematic pattern, but we return the current result
|
|
202
150
|
pass
|
|
203
151
|
|
|
204
152
|
return result
|
|
205
153
|
|
|
206
154
|
def apply_with_timeout(self, text: str, timeout_seconds: float = 1.0) -> str:
|
|
207
|
-
"""Apply pattern with timeout protection."""
|
|
208
155
|
import signal
|
|
209
156
|
|
|
210
157
|
def timeout_handler(signum: int, frame: t.Any) -> None:
|
|
@@ -212,8 +159,6 @@ class ValidatedPattern:
|
|
|
212
159
|
f"Pattern '{self.name}' timed out after {timeout_seconds}s"
|
|
213
160
|
)
|
|
214
161
|
|
|
215
|
-
# Note: signal-based timeout only works on Unix and in main thread
|
|
216
|
-
# For broader compatibility, we could use threading.Timer instead
|
|
217
162
|
old_handler = signal.signal(signal.SIGALRM, timeout_handler)
|
|
218
163
|
signal.alarm(int(timeout_seconds))
|
|
219
164
|
|
|
@@ -226,12 +171,10 @@ class ValidatedPattern:
|
|
|
226
171
|
return result
|
|
227
172
|
|
|
228
173
|
def test(self, text: str) -> bool:
|
|
229
|
-
"""Test if pattern matches text without applying replacement."""
|
|
230
174
|
compiled = self._get_compiled_pattern()
|
|
231
175
|
return bool(compiled.search(text))
|
|
232
176
|
|
|
233
177
|
def search(self, text: str) -> re.Match[str] | None:
|
|
234
|
-
"""Search for the first match and return a Match object or None."""
|
|
235
178
|
if len(text) > MAX_INPUT_SIZE:
|
|
236
179
|
raise ValueError(
|
|
237
180
|
f"Input text too large: {len(text)} bytes > {MAX_INPUT_SIZE}"
|
|
@@ -239,7 +182,6 @@ class ValidatedPattern:
|
|
|
239
182
|
return self._get_compiled_pattern().search(text)
|
|
240
183
|
|
|
241
184
|
def findall(self, text: str) -> list[str]:
|
|
242
|
-
"""Find all matches of the pattern in text safely."""
|
|
243
185
|
if len(text) > MAX_INPUT_SIZE:
|
|
244
186
|
raise ValueError(
|
|
245
187
|
f"Input text too large: {len(text)} bytes > {MAX_INPUT_SIZE}"
|
|
@@ -249,7 +191,6 @@ class ValidatedPattern:
|
|
|
249
191
|
def get_performance_stats(
|
|
250
192
|
self, text: str, iterations: int = 100
|
|
251
193
|
) -> dict[str, float]:
|
|
252
|
-
"""Get performance statistics for this pattern on given text."""
|
|
253
194
|
times = []
|
|
254
195
|
|
|
255
196
|
for _ in range(iterations):
|
|
@@ -266,7 +207,6 @@ class ValidatedPattern:
|
|
|
266
207
|
}
|
|
267
208
|
|
|
268
209
|
|
|
269
|
-
# All validated patterns - ADD NEW PATTERNS HERE WITH TESTS
|
|
270
210
|
SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
271
211
|
"fix_command_spacing": ValidatedPattern(
|
|
272
212
|
name="fix_command_spacing",
|
|
@@ -275,20 +215,20 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
275
215
|
description="Fix spacing in 'python -m command' patterns",
|
|
276
216
|
test_cases=[
|
|
277
217
|
("python - m crackerjack", "python -m crackerjack"),
|
|
278
|
-
("python -m crackerjack", "python -m crackerjack"),
|
|
279
|
-
("python
|
|
218
|
+
("python -m crackerjack", "python -m crackerjack"),
|
|
219
|
+
("python - m pytest", "python -m pytest"),
|
|
280
220
|
("other python - m stuff", "other python -m stuff"),
|
|
281
221
|
],
|
|
282
222
|
),
|
|
283
223
|
"fix_long_flag_spacing": ValidatedPattern(
|
|
284
224
|
name="fix_long_flag_spacing",
|
|
285
|
-
pattern=r"-\s*-\s*(\w+(
|
|
225
|
+
pattern=r"-\s*-\s*(\w+(?: -\w+)*)",
|
|
286
226
|
replacement=r"--\1",
|
|
287
227
|
description="Fix spacing in long flags like '--help'",
|
|
288
228
|
test_cases=[
|
|
289
229
|
("- - help", "--help"),
|
|
290
|
-
("- - ai-
|
|
291
|
-
("--help", "--help"),
|
|
230
|
+
("- - ai-fix", "--ai-fix"),
|
|
231
|
+
("--help", "--help"),
|
|
292
232
|
("- - start-websocket-server", "--start-websocket-server"),
|
|
293
233
|
],
|
|
294
234
|
),
|
|
@@ -300,7 +240,7 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
300
240
|
test_cases=[
|
|
301
241
|
("python -m crackerjack - t", "python -m crackerjack -t"),
|
|
302
242
|
("- q", "-q"),
|
|
303
|
-
("-t", "-t"),
|
|
243
|
+
("-t", "-t"),
|
|
304
244
|
("some - x flag", "some -x flag"),
|
|
305
245
|
],
|
|
306
246
|
),
|
|
@@ -314,10 +254,10 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
314
254
|
(
|
|
315
255
|
"pytest - hypothesis - specialist",
|
|
316
256
|
"pytest-hypothesis - specialist",
|
|
317
|
-
),
|
|
257
|
+
),
|
|
318
258
|
("backend - architect", "backend-architect"),
|
|
319
|
-
("python-pro", "python-pro"),
|
|
320
|
-
("end - of - file-fixer", "end-of - file-fixer"),
|
|
259
|
+
("python-pro", "python-pro"),
|
|
260
|
+
("end - of - file-fixer", "end-of - file-fixer"),
|
|
321
261
|
],
|
|
322
262
|
),
|
|
323
263
|
"fix_hyphenated_names_global": ValidatedPattern(
|
|
@@ -328,9 +268,9 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
328
268
|
global_replace=True,
|
|
329
269
|
test_cases=[
|
|
330
270
|
("python - pro", "python-pro"),
|
|
331
|
-
("end - of - file", "end-of - file"),
|
|
332
|
-
("already-hyphenated", "already-hyphenated"),
|
|
333
|
-
("start - middle - end", "start-middle - end"),
|
|
271
|
+
("end - of - file", "end-of - file"),
|
|
272
|
+
("already-hyphenated", "already-hyphenated"),
|
|
273
|
+
("start - middle - end", "start-middle - end"),
|
|
334
274
|
],
|
|
335
275
|
),
|
|
336
276
|
"fix_spaced_hyphens": ValidatedPattern(
|
|
@@ -338,19 +278,19 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
338
278
|
pattern=r"(\w+)\s+-\s+(\w+)",
|
|
339
279
|
replacement=r"\1-\2",
|
|
340
280
|
description="Fix spaced hyphens with spaces around dashes (use apply_iteratively for multi-word)",
|
|
341
|
-
global_replace=True,
|
|
281
|
+
global_replace=True,
|
|
342
282
|
test_cases=[
|
|
343
283
|
("python - pro", "python-pro"),
|
|
344
284
|
(
|
|
345
285
|
"pytest - hypothesis - specialist",
|
|
346
286
|
"pytest-hypothesis - specialist",
|
|
347
|
-
),
|
|
287
|
+
),
|
|
348
288
|
(
|
|
349
289
|
"end - of - file - fixer",
|
|
350
290
|
"end-of - file-fixer",
|
|
351
|
-
),
|
|
352
|
-
("already-hyphenated", "already-hyphenated"),
|
|
353
|
-
("mixed-case with - spaces", "mixed-case with-spaces"),
|
|
291
|
+
),
|
|
292
|
+
("already-hyphenated", "already-hyphenated"),
|
|
293
|
+
("mixed-case with - spaces", "mixed-case with-spaces"),
|
|
354
294
|
],
|
|
355
295
|
),
|
|
356
296
|
"fix_debug_log_pattern": ValidatedPattern(
|
|
@@ -360,7 +300,7 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
360
300
|
description="Fix spacing in debug log patterns",
|
|
361
301
|
test_cases=[
|
|
362
302
|
("crackerjack - debug-12345.log", "crackerjack-debug-12345.log"),
|
|
363
|
-
("crackerjack-debug.log", "crackerjack-debug.log"),
|
|
303
|
+
("crackerjack-debug.log", "crackerjack-debug.log"),
|
|
364
304
|
("old crackerjack - debug files", "old crackerjack-debug files"),
|
|
365
305
|
],
|
|
366
306
|
),
|
|
@@ -372,7 +312,7 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
372
312
|
test_cases=[
|
|
373
313
|
("job - {self.web_job_id}.json", "job-{self.web_job_id}.json"),
|
|
374
314
|
("job - abc123.json", "job-abc123.json"),
|
|
375
|
-
("job-existing.json", "job-existing.json"),
|
|
315
|
+
("job-existing.json", "job-existing.json"),
|
|
376
316
|
],
|
|
377
317
|
),
|
|
378
318
|
"fix_markdown_bold": ValidatedPattern(
|
|
@@ -383,10 +323,9 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
383
323
|
test_cases=[
|
|
384
324
|
("* *Bold Text * *", "**Bold Text**"),
|
|
385
325
|
("* *🧪 pytest-specialist * *", "**🧪 pytest-specialist**"),
|
|
386
|
-
("**Already Bold**", "**Already Bold**"),
|
|
326
|
+
("**Already Bold**", "**Already Bold**"),
|
|
387
327
|
],
|
|
388
328
|
),
|
|
389
|
-
# Security token masking patterns
|
|
390
329
|
"mask_pypi_token": ValidatedPattern(
|
|
391
330
|
name="mask_pypi_token",
|
|
392
331
|
pattern=r"\bpypi-[a-zA-Z0-9_-]{12,}\b",
|
|
@@ -400,11 +339,11 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
400
339
|
"Using token: pypi-AgEIcHlwaS5vcmcCJGE4M2Y3ZjI for upload",
|
|
401
340
|
"Using token: pypi-**** for upload",
|
|
402
341
|
),
|
|
403
|
-
("pypi-short", "pypi-short"),
|
|
342
|
+
("pypi-short", "pypi-short"),
|
|
404
343
|
(
|
|
405
344
|
"not pypi-AgEIcHlwaS5vcmcCJGE4M2Y3ZjI",
|
|
406
345
|
"not pypi-****",
|
|
407
|
-
),
|
|
346
|
+
),
|
|
408
347
|
(
|
|
409
348
|
"Multiple pypi-token1234567890 and pypi-anothertokenhere",
|
|
410
349
|
"Multiple pypi-**** and pypi-****",
|
|
@@ -424,11 +363,11 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
424
363
|
"GITHUB_TOKEN=ghp_1234567890abcdef1234567890abcdef1234",
|
|
425
364
|
"GITHUB_TOKEN=ghp_****",
|
|
426
365
|
),
|
|
427
|
-
("ghp_short", "ghp_short"),
|
|
366
|
+
("ghp_short", "ghp_short"),
|
|
428
367
|
(
|
|
429
368
|
"ghp_1234567890abcdef1234567890abcdef12345",
|
|
430
369
|
"ghp_1234567890abcdef1234567890abcdef12345",
|
|
431
|
-
),
|
|
370
|
+
),
|
|
432
371
|
(
|
|
433
372
|
"Multiple ghp_1234567890abcdef1234567890abcdef1234 and"
|
|
434
373
|
" ghp_abcdef1234567890abcdef12345678901234",
|
|
@@ -448,23 +387,23 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
448
387
|
(
|
|
449
388
|
"Short token abc123def456",
|
|
450
389
|
"Short token abc123def456",
|
|
451
|
-
),
|
|
390
|
+
),
|
|
452
391
|
(
|
|
453
392
|
"File path "
|
|
454
393
|
"/very/long/path/that/should/not/be/masked/even/though/its/long",
|
|
455
394
|
"File path "
|
|
456
395
|
"/very/long/path/that/should/not/be/masked/even/though/its/long",
|
|
457
|
-
),
|
|
396
|
+
),
|
|
458
397
|
("API_KEY=verylongapikeyhere1234567890123456", "API_KEY=****"),
|
|
459
398
|
(
|
|
460
399
|
"Long-token_with-underscores_123456789012345678",
|
|
461
400
|
"****",
|
|
462
|
-
),
|
|
401
|
+
),
|
|
463
402
|
],
|
|
464
403
|
),
|
|
465
404
|
"mask_token_assignment": ValidatedPattern(
|
|
466
405
|
name="mask_token_assignment",
|
|
467
|
-
pattern=r"(?i)\b(token\s*[=:]\s*)['\"]([^'\"]{8,})['\"]",
|
|
406
|
+
pattern=r"(?i)\b(token\s*[=: ]\s*)['\"]([^'\"]{8,})['\"]",
|
|
468
407
|
replacement=r"\1'****'",
|
|
469
408
|
description="Mask token assignments in various formats (case insensitive)",
|
|
470
409
|
global_replace=True,
|
|
@@ -473,17 +412,17 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
473
412
|
("token='long_secret_token_here'", "token='****'"),
|
|
474
413
|
('token: "another_secret_token"', "token: '****'"),
|
|
475
414
|
("token = 'spaced_assignment_token'", "token = '****'"),
|
|
476
|
-
('token="short"', 'token="short"'),
|
|
415
|
+
('token="short"', 'token="short"'),
|
|
477
416
|
(
|
|
478
417
|
"not_token='should_not_be_masked'",
|
|
479
418
|
"not_token='should_not_be_masked'",
|
|
480
|
-
),
|
|
481
|
-
('TOKEN="UPPERCASE_TOKEN_HERE"', "TOKEN='****'"),
|
|
419
|
+
),
|
|
420
|
+
('TOKEN="UPPERCASE_TOKEN_HERE"', "TOKEN='****'"),
|
|
482
421
|
],
|
|
483
422
|
),
|
|
484
423
|
"mask_password_assignment": ValidatedPattern(
|
|
485
424
|
name="mask_password_assignment",
|
|
486
|
-
pattern=r"(?i)\b(password\s*[=:]\s*)['\"]([^'\"]{8,})['\"]",
|
|
425
|
+
pattern=r"(?i)\b(password\s*[=: ]\s*)['\"]([^'\"]{8,})['\"]",
|
|
487
426
|
replacement=r"\1'****'",
|
|
488
427
|
description="Mask password assignments in various formats (case insensitive)",
|
|
489
428
|
global_replace=True,
|
|
@@ -492,15 +431,14 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
492
431
|
("password='my_long_password'", "password='****'"),
|
|
493
432
|
('password: "another_secret_password"', "password: '****'"),
|
|
494
433
|
("password = 'spaced_password_assignment'", "password = '****'"),
|
|
495
|
-
('password="short"', 'password="short"'),
|
|
434
|
+
('password="short"', 'password="short"'),
|
|
496
435
|
(
|
|
497
436
|
"not_password='should_not_be_masked'",
|
|
498
437
|
"not_password='should_not_be_masked'",
|
|
499
|
-
),
|
|
500
|
-
('PASSWORD="UPPERCASE_PASSWORD"', "PASSWORD='****'"),
|
|
438
|
+
),
|
|
439
|
+
('PASSWORD="UPPERCASE_PASSWORD"', "PASSWORD='****'"),
|
|
501
440
|
],
|
|
502
441
|
),
|
|
503
|
-
# Version management patterns
|
|
504
442
|
"update_pyproject_version": ValidatedPattern(
|
|
505
443
|
name="update_pyproject_version",
|
|
506
444
|
pattern=r'^(version\s*=\s*["\'])([^"\']+)(["\'])$',
|
|
@@ -513,11 +451,9 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
513
451
|
('version="1.0.0-beta"', 'version="NEW_VERSION"'),
|
|
514
452
|
("version = '2.1.0'", "version = 'NEW_VERSION'"),
|
|
515
453
|
("version='10.20.30'", "version='NEW_VERSION'"),
|
|
516
|
-
|
|
517
|
-
('name = "my-package"', 'name = "my-package"'), # No change
|
|
454
|
+
('name = "my-package"', 'name = "my-package"'),
|
|
518
455
|
],
|
|
519
456
|
),
|
|
520
|
-
# Formatting agent patterns
|
|
521
457
|
"remove_trailing_whitespace": ValidatedPattern(
|
|
522
458
|
name="remove_trailing_whitespace",
|
|
523
459
|
pattern=r"[ \t]+$",
|
|
@@ -525,11 +461,11 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
525
461
|
description="Remove trailing whitespace from lines",
|
|
526
462
|
global_replace=True,
|
|
527
463
|
test_cases=[
|
|
528
|
-
("line with spaces
|
|
464
|
+
("line with spaces ", "line with spaces"),
|
|
529
465
|
("line with tabs\t\t", "line with tabs"),
|
|
530
|
-
("normal line", "normal line"),
|
|
531
|
-
("mixed
|
|
532
|
-
("", ""),
|
|
466
|
+
("normal line", "normal line"),
|
|
467
|
+
("mixed \t ", "mixed"),
|
|
468
|
+
("", ""),
|
|
533
469
|
],
|
|
534
470
|
),
|
|
535
471
|
"normalize_multiple_newlines": ValidatedPattern(
|
|
@@ -541,11 +477,10 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
541
477
|
test_cases=[
|
|
542
478
|
("line1\n\n\nline2", "line1\n\nline2"),
|
|
543
479
|
("line1\n\n\n\n\nline2", "line1\n\nline2"),
|
|
544
|
-
("line1\n\nline2", "line1\n\nline2"),
|
|
545
|
-
("line1\nline2", "line1\nline2"),
|
|
480
|
+
("line1\n\nline2", "line1\n\nline2"),
|
|
481
|
+
("line1\nline2", "line1\nline2"),
|
|
546
482
|
],
|
|
547
483
|
),
|
|
548
|
-
# Security agent patterns - subprocess fixes
|
|
549
484
|
"fix_subprocess_run_shell": ValidatedPattern(
|
|
550
485
|
name="fix_subprocess_run_shell",
|
|
551
486
|
pattern=r"subprocess\.run\(([^,]+),\s*shell=True\)",
|
|
@@ -561,7 +496,7 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
561
496
|
(
|
|
562
497
|
"subprocess.run(command, shell=False)",
|
|
563
498
|
"subprocess.run(command, shell=False)",
|
|
564
|
-
),
|
|
499
|
+
),
|
|
565
500
|
],
|
|
566
501
|
),
|
|
567
502
|
"fix_subprocess_call_shell": ValidatedPattern(
|
|
@@ -579,12 +514,12 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
579
514
|
(
|
|
580
515
|
"subprocess.call(command, shell=False)",
|
|
581
516
|
"subprocess.call(command, shell=False)",
|
|
582
|
-
),
|
|
517
|
+
),
|
|
583
518
|
],
|
|
584
519
|
),
|
|
585
520
|
"fix_subprocess_popen_shell": ValidatedPattern(
|
|
586
521
|
name="fix_subprocess_popen_shell",
|
|
587
|
-
pattern=r"subprocess\.Popen\(([^,]+)
|
|
522
|
+
pattern=r"subprocess\.Popen\(([^,]+), \s*shell=True\)",
|
|
588
523
|
replacement=r"subprocess.Popen(\1.split())",
|
|
589
524
|
description="Remove shell=True from subprocess.Popen calls",
|
|
590
525
|
global_replace=True,
|
|
@@ -597,10 +532,9 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
597
532
|
(
|
|
598
533
|
"subprocess.Popen(command, shell=False)",
|
|
599
534
|
"subprocess.Popen(command, shell=False)",
|
|
600
|
-
),
|
|
535
|
+
),
|
|
601
536
|
],
|
|
602
537
|
),
|
|
603
|
-
# Security agent patterns - unsafe library usage
|
|
604
538
|
"fix_unsafe_yaml_load": ValidatedPattern(
|
|
605
539
|
name="fix_unsafe_yaml_load",
|
|
606
540
|
pattern=r"\byaml\.load\(",
|
|
@@ -610,11 +544,11 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
610
544
|
test_cases=[
|
|
611
545
|
("yaml.load(file)", "yaml.safe_load(file)"),
|
|
612
546
|
("data = yaml.load(content)", "data = yaml.safe_load(content)"),
|
|
613
|
-
("yaml.safe_load(content)", "yaml.safe_load(content)"),
|
|
547
|
+
("yaml.safe_load(content)", "yaml.safe_load(content)"),
|
|
614
548
|
(
|
|
615
549
|
"my_yaml.load(content)",
|
|
616
550
|
"my_yaml.load(content)",
|
|
617
|
-
),
|
|
551
|
+
),
|
|
618
552
|
],
|
|
619
553
|
),
|
|
620
554
|
"fix_weak_md5_hash": ValidatedPattern(
|
|
@@ -626,7 +560,7 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
626
560
|
test_cases=[
|
|
627
561
|
("hashlib.md5(data)", "hashlib.sha256(data)"),
|
|
628
562
|
("hash = hashlib.md5(content)", "hash = hashlib.sha256(content)"),
|
|
629
|
-
("hashlib.sha256(data)", "hashlib.sha256(data)"),
|
|
563
|
+
("hashlib.sha256(data)", "hashlib.sha256(data)"),
|
|
630
564
|
],
|
|
631
565
|
),
|
|
632
566
|
"fix_weak_sha1_hash": ValidatedPattern(
|
|
@@ -638,7 +572,7 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
638
572
|
test_cases=[
|
|
639
573
|
("hashlib.sha1(data)", "hashlib.sha256(data)"),
|
|
640
574
|
("hash = hashlib.sha1(content)", "hash = hashlib.sha256(content)"),
|
|
641
|
-
("hashlib.sha256(data)", "hashlib.sha256(data)"),
|
|
575
|
+
("hashlib.sha256(data)", "hashlib.sha256(data)"),
|
|
642
576
|
],
|
|
643
577
|
),
|
|
644
578
|
"fix_insecure_random_choice": ValidatedPattern(
|
|
@@ -650,27 +584,26 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
650
584
|
test_cases=[
|
|
651
585
|
("random.choice(options)", "secrets.choice(options)"),
|
|
652
586
|
("item = random.choice(items)", "item = secrets.choice(items)"),
|
|
653
|
-
("secrets.choice(options)", "secrets.choice(options)"),
|
|
587
|
+
("secrets.choice(options)", "secrets.choice(options)"),
|
|
654
588
|
],
|
|
655
589
|
),
|
|
656
590
|
"remove_debug_prints_with_secrets": ValidatedPattern(
|
|
657
591
|
name="remove_debug_prints_with_secrets",
|
|
658
|
-
pattern=r"print\s*\([^)]*(?:password|secret|key|token)[^)]*\)",
|
|
592
|
+
pattern=r"print\s*\([^)]*(?: password|secret|key|token)[^)]*\)",
|
|
659
593
|
replacement="",
|
|
660
594
|
description="Remove debug print statements that contain sensitive information",
|
|
661
595
|
global_replace=True,
|
|
662
596
|
test_cases=[
|
|
663
|
-
('print("password:", password)', ""),
|
|
597
|
+
('print("password: ", password)', ""),
|
|
664
598
|
("print(f'Token: {token}')", ""),
|
|
665
599
|
("print('Debug secret value')", ""),
|
|
666
600
|
(
|
|
667
601
|
"print('Normal debug message')",
|
|
668
602
|
"print('Normal debug message')",
|
|
669
|
-
),
|
|
603
|
+
),
|
|
670
604
|
('print("API key is", key)', ""),
|
|
671
605
|
],
|
|
672
606
|
),
|
|
673
|
-
# Test specialist agent patterns
|
|
674
607
|
"normalize_assert_statements": ValidatedPattern(
|
|
675
608
|
name="normalize_assert_statements",
|
|
676
609
|
pattern=r"assert (.+?)\s*==\s*(.+)",
|
|
@@ -679,30 +612,27 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
679
612
|
global_replace=True,
|
|
680
613
|
test_cases=[
|
|
681
614
|
("assert result==expected", "assert result == expected"),
|
|
682
|
-
("assert value
|
|
615
|
+
("assert value == other", "assert value == other"),
|
|
683
616
|
("assert result== expected", "assert result == expected"),
|
|
684
617
|
("assert result ==expected", "assert result == expected"),
|
|
685
618
|
(
|
|
686
619
|
"assert result == expected",
|
|
687
620
|
"assert result == expected",
|
|
688
|
-
),
|
|
621
|
+
),
|
|
689
622
|
],
|
|
690
623
|
),
|
|
691
|
-
# Job ID validation patterns
|
|
692
624
|
"validate_job_id_alphanumeric": ValidatedPattern(
|
|
693
625
|
name="validate_job_id_alphanumeric",
|
|
694
626
|
pattern=r"^[a-zA-Z0-9_-]+$",
|
|
695
|
-
replacement="VALID",
|
|
627
|
+
replacement="VALID",
|
|
696
628
|
description="Validate job ID contains only alphanumeric characters, "
|
|
697
629
|
"underscores, and hyphens",
|
|
698
630
|
test_cases=[
|
|
699
|
-
|
|
700
|
-
("
|
|
701
|
-
("
|
|
702
|
-
("job_123", "VALID"), # Valid ID
|
|
631
|
+
("valid_job-123", "VALID"),
|
|
632
|
+
("another_valid-job_456", "VALID"),
|
|
633
|
+
("job_123", "VALID"),
|
|
703
634
|
],
|
|
704
635
|
),
|
|
705
|
-
# Service configuration patterns
|
|
706
636
|
"remove_coverage_fail_under": ValidatedPattern(
|
|
707
637
|
name="remove_coverage_fail_under",
|
|
708
638
|
pattern=r"--cov-fail-under=\d+\.?\d*\s*",
|
|
@@ -713,7 +643,7 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
713
643
|
("--cov-fail-under=85 --verbose", "--verbose"),
|
|
714
644
|
("--cov-fail-under=90.5 -x", "-x"),
|
|
715
645
|
("--verbose --cov-fail-under=80 ", "--verbose "),
|
|
716
|
-
("--no-cov", "--no-cov"),
|
|
646
|
+
("--no-cov", "--no-cov"),
|
|
717
647
|
],
|
|
718
648
|
),
|
|
719
649
|
"update_coverage_requirement": ValidatedPattern(
|
|
@@ -725,11 +655,9 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
725
655
|
test_cases=[
|
|
726
656
|
("--cov-fail-under=85", "--cov-fail-under=NEW_COVERAGE"),
|
|
727
657
|
("--cov-fail-under=90.5", "--cov-fail-under=NEW_COVERAGE"),
|
|
728
|
-
("--verbose", "--verbose"),
|
|
658
|
+
("--verbose", "--verbose"),
|
|
729
659
|
],
|
|
730
660
|
),
|
|
731
|
-
# Path security validation patterns - designed for testing existence, not
|
|
732
|
-
# replacement
|
|
733
661
|
"detect_directory_traversal_basic": ValidatedPattern(
|
|
734
662
|
name="detect_directory_traversal_basic",
|
|
735
663
|
pattern=r"\.\./",
|
|
@@ -738,7 +666,7 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
738
666
|
global_replace=True,
|
|
739
667
|
test_cases=[
|
|
740
668
|
("../config.txt", "[TRAVERSAL]config.txt"),
|
|
741
|
-
("normal/path", "normal/path"),
|
|
669
|
+
("normal/path", "normal/path"),
|
|
742
670
|
("../../etc/passwd", "[TRAVERSAL][TRAVERSAL]etc/passwd"),
|
|
743
671
|
],
|
|
744
672
|
),
|
|
@@ -751,7 +679,7 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
751
679
|
test_cases=[
|
|
752
680
|
("..\\config.txt", "[TRAVERSAL]config.txt"),
|
|
753
681
|
("../config.txt", "[TRAVERSAL]config.txt"),
|
|
754
|
-
("normal/path", "normal/path"),
|
|
682
|
+
("normal/path", "normal/path"),
|
|
755
683
|
],
|
|
756
684
|
),
|
|
757
685
|
"detect_url_encoded_traversal": ValidatedPattern(
|
|
@@ -762,7 +690,7 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
762
690
|
global_replace=True,
|
|
763
691
|
test_cases=[
|
|
764
692
|
("path/%2e%2e%2f/config", "path/[TRAVERSAL]/config"),
|
|
765
|
-
("normal/path", "normal/path"),
|
|
693
|
+
("normal/path", "normal/path"),
|
|
766
694
|
("%2e%2e%2fpasswd", "[TRAVERSAL]passwd"),
|
|
767
695
|
],
|
|
768
696
|
),
|
|
@@ -774,7 +702,7 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
774
702
|
global_replace=True,
|
|
775
703
|
test_cases=[
|
|
776
704
|
("path/%252e%252e%252f/config", "path/[TRAVERSAL]/config"),
|
|
777
|
-
("normal/path", "normal/path"),
|
|
705
|
+
("normal/path", "normal/path"),
|
|
778
706
|
],
|
|
779
707
|
),
|
|
780
708
|
"detect_null_bytes_url": ValidatedPattern(
|
|
@@ -785,7 +713,7 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
785
713
|
global_replace=True,
|
|
786
714
|
test_cases=[
|
|
787
715
|
("file.txt%00.jpg", "file.txt[NULL].jpg"),
|
|
788
|
-
("normal.txt", "normal.txt"),
|
|
716
|
+
("normal.txt", "normal.txt"),
|
|
789
717
|
],
|
|
790
718
|
),
|
|
791
719
|
"detect_null_bytes_literal": ValidatedPattern(
|
|
@@ -796,7 +724,7 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
796
724
|
global_replace=True,
|
|
797
725
|
test_cases=[
|
|
798
726
|
("file.txt\\x00", "file.txt[NULL]"),
|
|
799
|
-
("normal.txt", "normal.txt"),
|
|
727
|
+
("normal.txt", "normal.txt"),
|
|
800
728
|
],
|
|
801
729
|
),
|
|
802
730
|
"detect_utf8_overlong_null": ValidatedPattern(
|
|
@@ -807,7 +735,7 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
807
735
|
global_replace=True,
|
|
808
736
|
test_cases=[
|
|
809
737
|
("file.txt%c0%80", "file.txt[NULL]"),
|
|
810
|
-
("normal.txt", "normal.txt"),
|
|
738
|
+
("normal.txt", "normal.txt"),
|
|
811
739
|
],
|
|
812
740
|
),
|
|
813
741
|
"detect_sys_directory_pattern": ValidatedPattern(
|
|
@@ -818,7 +746,7 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
818
746
|
test_cases=[
|
|
819
747
|
("/sys/", "[DANGER]"),
|
|
820
748
|
("/sys/devices", "[DANGER]"),
|
|
821
|
-
("/usr/sys", "/usr/sys"),
|
|
749
|
+
("/usr/sys", "/usr/sys"),
|
|
822
750
|
],
|
|
823
751
|
),
|
|
824
752
|
"detect_proc_directory_pattern": ValidatedPattern(
|
|
@@ -829,7 +757,7 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
829
757
|
test_cases=[
|
|
830
758
|
("/proc/", "[DANGER]"),
|
|
831
759
|
("/proc/self", "[DANGER]"),
|
|
832
|
-
("/usr/proc", "/usr/proc"),
|
|
760
|
+
("/usr/proc", "/usr/proc"),
|
|
833
761
|
],
|
|
834
762
|
),
|
|
835
763
|
"detect_etc_directory_pattern": ValidatedPattern(
|
|
@@ -840,7 +768,7 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
840
768
|
test_cases=[
|
|
841
769
|
("/etc/", "[DANGER]"),
|
|
842
770
|
("/etc/passwd", "[DANGER]"),
|
|
843
|
-
("/usr/etc", "/usr/etc"),
|
|
771
|
+
("/usr/etc", "/usr/etc"),
|
|
844
772
|
],
|
|
845
773
|
),
|
|
846
774
|
"detect_boot_directory_pattern": ValidatedPattern(
|
|
@@ -851,7 +779,7 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
851
779
|
test_cases=[
|
|
852
780
|
("/boot/", "[DANGER]"),
|
|
853
781
|
("/boot/grub", "[DANGER]"),
|
|
854
|
-
("/usr/boot", "/usr/boot"),
|
|
782
|
+
("/usr/boot", "/usr/boot"),
|
|
855
783
|
],
|
|
856
784
|
),
|
|
857
785
|
"detect_dev_directory_pattern": ValidatedPattern(
|
|
@@ -862,7 +790,7 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
862
790
|
test_cases=[
|
|
863
791
|
("/dev/", "[DANGER]"),
|
|
864
792
|
("/dev/null", "[DANGER]"),
|
|
865
|
-
("/usr/dev", "/usr/dev"),
|
|
793
|
+
("/usr/dev", "/usr/dev"),
|
|
866
794
|
],
|
|
867
795
|
),
|
|
868
796
|
"detect_root_directory_pattern": ValidatedPattern(
|
|
@@ -873,7 +801,7 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
873
801
|
test_cases=[
|
|
874
802
|
("/root/", "[DANGER]"),
|
|
875
803
|
("/root/.ssh", "[DANGER]"),
|
|
876
|
-
("/usr/root", "/usr/root"),
|
|
804
|
+
("/usr/root", "/usr/root"),
|
|
877
805
|
],
|
|
878
806
|
),
|
|
879
807
|
"detect_var_log_directory_pattern": ValidatedPattern(
|
|
@@ -884,7 +812,7 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
884
812
|
test_cases=[
|
|
885
813
|
("/var/log/", "[DANGER]"),
|
|
886
814
|
("/var/log/messages", "[DANGER]"),
|
|
887
|
-
("/usr/var/log", "/usr/var/log"),
|
|
815
|
+
("/usr/var/log", "/usr/var/log"),
|
|
888
816
|
],
|
|
889
817
|
),
|
|
890
818
|
"detect_bin_directory_pattern": ValidatedPattern(
|
|
@@ -895,7 +823,7 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
895
823
|
test_cases=[
|
|
896
824
|
("/bin/", "[DANGER]"),
|
|
897
825
|
("/usr/bin/", "[DANGER]"),
|
|
898
|
-
("/usr/local/bin", "/usr/local/bin"),
|
|
826
|
+
("/usr/local/bin", "/usr/local/bin"),
|
|
899
827
|
],
|
|
900
828
|
),
|
|
901
829
|
"detect_sbin_directory_pattern": ValidatedPattern(
|
|
@@ -906,7 +834,7 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
906
834
|
test_cases=[
|
|
907
835
|
("/sbin/", "[DANGER]"),
|
|
908
836
|
("/usr/sbin/", "[DANGER]"),
|
|
909
|
-
("/usr/local/sbin", "/usr/local/sbin"),
|
|
837
|
+
("/usr/local/sbin", "/usr/local/sbin"),
|
|
910
838
|
],
|
|
911
839
|
),
|
|
912
840
|
"detect_parent_directory_in_path": ValidatedPattern(
|
|
@@ -917,7 +845,7 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
917
845
|
global_replace=True,
|
|
918
846
|
test_cases=[
|
|
919
847
|
("../config", "[PARENT]/config"),
|
|
920
|
-
("safe/path", "safe/path"),
|
|
848
|
+
("safe/path", "safe/path"),
|
|
921
849
|
("path/../other", "path/[PARENT]/other"),
|
|
922
850
|
],
|
|
923
851
|
),
|
|
@@ -928,7 +856,7 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
928
856
|
description="Detect traversal attempts in temp directories",
|
|
929
857
|
test_cases=[
|
|
930
858
|
("/tmp/safe/../etc/passwd", "[SUSPICIOUS]etc/passwd"), # nosec B108
|
|
931
|
-
("/tmp/normal/file.txt", "/tmp/normal/file.txt"), #
|
|
859
|
+
("/tmp/normal/file.txt", "/tmp/normal/file.txt"), # nosec B108
|
|
932
860
|
],
|
|
933
861
|
),
|
|
934
862
|
"detect_suspicious_var_traversal": ValidatedPattern(
|
|
@@ -938,15 +866,14 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
938
866
|
description="Detect traversal attempts in var directories",
|
|
939
867
|
test_cases=[
|
|
940
868
|
("/var/lib/../etc/passwd", "[SUSPICIOUS]etc/passwd"),
|
|
941
|
-
("/var/lib/normal.txt", "/var/lib/normal.txt"),
|
|
869
|
+
("/var/lib/normal.txt", "/var/lib/normal.txt"),
|
|
942
870
|
],
|
|
943
871
|
),
|
|
944
|
-
# Tool output parsing patterns - for development tool output processing
|
|
945
872
|
"ruff_check_error": ValidatedPattern(
|
|
946
873
|
name="ruff_check_error",
|
|
947
874
|
pattern=r"^(.+?): (\d+): (\d+): ([A-Z]\d+) (.+)$",
|
|
948
875
|
replacement=r"File: \1, Line: \2, Col: \3, Code: \4, Message: \5",
|
|
949
|
-
description="Parse ruff-check error output: file:line:col:code message",
|
|
876
|
+
description="Parse ruff-check error output: file: line: col: code message",
|
|
950
877
|
test_cases=[
|
|
951
878
|
(
|
|
952
879
|
"crackerjack/core.py: 123: 45: E501 line too long",
|
|
@@ -979,7 +906,7 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
979
906
|
name="pyright_error",
|
|
980
907
|
pattern=r"^(.+?): (\d+): (\d+) - error: (.+)$",
|
|
981
908
|
replacement=r"File: \1, Line: \2, Col: \3, Error: \4",
|
|
982
|
-
description="Parse pyright error output: file:line:col - error: message",
|
|
909
|
+
description="Parse pyright error output: file: line: col - error: message",
|
|
983
910
|
test_cases=[
|
|
984
911
|
(
|
|
985
912
|
"src/app.py: 45: 12 - error: Undefined variable",
|
|
@@ -999,7 +926,7 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
999
926
|
name="pyright_warning",
|
|
1000
927
|
pattern=r"^(.+?): (\d+): (\d+) - warning: (.+)$",
|
|
1001
928
|
replacement=r"File: \1, Line: \2, Col: \3, Warning: \4",
|
|
1002
|
-
description="Parse pyright warning output: file:line:col - warning: message",
|
|
929
|
+
description="Parse pyright warning output: file: line: col - warning: message",
|
|
1003
930
|
test_cases=[
|
|
1004
931
|
(
|
|
1005
932
|
"src/app.py: 45: 12 - warning: Unused variable",
|
|
@@ -1091,7 +1018,7 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
1091
1018
|
name="mypy_error",
|
|
1092
1019
|
pattern=r"^(.+?): (\d+): error: (.+)$",
|
|
1093
1020
|
replacement=r"File: \1, Line: \2, Error: \3",
|
|
1094
|
-
description="Parse mypy error output: file:line: error: message",
|
|
1021
|
+
description="Parse mypy error output: file: line: error: message",
|
|
1095
1022
|
test_cases=[
|
|
1096
1023
|
(
|
|
1097
1024
|
"src/app.py: 45: error: Name 'undefined_var' is not defined",
|
|
@@ -1112,15 +1039,15 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
1112
1039
|
name="mypy_note",
|
|
1113
1040
|
pattern=r"^(.+?): (\d+): note: (.+)$",
|
|
1114
1041
|
replacement=r"File: \1, Line: \2, Note: \3",
|
|
1115
|
-
description="Parse mypy note output: file:line: note: message",
|
|
1042
|
+
description="Parse mypy note output: file: line: note: message",
|
|
1116
1043
|
test_cases=[
|
|
1117
1044
|
(
|
|
1118
1045
|
"src/app.py: 45: note: Expected type Union[int, str]",
|
|
1119
1046
|
"File: src/app.py, Line: 45, Note: Expected type Union[int, str]",
|
|
1120
1047
|
),
|
|
1121
1048
|
(
|
|
1122
|
-
"test.py: 1: note: See https
|
|
1123
|
-
"File: test.py, Line: 1, Note: See https
|
|
1049
|
+
"test.py: 1: note: See https: //mypy.readthedocs.io/",
|
|
1050
|
+
"File: test.py, Line: 1, Note: See https: //mypy.readthedocs.io/",
|
|
1124
1051
|
),
|
|
1125
1052
|
(
|
|
1126
1053
|
"./main.py: 999: note: Consider using Optional[...]",
|
|
@@ -1132,7 +1059,7 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
1132
1059
|
name="vulture_unused",
|
|
1133
1060
|
pattern=r"^(.+?): (\d+): unused (.+) '(.+)'",
|
|
1134
1061
|
replacement=r"File: \1, Line: \2, Unused \3: '\4'",
|
|
1135
|
-
description="Parse vulture unused code detection: file:line: unused type"
|
|
1062
|
+
description="Parse vulture unused code detection: file: line: unused type"
|
|
1136
1063
|
" 'name'",
|
|
1137
1064
|
test_cases=[
|
|
1138
1065
|
(
|
|
@@ -1153,13 +1080,13 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
1153
1080
|
name="complexipy_complex",
|
|
1154
1081
|
pattern=r"^(.+?): (\d+): (\d+) - (.+) is too complex \((\d+)\)",
|
|
1155
1082
|
replacement=r"File: \1, Line: \2, Col: \3, Function: \4, Complexity: \5",
|
|
1156
|
-
description="Parse complexipy complexity detection: file:line:col - function "
|
|
1083
|
+
description="Parse complexipy complexity detection: file: line: col - function "
|
|
1157
1084
|
"is too complex (score)",
|
|
1158
1085
|
test_cases=[
|
|
1159
1086
|
(
|
|
1160
1087
|
"src/app.py: 45: 1 - complex_function is too complex (15)",
|
|
1161
|
-
"File: src/app.py, Line: 45, Col: 1, Function: complex_function,"
|
|
1162
|
-
"
|
|
1088
|
+
"File: src/app.py, Line: 45, Col: 1, Function: complex_function, "
|
|
1089
|
+
"Complexity: 15",
|
|
1163
1090
|
),
|
|
1164
1091
|
(
|
|
1165
1092
|
"test.py: 1: 1 - nested_loops is too complex (20)",
|
|
@@ -1173,12 +1100,10 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
1173
1100
|
),
|
|
1174
1101
|
],
|
|
1175
1102
|
),
|
|
1176
|
-
# Test output parsing patterns for test_progress_streamer.py
|
|
1177
|
-
# These patterns are used for matching/extraction, not replacement
|
|
1178
1103
|
"pytest_test_start": ValidatedPattern(
|
|
1179
1104
|
name="pytest_test_start",
|
|
1180
1105
|
pattern=r"^(.+?):: ?(.+?):: ?(.+?) (PASSED|FAILED|SKIPPED|ERROR)$",
|
|
1181
|
-
replacement=r"\1::\2::\3",
|
|
1106
|
+
replacement=r"\1::\2::\3",
|
|
1182
1107
|
description="Parse pytest test start line with file, class, and method "
|
|
1183
1108
|
"(3-part format)",
|
|
1184
1109
|
test_cases=[
|
|
@@ -1199,7 +1124,7 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
1199
1124
|
"pytest_test_result": ValidatedPattern(
|
|
1200
1125
|
name="pytest_test_result",
|
|
1201
1126
|
pattern=r"^(.+?) (PASSED|FAILED|SKIPPED|ERROR)(?: \[.*?\])?\s*$",
|
|
1202
|
-
replacement=r"\1",
|
|
1127
|
+
replacement=r"\1",
|
|
1203
1128
|
description="Parse pytest test result line with test identifier",
|
|
1204
1129
|
test_cases=[
|
|
1205
1130
|
("test_file.py::test_method PASSED", "test_file.py::test_method"),
|
|
@@ -1213,7 +1138,7 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
1213
1138
|
"pytest_collection_count": ValidatedPattern(
|
|
1214
1139
|
name="pytest_collection_count",
|
|
1215
1140
|
pattern=r"collected (\d+) items?",
|
|
1216
|
-
replacement=r"\1",
|
|
1141
|
+
replacement=r"\1",
|
|
1217
1142
|
description="Parse pytest test collection count",
|
|
1218
1143
|
test_cases=[
|
|
1219
1144
|
("collected 5 items", "5"),
|
|
@@ -1221,13 +1146,13 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
1221
1146
|
(
|
|
1222
1147
|
"collected 42 items for execution",
|
|
1223
1148
|
"42 for execution",
|
|
1224
|
-
),
|
|
1149
|
+
),
|
|
1225
1150
|
],
|
|
1226
1151
|
),
|
|
1227
1152
|
"pytest_session_start": ValidatedPattern(
|
|
1228
1153
|
name="pytest_session_start",
|
|
1229
1154
|
pattern=r"test session starts",
|
|
1230
|
-
replacement=r"test session starts",
|
|
1155
|
+
replacement=r"test session starts",
|
|
1231
1156
|
description="Match pytest session start indicator",
|
|
1232
1157
|
test_cases=[
|
|
1233
1158
|
("test session starts", "test session starts"),
|
|
@@ -1237,24 +1162,24 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
1237
1162
|
"pytest_coverage_total": ValidatedPattern(
|
|
1238
1163
|
name="pytest_coverage_total",
|
|
1239
1164
|
pattern=r"TOTAL\s+\d+\s+\d+\s+(\d+)%",
|
|
1240
|
-
replacement=r"\1",
|
|
1165
|
+
replacement=r"\1",
|
|
1241
1166
|
description="Parse pytest coverage total percentage",
|
|
1242
1167
|
test_cases=[
|
|
1243
|
-
("TOTAL
|
|
1244
|
-
("TOTAL
|
|
1245
|
-
("TOTAL
|
|
1168
|
+
("TOTAL 123 45 85%", "85"),
|
|
1169
|
+
("TOTAL 1000 250 75%", "75"),
|
|
1170
|
+
("TOTAL 50 0 100%", "100"),
|
|
1246
1171
|
],
|
|
1247
1172
|
),
|
|
1248
1173
|
"pytest_detailed_test": ValidatedPattern(
|
|
1249
1174
|
name="pytest_detailed_test",
|
|
1250
1175
|
pattern=r"^(.+\.py)::(.+) (PASSED|FAILED|SKIPPED|ERROR)",
|
|
1251
|
-
replacement=r"\1::\2",
|
|
1176
|
+
replacement=r"\1::\2",
|
|
1252
1177
|
description="Parse detailed pytest test output with file, test name, and "
|
|
1253
1178
|
"status",
|
|
1254
1179
|
test_cases=[
|
|
1255
1180
|
(
|
|
1256
1181
|
"test_file.py::test_method PASSED [50%]",
|
|
1257
|
-
"test_file.py::test_method [50%]",
|
|
1182
|
+
"test_file.py::test_method [50%]",
|
|
1258
1183
|
),
|
|
1259
1184
|
(
|
|
1260
1185
|
"tests/core.py::TestClass::test_func FAILED [75%] [0.1s]",
|
|
@@ -1266,7 +1191,6 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
1266
1191
|
),
|
|
1267
1192
|
],
|
|
1268
1193
|
),
|
|
1269
|
-
# Code cleaning patterns (from code_cleaner.py)
|
|
1270
1194
|
"docstring_triple_double": ValidatedPattern(
|
|
1271
1195
|
name="docstring_triple_double",
|
|
1272
1196
|
pattern=r'^\s*""".*?"""\s*$',
|
|
@@ -1274,13 +1198,13 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
1274
1198
|
flags=re.MULTILINE | re.DOTALL,
|
|
1275
1199
|
description="Remove triple-quoted docstrings with double quotes",
|
|
1276
1200
|
test_cases=[
|
|
1277
|
-
('
|
|
1201
|
+
(' """This is a docstring""" ', ""),
|
|
1278
1202
|
('"""Module docstring"""', ""),
|
|
1279
|
-
('
|
|
1203
|
+
(' """\n Multi-line\n docstring\n """', ""),
|
|
1280
1204
|
(
|
|
1281
1205
|
'regular_code = "not a docstring"',
|
|
1282
1206
|
'regular_code = "not a docstring"',
|
|
1283
|
-
),
|
|
1207
|
+
),
|
|
1284
1208
|
],
|
|
1285
1209
|
),
|
|
1286
1210
|
"docstring_triple_single": ValidatedPattern(
|
|
@@ -1290,39 +1214,43 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
1290
1214
|
flags=re.MULTILINE | re.DOTALL,
|
|
1291
1215
|
description="Remove triple-quoted docstrings with single quotes",
|
|
1292
1216
|
test_cases=[
|
|
1293
|
-
("
|
|
1217
|
+
(" '''This is a docstring''' ", ""),
|
|
1294
1218
|
("'''Module docstring'''", ""),
|
|
1295
|
-
("
|
|
1219
|
+
(" '''\n Multi-line\n docstring\n '''", ""),
|
|
1296
1220
|
(
|
|
1297
1221
|
"regular_code = 'not a docstring'",
|
|
1298
1222
|
"regular_code = 'not a docstring'",
|
|
1299
|
-
),
|
|
1223
|
+
),
|
|
1300
1224
|
],
|
|
1301
1225
|
),
|
|
1302
1226
|
"spacing_after_comma": ValidatedPattern(
|
|
1303
1227
|
name="spacing_after_comma",
|
|
1304
|
-
pattern=r",([^ \n])",
|
|
1228
|
+
pattern=r", ([^ \n])",
|
|
1305
1229
|
replacement=r", \1",
|
|
1306
1230
|
global_replace=True,
|
|
1307
1231
|
description="Add space after comma if missing",
|
|
1308
1232
|
test_cases=[
|
|
1309
|
-
("def func(a,b,c):", "def func(a, b, c):"),
|
|
1310
|
-
("items = [1,2,3,4]", "items = [1, 2, 3, 4]"),
|
|
1311
|
-
("already, spaced, properly", "already, spaced, properly"),
|
|
1312
|
-
("mixed,spacing, here", "mixed, spacing, here"),
|
|
1233
|
+
("def func(a, b, c): ", "def func(a, b, c): "),
|
|
1234
|
+
("items = [1, 2, 3, 4]", "items = [1, 2, 3, 4]"),
|
|
1235
|
+
("already, spaced, properly", "already, spaced, properly"),
|
|
1236
|
+
("mixed, spacing, here", "mixed, spacing, here"),
|
|
1313
1237
|
],
|
|
1314
1238
|
),
|
|
1315
1239
|
"spacing_after_colon": ValidatedPattern(
|
|
1316
1240
|
name="spacing_after_colon",
|
|
1317
|
-
pattern=r"(?<!:):([^ \n:])",
|
|
1318
|
-
replacement=r": \
|
|
1241
|
+
pattern=r"(?<!https)(?<!http)(?<!ftp)(?<!file)(?<!: )(\b[a-zA-Z_][a-zA-Z0-9_]*):([a-zA-Z0-9_][^ \n:]*)",
|
|
1242
|
+
replacement=r"\1: \2",
|
|
1319
1243
|
global_replace=True,
|
|
1320
|
-
description="Add space after colon if missing (avoid double colons)",
|
|
1244
|
+
description="Add space after colon if missing (avoid double colons, URLs, and protocols)",
|
|
1321
1245
|
test_cases=[
|
|
1322
|
-
("def func(x:int, y:str):", "def func(x: int, y: str):"),
|
|
1323
|
-
("dict_item = {'key':'value'}", "dict_item = {'key': 'value'}"),
|
|
1324
|
-
("already: spaced: properly", "already: spaced: properly"),
|
|
1325
|
-
("class::method", "class::method"),
|
|
1246
|
+
("def func(x: int, y: str): ", "def func(x: int, y: str): "),
|
|
1247
|
+
("dict_item = {'key': 'value'}", "dict_item = {'key': 'value'}"),
|
|
1248
|
+
("already: spaced: properly", "already: spaced: properly"),
|
|
1249
|
+
("class::method", "class::method"),
|
|
1250
|
+
("https://github.com", "https://github.com"),
|
|
1251
|
+
("http://example.com", "http://example.com"),
|
|
1252
|
+
("ftp://server.com", "ftp://server.com"),
|
|
1253
|
+
("repo:local", "repo: local"),
|
|
1326
1254
|
],
|
|
1327
1255
|
),
|
|
1328
1256
|
"multiple_spaces": ValidatedPattern(
|
|
@@ -1332,80 +1260,79 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
1332
1260
|
description="Replace multiple spaces with single space",
|
|
1333
1261
|
global_replace=True,
|
|
1334
1262
|
test_cases=[
|
|
1335
|
-
("def
|
|
1336
|
-
("single space only", "single space only"),
|
|
1337
|
-
("lots
|
|
1338
|
-
("\tkeep\ttabs\tbut
|
|
1263
|
+
("def func( x, y ): ", "def func( x, y ): "),
|
|
1264
|
+
("single space only", "single space only"),
|
|
1265
|
+
("lots of spaces", "lots of spaces"),
|
|
1266
|
+
("\tkeep\ttabs\tbut fix spaces", "\tkeep\ttabs\tbut fix spaces"),
|
|
1339
1267
|
],
|
|
1340
1268
|
),
|
|
1341
1269
|
"preserved_comments": ValidatedPattern(
|
|
1342
1270
|
name="preserved_comments",
|
|
1343
1271
|
pattern=r"(#.*?(?: coding: | encoding: | type: | noqa | pragma).*)",
|
|
1344
|
-
replacement=r"\1",
|
|
1272
|
+
replacement=r"\1",
|
|
1345
1273
|
description="Match preserved code comments (encoding, type hints, etc.)",
|
|
1346
1274
|
test_cases=[
|
|
1347
|
-
("# coding: utf-8", "# coding: utf-8"),
|
|
1275
|
+
("# coding: utf-8", "# coding: utf-8"),
|
|
1348
1276
|
(
|
|
1349
1277
|
"# encoding: latin-1",
|
|
1350
1278
|
"# encoding: latin-1",
|
|
1351
|
-
),
|
|
1352
|
-
("# type: ignore", "# type: ignore"),
|
|
1353
|
-
("# noqa: E501", "# noqa: E501"),
|
|
1279
|
+
),
|
|
1280
|
+
("# type: ignore", "# type: ignore"),
|
|
1281
|
+
("# noqa: E501", "# noqa: E501"),
|
|
1354
1282
|
(
|
|
1355
1283
|
"# pragma: no cover",
|
|
1356
1284
|
"# pragma: no cover",
|
|
1357
|
-
),
|
|
1358
|
-
("# regular comment", "# regular comment"),
|
|
1285
|
+
),
|
|
1286
|
+
("# regular comment", "# regular comment"),
|
|
1359
1287
|
],
|
|
1360
1288
|
),
|
|
1361
1289
|
"todo_pattern": ValidatedPattern(
|
|
1362
1290
|
name="todo_pattern",
|
|
1363
1291
|
pattern=r"(#.*?TODO.*)",
|
|
1364
|
-
replacement=r"\1",
|
|
1292
|
+
replacement=r"\1",
|
|
1365
1293
|
flags=re.IGNORECASE,
|
|
1366
1294
|
description="Match TODO comments for validation",
|
|
1367
1295
|
test_cases=[
|
|
1368
1296
|
(
|
|
1369
1297
|
"# TODO: Fix this bug",
|
|
1370
1298
|
"# TODO: Fix this bug",
|
|
1371
|
-
),
|
|
1299
|
+
),
|
|
1372
1300
|
(
|
|
1373
1301
|
"# todo: implement later",
|
|
1374
1302
|
"# todo: implement later",
|
|
1375
|
-
),
|
|
1303
|
+
),
|
|
1376
1304
|
(
|
|
1377
1305
|
"# TODO refactor this method",
|
|
1378
1306
|
"# TODO refactor this method",
|
|
1379
|
-
),
|
|
1307
|
+
),
|
|
1380
1308
|
(
|
|
1381
1309
|
"# FIXME: another issue",
|
|
1382
1310
|
"# FIXME: another issue",
|
|
1383
|
-
),
|
|
1384
|
-
("# regular comment", "# regular comment"),
|
|
1311
|
+
),
|
|
1312
|
+
("# regular comment", "# regular comment"),
|
|
1385
1313
|
],
|
|
1386
1314
|
),
|
|
1387
|
-
# DRY agent patterns - for code duplication detection
|
|
1388
1315
|
"detect_error_response_patterns": ValidatedPattern(
|
|
1389
1316
|
name="detect_error_response_patterns",
|
|
1390
1317
|
pattern=r'return\s+.*[\'\"]\{.*[\'\""]error[\'\""].*\}.*[\'\""]',
|
|
1391
|
-
replacement=r"MATCH",
|
|
1318
|
+
replacement=r"MATCH",
|
|
1392
1319
|
description="Detect error response patterns in Python code for DRY violations",
|
|
1393
1320
|
test_cases=[
|
|
1394
1321
|
('return \'{"error": "msg"}\'', "MATCH"),
|
|
1395
1322
|
('return f\'{"error": "msg"}\'', "MATCH"),
|
|
1396
|
-
('return {"success": True}', 'return {"success": True}'),
|
|
1323
|
+
('return {"success": True}', 'return {"success": True}'),
|
|
1397
1324
|
('return \'{"error": "test message", "code": 500}\'', "MATCH"),
|
|
1398
1325
|
],
|
|
1399
1326
|
),
|
|
1400
1327
|
"detect_path_conversion_patterns": ValidatedPattern(
|
|
1401
1328
|
name="detect_path_conversion_patterns",
|
|
1402
|
-
pattern=r"Path\([^)]+\)\s+if\s+isinstance\([^)]
|
|
1403
|
-
replacement=r"MATCH",
|
|
1329
|
+
pattern=r"Path\([^)]+\)\s+if\s+isinstance\([^)]+, \s*str\)\s+else\s+[^)]+",
|
|
1330
|
+
replacement=r"MATCH",
|
|
1404
1331
|
description="Detect path conversion patterns in Python code for DRY violations",
|
|
1405
1332
|
test_cases=[
|
|
1406
1333
|
("Path(value) if isinstance(value, str) else value", "MATCH"),
|
|
1407
1334
|
("Path(path) if isinstance(path, str) else path", "MATCH"),
|
|
1408
|
-
("Path('/tmp/file')", "Path('/tmp/file')"),
|
|
1335
|
+
("Path('/tmp/file')", "Path('/tmp/file')"),
|
|
1409
1336
|
(
|
|
1410
1337
|
"Path(input_path) if isinstance(input_path, str) else input_path",
|
|
1411
1338
|
"MATCH",
|
|
@@ -1414,35 +1341,35 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
1414
1341
|
),
|
|
1415
1342
|
"detect_file_existence_patterns": ValidatedPattern(
|
|
1416
1343
|
name="detect_file_existence_patterns",
|
|
1417
|
-
pattern=r"if\s+not\s+\w+\.exists\(\):",
|
|
1418
|
-
replacement=r"MATCH",
|
|
1344
|
+
pattern=r"if\s+not\s+\w+\.exists\(\): ",
|
|
1345
|
+
replacement=r"MATCH",
|
|
1419
1346
|
description="Detect file existence check patterns in Python code for DRY"
|
|
1420
1347
|
" violations",
|
|
1421
1348
|
test_cases=[
|
|
1422
|
-
("if not file.exists():", "MATCH"),
|
|
1423
|
-
("if not path.exists():", "MATCH"),
|
|
1424
|
-
("if not file_path.exists():", "MATCH"),
|
|
1425
|
-
("if file.exists():", "if file.exists():"),
|
|
1349
|
+
("if not file.exists(): ", "MATCH"),
|
|
1350
|
+
("if not path.exists(): ", "MATCH"),
|
|
1351
|
+
("if not file_path.exists(): ", "MATCH"),
|
|
1352
|
+
("if file.exists(): ", "if file.exists(): "),
|
|
1426
1353
|
],
|
|
1427
1354
|
),
|
|
1428
1355
|
"detect_exception_patterns": ValidatedPattern(
|
|
1429
1356
|
name="detect_exception_patterns",
|
|
1430
|
-
pattern=r"except\s+\w*Exception\s+as\s+\w+:",
|
|
1431
|
-
replacement=r"MATCH",
|
|
1357
|
+
pattern=r"except\s+\w*Exception\s+as\s+\w+: ",
|
|
1358
|
+
replacement=r"MATCH",
|
|
1432
1359
|
description="Detect exception handling patterns for base Exception class in Python code for DRY violations",
|
|
1433
1360
|
test_cases=[
|
|
1434
|
-
("except Exception as e:", "MATCH"),
|
|
1435
|
-
("except BaseException as error:", "MATCH"),
|
|
1361
|
+
("except Exception as e: ", "MATCH"),
|
|
1362
|
+
("except BaseException as error: ", "MATCH"),
|
|
1436
1363
|
(
|
|
1437
|
-
"except ValueError as error:",
|
|
1438
|
-
"except ValueError as error:",
|
|
1439
|
-
),
|
|
1440
|
-
("try:", "try:"),
|
|
1364
|
+
"except ValueError as error: ",
|
|
1365
|
+
"except ValueError as error: ",
|
|
1366
|
+
),
|
|
1367
|
+
("try: ", "try: "),
|
|
1441
1368
|
],
|
|
1442
1369
|
),
|
|
1443
1370
|
"fix_path_conversion_with_ensure_path": ValidatedPattern(
|
|
1444
1371
|
name="fix_path_conversion_with_ensure_path",
|
|
1445
|
-
pattern=r"Path\([^)]+\)\s+if\s+isinstance\([^)]
|
|
1372
|
+
pattern=r"Path\([^)]+\)\s+if\s+isinstance\([^)]+, \s*str\)\s+else\s+([^)]+)",
|
|
1446
1373
|
replacement=r"_ensure_path(\1)",
|
|
1447
1374
|
description="Replace path conversion patterns with _ensure_path utility "
|
|
1448
1375
|
"function",
|
|
@@ -1457,7 +1384,7 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
1457
1384
|
),
|
|
1458
1385
|
"fix_path_conversion_simple": ValidatedPattern(
|
|
1459
1386
|
name="fix_path_conversion_simple",
|
|
1460
|
-
pattern=r"Path\(([^)]+)\)\s+if\s+isinstance\(\1
|
|
1387
|
+
pattern=r"Path\(([^)]+)\)\s+if\s+isinstance\(\1, \s*str\)\s+else\s+\1",
|
|
1461
1388
|
replacement=r"_ensure_path(\1)",
|
|
1462
1389
|
description="Replace simple path conversion patterns with _ensure_path utility "
|
|
1463
1390
|
"function",
|
|
@@ -1470,12 +1397,11 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
1470
1397
|
),
|
|
1471
1398
|
],
|
|
1472
1399
|
),
|
|
1473
|
-
# Security agent patterns - NEW PATTERNS FOR SECURITY_AGENT.PY
|
|
1474
1400
|
"detect_security_keywords": ValidatedPattern(
|
|
1475
1401
|
name="detect_security_keywords",
|
|
1476
1402
|
pattern=r"(?i)(bandit|security|vulnerability|hardcoded|"
|
|
1477
1403
|
r"shell=true|b108|b602|b301|b506|unsafe|injection)",
|
|
1478
|
-
replacement=r"MATCH",
|
|
1404
|
+
replacement=r"MATCH",
|
|
1479
1405
|
description="Detect security-related keywords in issue messages "
|
|
1480
1406
|
"(case insensitive)",
|
|
1481
1407
|
flags=re.IGNORECASE,
|
|
@@ -1485,7 +1411,7 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
1485
1411
|
("hardcoded path found", "MATCH path found"),
|
|
1486
1412
|
("shell=True usage", "MATCH usage"),
|
|
1487
1413
|
("B108 violation", "MATCH violation"),
|
|
1488
|
-
("normal message", "normal message"),
|
|
1414
|
+
("normal message", "normal message"),
|
|
1489
1415
|
],
|
|
1490
1416
|
),
|
|
1491
1417
|
"detect_hardcoded_temp_paths_basic": ValidatedPattern(
|
|
@@ -1499,7 +1425,7 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
1499
1425
|
(r"C:\tmp\data.log", "[TEMP_PATH]/data.log"),
|
|
1500
1426
|
("/temp/cache", "[TEMP_PATH]/cache"),
|
|
1501
1427
|
(r"C:\temp\work", "[TEMP_PATH]/work"),
|
|
1502
|
-
("/regular/path", "/regular/path"),
|
|
1428
|
+
("/regular/path", "/regular/path"),
|
|
1503
1429
|
],
|
|
1504
1430
|
),
|
|
1505
1431
|
"replace_hardcoded_temp_paths": ValidatedPattern(
|
|
@@ -1511,7 +1437,7 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
1511
1437
|
test_cases=[
|
|
1512
1438
|
('Path("/tmp/myfile.txt")', 'Path(tempfile.gettempdir()) / "myfile.txt"'),
|
|
1513
1439
|
('Path("/tmp/data.log")', 'Path(tempfile.gettempdir()) / "data.log"'),
|
|
1514
|
-
('Path("/regular/path")', 'Path("/regular/path")'),
|
|
1440
|
+
('Path("/regular/path")', 'Path("/regular/path")'),
|
|
1515
1441
|
],
|
|
1516
1442
|
),
|
|
1517
1443
|
"replace_hardcoded_temp_strings": ValidatedPattern(
|
|
@@ -1523,7 +1449,7 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
1523
1449
|
test_cases=[
|
|
1524
1450
|
('"/tmp/myfile.txt"', 'str(Path(tempfile.gettempdir()) / "myfile.txt")'),
|
|
1525
1451
|
('"/tmp/data.log"', 'str(Path(tempfile.gettempdir()) / "data.log")'),
|
|
1526
|
-
('"/regular/path"', '"/regular/path"'),
|
|
1452
|
+
('"/regular/path"', '"/regular/path"'),
|
|
1527
1453
|
],
|
|
1528
1454
|
),
|
|
1529
1455
|
"replace_hardcoded_temp_single_quotes": ValidatedPattern(
|
|
@@ -1536,7 +1462,7 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
1536
1462
|
test_cases=[
|
|
1537
1463
|
("'/tmp/myfile.txt'", "str(Path(tempfile.gettempdir()) / 'myfile.txt')"),
|
|
1538
1464
|
("'/tmp/data.log'", "str(Path(tempfile.gettempdir()) / 'data.log')"),
|
|
1539
|
-
("'/regular/path'", "'/regular/path'"),
|
|
1465
|
+
("'/regular/path'", "'/regular/path'"),
|
|
1540
1466
|
],
|
|
1541
1467
|
),
|
|
1542
1468
|
"replace_test_path_patterns": ValidatedPattern(
|
|
@@ -1546,7 +1472,7 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
1546
1472
|
description="Replace hardcoded /test/path patterns with tempfile equivalent",
|
|
1547
1473
|
test_cases=[
|
|
1548
1474
|
('Path("/test/path")', "Path(tempfile.gettempdir()) / 'test-path'"),
|
|
1549
|
-
('Path("/other/path")', 'Path("/other/path")'),
|
|
1475
|
+
('Path("/other/path")', 'Path("/other/path")'),
|
|
1550
1476
|
],
|
|
1551
1477
|
),
|
|
1552
1478
|
"detect_hardcoded_secrets": ValidatedPattern(
|
|
@@ -1560,7 +1486,7 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
1560
1486
|
('password = "secret123"', "[SECRET_DETECTED]"),
|
|
1561
1487
|
("api_key = 'abc123def'", "[SECRET_DETECTED]"),
|
|
1562
1488
|
('TOKEN = "my-token-here"', "[SECRET_DETECTED]"),
|
|
1563
|
-
("username = 'user123'", "username = 'user123'"),
|
|
1489
|
+
("username = 'user123'", "username = 'user123'"),
|
|
1564
1490
|
],
|
|
1565
1491
|
),
|
|
1566
1492
|
"extract_variable_name_from_assignment": ValidatedPattern(
|
|
@@ -1571,7 +1497,7 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
1571
1497
|
test_cases=[
|
|
1572
1498
|
("password = 'secret'", "password"),
|
|
1573
1499
|
("api_key = 'value'", "api_key"),
|
|
1574
|
-
("
|
|
1500
|
+
(" token =", "token"),
|
|
1575
1501
|
("complex_variable_name = value", "complex_variable_name"),
|
|
1576
1502
|
],
|
|
1577
1503
|
),
|
|
@@ -1584,11 +1510,10 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
1584
1510
|
test_cases=[
|
|
1585
1511
|
("random.random()", "[INSECURE_RANDOM]()"),
|
|
1586
1512
|
("random.choice(options)", "[INSECURE_RANDOM]()"),
|
|
1587
|
-
("secrets.choice(options)", "secrets.choice(options)"),
|
|
1588
|
-
("my_random.choice()", "my_random.choice()"),
|
|
1513
|
+
("secrets.choice(options)", "secrets.choice(options)"),
|
|
1514
|
+
("my_random.choice()", "my_random.choice()"),
|
|
1589
1515
|
],
|
|
1590
1516
|
),
|
|
1591
|
-
# Input validation patterns for security-critical validation
|
|
1592
1517
|
"validate_sql_injection_patterns": ValidatedPattern(
|
|
1593
1518
|
name="validate_sql_injection_patterns",
|
|
1594
1519
|
pattern=r"\b(union|select|insert|update|delete|drop|create|alter|"
|
|
@@ -1601,7 +1526,7 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
1601
1526
|
test_cases=[
|
|
1602
1527
|
("UNION SELECT", "[SQL_INJECTION] [SQL_INJECTION]"),
|
|
1603
1528
|
("drop table", "[SQL_INJECTION] table"),
|
|
1604
|
-
("normal text", "normal text"),
|
|
1529
|
+
("normal text", "normal text"),
|
|
1605
1530
|
("exec command", "[SQL_INJECTION] command"),
|
|
1606
1531
|
("execute procedure", "[SQL_INJECTION] procedure"),
|
|
1607
1532
|
],
|
|
@@ -1615,7 +1540,7 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
1615
1540
|
test_cases=[
|
|
1616
1541
|
("--comment", "[SQL_COMMENT]comment"),
|
|
1617
1542
|
("/* comment */", "[SQL_COMMENT] comment [SQL_COMMENT]"),
|
|
1618
|
-
("normal-text", "normal-text"),
|
|
1543
|
+
("normal-text", "normal-text"),
|
|
1619
1544
|
("---triple", "[SQL_COMMENT]triple"),
|
|
1620
1545
|
],
|
|
1621
1546
|
),
|
|
@@ -1629,8 +1554,8 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
1629
1554
|
test_cases=[
|
|
1630
1555
|
("or 1=1", "[BOOLEAN_INJECTION]1"),
|
|
1631
1556
|
("AND password=", "[BOOLEAN_INJECTION]"),
|
|
1632
|
-
("normal or text", "normal or text"),
|
|
1633
|
-
("value=test", "value=test"),
|
|
1557
|
+
("normal or text", "normal or text"),
|
|
1558
|
+
("value=test", "value=test"),
|
|
1634
1559
|
],
|
|
1635
1560
|
),
|
|
1636
1561
|
"validate_sql_server_specific": ValidatedPattern(
|
|
@@ -1643,7 +1568,7 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
1643
1568
|
test_cases=[
|
|
1644
1569
|
("xp_cmdshell", "[SQLSERVER_EXPLOIT]"),
|
|
1645
1570
|
("SP_EXECUTESQL", "[SQLSERVER_EXPLOIT]"),
|
|
1646
|
-
("normal text", "normal text"),
|
|
1571
|
+
("normal text", "normal text"),
|
|
1647
1572
|
],
|
|
1648
1573
|
),
|
|
1649
1574
|
"validate_code_eval_injection": ValidatedPattern(
|
|
@@ -1656,7 +1581,7 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
1656
1581
|
("eval(code)", "[CODE_EVAL](code)"),
|
|
1657
1582
|
("exec(command)", "[CODE_EVAL](command)"),
|
|
1658
1583
|
("execfile(script)", "[CODE_EVAL](script)"),
|
|
1659
|
-
("evaluate()", "evaluate()"),
|
|
1584
|
+
("evaluate()", "evaluate()"),
|
|
1660
1585
|
],
|
|
1661
1586
|
),
|
|
1662
1587
|
"validate_code_dynamic_access": ValidatedPattern(
|
|
@@ -1670,7 +1595,7 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
1670
1595
|
("getattr(obj, name)", "[DYNAMIC_ACCESS](obj, name)"),
|
|
1671
1596
|
("setattr(obj, name)", "[DYNAMIC_ACCESS](obj, name)"),
|
|
1672
1597
|
("delattr(obj, name)", "[DYNAMIC_ACCESS](obj, name)"),
|
|
1673
|
-
("mygetattr", "mygetattr"),
|
|
1598
|
+
("mygetattr", "mygetattr"),
|
|
1674
1599
|
],
|
|
1675
1600
|
),
|
|
1676
1601
|
"validate_code_system_commands": ValidatedPattern(
|
|
@@ -1684,7 +1609,7 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
1684
1609
|
("os.system(cmd)", "[SYSTEM_COMMAND](cmd)"),
|
|
1685
1610
|
("os.popen(cmd)", "[SYSTEM_COMMAND](cmd)"),
|
|
1686
1611
|
("commands.getoutput", "[SYSTEM_COMMAND]getoutput"),
|
|
1687
|
-
("mysubprocess", "mysubprocess"),
|
|
1612
|
+
("mysubprocess", "mysubprocess"),
|
|
1688
1613
|
],
|
|
1689
1614
|
),
|
|
1690
1615
|
"validate_code_compilation": ValidatedPattern(
|
|
@@ -1696,7 +1621,7 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
1696
1621
|
test_cases=[
|
|
1697
1622
|
("compile(source)", "[CODE_COMPILE]source)"),
|
|
1698
1623
|
("code.compile(source)", "[CODE_COMPILE](source)"),
|
|
1699
|
-
("compiled", "compiled"),
|
|
1624
|
+
("compiled", "compiled"),
|
|
1700
1625
|
],
|
|
1701
1626
|
),
|
|
1702
1627
|
"validate_job_id_format": ValidatedPattern(
|
|
@@ -1717,7 +1642,7 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
1717
1642
|
name="validate_env_var_name_format",
|
|
1718
1643
|
pattern=r"^[A-Z_][A-Z0-9_]*$",
|
|
1719
1644
|
replacement="VALID_ENV_VAR_NAME",
|
|
1720
|
-
description="Validate environment variable name format - uppercase letters,"
|
|
1645
|
+
description="Validate environment variable name format - uppercase letters, "
|
|
1721
1646
|
" numbers, underscores only, must start with letter or underscore",
|
|
1722
1647
|
test_cases=[
|
|
1723
1648
|
("VALID_VAR", "VALID_ENV_VAR_NAME"),
|
|
@@ -1727,7 +1652,6 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
1727
1652
|
("_PRIVATE_VAR", "VALID_ENV_VAR_NAME"),
|
|
1728
1653
|
],
|
|
1729
1654
|
),
|
|
1730
|
-
# Config file update patterns
|
|
1731
1655
|
"update_repo_revision": ValidatedPattern(
|
|
1732
1656
|
name="update_repo_revision",
|
|
1733
1657
|
pattern=r'("repo": "[^"]+?".*?"rev": )"([^"]+)"',
|
|
@@ -1737,137 +1661,135 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
1737
1661
|
flags=re.DOTALL,
|
|
1738
1662
|
test_cases=[
|
|
1739
1663
|
(
|
|
1740
|
-
'"repo": "https
|
|
1741
|
-
'"repo": "https
|
|
1664
|
+
'"repo": "https: //github.com/user/repo".*"rev": "old_rev"',
|
|
1665
|
+
'"repo": "https: //github.com/user/repo".*"rev": "NEW_REVISION"',
|
|
1742
1666
|
),
|
|
1743
1667
|
(
|
|
1744
|
-
'"repo": "git@github.com:user/repo.git", "branch": "main", "rev": '
|
|
1668
|
+
'"repo": "git@github.com: user/repo.git", "branch": "main", "rev": '
|
|
1745
1669
|
'"abc123"',
|
|
1746
|
-
'"repo": "git@github.com:user/repo.git", "branch": "main", "rev":'
|
|
1747
|
-
'
|
|
1670
|
+
'"repo": "git@github.com: user/repo.git", "branch": "main", "rev": '
|
|
1671
|
+
'"NEW_REVISION"',
|
|
1748
1672
|
),
|
|
1749
1673
|
(
|
|
1750
|
-
'{"repo": "https
|
|
1674
|
+
'{"repo": "https: //example.com/repo", "description": "test", "rev": '
|
|
1751
1675
|
'"456def"}',
|
|
1752
|
-
'{"repo": "https
|
|
1753
|
-
'
|
|
1676
|
+
'{"repo": "https: //example.com/repo", "description": "test", "rev": '
|
|
1677
|
+
'"NEW_REVISION"}',
|
|
1754
1678
|
),
|
|
1755
1679
|
],
|
|
1756
1680
|
),
|
|
1757
|
-
# URL sanitization patterns for security
|
|
1758
1681
|
"sanitize_localhost_urls": ValidatedPattern(
|
|
1759
1682
|
name="sanitize_localhost_urls",
|
|
1760
|
-
pattern=r"https
|
|
1683
|
+
pattern=r"https?: //localhost: \d+[^\s]*",
|
|
1761
1684
|
replacement="[INTERNAL_URL]",
|
|
1762
1685
|
description="Sanitize localhost URLs with ports for security",
|
|
1763
1686
|
global_replace=True,
|
|
1764
1687
|
test_cases=[
|
|
1765
|
-
("http
|
|
1766
|
-
("https
|
|
1688
|
+
("http: //localhost: 8000/api/test", "[INTERNAL_URL]"),
|
|
1689
|
+
("https: //localhost: 3000/dashboard", "[INTERNAL_URL]"),
|
|
1767
1690
|
(
|
|
1768
|
-
"Visit http
|
|
1691
|
+
"Visit http: //localhost: 8080/admin for details",
|
|
1769
1692
|
"Visit [INTERNAL_URL] for details",
|
|
1770
1693
|
),
|
|
1771
|
-
("https
|
|
1694
|
+
("https: //example.com/test", "https: //example.com/test"),
|
|
1772
1695
|
],
|
|
1773
1696
|
),
|
|
1774
1697
|
"sanitize_127_urls": ValidatedPattern(
|
|
1775
1698
|
name="sanitize_127_urls",
|
|
1776
|
-
pattern=r"https
|
|
1699
|
+
pattern=r"https?: //127\.0\.0\.1: \d+[^\s]*",
|
|
1777
1700
|
replacement="[INTERNAL_URL]",
|
|
1778
1701
|
description="Sanitize 127.0.0.1 URLs with ports for security",
|
|
1779
1702
|
global_replace=True,
|
|
1780
1703
|
test_cases=[
|
|
1781
|
-
("http
|
|
1782
|
-
("https
|
|
1783
|
-
("Connect to http
|
|
1704
|
+
("http: //127.0.0.1: 8000/api", "[INTERNAL_URL]"),
|
|
1705
|
+
("https: //127.0.0.1: 3000/test", "[INTERNAL_URL]"),
|
|
1706
|
+
("Connect to http: //127.0.0.1: 5000/status", "Connect to [INTERNAL_URL]"),
|
|
1784
1707
|
(
|
|
1785
|
-
"https
|
|
1786
|
-
"https
|
|
1787
|
-
),
|
|
1708
|
+
"https: //192.168.1.1: 8080/test",
|
|
1709
|
+
"https: //192.168.1.1: 8080/test",
|
|
1710
|
+
),
|
|
1788
1711
|
],
|
|
1789
1712
|
),
|
|
1790
1713
|
"sanitize_any_localhost_urls": ValidatedPattern(
|
|
1791
1714
|
name="sanitize_any_localhost_urls",
|
|
1792
|
-
pattern=r"https
|
|
1715
|
+
pattern=r"https?: //0\.0\.0\.0: \d+[^\s]*",
|
|
1793
1716
|
replacement="[INTERNAL_URL]",
|
|
1794
1717
|
description="Sanitize 0.0.0.0 URLs with ports for security",
|
|
1795
1718
|
global_replace=True,
|
|
1796
1719
|
test_cases=[
|
|
1797
|
-
("http
|
|
1798
|
-
("https
|
|
1799
|
-
("https
|
|
1720
|
+
("http: //0.0.0.0: 8000/api", "[INTERNAL_URL]"),
|
|
1721
|
+
("https: //0.0.0.0: 3000/test", "[INTERNAL_URL]"),
|
|
1722
|
+
("https: //1.1.1.1: 8080/test", "https: //1.1.1.1: 8080/test"),
|
|
1800
1723
|
],
|
|
1801
1724
|
),
|
|
1802
1725
|
"sanitize_ws_localhost_urls": ValidatedPattern(
|
|
1803
1726
|
name="sanitize_ws_localhost_urls",
|
|
1804
|
-
pattern=r"ws
|
|
1727
|
+
pattern=r"ws: //localhost: \d+[^\s]*",
|
|
1805
1728
|
replacement="[INTERNAL_URL]",
|
|
1806
1729
|
description="Sanitize WebSocket localhost URLs with ports for security",
|
|
1807
1730
|
global_replace=True,
|
|
1808
1731
|
test_cases=[
|
|
1809
|
-
("ws
|
|
1810
|
-
("ws
|
|
1811
|
-
("Connect to ws
|
|
1732
|
+
("ws: //localhost: 8675/websocket", "[INTERNAL_URL]"),
|
|
1733
|
+
("ws: //localhost: 3000/socket", "[INTERNAL_URL]"),
|
|
1734
|
+
("Connect to ws: //localhost: 8000/ws", "Connect to [INTERNAL_URL]"),
|
|
1812
1735
|
(
|
|
1813
|
-
"wss
|
|
1814
|
-
"wss
|
|
1815
|
-
),
|
|
1736
|
+
"wss: //example.com: 443/socket",
|
|
1737
|
+
"wss: //example.com: 443/socket",
|
|
1738
|
+
),
|
|
1816
1739
|
],
|
|
1817
1740
|
),
|
|
1818
1741
|
"sanitize_ws_127_urls": ValidatedPattern(
|
|
1819
1742
|
name="sanitize_ws_127_urls",
|
|
1820
|
-
pattern=r"ws
|
|
1743
|
+
pattern=r"ws: //127\.0\.0\.1: \d+[^\s]*",
|
|
1821
1744
|
replacement="[INTERNAL_URL]",
|
|
1822
1745
|
description="Sanitize WebSocket 127.0.0.1 URLs with ports for security",
|
|
1823
1746
|
global_replace=True,
|
|
1824
1747
|
test_cases=[
|
|
1825
|
-
("ws
|
|
1826
|
-
("ws
|
|
1748
|
+
("ws: //127.0.0.1: 8675/websocket", "[INTERNAL_URL]"),
|
|
1749
|
+
("ws: //127.0.0.1: 3000/socket", "[INTERNAL_URL]"),
|
|
1827
1750
|
(
|
|
1828
|
-
"ws
|
|
1829
|
-
"ws
|
|
1830
|
-
),
|
|
1751
|
+
"ws: //192.168.1.1: 8080/socket",
|
|
1752
|
+
"ws: //192.168.1.1: 8080/socket",
|
|
1753
|
+
),
|
|
1831
1754
|
],
|
|
1832
1755
|
),
|
|
1833
1756
|
"sanitize_simple_localhost_urls": ValidatedPattern(
|
|
1834
1757
|
name="sanitize_simple_localhost_urls",
|
|
1835
|
-
pattern=r"http
|
|
1758
|
+
pattern=r"http: //localhost[^\s]*",
|
|
1836
1759
|
replacement="[INTERNAL_URL]",
|
|
1837
1760
|
description="Sanitize simple localhost URLs without explicit ports for security",
|
|
1838
1761
|
global_replace=True,
|
|
1839
1762
|
test_cases=[
|
|
1840
|
-
("http
|
|
1841
|
-
("http
|
|
1842
|
-
("Visit http
|
|
1763
|
+
("http: //localhost/api/test", "[INTERNAL_URL]"),
|
|
1764
|
+
("http: //localhost/dashboard", "[INTERNAL_URL]"),
|
|
1765
|
+
("Visit http: //localhost/admin", "Visit [INTERNAL_URL]"),
|
|
1843
1766
|
(
|
|
1844
|
-
"https
|
|
1845
|
-
"https
|
|
1846
|
-
),
|
|
1767
|
+
"https: //localhost: 443/test",
|
|
1768
|
+
"https: //localhost: 443/test",
|
|
1769
|
+
),
|
|
1847
1770
|
],
|
|
1848
1771
|
),
|
|
1849
1772
|
"sanitize_simple_ws_localhost_urls": ValidatedPattern(
|
|
1850
1773
|
name="sanitize_simple_ws_localhost_urls",
|
|
1851
|
-
pattern=r"ws
|
|
1774
|
+
pattern=r"ws: //localhost[^\s]*",
|
|
1852
1775
|
replacement="[INTERNAL_URL]",
|
|
1853
1776
|
description="Sanitize simple WebSocket localhost URLs without explicit ports"
|
|
1854
1777
|
" for security",
|
|
1855
1778
|
global_replace=True,
|
|
1856
1779
|
test_cases=[
|
|
1857
|
-
("ws
|
|
1858
|
-
("ws
|
|
1859
|
-
("Connect to ws
|
|
1780
|
+
("ws: //localhost/websocket", "[INTERNAL_URL]"),
|
|
1781
|
+
("ws: //localhost/socket", "[INTERNAL_URL]"),
|
|
1782
|
+
("Connect to ws: //localhost/ws", "Connect to [INTERNAL_URL]"),
|
|
1860
1783
|
(
|
|
1861
|
-
"wss
|
|
1862
|
-
"wss
|
|
1863
|
-
),
|
|
1784
|
+
"wss: //localhost: 443/socket",
|
|
1785
|
+
"wss: //localhost: 443/socket",
|
|
1786
|
+
),
|
|
1864
1787
|
],
|
|
1865
1788
|
),
|
|
1866
|
-
# Integration script patterns for resource management
|
|
1867
1789
|
"detect_tempfile_usage": ValidatedPattern(
|
|
1868
1790
|
name="detect_tempfile_usage",
|
|
1869
|
-
pattern=r"tempfile\.(
|
|
1870
|
-
replacement="MATCH",
|
|
1791
|
+
pattern=r"tempfile\.(mkdtemp|NamedTemporaryFile|TemporaryDirectory)",
|
|
1792
|
+
replacement="MATCH",
|
|
1871
1793
|
test_cases=[
|
|
1872
1794
|
("tempfile.mkdtemp()", "MATCH()"),
|
|
1873
1795
|
("tempfile.NamedTemporaryFile()", "MATCH()"),
|
|
@@ -1875,14 +1797,14 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
1875
1797
|
(
|
|
1876
1798
|
"not_tempfile.other()",
|
|
1877
1799
|
"not_tempfile.other()",
|
|
1878
|
-
),
|
|
1800
|
+
),
|
|
1879
1801
|
],
|
|
1880
1802
|
description="Detect tempfile module usage for resource management integration",
|
|
1881
1803
|
),
|
|
1882
1804
|
"detect_subprocess_usage": ValidatedPattern(
|
|
1883
1805
|
name="detect_subprocess_usage",
|
|
1884
|
-
pattern=r"subprocess\.(
|
|
1885
|
-
replacement="MATCH",
|
|
1806
|
+
pattern=r"subprocess\.(Popen|run)",
|
|
1807
|
+
replacement="MATCH",
|
|
1886
1808
|
test_cases=[
|
|
1887
1809
|
("subprocess.Popen(cmd)", "MATCH(cmd)"),
|
|
1888
1810
|
("subprocess.run(['cmd'])", "MATCH(['cmd'])"),
|
|
@@ -1893,7 +1815,7 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
1893
1815
|
"detect_asyncio_create_task": ValidatedPattern(
|
|
1894
1816
|
name="detect_asyncio_create_task",
|
|
1895
1817
|
pattern=r"asyncio\.create_task",
|
|
1896
|
-
replacement="MATCH",
|
|
1818
|
+
replacement="MATCH",
|
|
1897
1819
|
test_cases=[
|
|
1898
1820
|
("asyncio.create_task(coro)", "MATCH(coro)"),
|
|
1899
1821
|
("not_asyncio.other()", "not_asyncio.other()"),
|
|
@@ -1904,35 +1826,35 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
1904
1826
|
"detect_file_open_operations": ValidatedPattern(
|
|
1905
1827
|
name="detect_file_open_operations",
|
|
1906
1828
|
pattern=r"(\.open\(|with open\()",
|
|
1907
|
-
replacement=r"MATCH",
|
|
1829
|
+
replacement=r"MATCH",
|
|
1908
1830
|
test_cases=[
|
|
1909
1831
|
("file.open()", "fileMATCH)"),
|
|
1910
|
-
("with open('file.txt'):", "MATCH'file.txt'):"),
|
|
1911
|
-
("other_method()", "other_method()"),
|
|
1832
|
+
("with open('file.txt'): ", "MATCH'file.txt'): "),
|
|
1833
|
+
("other_method()", "other_method()"),
|
|
1912
1834
|
],
|
|
1913
1835
|
description="Detect file open operations for resource management integration",
|
|
1914
1836
|
),
|
|
1915
1837
|
"match_async_function_definition": ValidatedPattern(
|
|
1916
1838
|
name="match_async_function_definition",
|
|
1917
|
-
pattern=r"(async def \w+\([^)]*\)[^:]*:)",
|
|
1839
|
+
pattern=r"(async def \w+\([^)]*\)[^: ]*: )",
|
|
1918
1840
|
replacement=r"\1",
|
|
1919
1841
|
test_cases=[
|
|
1920
|
-
("async def foo():", "async def foo():"),
|
|
1921
|
-
("async def bar(a, b) -> None:", "async def bar(a, b) -> None:"),
|
|
1922
|
-
("def sync_func():", "def sync_func():"),
|
|
1842
|
+
("async def foo(): ", "async def foo(): "),
|
|
1843
|
+
("async def bar(a, b) -> None: ", "async def bar(a, b) -> None: "),
|
|
1844
|
+
("def sync_func(): ", "def sync_func(): "),
|
|
1923
1845
|
],
|
|
1924
1846
|
description="Match async function definitions for resource management"
|
|
1925
1847
|
" integration",
|
|
1926
1848
|
),
|
|
1927
1849
|
"match_class_definition": ValidatedPattern(
|
|
1928
1850
|
name="match_class_definition",
|
|
1929
|
-
pattern=r"class (\w+).*:",
|
|
1851
|
+
pattern=r"class (\w+).*: ",
|
|
1930
1852
|
replacement=r"\1",
|
|
1931
1853
|
test_cases=[
|
|
1932
|
-
("class MyClass:", "MyClass"),
|
|
1933
|
-
("class MyClass(BaseClass):", "MyClass"),
|
|
1934
|
-
("class MyClass(Base, Mixin):", "MyClass"),
|
|
1935
|
-
("def not_class():", "def not_class():"),
|
|
1854
|
+
("class MyClass: ", "MyClass"),
|
|
1855
|
+
("class MyClass(BaseClass): ", "MyClass"),
|
|
1856
|
+
("class MyClass(Base, Mixin): ", "MyClass"),
|
|
1857
|
+
("def not_class(): ", "def not_class(): "),
|
|
1936
1858
|
],
|
|
1937
1859
|
description="Match class definitions for resource management integration",
|
|
1938
1860
|
),
|
|
@@ -1987,21 +1909,20 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
1987
1909
|
),
|
|
1988
1910
|
(
|
|
1989
1911
|
"file.write_text(data, encoding='utf-8')",
|
|
1990
|
-
"await SafeFileOperations.safe_write_text(file, data, encoding='utf-8',"
|
|
1991
|
-
"
|
|
1912
|
+
"await SafeFileOperations.safe_write_text(file, data, encoding='utf-8', "
|
|
1913
|
+
"atomic=True)",
|
|
1992
1914
|
),
|
|
1993
1915
|
],
|
|
1994
1916
|
description="Replace path.write_text with SafeFileOperations.safe_write_text",
|
|
1995
1917
|
),
|
|
1996
|
-
# Agent-specific patterns - DocumentationAgent
|
|
1997
1918
|
"agent_count_pattern": ValidatedPattern(
|
|
1998
1919
|
name="agent_count_pattern",
|
|
1999
1920
|
pattern=r"(\d+)\s+agents",
|
|
2000
1921
|
replacement=r"\1 agents",
|
|
2001
1922
|
test_cases=[
|
|
2002
1923
|
("9 agents", "9 agents"),
|
|
2003
|
-
("12
|
|
2004
|
-
("5
|
|
1924
|
+
("12 agents", "12 agents"),
|
|
1925
|
+
("5 agents", "5 agents"),
|
|
2005
1926
|
],
|
|
2006
1927
|
description="Match agent count patterns for documentation consistency",
|
|
2007
1928
|
flags=re.IGNORECASE,
|
|
@@ -2012,8 +1933,8 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
2012
1933
|
replacement=r"\1 specialized agents",
|
|
2013
1934
|
test_cases=[
|
|
2014
1935
|
("9 specialized agents", "9 specialized agents"),
|
|
2015
|
-
("12
|
|
2016
|
-
("5
|
|
1936
|
+
("12 specialized agents", "12 specialized agents"),
|
|
1937
|
+
("5 specialized agents", "5 specialized agents"),
|
|
2017
1938
|
],
|
|
2018
1939
|
description="Match specialized agent count patterns for documentation "
|
|
2019
1940
|
"consistency",
|
|
@@ -2021,7 +1942,7 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
2021
1942
|
),
|
|
2022
1943
|
"total_agents_config_pattern": ValidatedPattern(
|
|
2023
1944
|
name="total_agents_config_pattern",
|
|
2024
|
-
pattern=r'total_agents["\'][\s]
|
|
1945
|
+
pattern=r'total_agents["\'][\s]*: \s*(\d+)',
|
|
2025
1946
|
replacement=r'total_agents": \1',
|
|
2026
1947
|
test_cases=[
|
|
2027
1948
|
('total_agents": 9', 'total_agents": 9'),
|
|
@@ -2037,8 +1958,8 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
2037
1958
|
replacement=r"\1 sub-agents",
|
|
2038
1959
|
test_cases=[
|
|
2039
1960
|
("9 sub-agents", "9 sub-agents"),
|
|
2040
|
-
("12
|
|
2041
|
-
("5
|
|
1961
|
+
("12 sub-agents", "12 sub-agents"),
|
|
1962
|
+
("5 sub-agents", "5 sub-agents"),
|
|
2042
1963
|
],
|
|
2043
1964
|
description="Match sub-agent count patterns for documentation consistency",
|
|
2044
1965
|
flags=re.IGNORECASE,
|
|
@@ -2071,7 +1992,7 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
2071
1992
|
),
|
|
2072
1993
|
"update_total_agents_config": ValidatedPattern(
|
|
2073
1994
|
name="update_total_agents_config",
|
|
2074
|
-
pattern=r'total_agents["\'][\s]
|
|
1995
|
+
pattern=r'total_agents["\'][\s]*: \s*\d+',
|
|
2075
1996
|
replacement=r'total_agents": NEW_COUNT',
|
|
2076
1997
|
test_cases=[
|
|
2077
1998
|
('total_agents": 9', 'total_agents": NEW_COUNT'),
|
|
@@ -2093,7 +2014,6 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
2093
2014
|
description="Update sub-agent count references (NEW_COUNT replaced"
|
|
2094
2015
|
" dynamically)",
|
|
2095
2016
|
),
|
|
2096
|
-
# Agent-specific patterns - TestSpecialistAgent
|
|
2097
2017
|
"fixture_not_found_pattern": ValidatedPattern(
|
|
2098
2018
|
name="fixture_not_found_pattern",
|
|
2099
2019
|
pattern=r"fixture '(\w+)' not found",
|
|
@@ -2112,7 +2032,7 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
2112
2032
|
test_cases=[
|
|
2113
2033
|
("ImportError: No module named", "ImportError: No module named"),
|
|
2114
2034
|
("ModuleNotFoundError: No module", "ImportError: No module"),
|
|
2115
|
-
("Other error types", "Other error types"),
|
|
2035
|
+
("Other error types", "Other error types"),
|
|
2116
2036
|
],
|
|
2117
2037
|
description="Match import error patterns in test failures",
|
|
2118
2038
|
),
|
|
@@ -2124,9 +2044,9 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
2124
2044
|
(
|
|
2125
2045
|
"AssertionError: Values differ",
|
|
2126
2046
|
"AssertionError: Values differ",
|
|
2127
|
-
),
|
|
2047
|
+
),
|
|
2128
2048
|
("assert result == expected", "AssertionError expected"),
|
|
2129
|
-
("Normal code", "Normal code"),
|
|
2049
|
+
("Normal code", "Normal code"),
|
|
2130
2050
|
],
|
|
2131
2051
|
description="Match assertion error patterns in test failures",
|
|
2132
2052
|
),
|
|
@@ -2143,7 +2063,7 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
2143
2063
|
"AttributeError: 'NoneType' has no attribute 'value'",
|
|
2144
2064
|
"AttributeError: has no attribute 'value'",
|
|
2145
2065
|
),
|
|
2146
|
-
("Normal error", "Normal error"),
|
|
2066
|
+
("Normal error", "Normal error"),
|
|
2147
2067
|
],
|
|
2148
2068
|
description="Match attribute error patterns in test failures",
|
|
2149
2069
|
),
|
|
@@ -2154,7 +2074,7 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
2154
2074
|
test_cases=[
|
|
2155
2075
|
("MockSpec error occurred", "MockSpec error occurred"),
|
|
2156
2076
|
("spec for Mock failed", "MockSpec failed"),
|
|
2157
|
-
("Normal mock usage", "Normal mock usage"),
|
|
2077
|
+
("Normal mock usage", "Normal mock usage"),
|
|
2158
2078
|
],
|
|
2159
2079
|
description="Match mock specification error patterns in test failures",
|
|
2160
2080
|
),
|
|
@@ -2165,7 +2085,7 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
2165
2085
|
test_cases=[
|
|
2166
2086
|
("'/test/path'", "str(tmp_path)"),
|
|
2167
2087
|
("/test/path", "str(tmp_path)"),
|
|
2168
|
-
("'/other/path'", "'/other/path'"),
|
|
2088
|
+
("'/other/path'", "'/other/path'"),
|
|
2169
2089
|
],
|
|
2170
2090
|
description="Match hardcoded test path patterns that should use tmp_path",
|
|
2171
2091
|
),
|
|
@@ -2187,21 +2107,20 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
2187
2107
|
test_cases=[
|
|
2188
2108
|
("ValidationError: field required", "ValidationError: field required"),
|
|
2189
2109
|
("validation error in field", "ValidationError in field"),
|
|
2190
|
-
("Normal validation", "Normal validation"),
|
|
2110
|
+
("Normal validation", "Normal validation"),
|
|
2191
2111
|
],
|
|
2192
2112
|
description="Match Pydantic validation error patterns in test failures",
|
|
2193
2113
|
),
|
|
2194
|
-
# Agent-specific patterns - PerformanceAgent
|
|
2195
2114
|
"list_append_inefficiency_pattern": ValidatedPattern(
|
|
2196
2115
|
name="list_append_inefficiency_pattern",
|
|
2197
2116
|
pattern=r"(\s*)(\w+)\s*\+=\s*\[([^]]+)\]",
|
|
2198
2117
|
replacement=r"\1\2.append(\3)",
|
|
2199
2118
|
test_cases=[
|
|
2200
|
-
("
|
|
2119
|
+
(" items += [new_item]", " items.append(new_item)"),
|
|
2201
2120
|
("results += [result]", "results.append(result)"),
|
|
2202
|
-
("
|
|
2121
|
+
(" data += [value, other]", " data.append(value, other)"),
|
|
2203
2122
|
],
|
|
2204
|
-
description="Replace inefficient list concatenation with append for"
|
|
2123
|
+
description="Replace inefficient list[t.Any] concatenation with append for"
|
|
2205
2124
|
" performance",
|
|
2206
2125
|
),
|
|
2207
2126
|
"string_concatenation_pattern": ValidatedPattern(
|
|
@@ -2209,28 +2128,27 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
2209
2128
|
pattern=r"(\s*)(\w+)\s*\+=\s*(.+)",
|
|
2210
2129
|
replacement=r"\1\2_parts.append(\3)",
|
|
2211
2130
|
test_cases=[
|
|
2212
|
-
("
|
|
2131
|
+
(" text += new_text", " text_parts.append(new_text)"),
|
|
2213
2132
|
("result += line", "result_parts.append(line)"),
|
|
2214
|
-
("
|
|
2133
|
+
(" output += data", " output_parts.append(data)"),
|
|
2215
2134
|
],
|
|
2216
|
-
description="Replace string concatenation with list append for performance "
|
|
2135
|
+
description="Replace string concatenation with list[t.Any] append for performance "
|
|
2217
2136
|
"optimization",
|
|
2218
2137
|
),
|
|
2219
|
-
# Enhanced performance patterns for PerformanceAgent optimization
|
|
2220
2138
|
"nested_loop_detection_pattern": ValidatedPattern(
|
|
2221
2139
|
name="nested_loop_detection_pattern",
|
|
2222
|
-
pattern=r"(\s*)(for\s+\w+\s+in\s+.*:)",
|
|
2140
|
+
pattern=r"(\s*)(for\s+\w+\s+in\s+.*: )",
|
|
2223
2141
|
replacement=r"\1# Performance: Potential nested loop - check complexity\n\1\2",
|
|
2224
2142
|
test_cases=[
|
|
2225
2143
|
(
|
|
2226
|
-
"
|
|
2227
|
-
"
|
|
2228
|
-
"
|
|
2144
|
+
" for j in other: ",
|
|
2145
|
+
" # Performance: Potential nested loop - check complexity\n "
|
|
2146
|
+
"for j in other: ",
|
|
2229
2147
|
),
|
|
2230
2148
|
(
|
|
2231
|
-
"for i in items:",
|
|
2149
|
+
"for i in items: ",
|
|
2232
2150
|
"# Performance: Potential nested loop - check complexity\nfor i"
|
|
2233
|
-
" in items:",
|
|
2151
|
+
" in items: ",
|
|
2234
2152
|
),
|
|
2235
2153
|
],
|
|
2236
2154
|
description="Detect loop patterns that might be nested creating O(n²)"
|
|
@@ -2239,32 +2157,31 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
2239
2157
|
),
|
|
2240
2158
|
"list_extend_optimization_pattern": ValidatedPattern(
|
|
2241
2159
|
name="list_extend_optimization_pattern",
|
|
2242
|
-
pattern=r"(\s*)(\w+)\s*\+=\s*\[([^]]+(
|
|
2160
|
+
pattern=r"(\s*)(\w+)\s*\+=\s*\[([^]]+(?: , \s*[^]]+)*)\]",
|
|
2243
2161
|
replacement=r"\1\2.extend([\3])",
|
|
2244
2162
|
test_cases=[
|
|
2245
|
-
("
|
|
2163
|
+
(" items += [a, b, c]", " items.extend([a, b, c])"),
|
|
2246
2164
|
("results += [x, y]", "results.extend([x, y])"),
|
|
2247
|
-
("
|
|
2165
|
+
(" data += [single_item]", " data.extend([single_item])"),
|
|
2248
2166
|
],
|
|
2249
|
-
description="Replace list concatenation with extend for better performance with multiple items",
|
|
2167
|
+
description="Replace list[t.Any] concatenation with extend for better performance with multiple items",
|
|
2250
2168
|
),
|
|
2251
2169
|
"inefficient_string_join_pattern": ValidatedPattern(
|
|
2252
2170
|
name="inefficient_string_join_pattern",
|
|
2253
2171
|
pattern=r"(\s*)(\w+)\s*=\s*([\"'])([\"'])\s*\.\s*join\(\s*\[\s*\]\s*\)",
|
|
2254
|
-
replacement=r"\1\2 = \3\4
|
|
2172
|
+
replacement=r"\1\2 = \3\4 # Performance: Use empty string directly instead"
|
|
2255
2173
|
r" of join",
|
|
2256
2174
|
test_cases=[
|
|
2257
2175
|
(
|
|
2258
|
-
'
|
|
2259
|
-
'
|
|
2260
|
-
" join",
|
|
2176
|
+
' text = "".join([])',
|
|
2177
|
+
' text = "" # Performance: Use empty string directly instead of join',
|
|
2261
2178
|
),
|
|
2262
2179
|
(
|
|
2263
2180
|
"result = ''.join([])",
|
|
2264
|
-
"result = ''
|
|
2181
|
+
"result = '' # Performance: Use empty string directly instead of join",
|
|
2265
2182
|
),
|
|
2266
2183
|
],
|
|
2267
|
-
description="Replace inefficient empty list join with direct empty string"
|
|
2184
|
+
description="Replace inefficient empty list[t.Any] join with direct empty string"
|
|
2268
2185
|
" assignment",
|
|
2269
2186
|
),
|
|
2270
2187
|
"repeated_len_in_loop_pattern": ValidatedPattern(
|
|
@@ -2274,9 +2191,9 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
2274
2191
|
r"repeatedly\n\1\2",
|
|
2275
2192
|
test_cases=[
|
|
2276
2193
|
(
|
|
2277
|
-
"
|
|
2278
|
-
"
|
|
2279
|
-
"
|
|
2194
|
+
" len(items)",
|
|
2195
|
+
" # Performance: Consider caching len(items) if used repeatedly\n"
|
|
2196
|
+
" len(items)",
|
|
2280
2197
|
),
|
|
2281
2198
|
(
|
|
2282
2199
|
"len(data)",
|
|
@@ -2289,23 +2206,22 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
2289
2206
|
"list_comprehension_optimization_pattern": ValidatedPattern(
|
|
2290
2207
|
name="list_comprehension_optimization_pattern",
|
|
2291
2208
|
pattern=r"(\s*)(\w+)\.append\(([^)]+)\)",
|
|
2292
|
-
replacement=r"\1# Performance: Consider list comprehension if this is in a "
|
|
2209
|
+
replacement=r"\1# Performance: Consider list[t.Any] comprehension if this is in a "
|
|
2293
2210
|
r"simple loop\n\1\2.append(\3)",
|
|
2294
2211
|
test_cases=[
|
|
2295
2212
|
(
|
|
2296
|
-
"
|
|
2297
|
-
"
|
|
2298
|
-
"simple loop\n
|
|
2213
|
+
" results.append(item * 2)",
|
|
2214
|
+
" # Performance: Consider list[t.Any] comprehension if this is in a "
|
|
2215
|
+
"simple loop\n results.append(item * 2)",
|
|
2299
2216
|
),
|
|
2300
2217
|
(
|
|
2301
2218
|
"data.append(value)",
|
|
2302
|
-
"# Performance: Consider list comprehension if this is in a simple"
|
|
2219
|
+
"# Performance: Consider list[t.Any] comprehension if this is in a simple"
|
|
2303
2220
|
" loop\ndata.append(value)",
|
|
2304
2221
|
),
|
|
2305
2222
|
],
|
|
2306
|
-
description="Suggest list comprehensions for simple append patterns",
|
|
2223
|
+
description="Suggest list[t.Any] comprehensions for simple append patterns",
|
|
2307
2224
|
),
|
|
2308
|
-
# Enhanced security patterns for improved SecurityAgent capabilities
|
|
2309
2225
|
"detect_crypto_weak_algorithms": ValidatedPattern(
|
|
2310
2226
|
name="detect_crypto_weak_algorithms",
|
|
2311
2227
|
pattern=r"\b(?:md4|md5|sha1|des|3des|rc4)\b",
|
|
@@ -2316,7 +2232,7 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
2316
2232
|
test_cases=[
|
|
2317
2233
|
("hashlib.md5()", "hashlib.[WEAK_CRYPTO_ALGORITHM]()"),
|
|
2318
2234
|
("using DES encryption", "using [WEAK_CRYPTO_ALGORITHM] encryption"),
|
|
2319
|
-
("SHA256 is good", "SHA256 is good"),
|
|
2235
|
+
("SHA256 is good", "SHA256 is good"),
|
|
2320
2236
|
("MD4 hashing", "[WEAK_CRYPTO_ALGORITHM] hashing"),
|
|
2321
2237
|
],
|
|
2322
2238
|
),
|
|
@@ -2336,7 +2252,7 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
2336
2252
|
(
|
|
2337
2253
|
'username = "user"',
|
|
2338
2254
|
'username = "user"',
|
|
2339
|
-
),
|
|
2255
|
+
),
|
|
2340
2256
|
],
|
|
2341
2257
|
),
|
|
2342
2258
|
"detect_subprocess_shell_injection": ValidatedPattern(
|
|
@@ -2351,7 +2267,7 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
2351
2267
|
(
|
|
2352
2268
|
"subprocess.run(cmd, shell=False)",
|
|
2353
2269
|
"subprocess.run(cmd, shell=False)",
|
|
2354
|
-
),
|
|
2270
|
+
),
|
|
2355
2271
|
],
|
|
2356
2272
|
),
|
|
2357
2273
|
"detect_regex_redos_vulnerable": ValidatedPattern(
|
|
@@ -2365,7 +2281,7 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
2365
2281
|
("(a+)*", "[REDOS_VULNERABLE_PATTERN]"),
|
|
2366
2282
|
("(a*)+", "[REDOS_VULNERABLE_PATTERN]"),
|
|
2367
2283
|
("(abc)+", "[REDOS_VULNERABLE_PATTERN]"),
|
|
2368
|
-
("simple+", "simple+"),
|
|
2284
|
+
("simple+", "simple+"),
|
|
2369
2285
|
],
|
|
2370
2286
|
),
|
|
2371
2287
|
"fix_hardcoded_jwt_secret": ValidatedPattern(
|
|
@@ -2380,7 +2296,7 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
2380
2296
|
'JWT_SECRET = os.getenv("JWT_SECRET", "")',
|
|
2381
2297
|
),
|
|
2382
2298
|
('jwt_secret = "my-secret"', 'jwt_secret = os.getenv("JWT_SECRET", "")'),
|
|
2383
|
-
('other_var = "value"', 'other_var = "value"'),
|
|
2299
|
+
('other_var = "value"', 'other_var = "value"'),
|
|
2384
2300
|
],
|
|
2385
2301
|
),
|
|
2386
2302
|
"detect_unsafe_pickle_usage": ValidatedPattern(
|
|
@@ -2392,10 +2308,9 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
2392
2308
|
test_cases=[
|
|
2393
2309
|
("pickle.load(file)", "[UNSAFE_PICKLE_USAGE].load(file)"),
|
|
2394
2310
|
("pickle.loads(data)", "[UNSAFE_PICKLE_USAGE].loads(data)"),
|
|
2395
|
-
("my_pickle.load(file)", "my_pickle.load(file)"),
|
|
2311
|
+
("my_pickle.load(file)", "my_pickle.load(file)"),
|
|
2396
2312
|
],
|
|
2397
2313
|
),
|
|
2398
|
-
# Agent-specific patterns for validation and analysis
|
|
2399
2314
|
"extract_range_size": ValidatedPattern(
|
|
2400
2315
|
name="extract_range_size",
|
|
2401
2316
|
pattern=r"range\((\d+)\)",
|
|
@@ -2404,8 +2319,8 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
2404
2319
|
test_cases=[
|
|
2405
2320
|
("range(1000)", "1000"),
|
|
2406
2321
|
("range(50)", "50"),
|
|
2407
|
-
("for i in range(100):", "for i in 100:"),
|
|
2408
|
-
("other_func(10)", "other_func(10)"),
|
|
2322
|
+
("for i in range(100): ", "for i in 100: "),
|
|
2323
|
+
("other_func(10)", "other_func(10)"),
|
|
2409
2324
|
],
|
|
2410
2325
|
),
|
|
2411
2326
|
"match_error_code_patterns": ValidatedPattern(
|
|
@@ -2418,30 +2333,30 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
2418
2333
|
("I001", "I001"),
|
|
2419
2334
|
("E302", "E302"),
|
|
2420
2335
|
("W291", "W291"),
|
|
2421
|
-
("ABC123", "ABC123"),
|
|
2336
|
+
("ABC123", "ABC123"),
|
|
2422
2337
|
],
|
|
2423
2338
|
),
|
|
2424
2339
|
"match_validation_patterns": ValidatedPattern(
|
|
2425
2340
|
name="match_validation_patterns",
|
|
2426
|
-
pattern=r"if\s+not\s+\w+\s
|
|
2341
|
+
pattern=r"if\s+not\s+\w+\s*: |if\s+\w+\s+is\s+None\s*: |if\s+len\(\w+\)\s*[<>=]",
|
|
2427
2342
|
replacement=r"\g<0>",
|
|
2428
2343
|
description="Match common validation patterns for extraction",
|
|
2429
2344
|
test_cases=[
|
|
2430
|
-
("if not var:", "if not var:"),
|
|
2431
|
-
("if item is None:", "if item is None:"),
|
|
2345
|
+
("if not var: ", "if not var: "),
|
|
2346
|
+
("if item is None: ", "if item is None: "),
|
|
2432
2347
|
("if len(items) >", "if len(items) >"),
|
|
2433
|
-
("other code", "other code"),
|
|
2348
|
+
("other code", "other code"),
|
|
2434
2349
|
],
|
|
2435
2350
|
),
|
|
2436
2351
|
"match_loop_patterns": ValidatedPattern(
|
|
2437
2352
|
name="match_loop_patterns",
|
|
2438
|
-
pattern=r"\s*for\s
|
|
2353
|
+
pattern=r"\s*for\s+.*: \s*$|\s*while\s+.*: \s*$",
|
|
2439
2354
|
replacement=r"\g<0>",
|
|
2440
2355
|
description="Match for/while loop patterns",
|
|
2441
2356
|
test_cases=[
|
|
2442
|
-
("
|
|
2443
|
-
("
|
|
2444
|
-
("regular line", "regular line"),
|
|
2357
|
+
(" for i in items: ", " for i in items: "),
|
|
2358
|
+
(" while condition: ", " while condition: "),
|
|
2359
|
+
("regular line", "regular line"),
|
|
2445
2360
|
],
|
|
2446
2361
|
),
|
|
2447
2362
|
"match_star_import": ValidatedPattern(
|
|
@@ -2452,7 +2367,7 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
2452
2367
|
test_cases=[
|
|
2453
2368
|
("from module import *", "from module import *"),
|
|
2454
2369
|
("from my_pkg import *", "from my_pkg import *"),
|
|
2455
|
-
("from module import specific", "from module import specific"),
|
|
2370
|
+
("from module import specific", "from module import specific"),
|
|
2456
2371
|
],
|
|
2457
2372
|
),
|
|
2458
2373
|
"clean_unused_import": ValidatedPattern(
|
|
@@ -2461,11 +2376,11 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
2461
2376
|
replacement=r"",
|
|
2462
2377
|
description="Remove unused import statements (example with unused_module)",
|
|
2463
2378
|
test_cases=[
|
|
2464
|
-
("
|
|
2379
|
+
(" import unused_module", ""),
|
|
2465
2380
|
(
|
|
2466
2381
|
"import other_module",
|
|
2467
2382
|
"import other_module",
|
|
2468
|
-
),
|
|
2383
|
+
),
|
|
2469
2384
|
],
|
|
2470
2385
|
),
|
|
2471
2386
|
"clean_unused_from_import": ValidatedPattern(
|
|
@@ -2479,40 +2394,40 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
2479
2394
|
"from module import used, unused_item",
|
|
2480
2395
|
"from module import used, unused_item",
|
|
2481
2396
|
),
|
|
2482
|
-
("from other import needed", "from other import needed"),
|
|
2397
|
+
("from other import needed", "from other import needed"),
|
|
2483
2398
|
],
|
|
2484
2399
|
),
|
|
2485
2400
|
"clean_import_commas": ValidatedPattern(
|
|
2486
2401
|
name="clean_import_commas",
|
|
2487
|
-
pattern=r"
|
|
2488
|
-
replacement=r",",
|
|
2402
|
+
pattern=r", \s*, ",
|
|
2403
|
+
replacement=r", ",
|
|
2489
2404
|
description="Clean double commas in import statements",
|
|
2490
2405
|
test_cases=[
|
|
2491
2406
|
("from module import a, , b", "from module import a, b"),
|
|
2492
2407
|
("items = [a, , b]", "items = [a, b]"),
|
|
2493
|
-
("normal, list", "normal, list"),
|
|
2408
|
+
("normal, list[t.Any]", "normal, list[t.Any]"),
|
|
2494
2409
|
],
|
|
2495
2410
|
),
|
|
2496
2411
|
"clean_trailing_import_comma": ValidatedPattern(
|
|
2497
2412
|
name="clean_trailing_import_comma",
|
|
2498
|
-
pattern=r"
|
|
2413
|
+
pattern=r", \s*$",
|
|
2499
2414
|
replacement=r"",
|
|
2500
2415
|
description="Remove trailing commas from lines",
|
|
2501
2416
|
test_cases=[
|
|
2502
|
-
("from module import a, b,", "from module import a, b"),
|
|
2503
|
-
("import item,", "import item"),
|
|
2504
|
-
("normal line", "normal line"),
|
|
2417
|
+
("from module import a, b, ", "from module import a, b"),
|
|
2418
|
+
("import item, ", "import item"),
|
|
2419
|
+
("normal line", "normal line"),
|
|
2505
2420
|
],
|
|
2506
2421
|
),
|
|
2507
2422
|
"clean_import_prefix": ValidatedPattern(
|
|
2508
2423
|
name="clean_import_prefix",
|
|
2509
|
-
pattern=r"import\s
|
|
2424
|
+
pattern=r"import\s*, \s*",
|
|
2510
2425
|
replacement=r"import ",
|
|
2511
2426
|
description="Clean malformed import statements with leading comma",
|
|
2512
2427
|
test_cases=[
|
|
2513
|
-
("import ,module", "import module"),
|
|
2514
|
-
("from pkg import ,item", "from pkg import item"),
|
|
2515
|
-
("import normal", "import normal"),
|
|
2428
|
+
("import , module", "import module"),
|
|
2429
|
+
("from pkg import , item", "from pkg import item"),
|
|
2430
|
+
("import normal", "import normal"),
|
|
2516
2431
|
],
|
|
2517
2432
|
),
|
|
2518
2433
|
"extract_unused_import_name": ValidatedPattern(
|
|
@@ -2523,7 +2438,7 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
2523
2438
|
test_cases=[
|
|
2524
2439
|
("unused import 'module_name'", "module_name"),
|
|
2525
2440
|
('unused import "other_module"', "other_module"),
|
|
2526
|
-
("some other text", "some other text"),
|
|
2441
|
+
("some other text", "some other text"),
|
|
2527
2442
|
],
|
|
2528
2443
|
),
|
|
2529
2444
|
"normalize_whitespace": ValidatedPattern(
|
|
@@ -2533,17 +2448,280 @@ SAFE_PATTERNS: dict[str, ValidatedPattern] = {
|
|
|
2533
2448
|
description="Normalize multiple whitespace to single space",
|
|
2534
2449
|
global_replace=True,
|
|
2535
2450
|
test_cases=[
|
|
2536
|
-
("import
|
|
2537
|
-
("from
|
|
2538
|
-
("normal text", "normal text"),
|
|
2451
|
+
("import module", "import module"),
|
|
2452
|
+
("from pkg import item", "from pkg import item"),
|
|
2453
|
+
("normal text", "normal text"),
|
|
2454
|
+
],
|
|
2455
|
+
),
|
|
2456
|
+
# Template processing patterns
|
|
2457
|
+
"extract_template_variables": ValidatedPattern(
|
|
2458
|
+
name="extract_template_variables",
|
|
2459
|
+
pattern=r"\{\{\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*\}\}",
|
|
2460
|
+
replacement=r"\1",
|
|
2461
|
+
description="Extract template variables from {{variable}} patterns",
|
|
2462
|
+
test_cases=[
|
|
2463
|
+
("Hello {{name}}", "Hello name"),
|
|
2464
|
+
("{{user_name}}", "user_name"),
|
|
2465
|
+
("{{ spaced_var }}", "spaced_var"),
|
|
2466
|
+
("text {{var1}} and {{var2}}", "text var1 and {{var2}}"),
|
|
2467
|
+
],
|
|
2468
|
+
),
|
|
2469
|
+
"extract_template_sections": ValidatedPattern(
|
|
2470
|
+
name="extract_template_sections",
|
|
2471
|
+
pattern=r"\{\%\s*section\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*\%\}",
|
|
2472
|
+
replacement=r"\1",
|
|
2473
|
+
description="Extract section names from {% section name %} patterns",
|
|
2474
|
+
test_cases=[
|
|
2475
|
+
("{% section intro %}", "intro"),
|
|
2476
|
+
("{% section main_content %}", "main_content"),
|
|
2477
|
+
("text {% section footer %} more", "text footer more"),
|
|
2478
|
+
("{% section header_1 %}", "header_1"),
|
|
2479
|
+
],
|
|
2480
|
+
),
|
|
2481
|
+
"extract_template_blocks": ValidatedPattern(
|
|
2482
|
+
name="extract_template_blocks",
|
|
2483
|
+
pattern=r"\{\%\s*block\s+(\w+)\s*\%\}(.*?)\{\%\s*endblock\s*\%\}",
|
|
2484
|
+
replacement=r"\1",
|
|
2485
|
+
description="Extract block names and content from template blocks",
|
|
2486
|
+
flags=re.DOTALL,
|
|
2487
|
+
test_cases=[
|
|
2488
|
+
("{% block title %}Hello{% endblock %}", "title"),
|
|
2489
|
+
("{% block content %}Text content{% endblock %}", "content"),
|
|
2490
|
+
("{% block main %}Multi\nline{% endblock %}", "main"),
|
|
2491
|
+
(
|
|
2492
|
+
"prefix {% block nav %}nav content{% endblock %} suffix",
|
|
2493
|
+
"prefix nav suffix",
|
|
2494
|
+
),
|
|
2495
|
+
],
|
|
2496
|
+
),
|
|
2497
|
+
"replace_template_block": ValidatedPattern(
|
|
2498
|
+
name="replace_template_block",
|
|
2499
|
+
pattern=r"\{\%\s*block\s+BLOCK_NAME\s*\%\}.*?\{\%\s*endblock\s*\%\}",
|
|
2500
|
+
replacement="REPLACEMENT_CONTENT",
|
|
2501
|
+
description="Replace a specific template block (use with dynamic pattern substitution)",
|
|
2502
|
+
flags=re.DOTALL,
|
|
2503
|
+
test_cases=[
|
|
2504
|
+
("{% block BLOCK_NAME %}old{% endblock %}", "REPLACEMENT_CONTENT"),
|
|
2505
|
+
(
|
|
2506
|
+
"{% block BLOCK_NAME %}old content{% endblock %}",
|
|
2507
|
+
"REPLACEMENT_CONTENT",
|
|
2508
|
+
),
|
|
2509
|
+
],
|
|
2510
|
+
),
|
|
2511
|
+
# Documentation parsing patterns
|
|
2512
|
+
"extract_markdown_links": ValidatedPattern(
|
|
2513
|
+
name="extract_markdown_links",
|
|
2514
|
+
pattern=r"\[([^\]]+)\]\(([^)]+)\)",
|
|
2515
|
+
replacement=r"\1 -> \2",
|
|
2516
|
+
description="Extract markdown link text and URLs from [text](url) patterns",
|
|
2517
|
+
test_cases=[
|
|
2518
|
+
("[Click here](http://example.com)", "Click here -> http://example.com"),
|
|
2519
|
+
("[Local file](./docs/readme.md)", "Local file -> ./docs/readme.md"),
|
|
2520
|
+
(
|
|
2521
|
+
"See [the docs](../reference.md) for more",
|
|
2522
|
+
"See the docs -> ../reference.md for more",
|
|
2523
|
+
),
|
|
2524
|
+
("[Multi word link](path/to/file)", "Multi word link -> path/to/file"),
|
|
2525
|
+
],
|
|
2526
|
+
),
|
|
2527
|
+
"extract_version_numbers": ValidatedPattern(
|
|
2528
|
+
name="extract_version_numbers",
|
|
2529
|
+
pattern=r"version\s+(\d+\.\d+\.\d+)",
|
|
2530
|
+
replacement=r"\1",
|
|
2531
|
+
description="Extract semantic version numbers from 'version X.Y.Z' patterns",
|
|
2532
|
+
flags=re.IGNORECASE,
|
|
2533
|
+
test_cases=[
|
|
2534
|
+
("version 1.2.3", "1.2.3"),
|
|
2535
|
+
("Version 10.0.1", "10.0.1"),
|
|
2536
|
+
("current version 0.5.0", "current 0.5.0"),
|
|
2537
|
+
("VERSION 2.11.4", "2.11.4"),
|
|
2538
|
+
],
|
|
2539
|
+
),
|
|
2540
|
+
# Docstring parsing patterns
|
|
2541
|
+
"extract_google_docstring_params": ValidatedPattern(
|
|
2542
|
+
name="extract_google_docstring_params",
|
|
2543
|
+
pattern=r"^\s*(\w+)(?:\s*\([^)]+\))?\s*:\s*(.+)$",
|
|
2544
|
+
replacement=r"\1: \2",
|
|
2545
|
+
description="Extract parameter names and descriptions from Google-style docstrings",
|
|
2546
|
+
flags=re.MULTILINE,
|
|
2547
|
+
test_cases=[
|
|
2548
|
+
(" param1: Description here", "param1: Description here"),
|
|
2549
|
+
("param2 (str): String parameter", "param2: String parameter"),
|
|
2550
|
+
(
|
|
2551
|
+
" complex_param (Optional[int]): Complex type",
|
|
2552
|
+
"complex_param: Complex type",
|
|
2553
|
+
),
|
|
2554
|
+
("simple: Simple desc", "simple: Simple desc"),
|
|
2555
|
+
],
|
|
2556
|
+
),
|
|
2557
|
+
"extract_sphinx_docstring_params": ValidatedPattern(
|
|
2558
|
+
name="extract_sphinx_docstring_params",
|
|
2559
|
+
pattern=r":param\s+(\w+)\s*:\s*(.+)$",
|
|
2560
|
+
replacement=r"\1: \2",
|
|
2561
|
+
description="Extract parameter names and descriptions from Sphinx-style docstrings",
|
|
2562
|
+
flags=re.MULTILINE,
|
|
2563
|
+
test_cases=[
|
|
2564
|
+
(":param name: The name parameter", "name: The name parameter"),
|
|
2565
|
+
(":param user_id: User identifier", "user_id: User identifier"),
|
|
2566
|
+
(
|
|
2567
|
+
":param spaced : Description with spaces",
|
|
2568
|
+
"spaced: Description with spaces",
|
|
2569
|
+
),
|
|
2570
|
+
(
|
|
2571
|
+
":param complex_var: Multi-word description here",
|
|
2572
|
+
"complex_var: Multi-word description here",
|
|
2573
|
+
),
|
|
2574
|
+
],
|
|
2575
|
+
),
|
|
2576
|
+
"extract_docstring_returns": ValidatedPattern(
|
|
2577
|
+
name="extract_docstring_returns",
|
|
2578
|
+
pattern=r"(?:Returns?|Return):\s*(.+?)(?=\n\n|\n\w+:|\Z)",
|
|
2579
|
+
replacement=r"\1",
|
|
2580
|
+
description="Extract return descriptions from docstrings",
|
|
2581
|
+
flags=re.MULTILINE | re.DOTALL,
|
|
2582
|
+
test_cases=[
|
|
2583
|
+
("Returns: A string value", "A string value"),
|
|
2584
|
+
("Return: Boolean indicating success", "Boolean indicating success"),
|
|
2585
|
+
("Returns: Multi-line\n description", "Multi-line\n description"),
|
|
2586
|
+
("Returns: Simple value\n\nArgs:", "Simple value\n\nArgs:"),
|
|
2587
|
+
],
|
|
2588
|
+
),
|
|
2589
|
+
# Command processing patterns
|
|
2590
|
+
"enhance_command_blocks": ValidatedPattern(
|
|
2591
|
+
name="enhance_command_blocks",
|
|
2592
|
+
pattern=r"```(?:bash|shell|sh)?\n([^`]+)\n```",
|
|
2593
|
+
replacement=r"```bash\n\1\n```",
|
|
2594
|
+
description="Enhance command blocks with proper bash syntax highlighting",
|
|
2595
|
+
test_cases=[
|
|
2596
|
+
("```\npython -m test\n```", "```bash\npython -m test\n```"),
|
|
2597
|
+
("```bash\necho hello\n```", "```bash\necho hello\n```"),
|
|
2598
|
+
("```sh\nls -la\n```", "```bash\nls -la\n```"),
|
|
2599
|
+
("```shell\ncd /tmp\n```", "```bash\ncd /tmp\n```"),
|
|
2600
|
+
],
|
|
2601
|
+
),
|
|
2602
|
+
"extract_step_numbers": ValidatedPattern(
|
|
2603
|
+
name="extract_step_numbers",
|
|
2604
|
+
pattern=r"^(\s*)(\d+)\.\s*(.+)$",
|
|
2605
|
+
replacement=r"\1**Step \2**: \3",
|
|
2606
|
+
description="Extract and enhance numbered steps in documentation",
|
|
2607
|
+
flags=re.MULTILINE,
|
|
2608
|
+
test_cases=[
|
|
2609
|
+
("1. First step", "**Step 1**: First step"),
|
|
2610
|
+
(" 2. Indented step", " **Step 2**: Indented step"),
|
|
2611
|
+
("10. Double digit step", "**Step 10**: Double digit step"),
|
|
2612
|
+
("normal text", "normal text"),
|
|
2613
|
+
],
|
|
2614
|
+
),
|
|
2615
|
+
"extract_bash_command_blocks": ValidatedPattern(
|
|
2616
|
+
name="extract_bash_command_blocks",
|
|
2617
|
+
pattern=r"```bash\n([^`]+)\n```",
|
|
2618
|
+
replacement=r"\1",
|
|
2619
|
+
description="Extract content from bash command blocks",
|
|
2620
|
+
test_cases=[
|
|
2621
|
+
("```bash\necho hello\n```", "echo hello"),
|
|
2622
|
+
("```bash\npython -m test\n```", "python -m test"),
|
|
2623
|
+
("text\n```bash\nls -la\n```\nmore", "text\nls -la\nmore"),
|
|
2624
|
+
("```bash\nmulti\nline\ncommand\n```", "multi\nline\ncommand"),
|
|
2625
|
+
],
|
|
2626
|
+
),
|
|
2627
|
+
"detect_coverage_badge": ValidatedPattern(
|
|
2628
|
+
name="detect_coverage_badge",
|
|
2629
|
+
pattern=r"!\[Coverage.*?\]\(.*?coverage.*?\)|!\[.*coverage.*?\]\(.*?shields\.io.*?coverage.*?\)|https://img\.shields\.io/badge/coverage-[\d\.]+%25-\w+",
|
|
2630
|
+
replacement="",
|
|
2631
|
+
description="Detect existing coverage badges in README content",
|
|
2632
|
+
flags=re.IGNORECASE,
|
|
2633
|
+
test_cases=[
|
|
2634
|
+
(
|
|
2635
|
+
"",
|
|
2636
|
+
"",
|
|
2637
|
+
),
|
|
2638
|
+
(
|
|
2639
|
+
"",
|
|
2640
|
+
"",
|
|
2641
|
+
),
|
|
2642
|
+
(
|
|
2643
|
+
"",
|
|
2644
|
+
"",
|
|
2645
|
+
),
|
|
2646
|
+
("Some text without badge", "Some text without badge"),
|
|
2647
|
+
],
|
|
2648
|
+
),
|
|
2649
|
+
"update_coverage_badge_url": ValidatedPattern(
|
|
2650
|
+
name="update_coverage_badge_url",
|
|
2651
|
+
pattern=r"(!\[Coverage.*?\]\()([^)]+)(\))",
|
|
2652
|
+
replacement=r"\1NEW_BADGE_URL\3",
|
|
2653
|
+
description="Update coverage badge URL in markdown links",
|
|
2654
|
+
test_cases=[
|
|
2655
|
+
("", ""),
|
|
2656
|
+
("", ""),
|
|
2657
|
+
("text  more", "text  more"),
|
|
2658
|
+
("no badge here", "no badge here"),
|
|
2659
|
+
],
|
|
2660
|
+
),
|
|
2661
|
+
"update_coverage_badge_any": ValidatedPattern(
|
|
2662
|
+
name="update_coverage_badge_any",
|
|
2663
|
+
pattern=r"(!\[.*coverage.*?\]\()([^)]+)(\))",
|
|
2664
|
+
replacement=r"\1NEW_BADGE_URL\3",
|
|
2665
|
+
description="Update any coverage-related badge URL",
|
|
2666
|
+
flags=re.IGNORECASE,
|
|
2667
|
+
test_cases=[
|
|
2668
|
+
("", ""),
|
|
2669
|
+
("", ""),
|
|
2670
|
+
("", ""),
|
|
2671
|
+
("", ""),
|
|
2672
|
+
],
|
|
2673
|
+
),
|
|
2674
|
+
"update_shields_coverage_url": ValidatedPattern(
|
|
2675
|
+
name="update_shields_coverage_url",
|
|
2676
|
+
pattern=r"(https://img\.shields\.io/badge/coverage-[\d\.]+%25-\w+)",
|
|
2677
|
+
replacement="NEW_BADGE_URL",
|
|
2678
|
+
description="Update shields.io coverage badge URLs directly",
|
|
2679
|
+
test_cases=[
|
|
2680
|
+
(
|
|
2681
|
+
"https://img.shields.io/badge/coverage-85.0%25-brightgreen",
|
|
2682
|
+
"NEW_BADGE_URL",
|
|
2683
|
+
),
|
|
2684
|
+
("https://img.shields.io/badge/coverage-75.5%25-yellow", "NEW_BADGE_URL"),
|
|
2685
|
+
(
|
|
2686
|
+
"https://img.shields.io/badge/coverage-90.1%25-brightgreen",
|
|
2687
|
+
"NEW_BADGE_URL",
|
|
2688
|
+
),
|
|
2689
|
+
(
|
|
2690
|
+
"https://img.shields.io/badge/build-passing-green",
|
|
2691
|
+
"https://img.shields.io/badge/build-passing-green",
|
|
2692
|
+
),
|
|
2693
|
+
],
|
|
2694
|
+
),
|
|
2695
|
+
"extract_coverage_percentage": ValidatedPattern(
|
|
2696
|
+
name="extract_coverage_percentage",
|
|
2697
|
+
pattern=r"coverage-([\d\.]+)%25",
|
|
2698
|
+
replacement="", # Not used for extraction, just validation
|
|
2699
|
+
description="Search for coverage percentage in badge URL",
|
|
2700
|
+
test_cases=[
|
|
2701
|
+
("coverage-85.0%25", ""), # Will use search() to get group(1)
|
|
2702
|
+
("coverage-75.5%25", ""),
|
|
2703
|
+
("coverage-100.0%25", ""),
|
|
2704
|
+
("no coverage here", "no coverage here"), # No match
|
|
2705
|
+
],
|
|
2706
|
+
),
|
|
2707
|
+
"detect_typing_usage": ValidatedPattern(
|
|
2708
|
+
name="detect_typing_usage",
|
|
2709
|
+
pattern=r"\bt\.[A-Z]",
|
|
2710
|
+
replacement="",
|
|
2711
|
+
description="Detect usage of typing module aliases like t.Any, t.Dict, etc.",
|
|
2712
|
+
global_replace=True,
|
|
2713
|
+
test_cases=[
|
|
2714
|
+
("def func(x: t.Any) -> t.Dict:", "def func(x: ny) -> ict:"), # Removes t.A and t.D
|
|
2715
|
+
("value: t.Optional[str] = None", "value: ptional[str] = None"), # Removes t.O
|
|
2716
|
+
("from typing import Dict", "from typing import Dict"), # No match
|
|
2717
|
+
("data = dict()", "data = dict()"), # No match
|
|
2539
2718
|
],
|
|
2540
2719
|
),
|
|
2541
2720
|
}
|
|
2542
2721
|
|
|
2543
2722
|
|
|
2544
2723
|
def validate_all_patterns() -> dict[str, bool]:
|
|
2545
|
-
|
|
2546
|
-
validate_results = {}
|
|
2724
|
+
validate_results: dict[str, bool] = {}
|
|
2547
2725
|
for name, pattern in SAFE_PATTERNS.items():
|
|
2548
2726
|
try:
|
|
2549
2727
|
pattern._validate()
|
|
@@ -2555,12 +2733,10 @@ def validate_all_patterns() -> dict[str, bool]:
|
|
|
2555
2733
|
|
|
2556
2734
|
|
|
2557
2735
|
def find_pattern_for_text(text: str) -> list[str]:
|
|
2558
|
-
"""Find which patterns match the given text."""
|
|
2559
2736
|
return [name for name, pattern in SAFE_PATTERNS.items() if pattern.test(text)]
|
|
2560
2737
|
|
|
2561
2738
|
|
|
2562
2739
|
def apply_safe_replacement(text: str, pattern_name: str) -> str:
|
|
2563
|
-
"""Apply a safe replacement pattern by name."""
|
|
2564
2740
|
if pattern_name not in SAFE_PATTERNS:
|
|
2565
2741
|
raise ValueError(f"Unknown pattern: {pattern_name}")
|
|
2566
2742
|
|
|
@@ -2568,7 +2744,6 @@ def apply_safe_replacement(text: str, pattern_name: str) -> str:
|
|
|
2568
2744
|
|
|
2569
2745
|
|
|
2570
2746
|
def get_pattern_description(pattern_name: str) -> str:
|
|
2571
|
-
"""Get description of a pattern."""
|
|
2572
2747
|
if pattern_name not in SAFE_PATTERNS:
|
|
2573
2748
|
return "Unknown pattern"
|
|
2574
2749
|
|
|
@@ -2576,29 +2751,14 @@ def get_pattern_description(pattern_name: str) -> str:
|
|
|
2576
2751
|
|
|
2577
2752
|
|
|
2578
2753
|
def fix_multi_word_hyphenation(text: str) -> str:
|
|
2579
|
-
"""
|
|
2580
|
-
Fix complex multi-word hyphenation cases like 'pytest - hypothesis - specialist'.
|
|
2581
|
-
|
|
2582
|
-
Uses iterative application of the spaced_hyphens pattern to handle multiple words.
|
|
2583
|
-
"""
|
|
2584
2754
|
return SAFE_PATTERNS["fix_spaced_hyphens"].apply_iteratively(text)
|
|
2585
2755
|
|
|
2586
2756
|
|
|
2587
2757
|
def update_pyproject_version(content: str, new_version: str) -> str:
|
|
2588
|
-
"""
|
|
2589
|
-
Update version in pyproject.toml content with safe regex.
|
|
2590
|
-
|
|
2591
|
-
Args:
|
|
2592
|
-
content: The pyproject.toml file content
|
|
2593
|
-
new_version: The new version to set
|
|
2594
|
-
|
|
2595
|
-
Returns:
|
|
2596
|
-
Updated content with new version
|
|
2597
|
-
"""
|
|
2598
2758
|
import re
|
|
2599
2759
|
|
|
2600
2760
|
pattern_obj = SAFE_PATTERNS["update_pyproject_version"]
|
|
2601
|
-
|
|
2761
|
+
|
|
2602
2762
|
temp_pattern = ValidatedPattern(
|
|
2603
2763
|
name="temp_version_update",
|
|
2604
2764
|
pattern=pattern_obj.pattern,
|
|
@@ -2609,15 +2769,12 @@ def update_pyproject_version(content: str, new_version: str) -> str:
|
|
|
2609
2769
|
],
|
|
2610
2770
|
)
|
|
2611
2771
|
|
|
2612
|
-
# Apply with MULTILINE flag for line-by-line matching
|
|
2613
2772
|
return re.compile(pattern_obj.pattern, re.MULTILINE).sub(
|
|
2614
2773
|
temp_pattern.replacement, content
|
|
2615
2774
|
)
|
|
2616
2775
|
|
|
2617
2776
|
|
|
2618
2777
|
def apply_formatting_fixes(content: str) -> str:
|
|
2619
|
-
"""Apply standard formatting fixes to content."""
|
|
2620
|
-
# Remove trailing whitespace using MULTILINE flag
|
|
2621
2778
|
import re
|
|
2622
2779
|
|
|
2623
2780
|
pattern = SAFE_PATTERNS["remove_trailing_whitespace"]
|
|
@@ -2625,59 +2782,50 @@ def apply_formatting_fixes(content: str) -> str:
|
|
|
2625
2782
|
pattern.replacement, content
|
|
2626
2783
|
)
|
|
2627
2784
|
|
|
2628
|
-
# Normalize multiple newlines
|
|
2629
2785
|
content = SAFE_PATTERNS["normalize_multiple_newlines"].apply(content)
|
|
2630
2786
|
|
|
2631
2787
|
return content
|
|
2632
2788
|
|
|
2633
2789
|
|
|
2634
2790
|
def apply_security_fixes(content: str) -> str:
|
|
2635
|
-
"""Apply all security-related fixes to content."""
|
|
2636
|
-
# Fix subprocess shell injections
|
|
2637
2791
|
content = SAFE_PATTERNS["fix_subprocess_run_shell"].apply(content)
|
|
2638
2792
|
content = SAFE_PATTERNS["fix_subprocess_call_shell"].apply(content)
|
|
2639
2793
|
content = SAFE_PATTERNS["fix_subprocess_popen_shell"].apply(content)
|
|
2640
2794
|
|
|
2641
|
-
# Fix unsafe library usage
|
|
2642
2795
|
content = SAFE_PATTERNS["fix_unsafe_yaml_load"].apply(content)
|
|
2643
2796
|
content = SAFE_PATTERNS["fix_weak_md5_hash"].apply(content)
|
|
2644
2797
|
content = SAFE_PATTERNS["fix_weak_sha1_hash"].apply(content)
|
|
2645
2798
|
content = SAFE_PATTERNS["fix_insecure_random_choice"].apply(content)
|
|
2646
2799
|
|
|
2647
|
-
# Remove debug prints with secrets
|
|
2648
2800
|
content = SAFE_PATTERNS["remove_debug_prints_with_secrets"].apply(content)
|
|
2649
2801
|
|
|
2650
2802
|
return content
|
|
2651
2803
|
|
|
2652
2804
|
|
|
2653
2805
|
def apply_test_fixes(content: str) -> str:
|
|
2654
|
-
"""Apply test-related fixes to content."""
|
|
2655
2806
|
return SAFE_PATTERNS["normalize_assert_statements"].apply(content)
|
|
2656
2807
|
|
|
2657
2808
|
|
|
2658
2809
|
def is_valid_job_id(job_id: str) -> bool:
|
|
2659
|
-
"""Validate job ID using safe regex patterns."""
|
|
2660
2810
|
return SAFE_PATTERNS["validate_job_id_alphanumeric"].test(job_id)
|
|
2661
2811
|
|
|
2662
2812
|
|
|
2663
2813
|
def remove_coverage_fail_under(addopts: str) -> str:
|
|
2664
|
-
"""Remove coverage fail-under flags from pytest addopts."""
|
|
2665
2814
|
return SAFE_PATTERNS["remove_coverage_fail_under"].apply(addopts)
|
|
2666
2815
|
|
|
2667
2816
|
|
|
2668
2817
|
def update_coverage_requirement(content: str, new_coverage: float) -> str:
|
|
2669
|
-
"""Update coverage requirement in content."""
|
|
2670
2818
|
import re
|
|
2671
2819
|
|
|
2672
2820
|
pattern_obj = SAFE_PATTERNS["update_coverage_requirement"]
|
|
2673
|
-
|
|
2821
|
+
|
|
2674
2822
|
temp_pattern = ValidatedPattern(
|
|
2675
2823
|
name="temp_coverage_update",
|
|
2676
2824
|
pattern=pattern_obj.pattern,
|
|
2677
|
-
replacement=f"\\1{new_coverage
|
|
2825
|
+
replacement=f"\\1{new_coverage: .0f}",
|
|
2678
2826
|
description=f"Update coverage to {new_coverage}",
|
|
2679
2827
|
test_cases=[
|
|
2680
|
-
("--cov-fail-under=85", f"--cov-fail-under={new_coverage
|
|
2828
|
+
("--cov-fail-under=85", f"--cov-fail-under={new_coverage: .0f}"),
|
|
2681
2829
|
],
|
|
2682
2830
|
)
|
|
2683
2831
|
|
|
@@ -2685,39 +2833,16 @@ def update_coverage_requirement(content: str, new_coverage: float) -> str:
|
|
|
2685
2833
|
|
|
2686
2834
|
|
|
2687
2835
|
def update_repo_revision(content: str, repo_url: str, new_revision: str) -> str:
|
|
2688
|
-
"""
|
|
2689
|
-
Update repository revision in config content with safe regex.
|
|
2690
|
-
|
|
2691
|
-
Args:
|
|
2692
|
-
content: The config file content (JSON-like format)
|
|
2693
|
-
repo_url: The repository URL to find and update
|
|
2694
|
-
new_revision: The new revision to set
|
|
2695
|
-
|
|
2696
|
-
Returns:
|
|
2697
|
-
Updated content with new revision
|
|
2698
|
-
"""
|
|
2699
2836
|
import re
|
|
2700
2837
|
|
|
2701
|
-
# Create a pattern specific to the repo URL (escaped for safety)
|
|
2702
2838
|
escaped_url = re.escape(repo_url)
|
|
2703
2839
|
pattern = rf'("repo": "{escaped_url}".*?"rev": )"([^"]+)"'
|
|
2704
2840
|
replacement = rf'\1"{new_revision}"'
|
|
2705
2841
|
|
|
2706
|
-
# Use DOTALL flag for multiline matching
|
|
2707
2842
|
return re.compile(pattern, re.DOTALL).sub(replacement, content)
|
|
2708
2843
|
|
|
2709
2844
|
|
|
2710
2845
|
def sanitize_internal_urls(text: str) -> str:
|
|
2711
|
-
"""
|
|
2712
|
-
Sanitize internal URLs using safe patterns for security.
|
|
2713
|
-
|
|
2714
|
-
Args:
|
|
2715
|
-
text: Text that may contain internal URLs
|
|
2716
|
-
|
|
2717
|
-
Returns:
|
|
2718
|
-
Text with internal URLs replaced with [INTERNAL_URL]
|
|
2719
|
-
"""
|
|
2720
|
-
# Apply all URL sanitization patterns
|
|
2721
2846
|
url_patterns = [
|
|
2722
2847
|
"sanitize_localhost_urls",
|
|
2723
2848
|
"sanitize_127_urls",
|
|
@@ -2738,17 +2863,15 @@ def sanitize_internal_urls(text: str) -> str:
|
|
|
2738
2863
|
def apply_pattern_iteratively(
|
|
2739
2864
|
text: str, pattern_name: str, max_iterations: int = MAX_ITERATIONS
|
|
2740
2865
|
) -> str:
|
|
2741
|
-
"""Apply a pattern iteratively until no more changes occur."""
|
|
2742
2866
|
if pattern_name not in SAFE_PATTERNS:
|
|
2743
2867
|
raise ValueError(f"Unknown pattern: {pattern_name}")
|
|
2744
2868
|
|
|
2745
2869
|
return SAFE_PATTERNS[pattern_name].apply_iteratively(text, max_iterations)
|
|
2746
2870
|
|
|
2747
2871
|
|
|
2748
|
-
def get_all_pattern_stats() -> dict[str, dict[str,
|
|
2749
|
-
"""Get performance statistics for all patterns."""
|
|
2872
|
+
def get_all_pattern_stats() -> dict[str, dict[str, float] | dict[str, str]]:
|
|
2750
2873
|
test_text = "python - m crackerjack - t with pytest - hypothesis - specialist"
|
|
2751
|
-
stats = {}
|
|
2874
|
+
stats: dict[str, dict[str, float] | dict[str, str]] = {}
|
|
2752
2875
|
|
|
2753
2876
|
for name, pattern in SAFE_PATTERNS.items():
|
|
2754
2877
|
try:
|
|
@@ -2761,22 +2884,14 @@ def get_all_pattern_stats() -> dict[str, dict[str, int | float]]:
|
|
|
2761
2884
|
|
|
2762
2885
|
|
|
2763
2886
|
def clear_all_caches() -> None:
|
|
2764
|
-
"""Clear all caches (useful for testing and memory management)."""
|
|
2765
2887
|
CompiledPatternCache.clear_cache()
|
|
2766
2888
|
|
|
2767
2889
|
|
|
2768
2890
|
def get_cache_info() -> dict[str, int | list[str]]:
|
|
2769
|
-
"""Get information about pattern cache usage."""
|
|
2770
2891
|
return CompiledPatternCache.get_cache_stats()
|
|
2771
2892
|
|
|
2772
2893
|
|
|
2773
|
-
# Security validation functions
|
|
2774
2894
|
def detect_path_traversal_patterns(path_str: str) -> list[str]:
|
|
2775
|
-
"""
|
|
2776
|
-
Detect directory traversal patterns in a path string.
|
|
2777
|
-
|
|
2778
|
-
Returns list of detected pattern names.
|
|
2779
|
-
"""
|
|
2780
2895
|
detected = []
|
|
2781
2896
|
traversal_patterns = [
|
|
2782
2897
|
"detect_directory_traversal_basic",
|
|
@@ -2794,11 +2909,6 @@ def detect_path_traversal_patterns(path_str: str) -> list[str]:
|
|
|
2794
2909
|
|
|
2795
2910
|
|
|
2796
2911
|
def detect_null_byte_patterns(path_str: str) -> list[str]:
|
|
2797
|
-
"""
|
|
2798
|
-
Detect null byte patterns in a path string.
|
|
2799
|
-
|
|
2800
|
-
Returns list of detected pattern names.
|
|
2801
|
-
"""
|
|
2802
2912
|
detected = []
|
|
2803
2913
|
null_patterns = [
|
|
2804
2914
|
"detect_null_bytes_url",
|
|
@@ -2815,11 +2925,6 @@ def detect_null_byte_patterns(path_str: str) -> list[str]:
|
|
|
2815
2925
|
|
|
2816
2926
|
|
|
2817
2927
|
def detect_dangerous_directory_patterns(path_str: str) -> list[str]:
|
|
2818
|
-
"""
|
|
2819
|
-
Detect dangerous directory access patterns.
|
|
2820
|
-
|
|
2821
|
-
Returns list of detected pattern names.
|
|
2822
|
-
"""
|
|
2823
2928
|
detected = []
|
|
2824
2929
|
dangerous_patterns = [
|
|
2825
2930
|
"detect_sys_directory_pattern",
|
|
@@ -2842,11 +2947,6 @@ def detect_dangerous_directory_patterns(path_str: str) -> list[str]:
|
|
|
2842
2947
|
|
|
2843
2948
|
|
|
2844
2949
|
def detect_suspicious_path_patterns(path_str: str) -> list[str]:
|
|
2845
|
-
"""
|
|
2846
|
-
Detect suspicious path patterns that might indicate attacks.
|
|
2847
|
-
|
|
2848
|
-
Returns list of detected pattern names.
|
|
2849
|
-
"""
|
|
2850
2950
|
detected = []
|
|
2851
2951
|
suspicious_patterns = [
|
|
2852
2952
|
"detect_parent_directory_in_path",
|
|
@@ -2863,11 +2963,6 @@ def detect_suspicious_path_patterns(path_str: str) -> list[str]:
|
|
|
2863
2963
|
|
|
2864
2964
|
|
|
2865
2965
|
def validate_path_security(path_str: str) -> dict[str, list[str]]:
|
|
2866
|
-
"""
|
|
2867
|
-
Comprehensive path security validation using safe patterns.
|
|
2868
|
-
|
|
2869
|
-
Returns dict with categories of detected issues.
|
|
2870
|
-
"""
|
|
2871
2966
|
return {
|
|
2872
2967
|
"traversal_patterns": detect_path_traversal_patterns(path_str),
|
|
2873
2968
|
"null_bytes": detect_null_byte_patterns(path_str),
|
|
@@ -2876,7 +2971,6 @@ def validate_path_security(path_str: str) -> dict[str, list[str]]:
|
|
|
2876
2971
|
}
|
|
2877
2972
|
|
|
2878
2973
|
|
|
2879
|
-
# Validation on module import
|
|
2880
2974
|
if __name__ == "__main__":
|
|
2881
2975
|
results = validate_all_patterns()
|
|
2882
2976
|
if all(results.values()):
|