crackerjack 0.31.10__py3-none-any.whl → 0.31.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crackerjack might be problematic. Click here for more details.
- crackerjack/CLAUDE.md +288 -705
- crackerjack/__main__.py +22 -8
- crackerjack/agents/__init__.py +0 -3
- crackerjack/agents/architect_agent.py +0 -43
- crackerjack/agents/base.py +1 -9
- crackerjack/agents/coordinator.py +2 -148
- crackerjack/agents/documentation_agent.py +109 -81
- crackerjack/agents/dry_agent.py +122 -97
- crackerjack/agents/formatting_agent.py +3 -16
- crackerjack/agents/import_optimization_agent.py +1174 -130
- crackerjack/agents/performance_agent.py +956 -188
- crackerjack/agents/performance_helpers.py +229 -0
- crackerjack/agents/proactive_agent.py +1 -48
- crackerjack/agents/refactoring_agent.py +516 -246
- crackerjack/agents/refactoring_helpers.py +282 -0
- crackerjack/agents/security_agent.py +393 -90
- crackerjack/agents/test_creation_agent.py +1776 -120
- crackerjack/agents/test_specialist_agent.py +59 -15
- crackerjack/agents/tracker.py +0 -102
- crackerjack/api.py +145 -37
- crackerjack/cli/handlers.py +48 -30
- crackerjack/cli/interactive.py +11 -11
- crackerjack/cli/options.py +66 -4
- crackerjack/code_cleaner.py +808 -148
- crackerjack/config/global_lock_config.py +110 -0
- crackerjack/config/hooks.py +43 -64
- crackerjack/core/async_workflow_orchestrator.py +247 -97
- crackerjack/core/autofix_coordinator.py +192 -109
- crackerjack/core/enhanced_container.py +46 -63
- crackerjack/core/file_lifecycle.py +549 -0
- crackerjack/core/performance.py +9 -8
- crackerjack/core/performance_monitor.py +395 -0
- crackerjack/core/phase_coordinator.py +281 -94
- crackerjack/core/proactive_workflow.py +9 -58
- crackerjack/core/resource_manager.py +501 -0
- crackerjack/core/service_watchdog.py +490 -0
- crackerjack/core/session_coordinator.py +4 -8
- crackerjack/core/timeout_manager.py +504 -0
- crackerjack/core/websocket_lifecycle.py +475 -0
- crackerjack/core/workflow_orchestrator.py +343 -209
- crackerjack/dynamic_config.py +47 -6
- crackerjack/errors.py +3 -4
- crackerjack/executors/async_hook_executor.py +63 -13
- crackerjack/executors/cached_hook_executor.py +14 -14
- crackerjack/executors/hook_executor.py +100 -37
- crackerjack/executors/hook_lock_manager.py +856 -0
- crackerjack/executors/individual_hook_executor.py +120 -86
- crackerjack/intelligence/__init__.py +0 -7
- crackerjack/intelligence/adaptive_learning.py +13 -86
- crackerjack/intelligence/agent_orchestrator.py +15 -78
- crackerjack/intelligence/agent_registry.py +12 -59
- crackerjack/intelligence/agent_selector.py +31 -92
- crackerjack/intelligence/integration.py +1 -41
- crackerjack/interactive.py +9 -9
- crackerjack/managers/async_hook_manager.py +25 -8
- crackerjack/managers/hook_manager.py +9 -9
- crackerjack/managers/publish_manager.py +57 -59
- crackerjack/managers/test_command_builder.py +6 -36
- crackerjack/managers/test_executor.py +9 -61
- crackerjack/managers/test_manager.py +17 -63
- crackerjack/managers/test_manager_backup.py +77 -127
- crackerjack/managers/test_progress.py +4 -23
- crackerjack/mcp/cache.py +5 -12
- crackerjack/mcp/client_runner.py +10 -10
- crackerjack/mcp/context.py +64 -6
- crackerjack/mcp/dashboard.py +14 -11
- crackerjack/mcp/enhanced_progress_monitor.py +55 -55
- crackerjack/mcp/file_monitor.py +72 -42
- crackerjack/mcp/progress_components.py +103 -84
- crackerjack/mcp/progress_monitor.py +122 -49
- crackerjack/mcp/rate_limiter.py +12 -12
- crackerjack/mcp/server_core.py +16 -22
- crackerjack/mcp/service_watchdog.py +26 -26
- crackerjack/mcp/state.py +15 -0
- crackerjack/mcp/tools/core_tools.py +95 -39
- crackerjack/mcp/tools/error_analyzer.py +6 -32
- crackerjack/mcp/tools/execution_tools.py +1 -56
- crackerjack/mcp/tools/execution_tools_backup.py +35 -131
- crackerjack/mcp/tools/intelligence_tool_registry.py +0 -36
- crackerjack/mcp/tools/intelligence_tools.py +2 -55
- crackerjack/mcp/tools/monitoring_tools.py +308 -145
- crackerjack/mcp/tools/proactive_tools.py +12 -42
- crackerjack/mcp/tools/progress_tools.py +23 -15
- crackerjack/mcp/tools/utility_tools.py +3 -40
- crackerjack/mcp/tools/workflow_executor.py +40 -60
- crackerjack/mcp/websocket/app.py +0 -3
- crackerjack/mcp/websocket/endpoints.py +206 -268
- crackerjack/mcp/websocket/jobs.py +213 -66
- crackerjack/mcp/websocket/server.py +84 -6
- crackerjack/mcp/websocket/websocket_handler.py +137 -29
- crackerjack/models/config_adapter.py +3 -16
- crackerjack/models/protocols.py +162 -3
- crackerjack/models/resource_protocols.py +454 -0
- crackerjack/models/task.py +3 -3
- crackerjack/monitoring/__init__.py +0 -0
- crackerjack/monitoring/ai_agent_watchdog.py +25 -71
- crackerjack/monitoring/regression_prevention.py +28 -87
- crackerjack/orchestration/advanced_orchestrator.py +44 -78
- crackerjack/orchestration/coverage_improvement.py +10 -60
- crackerjack/orchestration/execution_strategies.py +16 -16
- crackerjack/orchestration/test_progress_streamer.py +61 -53
- crackerjack/plugins/base.py +1 -1
- crackerjack/plugins/managers.py +22 -20
- crackerjack/py313.py +65 -21
- crackerjack/services/backup_service.py +467 -0
- crackerjack/services/bounded_status_operations.py +627 -0
- crackerjack/services/cache.py +7 -9
- crackerjack/services/config.py +35 -52
- crackerjack/services/config_integrity.py +5 -16
- crackerjack/services/config_merge.py +542 -0
- crackerjack/services/contextual_ai_assistant.py +17 -19
- crackerjack/services/coverage_ratchet.py +44 -73
- crackerjack/services/debug.py +25 -39
- crackerjack/services/dependency_monitor.py +52 -50
- crackerjack/services/enhanced_filesystem.py +14 -11
- crackerjack/services/file_hasher.py +1 -1
- crackerjack/services/filesystem.py +1 -12
- crackerjack/services/git.py +71 -47
- crackerjack/services/health_metrics.py +31 -27
- crackerjack/services/initialization.py +276 -428
- crackerjack/services/input_validator.py +760 -0
- crackerjack/services/log_manager.py +16 -16
- crackerjack/services/logging.py +7 -6
- crackerjack/services/metrics.py +43 -43
- crackerjack/services/pattern_cache.py +2 -31
- crackerjack/services/pattern_detector.py +26 -63
- crackerjack/services/performance_benchmarks.py +20 -45
- crackerjack/services/regex_patterns.py +2887 -0
- crackerjack/services/regex_utils.py +537 -0
- crackerjack/services/secure_path_utils.py +683 -0
- crackerjack/services/secure_status_formatter.py +534 -0
- crackerjack/services/secure_subprocess.py +605 -0
- crackerjack/services/security.py +47 -10
- crackerjack/services/security_logger.py +492 -0
- crackerjack/services/server_manager.py +109 -50
- crackerjack/services/smart_scheduling.py +8 -25
- crackerjack/services/status_authentication.py +603 -0
- crackerjack/services/status_security_manager.py +442 -0
- crackerjack/services/thread_safe_status_collector.py +546 -0
- crackerjack/services/tool_version_service.py +1 -23
- crackerjack/services/unified_config.py +36 -58
- crackerjack/services/validation_rate_limiter.py +269 -0
- crackerjack/services/version_checker.py +9 -40
- crackerjack/services/websocket_resource_limiter.py +572 -0
- crackerjack/slash_commands/__init__.py +52 -2
- crackerjack/tools/__init__.py +0 -0
- crackerjack/tools/validate_input_validator_patterns.py +262 -0
- crackerjack/tools/validate_regex_patterns.py +198 -0
- {crackerjack-0.31.10.dist-info → crackerjack-0.31.12.dist-info}/METADATA +197 -12
- crackerjack-0.31.12.dist-info/RECORD +178 -0
- crackerjack/cli/facade.py +0 -104
- crackerjack-0.31.10.dist-info/RECORD +0 -149
- {crackerjack-0.31.10.dist-info → crackerjack-0.31.12.dist-info}/WHEEL +0 -0
- {crackerjack-0.31.10.dist-info → crackerjack-0.31.12.dist-info}/entry_points.txt +0 -0
- {crackerjack-0.31.10.dist-info → crackerjack-0.31.12.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,262 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Test script to validate that all new input validator SAFE_PATTERNS work correctly.
|
|
4
|
+
|
|
5
|
+
This script validates the security-critical input validation patterns and ensures
|
|
6
|
+
they provide proper protection against injection attacks and malicious input.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import sys
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
|
|
12
|
+
# Add the crackerjack package to the path
|
|
13
|
+
sys.path.insert(0, str(Path(__file__).parent.parent))
|
|
14
|
+
|
|
15
|
+
from crackerjack.services.input_validator import SecureInputValidator
|
|
16
|
+
from crackerjack.services.regex_patterns import SAFE_PATTERNS
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def test_sql_injection_patterns():
|
|
20
|
+
"""Test SQL injection detection patterns."""
|
|
21
|
+
print("Testing SQL injection patterns...")
|
|
22
|
+
|
|
23
|
+
test_cases = [
|
|
24
|
+
# Should be detected as malicious
|
|
25
|
+
("SELECT * FROM users", True, "Basic SELECT"),
|
|
26
|
+
("UNION SELECT password FROM admin", True, "UNION injection"),
|
|
27
|
+
("'; DROP TABLE users; --", True, "SQL comment injection"),
|
|
28
|
+
("' OR 1=1--", True, "Boolean injection"),
|
|
29
|
+
("xp_cmdshell('dir')", True, "SQL Server specific"),
|
|
30
|
+
("sp_executesql @sql", True, "SQL Server procedure"),
|
|
31
|
+
# Should be allowed (legitimate text)
|
|
32
|
+
("user selected item", False, "Legitimate text with 'select'"),
|
|
33
|
+
("button execution", False, "Legitimate text with 'execution'"),
|
|
34
|
+
("team membership", False, "Legitimate text without SQL keywords"),
|
|
35
|
+
("normal text", False, "Normal text"),
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
patterns = [
|
|
39
|
+
"validate_sql_injection_patterns",
|
|
40
|
+
"validate_sql_comment_patterns",
|
|
41
|
+
"validate_sql_boolean_injection",
|
|
42
|
+
"validate_sql_server_specific",
|
|
43
|
+
]
|
|
44
|
+
|
|
45
|
+
for text, should_detect, description in test_cases:
|
|
46
|
+
detected = False
|
|
47
|
+
for pattern_name in patterns:
|
|
48
|
+
if SAFE_PATTERNS[pattern_name].test(text):
|
|
49
|
+
detected = True
|
|
50
|
+
break
|
|
51
|
+
|
|
52
|
+
status = "✅" if detected == should_detect else "❌"
|
|
53
|
+
print(
|
|
54
|
+
f" {status} {description}: '{text}' -> {'BLOCKED' if detected else 'ALLOWED'}"
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
if detected != should_detect:
|
|
58
|
+
print(f" Expected: {'BLOCKED' if should_detect else 'ALLOWED'}")
|
|
59
|
+
return False
|
|
60
|
+
|
|
61
|
+
print("✅ All SQL injection pattern tests passed!")
|
|
62
|
+
return True
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def test_code_injection_patterns():
|
|
66
|
+
"""Test code injection detection patterns."""
|
|
67
|
+
print("\nTesting code injection patterns...")
|
|
68
|
+
|
|
69
|
+
test_cases = [
|
|
70
|
+
# Should be detected as malicious
|
|
71
|
+
("eval(user_input)", True, "eval() execution"),
|
|
72
|
+
("exec(malicious_code)", True, "exec() execution"),
|
|
73
|
+
("__import__('os')", True, "Dynamic import"),
|
|
74
|
+
("getattr(obj, 'dangerous')", True, "Dynamic attribute access"),
|
|
75
|
+
("subprocess.run(cmd)", True, "System command"),
|
|
76
|
+
("os.system('rm -rf')", True, "OS system call"),
|
|
77
|
+
("compile(code, 'string', 'exec')", True, "Code compilation"),
|
|
78
|
+
# Should be allowed (legitimate text)
|
|
79
|
+
("evaluate the results", False, "Legitimate text with 'eval'"),
|
|
80
|
+
("execute the plan", False, "Legitimate text with 'execute'"),
|
|
81
|
+
("import statement", False, "Normal import discussion"),
|
|
82
|
+
("compiled successfully", False, "Normal compilation discussion"),
|
|
83
|
+
]
|
|
84
|
+
|
|
85
|
+
patterns = [
|
|
86
|
+
"validate_code_eval_injection",
|
|
87
|
+
"validate_code_dynamic_access",
|
|
88
|
+
"validate_code_system_commands",
|
|
89
|
+
"validate_code_compilation",
|
|
90
|
+
]
|
|
91
|
+
|
|
92
|
+
for text, should_detect, description in test_cases:
|
|
93
|
+
detected = False
|
|
94
|
+
for pattern_name in patterns:
|
|
95
|
+
if SAFE_PATTERNS[pattern_name].test(text):
|
|
96
|
+
detected = True
|
|
97
|
+
break
|
|
98
|
+
|
|
99
|
+
status = "✅" if detected == should_detect else "❌"
|
|
100
|
+
print(
|
|
101
|
+
f" {status} {description}: '{text}' -> {'BLOCKED' if detected else 'ALLOWED'}"
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
if detected != should_detect:
|
|
105
|
+
print(f" Expected: {'BLOCKED' if should_detect else 'ALLOWED'}")
|
|
106
|
+
return False
|
|
107
|
+
|
|
108
|
+
print("✅ All code injection pattern tests passed!")
|
|
109
|
+
return True
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def test_job_id_validation():
|
|
113
|
+
"""Test job ID format validation."""
|
|
114
|
+
print("\nTesting job ID validation...")
|
|
115
|
+
|
|
116
|
+
test_cases = [
|
|
117
|
+
# Should be valid
|
|
118
|
+
("valid_job-123", True, "Standard job ID"),
|
|
119
|
+
("another-valid_job", True, "Hyphen and underscore"),
|
|
120
|
+
("JOB123", True, "Uppercase"),
|
|
121
|
+
("job_456", True, "Underscore only"),
|
|
122
|
+
("job-789", True, "Hyphen only"),
|
|
123
|
+
("complex_job-id_123", True, "Complex valid ID"),
|
|
124
|
+
# Should be invalid
|
|
125
|
+
("job with spaces", False, "Contains spaces"),
|
|
126
|
+
("job@invalid", False, "Contains @ symbol"),
|
|
127
|
+
("job.invalid", False, "Contains dot"),
|
|
128
|
+
("job/invalid", False, "Contains slash"),
|
|
129
|
+
("job=invalid", False, "Contains equals"),
|
|
130
|
+
("job$invalid", False, "Contains dollar"),
|
|
131
|
+
("", False, "Empty string"),
|
|
132
|
+
]
|
|
133
|
+
|
|
134
|
+
pattern = SAFE_PATTERNS["validate_job_id_format"]
|
|
135
|
+
|
|
136
|
+
for job_id, should_be_valid, description in test_cases:
|
|
137
|
+
is_valid = pattern.test(job_id)
|
|
138
|
+
status = "✅" if is_valid == should_be_valid else "❌"
|
|
139
|
+
print(
|
|
140
|
+
f" {status} {description}: '{job_id}' -> {'VALID' if is_valid else 'INVALID'}"
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
if is_valid != should_be_valid:
|
|
144
|
+
print(f" Expected: {'VALID' if should_be_valid else 'INVALID'}")
|
|
145
|
+
return False
|
|
146
|
+
|
|
147
|
+
print("✅ All job ID validation tests passed!")
|
|
148
|
+
return True
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def test_env_var_validation():
|
|
152
|
+
"""Test environment variable name validation."""
|
|
153
|
+
print("\nTesting environment variable name validation...")
|
|
154
|
+
|
|
155
|
+
test_cases = [
|
|
156
|
+
# Should be valid
|
|
157
|
+
("VALID_VAR", True, "Standard env var"),
|
|
158
|
+
("_PRIVATE_VAR", True, "Starting with underscore"),
|
|
159
|
+
("API_KEY_123", True, "With numbers"),
|
|
160
|
+
("DATABASE_URL", True, "Typical env var"),
|
|
161
|
+
("MAX_RETRIES", True, "Another typical var"),
|
|
162
|
+
# Should be invalid
|
|
163
|
+
("lowercase_var", False, "Contains lowercase"),
|
|
164
|
+
("123_INVALID", False, "Starts with number"),
|
|
165
|
+
("INVALID-VAR", False, "Contains hyphen"),
|
|
166
|
+
("INVALID.VAR", False, "Contains dot"),
|
|
167
|
+
("INVALID VAR", False, "Contains space"),
|
|
168
|
+
("INVALID@VAR", False, "Contains @ symbol"),
|
|
169
|
+
("", False, "Empty string"),
|
|
170
|
+
]
|
|
171
|
+
|
|
172
|
+
pattern = SAFE_PATTERNS["validate_env_var_name_format"]
|
|
173
|
+
|
|
174
|
+
for env_var, should_be_valid, description in test_cases:
|
|
175
|
+
is_valid = pattern.test(env_var)
|
|
176
|
+
status = "✅" if is_valid == should_be_valid else "❌"
|
|
177
|
+
print(
|
|
178
|
+
f" {status} {description}: '{env_var}' -> {'VALID' if is_valid else 'INVALID'}"
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
if is_valid != should_be_valid:
|
|
182
|
+
print(f" Expected: {'VALID' if should_be_valid else 'INVALID'}")
|
|
183
|
+
return False
|
|
184
|
+
|
|
185
|
+
print("✅ All environment variable validation tests passed!")
|
|
186
|
+
return True
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def test_integration_with_validator():
|
|
190
|
+
"""Test integration with SecureInputValidator."""
|
|
191
|
+
print("\nTesting integration with SecureInputValidator...")
|
|
192
|
+
|
|
193
|
+
validator = SecureInputValidator()
|
|
194
|
+
|
|
195
|
+
# Test SQL injection detection
|
|
196
|
+
result = validator.sanitizer.sanitize_string("'; DROP TABLE users; --")
|
|
197
|
+
if result.valid:
|
|
198
|
+
print("❌ SQL injection should have been detected")
|
|
199
|
+
return False
|
|
200
|
+
print("✅ SQL injection properly detected and blocked")
|
|
201
|
+
|
|
202
|
+
# Test job ID validation
|
|
203
|
+
result = validator.validate_job_id("valid_job-123")
|
|
204
|
+
if not result.valid:
|
|
205
|
+
print("❌ Valid job ID should have been accepted")
|
|
206
|
+
return False
|
|
207
|
+
print("✅ Valid job ID properly accepted")
|
|
208
|
+
|
|
209
|
+
result = validator.validate_job_id("invalid job with spaces")
|
|
210
|
+
if result.valid:
|
|
211
|
+
print("❌ Invalid job ID should have been rejected")
|
|
212
|
+
return False
|
|
213
|
+
print("✅ Invalid job ID properly rejected")
|
|
214
|
+
|
|
215
|
+
# Test environment variable validation
|
|
216
|
+
result = validator.validate_environment_var("VALID_VAR", "some_value")
|
|
217
|
+
if not result.valid:
|
|
218
|
+
print("❌ Valid env var should have been accepted")
|
|
219
|
+
return False
|
|
220
|
+
print("✅ Valid environment variable properly accepted")
|
|
221
|
+
|
|
222
|
+
result = validator.validate_environment_var("invalid_var", "some_value")
|
|
223
|
+
if result.valid:
|
|
224
|
+
print("❌ Invalid env var should have been rejected")
|
|
225
|
+
return False
|
|
226
|
+
print("✅ Invalid environment variable properly rejected")
|
|
227
|
+
|
|
228
|
+
print("✅ All integration tests passed!")
|
|
229
|
+
return True
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
def main():
|
|
233
|
+
"""Run all validation tests."""
|
|
234
|
+
print("🔒 Validating Input Validator Security Patterns")
|
|
235
|
+
print("=" * 50)
|
|
236
|
+
|
|
237
|
+
tests = [
|
|
238
|
+
test_sql_injection_patterns,
|
|
239
|
+
test_code_injection_patterns,
|
|
240
|
+
test_job_id_validation,
|
|
241
|
+
test_env_var_validation,
|
|
242
|
+
test_integration_with_validator,
|
|
243
|
+
]
|
|
244
|
+
|
|
245
|
+
all_passed = True
|
|
246
|
+
for test_func in tests:
|
|
247
|
+
if not test_func():
|
|
248
|
+
all_passed = False
|
|
249
|
+
|
|
250
|
+
print("\n" + "=" * 50)
|
|
251
|
+
if all_passed:
|
|
252
|
+
print("✅ ALL SECURITY VALIDATION TESTS PASSED!")
|
|
253
|
+
print("🔒 Input validation is properly secured with SAFE_PATTERNS")
|
|
254
|
+
return 0
|
|
255
|
+
else:
|
|
256
|
+
print("❌ SOME TESTS FAILED!")
|
|
257
|
+
print("🚨 Security issues detected - review failed tests")
|
|
258
|
+
return 1
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
if __name__ == "__main__":
|
|
262
|
+
sys.exit(main())
|
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Pre-commit hook to validate regex pattern usage.
|
|
4
|
+
|
|
5
|
+
This script ensures all regex patterns use validated patterns from
|
|
6
|
+
crackerjack.services.regex_patterns instead of raw re.sub() calls.
|
|
7
|
+
|
|
8
|
+
CRITICAL: Prevents spacing issues by catching bad regex patterns before they
|
|
9
|
+
enter the codebase.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import ast
|
|
13
|
+
import re
|
|
14
|
+
import sys
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
|
|
17
|
+
# Patterns that indicate regex usage
|
|
18
|
+
REGEX_IMPORTS = {
|
|
19
|
+
"re",
|
|
20
|
+
"regex",
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
REGEX_FUNCTIONS = {
|
|
24
|
+
"re.sub",
|
|
25
|
+
"re.search",
|
|
26
|
+
"re.match",
|
|
27
|
+
"re.findall",
|
|
28
|
+
"re.split",
|
|
29
|
+
"re.compile",
|
|
30
|
+
"regex.sub",
|
|
31
|
+
"regex.search",
|
|
32
|
+
"regex.match",
|
|
33
|
+
"regex.findall",
|
|
34
|
+
"regex.split",
|
|
35
|
+
"regex.compile",
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
# Allowed regex usage patterns (whitelisted)
|
|
39
|
+
ALLOWED_PATTERNS = {
|
|
40
|
+
# Simple string operations are OK
|
|
41
|
+
r"re\.escape\(",
|
|
42
|
+
r"re\.compile\(r?['\"]\\\\[wd]", # Simple character classes
|
|
43
|
+
# Test files can use regex for testing
|
|
44
|
+
r"# REGEX OK:", # Comment-based exemption
|
|
45
|
+
# Validation in regex_patterns.py itself
|
|
46
|
+
r"crackerjack/services/regex_patterns\.py$",
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
FORBIDDEN_REPLACEMENT_PATTERNS = [
|
|
50
|
+
r"\\g\s*<\s*\d+\s*>", # \g < 1 > with spaces
|
|
51
|
+
r"\\g<\s+\d+>", # \g< 1> with space after <
|
|
52
|
+
r"\\g<\d+\s+>", # \g<1 > with space before >
|
|
53
|
+
]
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class RegexVisitor(ast.NodeVisitor):
|
|
57
|
+
"""AST visitor to find regex usage patterns."""
|
|
58
|
+
|
|
59
|
+
def __init__(self, file_path: Path):
|
|
60
|
+
self.file_path = file_path
|
|
61
|
+
self.issues: list[tuple[int, str]] = []
|
|
62
|
+
self.has_regex_import = False
|
|
63
|
+
self.allowed_file = any(
|
|
64
|
+
re.search(pattern, str(file_path)) for pattern in ALLOWED_PATTERNS
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
def visit_Import(self, node: ast.Import) -> None:
|
|
68
|
+
"""Check for regex module imports."""
|
|
69
|
+
for alias in node.names:
|
|
70
|
+
if alias.name in REGEX_IMPORTS:
|
|
71
|
+
self.has_regex_import = True
|
|
72
|
+
self.generic_visit(node)
|
|
73
|
+
|
|
74
|
+
def visit_ImportFrom(self, node: ast.ImportFrom) -> None:
|
|
75
|
+
"""Check for regex imports from modules."""
|
|
76
|
+
if node.module in REGEX_IMPORTS:
|
|
77
|
+
self.has_regex_import = True
|
|
78
|
+
self.generic_visit(node)
|
|
79
|
+
|
|
80
|
+
def visit_Call(self, node: ast.Call) -> None:
|
|
81
|
+
"""Check for regex function calls."""
|
|
82
|
+
if self.allowed_file:
|
|
83
|
+
self.generic_visit(node)
|
|
84
|
+
return
|
|
85
|
+
|
|
86
|
+
func_name = self._get_function_name(node.func)
|
|
87
|
+
|
|
88
|
+
if func_name in REGEX_FUNCTIONS:
|
|
89
|
+
# Check for bad replacement syntax in arguments
|
|
90
|
+
for arg in node.args:
|
|
91
|
+
if isinstance(arg, ast.Constant) and isinstance(arg.value, str):
|
|
92
|
+
self._check_replacement_syntax(arg.value, node.lineno)
|
|
93
|
+
|
|
94
|
+
# Flag non-whitelisted regex usage
|
|
95
|
+
if not self._is_exempted_line(node.lineno):
|
|
96
|
+
self.issues.append(
|
|
97
|
+
(
|
|
98
|
+
node.lineno,
|
|
99
|
+
f"Raw regex usage detected: {func_name}(). "
|
|
100
|
+
f"Use validated patterns from crackerjack.services.regex_patterns instead.",
|
|
101
|
+
)
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
self.generic_visit(node)
|
|
105
|
+
|
|
106
|
+
def _get_function_name(self, func_node: ast.AST) -> str:
|
|
107
|
+
"""Extract function name from AST node."""
|
|
108
|
+
if isinstance(func_node, ast.Name):
|
|
109
|
+
return func_node.id
|
|
110
|
+
elif isinstance(func_node, ast.Attribute):
|
|
111
|
+
if isinstance(func_node.value, ast.Name):
|
|
112
|
+
return f"{func_node.value.id}.{func_node.attr}"
|
|
113
|
+
return func_node.attr
|
|
114
|
+
return ""
|
|
115
|
+
|
|
116
|
+
def _check_replacement_syntax(self, replacement: str, line_no: int) -> None:
|
|
117
|
+
"""Check for forbidden replacement syntax patterns."""
|
|
118
|
+
for pattern in FORBIDDEN_REPLACEMENT_PATTERNS:
|
|
119
|
+
if re.search(pattern, replacement):
|
|
120
|
+
self.issues.append(
|
|
121
|
+
(
|
|
122
|
+
line_no,
|
|
123
|
+
f"CRITICAL: Bad replacement syntax detected: '{replacement}'. "
|
|
124
|
+
f"Use \\g<1> not \\g < 1 >",
|
|
125
|
+
)
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
def _is_exempted_line(self, line_no: int) -> bool:
|
|
129
|
+
"""Check if line has exemption comment."""
|
|
130
|
+
from contextlib import suppress
|
|
131
|
+
|
|
132
|
+
with suppress(OSError, UnicodeDecodeError):
|
|
133
|
+
with self.file_path.open(encoding="utf-8") as f:
|
|
134
|
+
lines = f.readlines()
|
|
135
|
+
if line_no <= len(lines):
|
|
136
|
+
line = lines[line_no - 1]
|
|
137
|
+
return "# REGEX OK:" in line or "# regex ok:" in line.lower()
|
|
138
|
+
return False
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def validate_file(file_path: Path) -> list[tuple[int, str]]:
|
|
142
|
+
"""Validate a single Python file for regex pattern usage."""
|
|
143
|
+
try:
|
|
144
|
+
content = file_path.read_text(encoding="utf-8")
|
|
145
|
+
except (OSError, UnicodeDecodeError) as e:
|
|
146
|
+
return [(1, f"Error reading file: {e}")]
|
|
147
|
+
|
|
148
|
+
try:
|
|
149
|
+
tree = ast.parse(content, filename=str(file_path))
|
|
150
|
+
except SyntaxError as e:
|
|
151
|
+
return [(e.lineno or 1, f"Syntax error: {e}")]
|
|
152
|
+
|
|
153
|
+
visitor = RegexVisitor(file_path)
|
|
154
|
+
visitor.visit(tree)
|
|
155
|
+
return visitor.issues
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def main(file_paths: list[str]) -> int:
|
|
159
|
+
"""Main validation function for pre-commit hook."""
|
|
160
|
+
exit_code = 0
|
|
161
|
+
|
|
162
|
+
for file_path_str in file_paths:
|
|
163
|
+
file_path = Path(file_path_str)
|
|
164
|
+
|
|
165
|
+
# Skip non-Python files
|
|
166
|
+
if file_path.suffix != ".py":
|
|
167
|
+
continue
|
|
168
|
+
|
|
169
|
+
# Skip files that don't exist
|
|
170
|
+
if not file_path.exists():
|
|
171
|
+
continue
|
|
172
|
+
|
|
173
|
+
issues = validate_file(file_path)
|
|
174
|
+
|
|
175
|
+
if issues:
|
|
176
|
+
exit_code = 1
|
|
177
|
+
print(f"\n❌ {file_path}:")
|
|
178
|
+
for line_no, message in issues:
|
|
179
|
+
print(f" Line {line_no}: {message}")
|
|
180
|
+
|
|
181
|
+
if exit_code == 0:
|
|
182
|
+
print("✅ All regex patterns validated successfully!")
|
|
183
|
+
else:
|
|
184
|
+
print("\n" + "=" * 80)
|
|
185
|
+
print("REGEX VALIDATION FAILED")
|
|
186
|
+
print("=" * 80)
|
|
187
|
+
print("To fix these issues:")
|
|
188
|
+
print("1. Use patterns from crackerjack.services.regex_patterns")
|
|
189
|
+
print("2. Add new patterns to SAFE_PATTERNS with comprehensive tests")
|
|
190
|
+
print("3. Use '# REGEX OK: reason' comment for legitimate exceptions")
|
|
191
|
+
print("4. Fix \\g<1> replacement syntax (no spaces)")
|
|
192
|
+
print("=" * 80)
|
|
193
|
+
|
|
194
|
+
return exit_code
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
if __name__ == "__main__":
|
|
198
|
+
sys.exit(main(sys.argv[1:]))
|