crackerjack 0.31.10__py3-none-any.whl → 0.31.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crackerjack might be problematic. Click here for more details.
- crackerjack/CLAUDE.md +288 -705
- crackerjack/__main__.py +22 -8
- crackerjack/agents/__init__.py +0 -3
- crackerjack/agents/architect_agent.py +0 -43
- crackerjack/agents/base.py +1 -9
- crackerjack/agents/coordinator.py +2 -148
- crackerjack/agents/documentation_agent.py +109 -81
- crackerjack/agents/dry_agent.py +122 -97
- crackerjack/agents/formatting_agent.py +3 -16
- crackerjack/agents/import_optimization_agent.py +1174 -130
- crackerjack/agents/performance_agent.py +956 -188
- crackerjack/agents/performance_helpers.py +229 -0
- crackerjack/agents/proactive_agent.py +1 -48
- crackerjack/agents/refactoring_agent.py +516 -246
- crackerjack/agents/refactoring_helpers.py +282 -0
- crackerjack/agents/security_agent.py +393 -90
- crackerjack/agents/test_creation_agent.py +1776 -120
- crackerjack/agents/test_specialist_agent.py +59 -15
- crackerjack/agents/tracker.py +0 -102
- crackerjack/api.py +145 -37
- crackerjack/cli/handlers.py +48 -30
- crackerjack/cli/interactive.py +11 -11
- crackerjack/cli/options.py +66 -4
- crackerjack/code_cleaner.py +808 -148
- crackerjack/config/global_lock_config.py +110 -0
- crackerjack/config/hooks.py +43 -64
- crackerjack/core/async_workflow_orchestrator.py +247 -97
- crackerjack/core/autofix_coordinator.py +192 -109
- crackerjack/core/enhanced_container.py +46 -63
- crackerjack/core/file_lifecycle.py +549 -0
- crackerjack/core/performance.py +9 -8
- crackerjack/core/performance_monitor.py +395 -0
- crackerjack/core/phase_coordinator.py +281 -94
- crackerjack/core/proactive_workflow.py +9 -58
- crackerjack/core/resource_manager.py +501 -0
- crackerjack/core/service_watchdog.py +490 -0
- crackerjack/core/session_coordinator.py +4 -8
- crackerjack/core/timeout_manager.py +504 -0
- crackerjack/core/websocket_lifecycle.py +475 -0
- crackerjack/core/workflow_orchestrator.py +343 -209
- crackerjack/dynamic_config.py +50 -9
- crackerjack/errors.py +3 -4
- crackerjack/executors/async_hook_executor.py +63 -13
- crackerjack/executors/cached_hook_executor.py +14 -14
- crackerjack/executors/hook_executor.py +100 -37
- crackerjack/executors/hook_lock_manager.py +856 -0
- crackerjack/executors/individual_hook_executor.py +120 -86
- crackerjack/intelligence/__init__.py +0 -7
- crackerjack/intelligence/adaptive_learning.py +13 -86
- crackerjack/intelligence/agent_orchestrator.py +15 -78
- crackerjack/intelligence/agent_registry.py +12 -59
- crackerjack/intelligence/agent_selector.py +31 -92
- crackerjack/intelligence/integration.py +1 -41
- crackerjack/interactive.py +9 -9
- crackerjack/managers/async_hook_manager.py +25 -8
- crackerjack/managers/hook_manager.py +9 -9
- crackerjack/managers/publish_manager.py +57 -59
- crackerjack/managers/test_command_builder.py +6 -36
- crackerjack/managers/test_executor.py +9 -61
- crackerjack/managers/test_manager.py +17 -63
- crackerjack/managers/test_manager_backup.py +77 -127
- crackerjack/managers/test_progress.py +4 -23
- crackerjack/mcp/cache.py +5 -12
- crackerjack/mcp/client_runner.py +10 -10
- crackerjack/mcp/context.py +64 -6
- crackerjack/mcp/dashboard.py +14 -11
- crackerjack/mcp/enhanced_progress_monitor.py +55 -55
- crackerjack/mcp/file_monitor.py +72 -42
- crackerjack/mcp/progress_components.py +103 -84
- crackerjack/mcp/progress_monitor.py +122 -49
- crackerjack/mcp/rate_limiter.py +12 -12
- crackerjack/mcp/server_core.py +16 -22
- crackerjack/mcp/service_watchdog.py +26 -26
- crackerjack/mcp/state.py +15 -0
- crackerjack/mcp/tools/core_tools.py +95 -39
- crackerjack/mcp/tools/error_analyzer.py +6 -32
- crackerjack/mcp/tools/execution_tools.py +1 -56
- crackerjack/mcp/tools/execution_tools_backup.py +35 -131
- crackerjack/mcp/tools/intelligence_tool_registry.py +0 -36
- crackerjack/mcp/tools/intelligence_tools.py +2 -55
- crackerjack/mcp/tools/monitoring_tools.py +308 -145
- crackerjack/mcp/tools/proactive_tools.py +12 -42
- crackerjack/mcp/tools/progress_tools.py +23 -15
- crackerjack/mcp/tools/utility_tools.py +3 -40
- crackerjack/mcp/tools/workflow_executor.py +40 -60
- crackerjack/mcp/websocket/app.py +0 -3
- crackerjack/mcp/websocket/endpoints.py +206 -268
- crackerjack/mcp/websocket/jobs.py +213 -66
- crackerjack/mcp/websocket/server.py +84 -6
- crackerjack/mcp/websocket/websocket_handler.py +137 -29
- crackerjack/models/config_adapter.py +3 -16
- crackerjack/models/protocols.py +162 -3
- crackerjack/models/resource_protocols.py +454 -0
- crackerjack/models/task.py +3 -3
- crackerjack/monitoring/__init__.py +0 -0
- crackerjack/monitoring/ai_agent_watchdog.py +25 -71
- crackerjack/monitoring/regression_prevention.py +28 -87
- crackerjack/orchestration/advanced_orchestrator.py +44 -78
- crackerjack/orchestration/coverage_improvement.py +10 -60
- crackerjack/orchestration/execution_strategies.py +16 -16
- crackerjack/orchestration/test_progress_streamer.py +61 -53
- crackerjack/plugins/base.py +1 -1
- crackerjack/plugins/managers.py +22 -20
- crackerjack/py313.py +65 -21
- crackerjack/services/backup_service.py +467 -0
- crackerjack/services/bounded_status_operations.py +627 -0
- crackerjack/services/cache.py +7 -9
- crackerjack/services/config.py +35 -52
- crackerjack/services/config_integrity.py +5 -16
- crackerjack/services/config_merge.py +542 -0
- crackerjack/services/contextual_ai_assistant.py +17 -19
- crackerjack/services/coverage_ratchet.py +44 -73
- crackerjack/services/debug.py +25 -39
- crackerjack/services/dependency_monitor.py +52 -50
- crackerjack/services/enhanced_filesystem.py +14 -11
- crackerjack/services/file_hasher.py +1 -1
- crackerjack/services/filesystem.py +1 -12
- crackerjack/services/git.py +71 -47
- crackerjack/services/health_metrics.py +31 -27
- crackerjack/services/initialization.py +276 -428
- crackerjack/services/input_validator.py +760 -0
- crackerjack/services/log_manager.py +16 -16
- crackerjack/services/logging.py +7 -6
- crackerjack/services/metrics.py +43 -43
- crackerjack/services/pattern_cache.py +2 -31
- crackerjack/services/pattern_detector.py +26 -63
- crackerjack/services/performance_benchmarks.py +20 -45
- crackerjack/services/regex_patterns.py +2887 -0
- crackerjack/services/regex_utils.py +537 -0
- crackerjack/services/secure_path_utils.py +683 -0
- crackerjack/services/secure_status_formatter.py +534 -0
- crackerjack/services/secure_subprocess.py +605 -0
- crackerjack/services/security.py +47 -10
- crackerjack/services/security_logger.py +492 -0
- crackerjack/services/server_manager.py +109 -50
- crackerjack/services/smart_scheduling.py +8 -25
- crackerjack/services/status_authentication.py +603 -0
- crackerjack/services/status_security_manager.py +442 -0
- crackerjack/services/thread_safe_status_collector.py +546 -0
- crackerjack/services/tool_version_service.py +1 -23
- crackerjack/services/unified_config.py +36 -58
- crackerjack/services/validation_rate_limiter.py +269 -0
- crackerjack/services/version_checker.py +9 -40
- crackerjack/services/websocket_resource_limiter.py +572 -0
- crackerjack/slash_commands/__init__.py +52 -2
- crackerjack/tools/__init__.py +0 -0
- crackerjack/tools/validate_input_validator_patterns.py +262 -0
- crackerjack/tools/validate_regex_patterns.py +198 -0
- {crackerjack-0.31.10.dist-info → crackerjack-0.31.13.dist-info}/METADATA +197 -12
- crackerjack-0.31.13.dist-info/RECORD +178 -0
- crackerjack/cli/facade.py +0 -104
- crackerjack-0.31.10.dist-info/RECORD +0 -149
- {crackerjack-0.31.10.dist-info → crackerjack-0.31.13.dist-info}/WHEEL +0 -0
- {crackerjack-0.31.10.dist-info → crackerjack-0.31.13.dist-info}/entry_points.txt +0 -0
- {crackerjack-0.31.10.dist-info → crackerjack-0.31.13.dist-info}/licenses/LICENSE +0 -0
crackerjack/code_cleaner.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import ast
|
|
2
2
|
import typing as t
|
|
3
3
|
from dataclasses import dataclass
|
|
4
|
-
from enum import Enum
|
|
5
4
|
from pathlib import Path
|
|
6
5
|
from typing import Protocol
|
|
7
6
|
|
|
@@ -9,12 +8,49 @@ from pydantic import BaseModel, ConfigDict
|
|
|
9
8
|
from rich.console import Console
|
|
10
9
|
|
|
11
10
|
from .errors import ErrorCode, ExecutionError
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
11
|
+
from .services.backup_service import BackupMetadata, PackageBackupService
|
|
12
|
+
from .services.regex_patterns import SAFE_PATTERNS
|
|
13
|
+
from .services.secure_path_utils import (
|
|
14
|
+
AtomicFileOperations,
|
|
15
|
+
SecurePathValidator,
|
|
16
|
+
)
|
|
17
|
+
from .services.security_logger import (
|
|
18
|
+
SecurityEventLevel,
|
|
19
|
+
SecurityEventType,
|
|
20
|
+
get_security_logger,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class SafePatternApplicator:
|
|
25
|
+
"""Safe pattern applicator using centralized SAFE_PATTERNS."""
|
|
26
|
+
|
|
27
|
+
def apply_docstring_patterns(self, code: str) -> str:
|
|
28
|
+
"""Apply docstring removal patterns safely."""
|
|
29
|
+
result = code
|
|
30
|
+
result = SAFE_PATTERNS["docstring_triple_double"].apply(result)
|
|
31
|
+
result = SAFE_PATTERNS["docstring_triple_single"].apply(result)
|
|
32
|
+
return result
|
|
33
|
+
|
|
34
|
+
def apply_formatting_patterns(self, content: str) -> str:
|
|
35
|
+
"""Apply formatting patterns safely."""
|
|
36
|
+
# Apply spacing patterns
|
|
37
|
+
content = SAFE_PATTERNS["spacing_after_comma"].apply(content)
|
|
38
|
+
content = SAFE_PATTERNS["spacing_after_colon"].apply(content)
|
|
39
|
+
content = SAFE_PATTERNS["multiple_spaces"].apply(content)
|
|
40
|
+
return content
|
|
41
|
+
|
|
42
|
+
def has_preserved_comment(self, line: str) -> bool:
|
|
43
|
+
"""Check if a line contains preserved comments."""
|
|
44
|
+
if line.strip().startswith("#! /"):
|
|
45
|
+
return True
|
|
46
|
+
|
|
47
|
+
# Check for preserved comment keywords
|
|
48
|
+
line_lower = line.lower()
|
|
49
|
+
preserved_keywords = ["coding:", "encoding:", "type:", "noqa", "pragma"]
|
|
50
|
+
return any(keyword in line_lower for keyword in preserved_keywords)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
_safe_applicator = SafePatternApplicator()
|
|
18
54
|
|
|
19
55
|
|
|
20
56
|
@dataclass
|
|
@@ -26,12 +62,18 @@ class CleaningResult:
|
|
|
26
62
|
warnings: list[str]
|
|
27
63
|
original_size: int
|
|
28
64
|
cleaned_size: int
|
|
65
|
+
backup_metadata: BackupMetadata | None = None
|
|
29
66
|
|
|
30
67
|
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
68
|
+
@dataclass
|
|
69
|
+
class PackageCleaningResult:
|
|
70
|
+
total_files: int
|
|
71
|
+
successful_files: int
|
|
72
|
+
failed_files: int
|
|
73
|
+
file_results: list[CleaningResult]
|
|
74
|
+
backup_metadata: BackupMetadata | None
|
|
75
|
+
backup_restored: bool = False
|
|
76
|
+
overall_success: bool = False
|
|
35
77
|
|
|
36
78
|
|
|
37
79
|
class CleaningStepProtocol(Protocol):
|
|
@@ -41,21 +83,13 @@ class CleaningStepProtocol(Protocol):
|
|
|
41
83
|
def name(self) -> str: ...
|
|
42
84
|
|
|
43
85
|
|
|
44
|
-
class ErrorHandlerProtocol(Protocol):
|
|
45
|
-
def handle_file_error(
|
|
46
|
-
self,
|
|
47
|
-
file_path: Path,
|
|
48
|
-
error: Exception,
|
|
49
|
-
step: str,
|
|
50
|
-
) -> None: ...
|
|
51
|
-
def log_cleaning_result(self, result: CleaningResult) -> None: ...
|
|
52
|
-
|
|
53
|
-
|
|
54
86
|
class FileProcessor(BaseModel):
|
|
55
|
-
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
87
|
+
model_config = ConfigDict(arbitrary_types_allowed=True, extra="allow")
|
|
56
88
|
|
|
57
89
|
console: Console
|
|
58
90
|
logger: t.Any = None
|
|
91
|
+
base_directory: Path | None = None
|
|
92
|
+
security_logger: t.Any = None
|
|
59
93
|
|
|
60
94
|
def model_post_init(self, _: t.Any) -> None:
|
|
61
95
|
if self.logger is None:
|
|
@@ -63,24 +97,55 @@ class FileProcessor(BaseModel):
|
|
|
63
97
|
|
|
64
98
|
self.logger = logging.getLogger("crackerjack.code_cleaner.file_processor")
|
|
65
99
|
|
|
100
|
+
if self.security_logger is None:
|
|
101
|
+
self.security_logger = get_security_logger()
|
|
102
|
+
|
|
66
103
|
def read_file_safely(self, file_path: Path) -> str:
|
|
104
|
+
validated_path = SecurePathValidator.validate_file_path(
|
|
105
|
+
file_path, self.base_directory
|
|
106
|
+
)
|
|
107
|
+
SecurePathValidator.validate_file_size(validated_path)
|
|
108
|
+
|
|
109
|
+
self.security_logger.log_security_event(
|
|
110
|
+
SecurityEventType.FILE_CLEANED,
|
|
111
|
+
SecurityEventLevel.LOW,
|
|
112
|
+
f"Reading file for cleaning: {validated_path}",
|
|
113
|
+
file_path=validated_path,
|
|
114
|
+
)
|
|
115
|
+
|
|
67
116
|
try:
|
|
68
|
-
return
|
|
117
|
+
return validated_path.read_text(encoding="utf-8")
|
|
118
|
+
|
|
69
119
|
except UnicodeDecodeError:
|
|
70
120
|
for encoding in ("latin1", "cp1252"):
|
|
71
121
|
try:
|
|
72
|
-
content =
|
|
122
|
+
content = validated_path.read_text(encoding=encoding)
|
|
73
123
|
self.logger.warning(
|
|
74
|
-
f"File {
|
|
124
|
+
f"File {validated_path} read with {encoding} encoding",
|
|
75
125
|
)
|
|
76
126
|
return content
|
|
77
127
|
except UnicodeDecodeError:
|
|
78
128
|
continue
|
|
129
|
+
|
|
130
|
+
self.security_logger.log_validation_failed(
|
|
131
|
+
"encoding",
|
|
132
|
+
file_path,
|
|
133
|
+
"Could not decode file with any supported encoding",
|
|
134
|
+
)
|
|
135
|
+
|
|
79
136
|
raise ExecutionError(
|
|
80
137
|
message=f"Could not decode file {file_path}",
|
|
81
138
|
error_code=ErrorCode.FILE_READ_ERROR,
|
|
82
139
|
)
|
|
140
|
+
|
|
141
|
+
except ExecutionError:
|
|
142
|
+
raise
|
|
143
|
+
|
|
83
144
|
except Exception as e:
|
|
145
|
+
self.security_logger.log_validation_failed(
|
|
146
|
+
"file_read", file_path, f"Unexpected error during file read: {e}"
|
|
147
|
+
)
|
|
148
|
+
|
|
84
149
|
raise ExecutionError(
|
|
85
150
|
message=f"Failed to read file {file_path}: {e}",
|
|
86
151
|
error_code=ErrorCode.FILE_READ_ERROR,
|
|
@@ -88,19 +153,42 @@ class FileProcessor(BaseModel):
|
|
|
88
153
|
|
|
89
154
|
def write_file_safely(self, file_path: Path, content: str) -> None:
|
|
90
155
|
try:
|
|
91
|
-
|
|
156
|
+
AtomicFileOperations.atomic_write(file_path, content, self.base_directory)
|
|
157
|
+
|
|
158
|
+
self.security_logger.log_atomic_operation("write", file_path, True)
|
|
159
|
+
|
|
160
|
+
except ExecutionError:
|
|
161
|
+
self.security_logger.log_atomic_operation("write", file_path, False)
|
|
162
|
+
raise
|
|
163
|
+
|
|
92
164
|
except Exception as e:
|
|
165
|
+
self.security_logger.log_atomic_operation(
|
|
166
|
+
"write", file_path, False, error=str(e)
|
|
167
|
+
)
|
|
168
|
+
|
|
93
169
|
raise ExecutionError(
|
|
94
170
|
message=f"Failed to write file {file_path}: {e}",
|
|
95
171
|
error_code=ErrorCode.FILE_WRITE_ERROR,
|
|
96
172
|
) from e
|
|
97
173
|
|
|
98
174
|
def backup_file(self, file_path: Path) -> Path:
|
|
99
|
-
backup_path = file_path.with_suffix(f"{file_path.suffix}.backup")
|
|
100
175
|
try:
|
|
101
|
-
backup_path.
|
|
176
|
+
backup_path = AtomicFileOperations.atomic_backup_and_write(
|
|
177
|
+
file_path, file_path.read_bytes(), self.base_directory
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
self.security_logger.log_backup_created(file_path, backup_path)
|
|
181
|
+
|
|
102
182
|
return backup_path
|
|
183
|
+
|
|
184
|
+
except ExecutionError:
|
|
185
|
+
raise
|
|
186
|
+
|
|
103
187
|
except Exception as e:
|
|
188
|
+
self.security_logger.log_validation_failed(
|
|
189
|
+
"backup_creation", file_path, f"Backup creation failed: {e}"
|
|
190
|
+
)
|
|
191
|
+
|
|
104
192
|
raise ExecutionError(
|
|
105
193
|
message=f"Failed to create backup for {file_path}: {e}",
|
|
106
194
|
error_code=ErrorCode.FILE_WRITE_ERROR,
|
|
@@ -121,7 +209,7 @@ class CleaningErrorHandler(BaseModel):
|
|
|
121
209
|
|
|
122
210
|
def handle_file_error(self, file_path: Path, error: Exception, step: str) -> None:
|
|
123
211
|
self.console.print(
|
|
124
|
-
f"[bold bright_yellow]⚠️ Warning: {step} failed for {file_path}: {error}[/bold bright_yellow]",
|
|
212
|
+
f"[bold bright_yellow]⚠️ Warning: {step} failed for {file_path}: {error}[/ bold bright_yellow]",
|
|
125
213
|
)
|
|
126
214
|
|
|
127
215
|
self.logger.warning(
|
|
@@ -137,18 +225,18 @@ class CleaningErrorHandler(BaseModel):
|
|
|
137
225
|
def log_cleaning_result(self, result: CleaningResult) -> None:
|
|
138
226
|
if result.success:
|
|
139
227
|
self.console.print(
|
|
140
|
-
f"[green]✅ Cleaned {result.file_path}[/green] "
|
|
228
|
+
f"[green]✅ Cleaned {result.file_path}[/ green] "
|
|
141
229
|
f"({result.original_size} → {result.cleaned_size} bytes)",
|
|
142
230
|
)
|
|
143
231
|
else:
|
|
144
232
|
self.console.print(
|
|
145
|
-
f"[red]❌ Failed to clean {result.file_path}[/red] "
|
|
233
|
+
f"[red]❌ Failed to clean {result.file_path}[/ red] "
|
|
146
234
|
f"({len(result.steps_failed)} steps failed)",
|
|
147
235
|
)
|
|
148
236
|
|
|
149
237
|
if result.warnings:
|
|
150
238
|
for warning in result.warnings:
|
|
151
|
-
self.console.print(f"[yellow]⚠️ {warning}[/yellow]")
|
|
239
|
+
self.console.print(f"[yellow]⚠️ {warning}[/ yellow]")
|
|
152
240
|
|
|
153
241
|
self.logger.info(
|
|
154
242
|
"File cleaning completed",
|
|
@@ -185,7 +273,7 @@ class CleaningPipeline(BaseModel):
|
|
|
185
273
|
self.logger.info(f"Starting clean_file for {file_path}")
|
|
186
274
|
try:
|
|
187
275
|
original_code = self.file_processor.read_file_safely(file_path)
|
|
188
|
-
original_size = len(original_code.encode("utf
|
|
276
|
+
original_size = len(original_code.encode("utf-8"))
|
|
189
277
|
|
|
190
278
|
result = self._apply_cleaning_pipeline(
|
|
191
279
|
original_code,
|
|
@@ -193,11 +281,10 @@ class CleaningPipeline(BaseModel):
|
|
|
193
281
|
cleaning_steps,
|
|
194
282
|
)
|
|
195
283
|
|
|
284
|
+
cleaned_size = original_size
|
|
196
285
|
if result.success and result.cleaned_code != original_code:
|
|
197
286
|
self.file_processor.write_file_safely(file_path, result.cleaned_code)
|
|
198
|
-
cleaned_size = len(result.cleaned_code.encode("utf
|
|
199
|
-
else:
|
|
200
|
-
cleaned_size = original_size
|
|
287
|
+
cleaned_size = len(result.cleaned_code.encode("utf-8"))
|
|
201
288
|
|
|
202
289
|
cleaning_result = CleaningResult(
|
|
203
290
|
file_path=file_path,
|
|
@@ -285,13 +372,16 @@ class CleaningPipeline(BaseModel):
|
|
|
285
372
|
|
|
286
373
|
|
|
287
374
|
class CodeCleaner(BaseModel):
|
|
288
|
-
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
375
|
+
model_config = ConfigDict(arbitrary_types_allowed=True, extra="allow")
|
|
289
376
|
|
|
290
377
|
console: Console
|
|
291
378
|
file_processor: t.Any = None
|
|
292
379
|
error_handler: t.Any = None
|
|
293
380
|
pipeline: t.Any = None
|
|
294
381
|
logger: t.Any = None
|
|
382
|
+
base_directory: Path | None = None
|
|
383
|
+
security_logger: t.Any = None
|
|
384
|
+
backup_service: t.Any = None
|
|
295
385
|
|
|
296
386
|
def model_post_init(self, _: t.Any) -> None:
|
|
297
387
|
if self.logger is None:
|
|
@@ -299,8 +389,13 @@ class CodeCleaner(BaseModel):
|
|
|
299
389
|
|
|
300
390
|
self.logger = logging.getLogger("crackerjack.code_cleaner")
|
|
301
391
|
|
|
392
|
+
if self.base_directory is None:
|
|
393
|
+
self.base_directory = Path.cwd()
|
|
394
|
+
|
|
302
395
|
if self.file_processor is None:
|
|
303
|
-
self.file_processor = FileProcessor(
|
|
396
|
+
self.file_processor = FileProcessor(
|
|
397
|
+
console=self.console, base_directory=self.base_directory
|
|
398
|
+
)
|
|
304
399
|
|
|
305
400
|
if self.error_handler is None:
|
|
306
401
|
self.error_handler = CleaningErrorHandler(console=self.console)
|
|
@@ -312,6 +407,12 @@ class CodeCleaner(BaseModel):
|
|
|
312
407
|
console=self.console,
|
|
313
408
|
)
|
|
314
409
|
|
|
410
|
+
if self.security_logger is None:
|
|
411
|
+
self.security_logger = get_security_logger()
|
|
412
|
+
|
|
413
|
+
if self.backup_service is None:
|
|
414
|
+
self.backup_service = PackageBackupService()
|
|
415
|
+
|
|
315
416
|
def clean_file(self, file_path: Path) -> CleaningResult:
|
|
316
417
|
cleaning_steps = [
|
|
317
418
|
self._create_line_comment_step(),
|
|
@@ -322,49 +423,655 @@ class CodeCleaner(BaseModel):
|
|
|
322
423
|
|
|
323
424
|
return self.pipeline.clean_file(file_path, cleaning_steps)
|
|
324
425
|
|
|
325
|
-
def clean_files(
|
|
426
|
+
def clean_files(
|
|
427
|
+
self, pkg_dir: Path | None = None, use_backup: bool = True
|
|
428
|
+
) -> list[CleaningResult] | PackageCleaningResult:
|
|
429
|
+
"""Clean package files with optional backup protection.
|
|
430
|
+
|
|
431
|
+
Args:
|
|
432
|
+
pkg_dir: Package directory to clean (defaults to current directory)
|
|
433
|
+
use_backup: Whether to use backup protection (default: True for safety)
|
|
434
|
+
|
|
435
|
+
Returns:
|
|
436
|
+
PackageCleaningResult with backup protection (default), list[CleaningResult] if use_backup=False (legacy)
|
|
437
|
+
"""
|
|
438
|
+
if use_backup:
|
|
439
|
+
# Use the comprehensive backup system for maximum safety
|
|
440
|
+
package_result = self.clean_files_with_backup(pkg_dir)
|
|
441
|
+
self.logger.info(
|
|
442
|
+
f"Package cleaning with backup completed: "
|
|
443
|
+
f"success={package_result.overall_success}, "
|
|
444
|
+
f"restored={package_result.backup_restored}"
|
|
445
|
+
)
|
|
446
|
+
return package_result
|
|
447
|
+
|
|
448
|
+
# Legacy non-backup mode (deprecated, kept for compatibility)
|
|
449
|
+
self.console.print(
|
|
450
|
+
"[yellow]⚠️ WARNING: Running without backup protection. "
|
|
451
|
+
"Consider using use_backup=True for safety.[/yellow]"
|
|
452
|
+
)
|
|
453
|
+
|
|
326
454
|
if pkg_dir is None:
|
|
327
455
|
pkg_dir = Path.cwd()
|
|
328
456
|
|
|
329
|
-
python_files =
|
|
457
|
+
python_files = self._discover_package_files(pkg_dir)
|
|
458
|
+
|
|
459
|
+
files_to_process = [
|
|
460
|
+
file_path
|
|
461
|
+
for file_path in python_files
|
|
462
|
+
if self.should_process_file(file_path)
|
|
463
|
+
]
|
|
464
|
+
|
|
330
465
|
results: list[CleaningResult] = []
|
|
466
|
+
self.logger.info(f"Starting clean_files for {len(files_to_process)} files")
|
|
331
467
|
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
468
|
+
cleaning_steps = [
|
|
469
|
+
self._create_line_comment_step(),
|
|
470
|
+
self._create_docstring_step(),
|
|
471
|
+
self._create_whitespace_step(),
|
|
472
|
+
self._create_formatting_step(),
|
|
473
|
+
]
|
|
474
|
+
|
|
475
|
+
for file_path in files_to_process:
|
|
476
|
+
result = self.pipeline.clean_file(file_path, cleaning_steps)
|
|
477
|
+
results.append(result)
|
|
337
478
|
|
|
338
479
|
return results
|
|
339
480
|
|
|
340
|
-
def
|
|
341
|
-
|
|
481
|
+
def clean_files_with_backup(
|
|
482
|
+
self, pkg_dir: Path | None = None
|
|
483
|
+
) -> PackageCleaningResult:
|
|
484
|
+
validated_pkg_dir = self._prepare_package_directory(pkg_dir)
|
|
485
|
+
|
|
486
|
+
self.logger.info(
|
|
487
|
+
f"Starting safe package cleaning with backup: {validated_pkg_dir}"
|
|
488
|
+
)
|
|
489
|
+
self.console.print(
|
|
490
|
+
"[cyan]🛡️ Starting package cleaning with backup protection...[/cyan]"
|
|
491
|
+
)
|
|
492
|
+
|
|
493
|
+
backup_metadata: BackupMetadata | None = None
|
|
494
|
+
|
|
495
|
+
try:
|
|
496
|
+
backup_metadata = self._create_backup(validated_pkg_dir)
|
|
497
|
+
files_to_process = self._find_files_to_process(validated_pkg_dir)
|
|
498
|
+
|
|
499
|
+
if not files_to_process:
|
|
500
|
+
return self._handle_no_files_to_process(backup_metadata)
|
|
501
|
+
|
|
502
|
+
cleaning_result = self._execute_cleaning_with_backup(
|
|
503
|
+
files_to_process, backup_metadata
|
|
504
|
+
)
|
|
505
|
+
|
|
506
|
+
return self._finalize_cleaning_result(cleaning_result, backup_metadata)
|
|
507
|
+
|
|
508
|
+
except Exception as e:
|
|
509
|
+
return self._handle_critical_error(e, backup_metadata)
|
|
510
|
+
|
|
511
|
+
def _prepare_package_directory(self, pkg_dir: Path | None) -> Path:
|
|
512
|
+
if pkg_dir is None:
|
|
513
|
+
pkg_dir = Path.cwd()
|
|
514
|
+
|
|
515
|
+
return SecurePathValidator.validate_file_path(pkg_dir, self.base_directory)
|
|
516
|
+
|
|
517
|
+
def _create_backup(self, validated_pkg_dir: Path) -> BackupMetadata:
|
|
518
|
+
self.console.print(
|
|
519
|
+
"[yellow]📦 Creating backup of all package files...[/yellow]"
|
|
520
|
+
)
|
|
521
|
+
|
|
522
|
+
backup_metadata = self.backup_service.create_package_backup(
|
|
523
|
+
validated_pkg_dir, self.base_directory
|
|
524
|
+
)
|
|
525
|
+
|
|
526
|
+
self.console.print(
|
|
527
|
+
f"[green]✅ Backup created: {backup_metadata.backup_id}[/green] "
|
|
528
|
+
f"({backup_metadata.total_files} files, {backup_metadata.total_size} bytes)"
|
|
529
|
+
)
|
|
530
|
+
|
|
531
|
+
return backup_metadata
|
|
532
|
+
|
|
533
|
+
def _find_files_to_process(self, validated_pkg_dir: Path) -> list[Path]:
|
|
534
|
+
python_files = self._discover_package_files(validated_pkg_dir)
|
|
535
|
+
return [
|
|
536
|
+
file_path
|
|
537
|
+
for file_path in python_files
|
|
538
|
+
if self.should_process_file(file_path)
|
|
539
|
+
]
|
|
540
|
+
|
|
541
|
+
def _discover_package_files(self, root_dir: Path) -> list[Path]:
|
|
542
|
+
"""Discover Python files in the main package directory using crackerjack naming convention.
|
|
543
|
+
|
|
544
|
+
Crackerjack convention:
|
|
545
|
+
- Project name with dashes → package name with underscores
|
|
546
|
+
- Single word → same name lowercase
|
|
547
|
+
- Package directory determined from pyproject.toml [project.name]
|
|
548
|
+
|
|
549
|
+
Args:
|
|
550
|
+
root_dir: Project root directory
|
|
551
|
+
|
|
552
|
+
Returns:
|
|
553
|
+
List of Python files found only in the main package directory
|
|
554
|
+
"""
|
|
555
|
+
package_dir = self._find_package_directory(root_dir)
|
|
556
|
+
|
|
557
|
+
if not package_dir or not package_dir.exists():
|
|
558
|
+
# Fallback: look for any directory with __init__.py (excluding common non-package dirs)
|
|
559
|
+
self.console.print(
|
|
560
|
+
"[yellow]⚠️ Could not determine package directory, searching for Python packages...[/yellow]"
|
|
561
|
+
)
|
|
562
|
+
return self._fallback_discover_packages(root_dir)
|
|
563
|
+
|
|
564
|
+
self.logger.debug(f"Using package directory: {package_dir}")
|
|
565
|
+
|
|
566
|
+
# Get all Python files from the package directory only
|
|
567
|
+
package_files = list(package_dir.rglob("*.py"))
|
|
568
|
+
|
|
569
|
+
# Filter out any problematic subdirectories that might exist within the package
|
|
570
|
+
exclude_dirs = {
|
|
571
|
+
"__pycache__",
|
|
572
|
+
".pytest_cache",
|
|
573
|
+
".mypy_cache",
|
|
574
|
+
".ruff_cache",
|
|
575
|
+
".venv",
|
|
576
|
+
"venv",
|
|
577
|
+
}
|
|
578
|
+
filtered_files = [
|
|
579
|
+
f
|
|
580
|
+
for f in package_files
|
|
581
|
+
if not any(excl in f.parts for excl in exclude_dirs)
|
|
582
|
+
]
|
|
583
|
+
|
|
584
|
+
return filtered_files
|
|
585
|
+
|
|
586
|
+
def _find_package_directory(self, root_dir: Path) -> Path | None:
|
|
587
|
+
"""Find the main package directory using crackerjack naming convention.
|
|
588
|
+
|
|
589
|
+
Args:
|
|
590
|
+
root_dir: Project root directory
|
|
591
|
+
|
|
592
|
+
Returns:
|
|
593
|
+
Path to package directory or None if not found
|
|
594
|
+
"""
|
|
595
|
+
# First, try to get project name from pyproject.toml
|
|
596
|
+
pyproject_path = root_dir / "pyproject.toml"
|
|
597
|
+
if pyproject_path.exists():
|
|
598
|
+
try:
|
|
599
|
+
import tomllib
|
|
600
|
+
|
|
601
|
+
with pyproject_path.open("rb") as f:
|
|
602
|
+
config = tomllib.load(f)
|
|
603
|
+
|
|
604
|
+
project_name = config.get("project", {}).get("name")
|
|
605
|
+
if project_name:
|
|
606
|
+
# Apply crackerjack naming convention
|
|
607
|
+
package_name = project_name.replace("-", "_").lower()
|
|
608
|
+
package_dir = root_dir / package_name
|
|
609
|
+
|
|
610
|
+
if package_dir.exists() and (package_dir / "__init__.py").exists():
|
|
611
|
+
return package_dir
|
|
612
|
+
|
|
613
|
+
except Exception as e:
|
|
614
|
+
self.logger.debug(f"Could not parse pyproject.toml: {e}")
|
|
615
|
+
|
|
616
|
+
# Fallback: infer from directory name
|
|
617
|
+
package_name = root_dir.name.replace("-", "_").lower()
|
|
618
|
+
package_dir = root_dir / package_name
|
|
619
|
+
|
|
620
|
+
if package_dir.exists() and (package_dir / "__init__.py").exists():
|
|
621
|
+
return package_dir
|
|
622
|
+
|
|
623
|
+
return None
|
|
624
|
+
|
|
625
|
+
def _fallback_discover_packages(self, root_dir: Path) -> list[Path]:
|
|
626
|
+
"""Fallback method to discover package files when convention-based detection fails."""
|
|
627
|
+
python_files = []
|
|
628
|
+
exclude_dirs = {
|
|
342
629
|
"__pycache__",
|
|
343
630
|
".git",
|
|
344
631
|
".venv",
|
|
345
|
-
"
|
|
632
|
+
"venv",
|
|
633
|
+
"site-packages",
|
|
346
634
|
".pytest_cache",
|
|
347
635
|
"build",
|
|
348
636
|
"dist",
|
|
637
|
+
".tox",
|
|
638
|
+
"node_modules",
|
|
639
|
+
"tests",
|
|
640
|
+
"test",
|
|
641
|
+
"examples",
|
|
642
|
+
"example",
|
|
643
|
+
"docs",
|
|
644
|
+
"doc",
|
|
645
|
+
".mypy_cache",
|
|
646
|
+
".ruff_cache",
|
|
647
|
+
"htmlcov",
|
|
648
|
+
".coverage",
|
|
349
649
|
}
|
|
350
650
|
|
|
351
|
-
for
|
|
352
|
-
if
|
|
651
|
+
for item in root_dir.iterdir():
|
|
652
|
+
if (
|
|
653
|
+
not item.is_dir()
|
|
654
|
+
or item.name.startswith(".")
|
|
655
|
+
or item.name in exclude_dirs
|
|
656
|
+
):
|
|
657
|
+
continue
|
|
658
|
+
|
|
659
|
+
if (item / "__init__.py").exists():
|
|
660
|
+
package_files = [
|
|
661
|
+
f
|
|
662
|
+
for f in item.rglob("*.py")
|
|
663
|
+
if self._should_include_file_path(f, exclude_dirs)
|
|
664
|
+
]
|
|
665
|
+
python_files.extend(package_files)
|
|
666
|
+
|
|
667
|
+
return python_files
|
|
668
|
+
|
|
669
|
+
def _should_include_file_path(
|
|
670
|
+
self, file_path: Path, exclude_dirs: set[str]
|
|
671
|
+
) -> bool:
|
|
672
|
+
"""Check if a file path should be included (not in excluded directories)."""
|
|
673
|
+
# Convert path parts to set for efficient lookup
|
|
674
|
+
path_parts = set(file_path.parts)
|
|
675
|
+
|
|
676
|
+
# If any part of the path is in exclude_dirs, exclude it
|
|
677
|
+
return not bool(path_parts.intersection(exclude_dirs))
|
|
678
|
+
|
|
679
|
+
def _handle_no_files_to_process(
|
|
680
|
+
self, backup_metadata: BackupMetadata
|
|
681
|
+
) -> PackageCleaningResult:
|
|
682
|
+
self.console.print("[yellow]⚠️ No files found to process[/yellow]")
|
|
683
|
+
self.backup_service.cleanup_backup(backup_metadata)
|
|
684
|
+
|
|
685
|
+
return PackageCleaningResult(
|
|
686
|
+
total_files=0,
|
|
687
|
+
successful_files=0,
|
|
688
|
+
failed_files=0,
|
|
689
|
+
file_results=[],
|
|
690
|
+
backup_metadata=None,
|
|
691
|
+
backup_restored=False,
|
|
692
|
+
overall_success=True,
|
|
693
|
+
)
|
|
694
|
+
|
|
695
|
+
def _execute_cleaning_with_backup(
|
|
696
|
+
self, files_to_process: list[Path], backup_metadata: BackupMetadata
|
|
697
|
+
) -> dict[str, t.Any]:
|
|
698
|
+
self.console.print(f"[cyan]🧹 Cleaning {len(files_to_process)} files...[/cyan]")
|
|
699
|
+
|
|
700
|
+
cleaning_steps = [
|
|
701
|
+
self._create_line_comment_step(),
|
|
702
|
+
self._create_docstring_step(),
|
|
703
|
+
self._create_whitespace_step(),
|
|
704
|
+
self._create_formatting_step(),
|
|
705
|
+
]
|
|
706
|
+
|
|
707
|
+
file_results: list[CleaningResult] = []
|
|
708
|
+
cleaning_errors: list[Exception] = []
|
|
709
|
+
|
|
710
|
+
for file_path in files_to_process:
|
|
711
|
+
try:
|
|
712
|
+
result = self.pipeline.clean_file(file_path, cleaning_steps)
|
|
713
|
+
result.backup_metadata = backup_metadata
|
|
714
|
+
file_results.append(result)
|
|
715
|
+
|
|
716
|
+
if not result.success:
|
|
717
|
+
cleaning_errors.append(
|
|
718
|
+
ExecutionError(
|
|
719
|
+
message=f"Cleaning failed for {file_path}: {result.steps_failed}",
|
|
720
|
+
error_code=ErrorCode.CODE_CLEANING_ERROR,
|
|
721
|
+
)
|
|
722
|
+
)
|
|
723
|
+
except Exception as e:
|
|
724
|
+
cleaning_errors.append(e)
|
|
725
|
+
file_results.append(
|
|
726
|
+
CleaningResult(
|
|
727
|
+
file_path=file_path,
|
|
728
|
+
success=False,
|
|
729
|
+
steps_completed=[],
|
|
730
|
+
steps_failed=["file_processing"],
|
|
731
|
+
warnings=[f"Exception during cleaning: {e}"],
|
|
732
|
+
original_size=0,
|
|
733
|
+
cleaned_size=0,
|
|
734
|
+
backup_metadata=backup_metadata,
|
|
735
|
+
)
|
|
736
|
+
)
|
|
737
|
+
|
|
738
|
+
return {
|
|
739
|
+
"file_results": file_results,
|
|
740
|
+
"cleaning_errors": cleaning_errors,
|
|
741
|
+
"files_to_process": files_to_process,
|
|
742
|
+
}
|
|
743
|
+
|
|
744
|
+
def _finalize_cleaning_result(
|
|
745
|
+
self, cleaning_result: dict[str, t.Any], backup_metadata: BackupMetadata
|
|
746
|
+
) -> PackageCleaningResult:
|
|
747
|
+
file_results = cleaning_result["file_results"]
|
|
748
|
+
cleaning_errors = cleaning_result["cleaning_errors"]
|
|
749
|
+
files_to_process = cleaning_result["files_to_process"]
|
|
750
|
+
|
|
751
|
+
successful_files = sum(1 for result in file_results if result.success)
|
|
752
|
+
failed_files = len(file_results) - successful_files
|
|
753
|
+
|
|
754
|
+
if cleaning_errors or failed_files > 0:
|
|
755
|
+
return self._handle_cleaning_failure(
|
|
756
|
+
backup_metadata,
|
|
757
|
+
file_results,
|
|
758
|
+
files_to_process,
|
|
759
|
+
successful_files,
|
|
760
|
+
failed_files,
|
|
761
|
+
cleaning_errors,
|
|
762
|
+
)
|
|
763
|
+
|
|
764
|
+
return self._handle_cleaning_success(
|
|
765
|
+
backup_metadata, file_results, files_to_process, successful_files
|
|
766
|
+
)
|
|
767
|
+
|
|
768
|
+
def _handle_cleaning_failure(
|
|
769
|
+
self,
|
|
770
|
+
backup_metadata: BackupMetadata,
|
|
771
|
+
file_results: list[CleaningResult],
|
|
772
|
+
files_to_process: list[Path],
|
|
773
|
+
successful_files: int,
|
|
774
|
+
failed_files: int,
|
|
775
|
+
cleaning_errors: list[Exception],
|
|
776
|
+
) -> PackageCleaningResult:
|
|
777
|
+
self.console.print(
|
|
778
|
+
f"[red]❌ Cleaning failed ({failed_files} files failed). "
|
|
779
|
+
f"Restoring from backup...[/red]"
|
|
780
|
+
)
|
|
781
|
+
|
|
782
|
+
self.logger.error(
|
|
783
|
+
f"Package cleaning failed with {len(cleaning_errors)} errors, "
|
|
784
|
+
f"restoring from backup {backup_metadata.backup_id}"
|
|
785
|
+
)
|
|
786
|
+
|
|
787
|
+
self.backup_service.restore_from_backup(backup_metadata, self.base_directory)
|
|
788
|
+
|
|
789
|
+
self.console.print("[green]✅ Files restored from backup successfully[/green]")
|
|
790
|
+
|
|
791
|
+
return PackageCleaningResult(
|
|
792
|
+
total_files=len(files_to_process),
|
|
793
|
+
successful_files=successful_files,
|
|
794
|
+
failed_files=failed_files,
|
|
795
|
+
file_results=file_results,
|
|
796
|
+
backup_metadata=backup_metadata,
|
|
797
|
+
backup_restored=True,
|
|
798
|
+
overall_success=False,
|
|
799
|
+
)
|
|
800
|
+
|
|
801
|
+
def _handle_cleaning_success(
|
|
802
|
+
self,
|
|
803
|
+
backup_metadata: BackupMetadata,
|
|
804
|
+
file_results: list[CleaningResult],
|
|
805
|
+
files_to_process: list[Path],
|
|
806
|
+
successful_files: int,
|
|
807
|
+
) -> PackageCleaningResult:
|
|
808
|
+
self.console.print(
|
|
809
|
+
f"[green]✅ Package cleaning completed successfully![/green] "
|
|
810
|
+
f"({successful_files} files cleaned)"
|
|
811
|
+
)
|
|
812
|
+
|
|
813
|
+
self.backup_service.cleanup_backup(backup_metadata)
|
|
814
|
+
|
|
815
|
+
return PackageCleaningResult(
|
|
816
|
+
total_files=len(files_to_process),
|
|
817
|
+
successful_files=successful_files,
|
|
818
|
+
failed_files=0,
|
|
819
|
+
file_results=file_results,
|
|
820
|
+
backup_metadata=None,
|
|
821
|
+
backup_restored=False,
|
|
822
|
+
overall_success=True,
|
|
823
|
+
)
|
|
824
|
+
|
|
825
|
+
def _handle_critical_error(
|
|
826
|
+
self, error: Exception, backup_metadata: BackupMetadata | None
|
|
827
|
+
) -> PackageCleaningResult:
|
|
828
|
+
self.logger.error(f"Critical error during package cleaning: {error}")
|
|
829
|
+
self.console.print(f"[red]💥 Critical error: {error}[/red]")
|
|
830
|
+
|
|
831
|
+
backup_restored = False
|
|
832
|
+
|
|
833
|
+
if backup_metadata:
|
|
834
|
+
backup_restored = self._attempt_emergency_restoration(backup_metadata)
|
|
835
|
+
|
|
836
|
+
return PackageCleaningResult(
|
|
837
|
+
total_files=0,
|
|
838
|
+
successful_files=0,
|
|
839
|
+
failed_files=0,
|
|
840
|
+
file_results=[],
|
|
841
|
+
backup_metadata=backup_metadata,
|
|
842
|
+
backup_restored=backup_restored,
|
|
843
|
+
overall_success=False,
|
|
844
|
+
)
|
|
845
|
+
|
|
846
|
+
def _attempt_emergency_restoration(self, backup_metadata: BackupMetadata) -> bool:
|
|
847
|
+
try:
|
|
848
|
+
self.console.print(
|
|
849
|
+
"[yellow]🔄 Attempting emergency restoration...[/yellow]"
|
|
850
|
+
)
|
|
851
|
+
self.backup_service.restore_from_backup(
|
|
852
|
+
backup_metadata, self.base_directory
|
|
853
|
+
)
|
|
854
|
+
self.console.print("[green]✅ Emergency restoration completed[/green]")
|
|
855
|
+
return True
|
|
856
|
+
|
|
857
|
+
except Exception as restore_error:
|
|
858
|
+
self.logger.error(f"Emergency restoration failed: {restore_error}")
|
|
859
|
+
self.console.print(
|
|
860
|
+
f"[red]💥 Emergency restoration failed: {restore_error}[/red]\n"
|
|
861
|
+
f"[yellow]⚠️ Manual restoration may be needed from: "
|
|
862
|
+
f"{backup_metadata.backup_directory}[/yellow]"
|
|
863
|
+
)
|
|
864
|
+
return False
|
|
865
|
+
|
|
866
|
+
def restore_from_backup_metadata(self, backup_metadata: BackupMetadata) -> None:
|
|
867
|
+
"""Manually restore from backup metadata.
|
|
868
|
+
|
|
869
|
+
Args:
|
|
870
|
+
backup_metadata: Backup metadata containing restoration information
|
|
871
|
+
"""
|
|
872
|
+
self.console.print(
|
|
873
|
+
f"[yellow]🔄 Manually restoring from backup: {backup_metadata.backup_id}[/yellow]"
|
|
874
|
+
)
|
|
875
|
+
|
|
876
|
+
self.backup_service.restore_from_backup(backup_metadata, self.base_directory)
|
|
877
|
+
|
|
878
|
+
self.console.print(
|
|
879
|
+
f"[green]✅ Manual restoration completed from backup: "
|
|
880
|
+
f"{backup_metadata.backup_id}[/green]"
|
|
881
|
+
)
|
|
882
|
+
|
|
883
|
+
def create_emergency_backup(self, pkg_dir: Path | None = None) -> BackupMetadata:
|
|
884
|
+
"""Create an emergency backup before potentially risky operations.
|
|
885
|
+
|
|
886
|
+
Args:
|
|
887
|
+
pkg_dir: Package directory to backup (defaults to current directory)
|
|
888
|
+
|
|
889
|
+
Returns:
|
|
890
|
+
BackupMetadata for the created backup
|
|
891
|
+
"""
|
|
892
|
+
validated_pkg_dir = self._prepare_package_directory(pkg_dir)
|
|
893
|
+
|
|
894
|
+
self.console.print(
|
|
895
|
+
"[cyan]🛡️ Creating emergency backup before risky operation...[/cyan]"
|
|
896
|
+
)
|
|
897
|
+
|
|
898
|
+
backup_metadata = self._create_backup(validated_pkg_dir)
|
|
899
|
+
|
|
900
|
+
self.console.print(
|
|
901
|
+
f"[green]✅ Emergency backup created: {backup_metadata.backup_id}[/green]"
|
|
902
|
+
)
|
|
903
|
+
|
|
904
|
+
return backup_metadata
|
|
905
|
+
|
|
906
|
+
def restore_emergency_backup(self, backup_metadata: BackupMetadata) -> bool:
|
|
907
|
+
"""Restore from an emergency backup with enhanced error handling.
|
|
908
|
+
|
|
909
|
+
Args:
|
|
910
|
+
backup_metadata: Backup metadata for restoration
|
|
911
|
+
|
|
912
|
+
Returns:
|
|
913
|
+
True if restoration succeeded, False otherwise
|
|
914
|
+
"""
|
|
915
|
+
try:
|
|
916
|
+
self.console.print(
|
|
917
|
+
f"[yellow]🔄 Restoring emergency backup: {backup_metadata.backup_id}[/yellow]"
|
|
918
|
+
)
|
|
919
|
+
|
|
920
|
+
self.backup_service.restore_from_backup(
|
|
921
|
+
backup_metadata, self.base_directory
|
|
922
|
+
)
|
|
923
|
+
|
|
924
|
+
self.console.print(
|
|
925
|
+
f"[green]✅ Emergency backup restored successfully: {backup_metadata.backup_id}[/green]"
|
|
926
|
+
)
|
|
927
|
+
|
|
928
|
+
return True
|
|
929
|
+
|
|
930
|
+
except Exception as e:
|
|
931
|
+
self.logger.error(f"Emergency backup restoration failed: {e}")
|
|
932
|
+
self.console.print(
|
|
933
|
+
f"[red]💥 Emergency backup restoration failed: {e}[/red]\n"
|
|
934
|
+
f"[yellow]⚠️ Manual intervention required. Backup location: "
|
|
935
|
+
f"{backup_metadata.backup_directory}[/yellow]"
|
|
936
|
+
)
|
|
937
|
+
|
|
938
|
+
return False
|
|
939
|
+
|
|
940
|
+
def verify_backup_integrity(self, backup_metadata: BackupMetadata) -> bool:
|
|
941
|
+
"""Verify the integrity of a backup without restoring it.
|
|
942
|
+
|
|
943
|
+
Args:
|
|
944
|
+
backup_metadata: Backup metadata to verify
|
|
945
|
+
|
|
946
|
+
Returns:
|
|
947
|
+
True if backup is valid and can be restored, False otherwise
|
|
948
|
+
"""
|
|
949
|
+
try:
|
|
950
|
+
validation_result = self.backup_service._validate_backup(backup_metadata)
|
|
951
|
+
|
|
952
|
+
if validation_result.is_valid:
|
|
953
|
+
self.console.print(
|
|
954
|
+
f"[green]✅ Backup verification passed: {backup_metadata.backup_id}[/green] "
|
|
955
|
+
f"({validation_result.total_validated} files verified)"
|
|
956
|
+
)
|
|
957
|
+
return True
|
|
958
|
+
else:
|
|
959
|
+
self.console.print(
|
|
960
|
+
f"[red]❌ Backup verification failed: {backup_metadata.backup_id}[/red]"
|
|
961
|
+
)
|
|
962
|
+
|
|
963
|
+
for error in validation_result.validation_errors[
|
|
964
|
+
:3
|
|
965
|
+
]: # Show first 3 errors
|
|
966
|
+
self.console.print(f"[red] • {error}[/red]")
|
|
967
|
+
|
|
968
|
+
if len(validation_result.validation_errors) > 3:
|
|
969
|
+
remaining = len(validation_result.validation_errors) - 3
|
|
970
|
+
self.console.print(f"[red] ... and {remaining} more errors[/red]")
|
|
971
|
+
|
|
353
972
|
return False
|
|
354
973
|
|
|
355
|
-
|
|
974
|
+
except Exception as e:
|
|
975
|
+
self.logger.error(f"Backup verification failed with exception: {e}")
|
|
976
|
+
self.console.print(f"[red]💥 Backup verification error: {e}[/red]")
|
|
977
|
+
return False
|
|
978
|
+
|
|
979
|
+
def list_available_backups(self) -> list[Path]:
|
|
980
|
+
"""List all available backup directories.
|
|
981
|
+
|
|
982
|
+
Returns:
|
|
983
|
+
List of backup directory paths
|
|
984
|
+
"""
|
|
985
|
+
if (
|
|
986
|
+
not self.backup_service.backup_root
|
|
987
|
+
or not self.backup_service.backup_root.exists()
|
|
988
|
+
):
|
|
989
|
+
self.console.print("[yellow]⚠️ No backup root directory found[/yellow]")
|
|
990
|
+
return []
|
|
991
|
+
|
|
992
|
+
try:
|
|
993
|
+
backup_dirs = [
|
|
994
|
+
path
|
|
995
|
+
for path in self.backup_service.backup_root.iterdir()
|
|
996
|
+
if path.is_dir() and path.name.startswith("backup_")
|
|
997
|
+
]
|
|
998
|
+
|
|
999
|
+
if backup_dirs:
|
|
1000
|
+
self.console.print(
|
|
1001
|
+
f"[cyan]📦 Found {len(backup_dirs)} available backups:[/cyan]"
|
|
1002
|
+
)
|
|
1003
|
+
for backup_dir in sorted(backup_dirs):
|
|
1004
|
+
self.console.print(f" • {backup_dir.name}")
|
|
1005
|
+
else:
|
|
1006
|
+
self.console.print("[yellow]⚠️ No backups found[/yellow]")
|
|
1007
|
+
|
|
1008
|
+
return backup_dirs
|
|
1009
|
+
|
|
1010
|
+
except Exception as e:
|
|
1011
|
+
self.logger.error(f"Failed to list backups: {e}")
|
|
1012
|
+
self.console.print(f"[red]💥 Error listing backups: {e}[/red]")
|
|
1013
|
+
return []
|
|
1014
|
+
|
|
1015
|
+
def should_process_file(self, file_path: Path) -> bool:
|
|
1016
|
+
try:
|
|
1017
|
+
validated_path = SecurePathValidator.validate_file_path(
|
|
1018
|
+
file_path, self.base_directory
|
|
1019
|
+
)
|
|
1020
|
+
|
|
1021
|
+
SecurePathValidator.validate_file_size(validated_path)
|
|
1022
|
+
|
|
1023
|
+
ignore_patterns = {
|
|
1024
|
+
"__pycache__",
|
|
1025
|
+
".git",
|
|
1026
|
+
".venv",
|
|
1027
|
+
"site-packages",
|
|
1028
|
+
".pytest_cache",
|
|
1029
|
+
"build",
|
|
1030
|
+
"dist",
|
|
1031
|
+
"tests",
|
|
1032
|
+
"test",
|
|
1033
|
+
"examples",
|
|
1034
|
+
"example",
|
|
1035
|
+
}
|
|
1036
|
+
|
|
1037
|
+
for parent in validated_path.parents:
|
|
1038
|
+
if parent.name in ignore_patterns:
|
|
1039
|
+
return False
|
|
1040
|
+
|
|
1041
|
+
should_process = not (
|
|
1042
|
+
validated_path.name.startswith(".") or validated_path.suffix != ".py"
|
|
1043
|
+
)
|
|
1044
|
+
|
|
1045
|
+
if should_process:
|
|
1046
|
+
self.security_logger.log_security_event(
|
|
1047
|
+
SecurityEventType.FILE_CLEANED,
|
|
1048
|
+
SecurityEventLevel.LOW,
|
|
1049
|
+
f"File approved for processing: {validated_path}",
|
|
1050
|
+
file_path=validated_path,
|
|
1051
|
+
)
|
|
1052
|
+
|
|
1053
|
+
return should_process
|
|
1054
|
+
|
|
1055
|
+
except ExecutionError as e:
|
|
1056
|
+
self.security_logger.log_validation_failed(
|
|
1057
|
+
"file_processing_check",
|
|
1058
|
+
file_path,
|
|
1059
|
+
f"File failed security validation: {e}",
|
|
1060
|
+
)
|
|
1061
|
+
|
|
1062
|
+
return False
|
|
1063
|
+
|
|
1064
|
+
except Exception as e:
|
|
1065
|
+
self.logger.warning(f"Unexpected error checking file {file_path}: {e}")
|
|
1066
|
+
return False
|
|
356
1067
|
|
|
357
1068
|
def _create_line_comment_step(self) -> CleaningStepProtocol:
|
|
358
|
-
"""Create a step for removing line comments while preserving special comments."""
|
|
359
1069
|
return self._LineCommentStep()
|
|
360
1070
|
|
|
361
1071
|
def _create_docstring_step(self) -> CleaningStepProtocol:
|
|
362
|
-
"""Create a step for removing docstrings."""
|
|
363
1072
|
return self._DocstringStep()
|
|
364
1073
|
|
|
365
1074
|
class _DocstringStep:
|
|
366
|
-
"""Step implementation for removing docstrings."""
|
|
367
|
-
|
|
368
1075
|
name = "remove_docstrings"
|
|
369
1076
|
|
|
370
1077
|
def _is_docstring_node(self, node: ast.AST) -> bool:
|
|
@@ -426,10 +1133,10 @@ class CodeCleaner(BaseModel):
|
|
|
426
1133
|
lines_to_remove: set[int] = set()
|
|
427
1134
|
|
|
428
1135
|
for node in docstring_nodes:
|
|
429
|
-
# Most AST nodes have lineno and end_lineno attributes
|
|
430
1136
|
start_line = getattr(node, "lineno", 1)
|
|
431
|
-
end_line = getattr(node, "end_lineno", start_line
|
|
432
|
-
|
|
1137
|
+
end_line = getattr(node, "end_lineno", start_line)
|
|
1138
|
+
|
|
1139
|
+
lines_to_remove.update(range(start_line, end_line + 1))
|
|
433
1140
|
|
|
434
1141
|
result_lines = [
|
|
435
1142
|
line for i, line in enumerate(lines, 1) if i not in lines_to_remove
|
|
@@ -439,95 +1146,58 @@ class CodeCleaner(BaseModel):
|
|
|
439
1146
|
return self._regex_fallback_removal(result)
|
|
440
1147
|
|
|
441
1148
|
def _regex_fallback_removal(self, code: str) -> str:
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
patterns = [
|
|
445
|
-
r'^\s*""".*?"""\s*$',
|
|
446
|
-
r"^\s*'''.*?'''\s*$",
|
|
447
|
-
r'^\s*""".*?"""\s*$',
|
|
448
|
-
r"^\s*'''.*?'''\s*$",
|
|
449
|
-
]
|
|
450
|
-
result = code
|
|
451
|
-
for pattern in patterns:
|
|
452
|
-
result = re.sub(pattern, "", result, flags=re.MULTILINE | re.DOTALL)
|
|
453
|
-
return result
|
|
1149
|
+
return _safe_applicator.apply_docstring_patterns(code)
|
|
454
1150
|
|
|
455
1151
|
class _LineCommentStep:
|
|
456
|
-
"""Step implementation for removing line comments."""
|
|
457
|
-
|
|
458
1152
|
name = "remove_line_comments"
|
|
459
1153
|
|
|
460
1154
|
def __call__(self, code: str, file_path: Path) -> str:
|
|
461
1155
|
lines = code.split("\n")
|
|
462
|
-
|
|
1156
|
+
|
|
463
1157
|
processed_lines = [self._process_line_for_comments(line) for line in lines]
|
|
464
1158
|
return "\n".join(processed_lines)
|
|
465
1159
|
|
|
466
1160
|
def _process_line_for_comments(self, line: str) -> str:
|
|
467
|
-
"""Process a single line to remove comments while preserving strings."""
|
|
468
1161
|
if not line.strip() or self._is_preserved_comment_line(line):
|
|
469
1162
|
return line
|
|
470
1163
|
return self._remove_comment_from_line(line)
|
|
471
1164
|
|
|
472
1165
|
def _is_preserved_comment_line(self, line: str) -> bool:
|
|
473
|
-
"""Check if this comment line should be preserved."""
|
|
474
1166
|
stripped = line.strip()
|
|
475
1167
|
if not stripped.startswith("#"):
|
|
476
1168
|
return False
|
|
477
1169
|
return self._has_preserved_pattern(stripped)
|
|
478
1170
|
|
|
479
1171
|
def _has_preserved_pattern(self, stripped_line: str) -> bool:
|
|
480
|
-
|
|
481
|
-
preserved_patterns = ["coding: ", "encoding: ", "type: ", "noqa", "pragma"]
|
|
482
|
-
return stripped_line.startswith("# !/ ") or any(
|
|
483
|
-
pattern in stripped_line for pattern in preserved_patterns
|
|
484
|
-
)
|
|
1172
|
+
return _safe_applicator.has_preserved_comment(stripped_line)
|
|
485
1173
|
|
|
486
1174
|
def _remove_comment_from_line(self, line: str) -> str:
|
|
487
|
-
"""
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
def _is_string_start(self, char: str, state: dict[str, t.Any]) -> bool:
|
|
515
|
-
"""Check if character starts a string."""
|
|
516
|
-
return not state["in_string"] and char in ('"', "'")
|
|
517
|
-
|
|
518
|
-
def _is_string_end(
|
|
519
|
-
self,
|
|
520
|
-
char: str,
|
|
521
|
-
index: int,
|
|
522
|
-
line: str,
|
|
523
|
-
state: dict[str, t.Any],
|
|
524
|
-
) -> bool:
|
|
525
|
-
"""Check if character ends a string."""
|
|
526
|
-
return (
|
|
527
|
-
state["in_string"]
|
|
528
|
-
and char == state["quote_char"]
|
|
529
|
-
and (index == 0 or line[index - 1] != "\\")
|
|
530
|
-
)
|
|
1175
|
+
if '"' not in line and "'" not in line and "#" not in line:
|
|
1176
|
+
return line
|
|
1177
|
+
|
|
1178
|
+
result_chars = []
|
|
1179
|
+
in_string = False
|
|
1180
|
+
quote_char = None
|
|
1181
|
+
i = 0
|
|
1182
|
+
length = len(line)
|
|
1183
|
+
|
|
1184
|
+
while i < length:
|
|
1185
|
+
char = line[i]
|
|
1186
|
+
|
|
1187
|
+
if not in_string:
|
|
1188
|
+
if char == "#":
|
|
1189
|
+
break
|
|
1190
|
+
elif char in ('"', "'"):
|
|
1191
|
+
in_string = True
|
|
1192
|
+
quote_char = char
|
|
1193
|
+
elif char == quote_char and (i == 0 or line[i - 1] != "\\"):
|
|
1194
|
+
in_string = False
|
|
1195
|
+
quote_char = None
|
|
1196
|
+
|
|
1197
|
+
result_chars.append(char)
|
|
1198
|
+
i += 1
|
|
1199
|
+
|
|
1200
|
+
return "".join(result_chars).rstrip()
|
|
531
1201
|
|
|
532
1202
|
def _create_docstring_finder_class(
|
|
533
1203
|
self,
|
|
@@ -570,11 +1240,8 @@ class CodeCleaner(BaseModel):
|
|
|
570
1240
|
name = "remove_extra_whitespace"
|
|
571
1241
|
|
|
572
1242
|
def __call__(self, code: str, file_path: Path) -> str:
|
|
573
|
-
import re
|
|
574
|
-
|
|
575
1243
|
lines = code.split("\n")
|
|
576
1244
|
cleaned_lines: list[str] = []
|
|
577
|
-
|
|
578
1245
|
empty_line_count = 0
|
|
579
1246
|
|
|
580
1247
|
for line in lines:
|
|
@@ -586,13 +1253,13 @@ class CodeCleaner(BaseModel):
|
|
|
586
1253
|
cleaned_lines.append("")
|
|
587
1254
|
else:
|
|
588
1255
|
empty_line_count = 0
|
|
589
|
-
|
|
590
1256
|
leading_whitespace = len(cleaned_line) - len(
|
|
591
|
-
cleaned_line.lstrip()
|
|
1257
|
+
cleaned_line.lstrip()
|
|
592
1258
|
)
|
|
593
1259
|
content = cleaned_line.lstrip()
|
|
594
1260
|
|
|
595
|
-
|
|
1261
|
+
# Use SAFE_PATTERNS for multiple spaces replacement
|
|
1262
|
+
content = SAFE_PATTERNS["multiple_spaces"].apply(content)
|
|
596
1263
|
|
|
597
1264
|
cleaned_line = cleaned_line[:leading_whitespace] + content
|
|
598
1265
|
cleaned_lines.append(cleaned_line)
|
|
@@ -612,33 +1279,26 @@ class CodeCleaner(BaseModel):
|
|
|
612
1279
|
class FormattingStep:
|
|
613
1280
|
name = "format_code"
|
|
614
1281
|
|
|
615
|
-
def
|
|
616
|
-
|
|
1282
|
+
def _is_preserved_comment_line(self, line: str) -> bool:
|
|
1283
|
+
stripped = line.strip()
|
|
1284
|
+
if not stripped.startswith("#"):
|
|
1285
|
+
return False
|
|
1286
|
+
return _safe_applicator.has_preserved_comment(line)
|
|
617
1287
|
|
|
1288
|
+
def __call__(self, code: str, file_path: Path) -> str:
|
|
618
1289
|
lines = code.split("\n")
|
|
619
1290
|
formatted_lines: list[str] = []
|
|
620
1291
|
|
|
621
1292
|
for line in lines:
|
|
622
1293
|
if line.strip():
|
|
1294
|
+
if self._is_preserved_comment_line(line):
|
|
1295
|
+
formatted_lines.append(line)
|
|
1296
|
+
continue
|
|
1297
|
+
|
|
623
1298
|
leading_whitespace = len(line) - len(line.lstrip())
|
|
624
1299
|
content = line.lstrip()
|
|
625
1300
|
|
|
626
|
-
content =
|
|
627
|
-
r"([ =+ \ -*/%<>!&|^ ])([ ^ =+ \ -*/%<>!&|^ ])",
|
|
628
|
-
r"\1 \2",
|
|
629
|
-
content,
|
|
630
|
-
)
|
|
631
|
-
content = re.sub(
|
|
632
|
-
r"([ ^ =+ \ -*/%<>!&|^ ])([ =+ \ -*/%<>!&|^ ])",
|
|
633
|
-
r"\1 \2",
|
|
634
|
-
content,
|
|
635
|
-
)
|
|
636
|
-
|
|
637
|
-
content = re.sub(r", ([ ^ \n])", r", \1", content)
|
|
638
|
-
|
|
639
|
-
content = re.sub(r": ([ ^ \n: ])", r": \1", content)
|
|
640
|
-
|
|
641
|
-
content = re.sub(r" {2, }", " ", content)
|
|
1301
|
+
content = _safe_applicator.apply_formatting_patterns(content)
|
|
642
1302
|
|
|
643
1303
|
formatted_line = line[:leading_whitespace] + content
|
|
644
1304
|
formatted_lines.append(formatted_line)
|