crackerjack 0.30.3__py3-none-any.whl → 0.31.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crackerjack might be problematic. Click here for more details.
- crackerjack/CLAUDE.md +1005 -0
- crackerjack/RULES.md +380 -0
- crackerjack/__init__.py +42 -13
- crackerjack/__main__.py +225 -299
- crackerjack/agents/__init__.py +41 -0
- crackerjack/agents/architect_agent.py +281 -0
- crackerjack/agents/base.py +169 -0
- crackerjack/agents/coordinator.py +512 -0
- crackerjack/agents/documentation_agent.py +498 -0
- crackerjack/agents/dry_agent.py +388 -0
- crackerjack/agents/formatting_agent.py +245 -0
- crackerjack/agents/import_optimization_agent.py +281 -0
- crackerjack/agents/performance_agent.py +669 -0
- crackerjack/agents/proactive_agent.py +104 -0
- crackerjack/agents/refactoring_agent.py +788 -0
- crackerjack/agents/security_agent.py +529 -0
- crackerjack/agents/test_creation_agent.py +652 -0
- crackerjack/agents/test_specialist_agent.py +486 -0
- crackerjack/agents/tracker.py +212 -0
- crackerjack/api.py +560 -0
- crackerjack/cli/__init__.py +24 -0
- crackerjack/cli/facade.py +104 -0
- crackerjack/cli/handlers.py +267 -0
- crackerjack/cli/interactive.py +471 -0
- crackerjack/cli/options.py +401 -0
- crackerjack/cli/utils.py +18 -0
- crackerjack/code_cleaner.py +618 -928
- crackerjack/config/__init__.py +19 -0
- crackerjack/config/hooks.py +218 -0
- crackerjack/core/__init__.py +0 -0
- crackerjack/core/async_workflow_orchestrator.py +406 -0
- crackerjack/core/autofix_coordinator.py +200 -0
- crackerjack/core/container.py +104 -0
- crackerjack/core/enhanced_container.py +542 -0
- crackerjack/core/performance.py +243 -0
- crackerjack/core/phase_coordinator.py +561 -0
- crackerjack/core/proactive_workflow.py +316 -0
- crackerjack/core/session_coordinator.py +289 -0
- crackerjack/core/workflow_orchestrator.py +640 -0
- crackerjack/dynamic_config.py +94 -103
- crackerjack/errors.py +263 -41
- crackerjack/executors/__init__.py +11 -0
- crackerjack/executors/async_hook_executor.py +431 -0
- crackerjack/executors/cached_hook_executor.py +242 -0
- crackerjack/executors/hook_executor.py +345 -0
- crackerjack/executors/individual_hook_executor.py +669 -0
- crackerjack/intelligence/__init__.py +44 -0
- crackerjack/intelligence/adaptive_learning.py +751 -0
- crackerjack/intelligence/agent_orchestrator.py +551 -0
- crackerjack/intelligence/agent_registry.py +414 -0
- crackerjack/intelligence/agent_selector.py +502 -0
- crackerjack/intelligence/integration.py +290 -0
- crackerjack/interactive.py +576 -315
- crackerjack/managers/__init__.py +11 -0
- crackerjack/managers/async_hook_manager.py +135 -0
- crackerjack/managers/hook_manager.py +137 -0
- crackerjack/managers/publish_manager.py +411 -0
- crackerjack/managers/test_command_builder.py +151 -0
- crackerjack/managers/test_executor.py +435 -0
- crackerjack/managers/test_manager.py +258 -0
- crackerjack/managers/test_manager_backup.py +1124 -0
- crackerjack/managers/test_progress.py +144 -0
- crackerjack/mcp/__init__.py +0 -0
- crackerjack/mcp/cache.py +336 -0
- crackerjack/mcp/client_runner.py +104 -0
- crackerjack/mcp/context.py +615 -0
- crackerjack/mcp/dashboard.py +636 -0
- crackerjack/mcp/enhanced_progress_monitor.py +479 -0
- crackerjack/mcp/file_monitor.py +336 -0
- crackerjack/mcp/progress_components.py +569 -0
- crackerjack/mcp/progress_monitor.py +949 -0
- crackerjack/mcp/rate_limiter.py +332 -0
- crackerjack/mcp/server.py +22 -0
- crackerjack/mcp/server_core.py +244 -0
- crackerjack/mcp/service_watchdog.py +501 -0
- crackerjack/mcp/state.py +395 -0
- crackerjack/mcp/task_manager.py +257 -0
- crackerjack/mcp/tools/__init__.py +17 -0
- crackerjack/mcp/tools/core_tools.py +249 -0
- crackerjack/mcp/tools/error_analyzer.py +308 -0
- crackerjack/mcp/tools/execution_tools.py +370 -0
- crackerjack/mcp/tools/execution_tools_backup.py +1097 -0
- crackerjack/mcp/tools/intelligence_tool_registry.py +80 -0
- crackerjack/mcp/tools/intelligence_tools.py +314 -0
- crackerjack/mcp/tools/monitoring_tools.py +502 -0
- crackerjack/mcp/tools/proactive_tools.py +384 -0
- crackerjack/mcp/tools/progress_tools.py +141 -0
- crackerjack/mcp/tools/utility_tools.py +341 -0
- crackerjack/mcp/tools/workflow_executor.py +360 -0
- crackerjack/mcp/websocket/__init__.py +14 -0
- crackerjack/mcp/websocket/app.py +39 -0
- crackerjack/mcp/websocket/endpoints.py +559 -0
- crackerjack/mcp/websocket/jobs.py +253 -0
- crackerjack/mcp/websocket/server.py +116 -0
- crackerjack/mcp/websocket/websocket_handler.py +78 -0
- crackerjack/mcp/websocket_server.py +10 -0
- crackerjack/models/__init__.py +31 -0
- crackerjack/models/config.py +93 -0
- crackerjack/models/config_adapter.py +230 -0
- crackerjack/models/protocols.py +118 -0
- crackerjack/models/task.py +154 -0
- crackerjack/monitoring/ai_agent_watchdog.py +450 -0
- crackerjack/monitoring/regression_prevention.py +638 -0
- crackerjack/orchestration/__init__.py +0 -0
- crackerjack/orchestration/advanced_orchestrator.py +970 -0
- crackerjack/orchestration/execution_strategies.py +341 -0
- crackerjack/orchestration/test_progress_streamer.py +636 -0
- crackerjack/plugins/__init__.py +15 -0
- crackerjack/plugins/base.py +200 -0
- crackerjack/plugins/hooks.py +246 -0
- crackerjack/plugins/loader.py +335 -0
- crackerjack/plugins/managers.py +259 -0
- crackerjack/py313.py +8 -3
- crackerjack/services/__init__.py +22 -0
- crackerjack/services/cache.py +314 -0
- crackerjack/services/config.py +347 -0
- crackerjack/services/config_integrity.py +99 -0
- crackerjack/services/contextual_ai_assistant.py +516 -0
- crackerjack/services/coverage_ratchet.py +347 -0
- crackerjack/services/debug.py +736 -0
- crackerjack/services/dependency_monitor.py +617 -0
- crackerjack/services/enhanced_filesystem.py +439 -0
- crackerjack/services/file_hasher.py +151 -0
- crackerjack/services/filesystem.py +395 -0
- crackerjack/services/git.py +165 -0
- crackerjack/services/health_metrics.py +611 -0
- crackerjack/services/initialization.py +847 -0
- crackerjack/services/log_manager.py +286 -0
- crackerjack/services/logging.py +174 -0
- crackerjack/services/metrics.py +578 -0
- crackerjack/services/pattern_cache.py +362 -0
- crackerjack/services/pattern_detector.py +515 -0
- crackerjack/services/performance_benchmarks.py +653 -0
- crackerjack/services/security.py +163 -0
- crackerjack/services/server_manager.py +234 -0
- crackerjack/services/smart_scheduling.py +144 -0
- crackerjack/services/tool_version_service.py +61 -0
- crackerjack/services/unified_config.py +437 -0
- crackerjack/services/version_checker.py +248 -0
- crackerjack/slash_commands/__init__.py +14 -0
- crackerjack/slash_commands/init.md +122 -0
- crackerjack/slash_commands/run.md +163 -0
- crackerjack/slash_commands/status.md +127 -0
- crackerjack-0.31.4.dist-info/METADATA +742 -0
- crackerjack-0.31.4.dist-info/RECORD +148 -0
- crackerjack-0.31.4.dist-info/entry_points.txt +2 -0
- crackerjack/.gitignore +0 -34
- crackerjack/.libcst.codemod.yaml +0 -18
- crackerjack/.pdm.toml +0 -1
- crackerjack/crackerjack.py +0 -3805
- crackerjack/pyproject.toml +0 -286
- crackerjack-0.30.3.dist-info/METADATA +0 -1290
- crackerjack-0.30.3.dist-info/RECORD +0 -16
- {crackerjack-0.30.3.dist-info → crackerjack-0.31.4.dist-info}/WHEEL +0 -0
- {crackerjack-0.30.3.dist-info → crackerjack-0.31.4.dist-info}/licenses/LICENSE +0 -0
crackerjack/code_cleaner.py
CHANGED
|
@@ -1,980 +1,670 @@
|
|
|
1
|
-
import
|
|
2
|
-
import re
|
|
3
|
-
import subprocess
|
|
1
|
+
import ast
|
|
4
2
|
import typing as t
|
|
5
|
-
from
|
|
6
|
-
from
|
|
7
|
-
from functools import lru_cache
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from enum import Enum
|
|
8
5
|
from pathlib import Path
|
|
6
|
+
from typing import Protocol
|
|
9
7
|
|
|
10
|
-
import
|
|
11
|
-
from pydantic import BaseModel
|
|
8
|
+
from pydantic import BaseModel, ConfigDict
|
|
12
9
|
from rich.console import Console
|
|
13
10
|
|
|
14
|
-
from .errors import ErrorCode, ExecutionError
|
|
11
|
+
from .errors import ErrorCode, ExecutionError
|
|
15
12
|
|
|
16
13
|
|
|
17
|
-
class
|
|
18
|
-
|
|
14
|
+
class CleaningStepResult(Enum):
|
|
15
|
+
SUCCESS = "success"
|
|
16
|
+
FAILED = "failed"
|
|
17
|
+
SKIPPED = "skipped"
|
|
19
18
|
|
|
20
|
-
def _analyze_workload_characteristics(self, files: list[Path]) -> dict[str, t.Any]:
|
|
21
|
-
if not files:
|
|
22
|
-
return {
|
|
23
|
-
"total_files": 0,
|
|
24
|
-
"total_size": 0,
|
|
25
|
-
"avg_file_size": 0,
|
|
26
|
-
"complexity": "low",
|
|
27
|
-
}
|
|
28
|
-
total_size = 0
|
|
29
|
-
large_files = 0
|
|
30
|
-
for file_path in files:
|
|
31
|
-
try:
|
|
32
|
-
size = file_path.stat().st_size
|
|
33
|
-
total_size += size
|
|
34
|
-
if size > 50_000:
|
|
35
|
-
large_files += 1
|
|
36
|
-
except (OSError, PermissionError):
|
|
37
|
-
continue
|
|
38
|
-
avg_file_size = total_size / len(files) if files else 0
|
|
39
|
-
large_file_ratio = large_files / len(files) if files else 0
|
|
40
|
-
if len(files) > 100 or avg_file_size > 20_000 or large_file_ratio > 0.3:
|
|
41
|
-
complexity = "high"
|
|
42
|
-
elif len(files) > 50 or avg_file_size > 10_000 or large_file_ratio > 0.1:
|
|
43
|
-
complexity = "medium"
|
|
44
|
-
else:
|
|
45
|
-
complexity = "low"
|
|
46
|
-
|
|
47
|
-
return {
|
|
48
|
-
"total_files": len(files),
|
|
49
|
-
"total_size": total_size,
|
|
50
|
-
"avg_file_size": avg_file_size,
|
|
51
|
-
"large_files": large_files,
|
|
52
|
-
"large_file_ratio": large_file_ratio,
|
|
53
|
-
"complexity": complexity,
|
|
54
|
-
}
|
|
55
19
|
|
|
56
|
-
|
|
57
|
-
|
|
20
|
+
@dataclass
|
|
21
|
+
class CleaningResult:
|
|
22
|
+
file_path: Path
|
|
23
|
+
success: bool
|
|
24
|
+
steps_completed: list[str]
|
|
25
|
+
steps_failed: list[str]
|
|
26
|
+
warnings: list[str]
|
|
27
|
+
original_size: int
|
|
28
|
+
cleaned_size: int
|
|
58
29
|
|
|
59
|
-
cpu_count = os.cpu_count() or 4
|
|
60
|
-
if workload["complexity"] == "high":
|
|
61
|
-
max_workers = min(cpu_count // 2, 3)
|
|
62
|
-
elif workload["complexity"] == "medium":
|
|
63
|
-
max_workers = min(cpu_count, 6)
|
|
64
|
-
else:
|
|
65
|
-
max_workers = min(cpu_count + 2, 8)
|
|
66
30
|
|
|
67
|
-
|
|
31
|
+
class FileProcessorProtocol(Protocol):
|
|
32
|
+
def read_file_safely(self, file_path: Path) -> str: ...
|
|
33
|
+
def write_file_safely(self, file_path: Path, content: str) -> None: ...
|
|
34
|
+
def backup_file(self, file_path: Path) -> Path: ...
|
|
68
35
|
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
36
|
+
|
|
37
|
+
class CleaningStepProtocol(Protocol):
|
|
38
|
+
def __call__(self, code: str, file_path: Path) -> str: ...
|
|
39
|
+
|
|
40
|
+
@property
|
|
41
|
+
def name(self) -> str: ...
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class ErrorHandlerProtocol(Protocol):
|
|
45
|
+
def handle_file_error(
|
|
46
|
+
self,
|
|
47
|
+
file_path: Path,
|
|
48
|
+
error: Exception,
|
|
49
|
+
step: str,
|
|
50
|
+
) -> None: ...
|
|
51
|
+
def log_cleaning_result(self, result: CleaningResult) -> None: ...
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class FileProcessor(BaseModel):
|
|
55
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
56
|
+
|
|
57
|
+
console: Console
|
|
58
|
+
logger: t.Any = None
|
|
59
|
+
|
|
60
|
+
def model_post_init(self, _: t.Any) -> None:
|
|
61
|
+
if self.logger is None:
|
|
62
|
+
import logging
|
|
63
|
+
|
|
64
|
+
self.logger = logging.getLogger("crackerjack.code_cleaner.file_processor")
|
|
65
|
+
|
|
66
|
+
def read_file_safely(self, file_path: Path) -> str:
|
|
67
|
+
try:
|
|
68
|
+
return file_path.read_text(encoding="utf - 8")
|
|
69
|
+
except UnicodeDecodeError:
|
|
70
|
+
for encoding in ("latin1", "cp1252"):
|
|
93
71
|
try:
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
f"[bold bright_red]❌ Error cleaning {file_path}: {e}[/bold bright_red]"
|
|
72
|
+
content = file_path.read_text(encoding=encoding)
|
|
73
|
+
self.logger.warning(
|
|
74
|
+
f"File {file_path} read with {encoding} encoding",
|
|
98
75
|
)
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
for cache_file in pycache_dir.iterdir():
|
|
106
|
-
with suppress(PermissionError, OSError):
|
|
107
|
-
cache_file.unlink()
|
|
108
|
-
pycache_dir.rmdir()
|
|
109
|
-
parent_pycache = pkg_dir.parent / "__pycache__"
|
|
110
|
-
if parent_pycache.exists():
|
|
111
|
-
for cache_file in parent_pycache.iterdir():
|
|
112
|
-
with suppress(PermissionError, OSError):
|
|
113
|
-
cache_file.unlink()
|
|
114
|
-
parent_pycache.rmdir()
|
|
115
|
-
|
|
116
|
-
def clean_file(self, file_path: Path) -> None:
|
|
117
|
-
try:
|
|
118
|
-
code = file_path.read_text(encoding="utf-8")
|
|
119
|
-
original_code = code
|
|
120
|
-
cleaning_failed = False
|
|
121
|
-
try:
|
|
122
|
-
code = self.remove_line_comments_streaming(code)
|
|
123
|
-
except Exception as e:
|
|
124
|
-
self.console.print(
|
|
125
|
-
f"[bold bright_yellow]⚠️ Warning: Failed to remove line comments from {file_path}: {e}[/bold bright_yellow]"
|
|
126
|
-
)
|
|
127
|
-
code = original_code
|
|
128
|
-
cleaning_failed = True
|
|
129
|
-
try:
|
|
130
|
-
code = self.remove_docstrings_streaming(code)
|
|
131
|
-
except Exception as e:
|
|
132
|
-
self.console.print(
|
|
133
|
-
f"[bold bright_yellow]⚠️ Warning: Failed to remove docstrings from {file_path}: {e}[/bold bright_yellow]"
|
|
134
|
-
)
|
|
135
|
-
code = original_code
|
|
136
|
-
cleaning_failed = True
|
|
137
|
-
try:
|
|
138
|
-
code = self.remove_extra_whitespace_streaming(code)
|
|
139
|
-
except Exception as e:
|
|
140
|
-
self.console.print(
|
|
141
|
-
f"[bold bright_yellow]⚠️ Warning: Failed to remove extra whitespace from {file_path}: {e}[/bold bright_yellow]"
|
|
142
|
-
)
|
|
143
|
-
code = original_code
|
|
144
|
-
cleaning_failed = True
|
|
145
|
-
try:
|
|
146
|
-
code = self.reformat_code(code)
|
|
147
|
-
except Exception as e:
|
|
148
|
-
self.console.print(
|
|
149
|
-
f"[bold bright_yellow]⚠️ Warning: Failed to reformat {file_path}: {e}[/bold bright_yellow]"
|
|
150
|
-
)
|
|
151
|
-
code = original_code
|
|
152
|
-
cleaning_failed = True
|
|
153
|
-
file_path.write_text(code, encoding="utf-8")
|
|
154
|
-
if cleaning_failed:
|
|
155
|
-
self.console.print(
|
|
156
|
-
f"[bold yellow]⚡ Partially cleaned:[/bold yellow] [dim bright_white]{file_path}[/dim bright_white]"
|
|
157
|
-
)
|
|
158
|
-
else:
|
|
159
|
-
self.console.print(
|
|
160
|
-
f"[bold green]✨ Cleaned:[/bold green] [dim bright_white]{file_path}[/dim bright_white]"
|
|
161
|
-
)
|
|
162
|
-
except PermissionError as e:
|
|
163
|
-
self.console.print(
|
|
164
|
-
f"[red]Failed to clean: {file_path} (Permission denied)[/red]"
|
|
165
|
-
)
|
|
166
|
-
handle_error(
|
|
167
|
-
ExecutionError(
|
|
168
|
-
message=f"Permission denied while cleaning {file_path}",
|
|
169
|
-
error_code=ErrorCode.PERMISSION_ERROR,
|
|
170
|
-
details=str(e),
|
|
171
|
-
recovery=f"Check file permissions for {file_path} and ensure you have write access",
|
|
172
|
-
),
|
|
173
|
-
console=self.console,
|
|
174
|
-
exit_on_error=False,
|
|
175
|
-
)
|
|
176
|
-
except OSError as e:
|
|
177
|
-
self.console.print(
|
|
178
|
-
f"[red]Failed to clean: {file_path} (File system error)[/red]"
|
|
179
|
-
)
|
|
180
|
-
handle_error(
|
|
181
|
-
ExecutionError(
|
|
182
|
-
message=f"File system error while cleaning {file_path}",
|
|
183
|
-
error_code=ErrorCode.FILE_WRITE_ERROR,
|
|
184
|
-
details=str(e),
|
|
185
|
-
recovery=f"Check that {file_path} exists and is not being used by another process",
|
|
186
|
-
),
|
|
187
|
-
console=self.console,
|
|
188
|
-
exit_on_error=False,
|
|
189
|
-
)
|
|
190
|
-
except UnicodeDecodeError as e:
|
|
191
|
-
self.console.print(
|
|
192
|
-
f"[red]Failed to clean: {file_path} (Encoding error)[/red]"
|
|
193
|
-
)
|
|
194
|
-
handle_error(
|
|
195
|
-
ExecutionError(
|
|
196
|
-
message=f"Encoding error while reading {file_path}",
|
|
197
|
-
error_code=ErrorCode.FILE_READ_ERROR,
|
|
198
|
-
details=str(e),
|
|
199
|
-
recovery=f"File {file_path} contains non-UTF-8 characters. Please check the file encoding.",
|
|
200
|
-
),
|
|
201
|
-
console=self.console,
|
|
202
|
-
exit_on_error=False,
|
|
76
|
+
return content
|
|
77
|
+
except UnicodeDecodeError:
|
|
78
|
+
continue
|
|
79
|
+
raise ExecutionError(
|
|
80
|
+
message=f"Could not decode file {file_path}",
|
|
81
|
+
error_code=ErrorCode.FILE_READ_ERROR,
|
|
203
82
|
)
|
|
204
83
|
except Exception as e:
|
|
205
|
-
|
|
206
|
-
f"
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
ExecutionError(
|
|
210
|
-
message=f"Unexpected error while cleaning {file_path}",
|
|
211
|
-
error_code=ErrorCode.UNEXPECTED_ERROR,
|
|
212
|
-
details=str(e),
|
|
213
|
-
recovery="This is an unexpected error. Please report this issue with the file content if possible.",
|
|
214
|
-
),
|
|
215
|
-
console=self.console,
|
|
216
|
-
exit_on_error=False,
|
|
217
|
-
)
|
|
84
|
+
raise ExecutionError(
|
|
85
|
+
message=f"Failed to read file {file_path}: {e}",
|
|
86
|
+
error_code=ErrorCode.FILE_READ_ERROR,
|
|
87
|
+
) from e
|
|
218
88
|
|
|
219
|
-
def
|
|
220
|
-
|
|
221
|
-
"
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
}
|
|
89
|
+
def write_file_safely(self, file_path: Path, content: str) -> None:
|
|
90
|
+
try:
|
|
91
|
+
file_path.write_text(content, encoding="utf - 8")
|
|
92
|
+
except Exception as e:
|
|
93
|
+
raise ExecutionError(
|
|
94
|
+
message=f"Failed to write file {file_path}: {e}",
|
|
95
|
+
error_code=ErrorCode.FILE_WRITE_ERROR,
|
|
96
|
+
) from e
|
|
228
97
|
|
|
229
|
-
def
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
def _handle_multiline_definition(
|
|
241
|
-
self, line: str, stripped: str, state: dict[str, t.Any]
|
|
242
|
-
) -> bool:
|
|
243
|
-
if state["in_multiline_def"]:
|
|
244
|
-
if stripped.endswith(":"):
|
|
245
|
-
state["in_multiline_def"] = False
|
|
246
|
-
return True
|
|
247
|
-
return False
|
|
248
|
-
|
|
249
|
-
def _handle_waiting_docstring(
|
|
250
|
-
self, lines: list[str], i: int, stripped: str, state: dict[str, t.Any]
|
|
251
|
-
) -> tuple[bool, str | None]:
|
|
252
|
-
if state["waiting"] and stripped:
|
|
253
|
-
if self._handle_docstring_start(stripped, state):
|
|
254
|
-
pass_line = None
|
|
255
|
-
if not state["in_docstring"]:
|
|
256
|
-
function_indent: int = state["function_indent"]
|
|
257
|
-
if self._needs_pass_statement(lines, i + 1, function_indent):
|
|
258
|
-
pass_line = " " * (function_indent + 4) + "pass"
|
|
259
|
-
state["removed_docstring"] = True
|
|
260
|
-
return True, pass_line
|
|
261
|
-
else:
|
|
262
|
-
state["waiting"] = False
|
|
263
|
-
return False, None
|
|
264
|
-
|
|
265
|
-
def _handle_docstring_content(
|
|
266
|
-
self, lines: list[str], i: int, stripped: str, state: dict[str, t.Any]
|
|
267
|
-
) -> tuple[bool, str | None]:
|
|
268
|
-
if state["in_docstring"]:
|
|
269
|
-
if self._handle_docstring_end(stripped, state):
|
|
270
|
-
pass_line = None
|
|
271
|
-
function_indent: int = state["function_indent"]
|
|
272
|
-
if self._needs_pass_statement(lines, i + 1, function_indent):
|
|
273
|
-
pass_line = " " * (function_indent + 4) + "pass"
|
|
274
|
-
state["removed_docstring"] = False
|
|
275
|
-
return True, pass_line
|
|
276
|
-
else:
|
|
277
|
-
return True, None
|
|
278
|
-
return False, None
|
|
279
|
-
|
|
280
|
-
def _process_line(
|
|
281
|
-
self, lines: list[str], i: int, line: str, state: dict[str, t.Any]
|
|
282
|
-
) -> tuple[bool, str | None]:
|
|
283
|
-
stripped = line.strip()
|
|
284
|
-
if self._handle_function_definition(line, stripped, state):
|
|
285
|
-
return True, line
|
|
286
|
-
if self._handle_multiline_definition(line, stripped, state):
|
|
287
|
-
return True, line
|
|
288
|
-
handled, pass_line = self._handle_waiting_docstring(lines, i, stripped, state)
|
|
289
|
-
if handled:
|
|
290
|
-
return True, pass_line
|
|
291
|
-
handled, pass_line = self._handle_docstring_content(lines, i, stripped, state)
|
|
292
|
-
if handled:
|
|
293
|
-
return True, pass_line
|
|
294
|
-
if state["removed_docstring"] and stripped:
|
|
295
|
-
state["removed_docstring"] = False
|
|
296
|
-
return False, line
|
|
297
|
-
|
|
298
|
-
def remove_docstrings(self, code: str) -> str:
|
|
299
|
-
lines = code.split("\n")
|
|
300
|
-
cleaned_lines: list[str] = []
|
|
301
|
-
docstring_state = self._initialize_docstring_state()
|
|
302
|
-
for i, line in enumerate(lines):
|
|
303
|
-
handled, result_line = self._process_line(lines, i, line, docstring_state)
|
|
304
|
-
if handled:
|
|
305
|
-
if result_line is not None:
|
|
306
|
-
cleaned_lines.append(result_line)
|
|
307
|
-
else:
|
|
308
|
-
cleaned_lines.append(line)
|
|
309
|
-
return "\n".join(cleaned_lines)
|
|
98
|
+
def backup_file(self, file_path: Path) -> Path:
|
|
99
|
+
backup_path = file_path.with_suffix(f"{file_path.suffix}.backup")
|
|
100
|
+
try:
|
|
101
|
+
backup_path.write_bytes(file_path.read_bytes())
|
|
102
|
+
return backup_path
|
|
103
|
+
except Exception as e:
|
|
104
|
+
raise ExecutionError(
|
|
105
|
+
message=f"Failed to create backup for {file_path}: {e}",
|
|
106
|
+
error_code=ErrorCode.FILE_WRITE_ERROR,
|
|
107
|
+
) from e
|
|
310
108
|
|
|
311
|
-
def _is_function_or_class_definition(self, stripped_line: str) -> bool:
|
|
312
|
-
return stripped_line.startswith(("def ", "class ", "async def "))
|
|
313
109
|
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
110
|
+
class CleaningErrorHandler(BaseModel):
|
|
111
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
112
|
+
|
|
113
|
+
console: Console
|
|
114
|
+
logger: t.Any = None
|
|
115
|
+
|
|
116
|
+
def model_post_init(self, _: t.Any) -> None:
|
|
117
|
+
if self.logger is None:
|
|
118
|
+
import logging
|
|
119
|
+
|
|
120
|
+
self.logger = logging.getLogger("crackerjack.code_cleaner.error_handler")
|
|
121
|
+
|
|
122
|
+
def handle_file_error(self, file_path: Path, error: Exception, step: str) -> None:
|
|
123
|
+
self.console.print(
|
|
124
|
+
f"[bold bright_yellow]⚠️ Warning: {step} failed for {file_path}: {error}[/bold bright_yellow]",
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
self.logger.warning(
|
|
128
|
+
"Cleaning step failed",
|
|
129
|
+
extra={
|
|
130
|
+
"file_path": str(file_path),
|
|
131
|
+
"step": step,
|
|
132
|
+
"error": str(error),
|
|
133
|
+
"error_type": type(error).__name__,
|
|
134
|
+
},
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
def log_cleaning_result(self, result: CleaningResult) -> None:
|
|
138
|
+
if result.success:
|
|
139
|
+
self.console.print(
|
|
140
|
+
f"[green]✅ Cleaned {result.file_path}[/green] "
|
|
141
|
+
f"({result.original_size} → {result.cleaned_size} bytes)",
|
|
142
|
+
)
|
|
325
143
|
else:
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
def _is_single_line_docstring(self, stripped: str, delimiter: str) -> bool:
|
|
331
|
-
return stripped.endswith(delimiter) and len(stripped) > len(delimiter)
|
|
332
|
-
|
|
333
|
-
def _handle_docstring_end(self, stripped: str, state: dict[str, t.Any]) -> bool:
|
|
334
|
-
if state["delimiter"] and stripped.endswith(state["delimiter"]):
|
|
335
|
-
state["in_docstring"] = False
|
|
336
|
-
state["delimiter"] = None
|
|
337
|
-
return True
|
|
338
|
-
return False
|
|
339
|
-
|
|
340
|
-
def _needs_pass_statement(
|
|
341
|
-
self, lines: list[str], start_index: int, function_indent: int
|
|
342
|
-
) -> bool:
|
|
343
|
-
for i in range(start_index, len(lines)):
|
|
344
|
-
line = lines[i]
|
|
345
|
-
stripped = line.strip()
|
|
346
|
-
if not stripped:
|
|
347
|
-
continue
|
|
348
|
-
line_indent = len(line) - len(line.lstrip())
|
|
349
|
-
if line_indent <= function_indent:
|
|
350
|
-
return True
|
|
351
|
-
if line_indent > function_indent:
|
|
352
|
-
return False
|
|
353
|
-
return True
|
|
354
|
-
|
|
355
|
-
def remove_line_comments(self, code: str) -> str:
|
|
356
|
-
lines = code.split("\n")
|
|
357
|
-
cleaned_lines: list[str] = []
|
|
358
|
-
for line in lines:
|
|
359
|
-
if not line.strip():
|
|
360
|
-
cleaned_lines.append(line)
|
|
361
|
-
continue
|
|
362
|
-
cleaned_line = self._process_line_for_comments(line)
|
|
363
|
-
if cleaned_line or not line.strip():
|
|
364
|
-
cleaned_lines.append(cleaned_line or line)
|
|
365
|
-
return "\n".join(cleaned_lines)
|
|
366
|
-
|
|
367
|
-
def _process_line_for_comments(self, line: str) -> str:
|
|
368
|
-
result: list[str] = []
|
|
369
|
-
string_state = {"in_string": None}
|
|
370
|
-
for i, char in enumerate(line):
|
|
371
|
-
if self._handle_string_character(char, i, line, string_state, result):
|
|
372
|
-
continue
|
|
373
|
-
elif self._handle_comment_character(char, i, line, string_state, result):
|
|
374
|
-
break
|
|
375
|
-
else:
|
|
376
|
-
result.append(char)
|
|
377
|
-
return "".join(result).rstrip()
|
|
144
|
+
self.console.print(
|
|
145
|
+
f"[red]❌ Failed to clean {result.file_path}[/red] "
|
|
146
|
+
f"({len(result.steps_failed)} steps failed)",
|
|
147
|
+
)
|
|
378
148
|
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
elif string_state["in_string"] == char:
|
|
394
|
-
string_state["in_string"] = None
|
|
395
|
-
result.append(char)
|
|
396
|
-
return True
|
|
397
|
-
|
|
398
|
-
def _handle_comment_character(
|
|
399
|
-
self,
|
|
400
|
-
char: str,
|
|
401
|
-
index: int,
|
|
402
|
-
line: str,
|
|
403
|
-
string_state: dict[str, t.Any],
|
|
404
|
-
result: list[str],
|
|
405
|
-
) -> bool:
|
|
406
|
-
if char != "#" or string_state["in_string"] is not None:
|
|
407
|
-
return False
|
|
408
|
-
comment = line[index:].strip()
|
|
409
|
-
if self._is_special_comment_line(comment):
|
|
410
|
-
result.append(line[index:])
|
|
411
|
-
return True
|
|
412
|
-
|
|
413
|
-
def _is_special_comment_line(self, comment: str) -> bool:
|
|
414
|
-
special_comment_pattern = (
|
|
415
|
-
r"^#\s*(?:type:\s*ignore(?:\[.*?\])?|noqa|nosec|pragma:\s*no\s*cover"
|
|
416
|
-
r"|pylint:\s*disable|mypy:\s*ignore)"
|
|
149
|
+
if result.warnings:
|
|
150
|
+
for warning in result.warnings:
|
|
151
|
+
self.console.print(f"[yellow]⚠️ {warning}[/yellow]")
|
|
152
|
+
|
|
153
|
+
self.logger.info(
|
|
154
|
+
"File cleaning completed",
|
|
155
|
+
extra={
|
|
156
|
+
"file_path": str(result.file_path),
|
|
157
|
+
"success": result.success,
|
|
158
|
+
"steps_completed": result.steps_completed,
|
|
159
|
+
"steps_failed": result.steps_failed,
|
|
160
|
+
"original_size": result.original_size,
|
|
161
|
+
"cleaned_size": result.cleaned_size,
|
|
162
|
+
},
|
|
417
163
|
)
|
|
418
|
-
return bool(re.match(special_comment_pattern, comment))
|
|
419
|
-
|
|
420
|
-
def remove_extra_whitespace(self, code: str) -> str:
|
|
421
|
-
lines = code.split("\n")
|
|
422
|
-
cleaned_lines: list[str] = []
|
|
423
|
-
function_tracker = {"in_function": False, "function_indent": 0}
|
|
424
|
-
import_tracker = {"in_imports": False, "last_import_type": None}
|
|
425
|
-
for i, line in enumerate(lines):
|
|
426
|
-
line = line.rstrip()
|
|
427
|
-
stripped_line = line.lstrip()
|
|
428
|
-
self._update_function_state(line, stripped_line, function_tracker)
|
|
429
|
-
self._update_import_state(line, stripped_line, import_tracker)
|
|
430
|
-
if not line:
|
|
431
|
-
if self._should_skip_empty_line(
|
|
432
|
-
i, lines, cleaned_lines, function_tracker, import_tracker
|
|
433
|
-
):
|
|
434
|
-
continue
|
|
435
|
-
cleaned_lines.append(line)
|
|
436
|
-
return "\n".join(self._remove_trailing_empty_lines(cleaned_lines))
|
|
437
164
|
|
|
438
|
-
def remove_docstrings_streaming(self, code: str) -> str:
|
|
439
|
-
if len(code) < 10000:
|
|
440
|
-
return self.remove_docstrings(code)
|
|
441
165
|
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
docstring_state = self._initialize_docstring_state()
|
|
445
|
-
for i, line in enumerate(lines):
|
|
446
|
-
handled, result_line = self._process_line(
|
|
447
|
-
lines, i, line, docstring_state
|
|
448
|
-
)
|
|
449
|
-
if handled:
|
|
450
|
-
if result_line is not None:
|
|
451
|
-
yield result_line
|
|
452
|
-
else:
|
|
453
|
-
yield line
|
|
454
|
-
|
|
455
|
-
return "\n".join(process_lines())
|
|
456
|
-
|
|
457
|
-
def remove_line_comments_streaming(self, code: str) -> str:
|
|
458
|
-
if len(code) < 10000:
|
|
459
|
-
return self.remove_line_comments(code)
|
|
460
|
-
|
|
461
|
-
def process_lines():
|
|
462
|
-
for line in code.split("\n"):
|
|
463
|
-
if not line.strip():
|
|
464
|
-
yield line
|
|
465
|
-
continue
|
|
466
|
-
cleaned_line = self._process_line_for_comments(line)
|
|
467
|
-
if cleaned_line or not line.strip():
|
|
468
|
-
yield cleaned_line or line
|
|
166
|
+
class CleaningPipeline(BaseModel):
|
|
167
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
469
168
|
|
|
470
|
-
|
|
169
|
+
file_processor: t.Any
|
|
170
|
+
error_handler: t.Any
|
|
171
|
+
console: Console
|
|
172
|
+
logger: t.Any = None
|
|
471
173
|
|
|
472
|
-
def
|
|
473
|
-
if
|
|
474
|
-
|
|
174
|
+
def model_post_init(self, _: t.Any) -> None:
|
|
175
|
+
if self.logger is None:
|
|
176
|
+
import logging
|
|
475
177
|
|
|
476
|
-
|
|
477
|
-
lines = code.split("\n")
|
|
478
|
-
function_tracker: dict[str, t.Any] = {
|
|
479
|
-
"in_function": False,
|
|
480
|
-
"function_indent": 0,
|
|
481
|
-
}
|
|
482
|
-
import_tracker: dict[str, t.Any] = {
|
|
483
|
-
"in_imports": False,
|
|
484
|
-
"last_import_type": None,
|
|
485
|
-
}
|
|
486
|
-
previous_lines: list[str] = []
|
|
487
|
-
for i, line in enumerate(lines):
|
|
488
|
-
line = line.rstrip()
|
|
489
|
-
stripped_line = line.lstrip()
|
|
490
|
-
self._update_function_state(line, stripped_line, function_tracker)
|
|
491
|
-
self._update_import_state(line, stripped_line, import_tracker)
|
|
492
|
-
if not line:
|
|
493
|
-
if self._should_skip_empty_line(
|
|
494
|
-
i, lines, previous_lines, function_tracker, import_tracker
|
|
495
|
-
):
|
|
496
|
-
continue
|
|
497
|
-
previous_lines.append(line)
|
|
498
|
-
yield line
|
|
499
|
-
|
|
500
|
-
processed_lines = list(process_lines())
|
|
501
|
-
return "\n".join(self._remove_trailing_empty_lines(processed_lines))
|
|
502
|
-
|
|
503
|
-
def _update_function_state(
|
|
504
|
-
self, line: str, stripped_line: str, function_tracker: dict[str, t.Any]
|
|
505
|
-
) -> None:
|
|
506
|
-
if stripped_line.startswith(("def ", "async def ")):
|
|
507
|
-
function_tracker["in_function"] = True
|
|
508
|
-
function_tracker["function_indent"] = len(line) - len(stripped_line)
|
|
509
|
-
elif self._is_function_end(line, stripped_line, function_tracker):
|
|
510
|
-
function_tracker["in_function"] = False
|
|
511
|
-
function_tracker["function_indent"] = 0
|
|
512
|
-
|
|
513
|
-
def _update_import_state(
|
|
514
|
-
self, line: str, stripped_line: str, import_tracker: dict[str, t.Any]
|
|
515
|
-
) -> None:
|
|
516
|
-
if stripped_line.startswith(("import ", "from ")):
|
|
517
|
-
import_tracker["in_imports"] = True
|
|
518
|
-
if self._is_stdlib_import(stripped_line):
|
|
519
|
-
current_type = "stdlib"
|
|
520
|
-
elif self._is_local_import(stripped_line):
|
|
521
|
-
current_type = "local"
|
|
522
|
-
else:
|
|
523
|
-
current_type = "third_party"
|
|
524
|
-
import_tracker["last_import_type"] = current_type
|
|
525
|
-
elif stripped_line and not stripped_line.startswith("#"):
|
|
526
|
-
import_tracker["in_imports"] = False
|
|
527
|
-
import_tracker["last_import_type"] = None
|
|
528
|
-
|
|
529
|
-
@staticmethod
|
|
530
|
-
@lru_cache(maxsize=256)
|
|
531
|
-
def _is_stdlib_module(module: str) -> bool:
|
|
532
|
-
stdlib_modules = {
|
|
533
|
-
"os",
|
|
534
|
-
"sys",
|
|
535
|
-
"re",
|
|
536
|
-
"json",
|
|
537
|
-
"datetime",
|
|
538
|
-
"time",
|
|
539
|
-
"pathlib",
|
|
540
|
-
"typing",
|
|
541
|
-
"collections",
|
|
542
|
-
"itertools",
|
|
543
|
-
"functools",
|
|
544
|
-
"operator",
|
|
545
|
-
"math",
|
|
546
|
-
"random",
|
|
547
|
-
"uuid",
|
|
548
|
-
"urllib",
|
|
549
|
-
"http",
|
|
550
|
-
"html",
|
|
551
|
-
"xml",
|
|
552
|
-
"email",
|
|
553
|
-
"csv",
|
|
554
|
-
"sqlite3",
|
|
555
|
-
"subprocess",
|
|
556
|
-
"threading",
|
|
557
|
-
"multiprocessing",
|
|
558
|
-
"asyncio",
|
|
559
|
-
"contextlib",
|
|
560
|
-
"dataclasses",
|
|
561
|
-
"enum",
|
|
562
|
-
"abc",
|
|
563
|
-
"io",
|
|
564
|
-
"tempfile",
|
|
565
|
-
"shutil",
|
|
566
|
-
"glob",
|
|
567
|
-
"pickle",
|
|
568
|
-
"copy",
|
|
569
|
-
"heapq",
|
|
570
|
-
"bisect",
|
|
571
|
-
"array",
|
|
572
|
-
"struct",
|
|
573
|
-
"zlib",
|
|
574
|
-
"hashlib",
|
|
575
|
-
"hmac",
|
|
576
|
-
"secrets",
|
|
577
|
-
"base64",
|
|
578
|
-
"binascii",
|
|
579
|
-
"codecs",
|
|
580
|
-
"locale",
|
|
581
|
-
"platform",
|
|
582
|
-
"socket",
|
|
583
|
-
"ssl",
|
|
584
|
-
"ipaddress",
|
|
585
|
-
"logging",
|
|
586
|
-
"warnings",
|
|
587
|
-
"inspect",
|
|
588
|
-
"ast",
|
|
589
|
-
"dis",
|
|
590
|
-
"tokenize",
|
|
591
|
-
"keyword",
|
|
592
|
-
"linecache",
|
|
593
|
-
"traceback",
|
|
594
|
-
"weakref",
|
|
595
|
-
"gc",
|
|
596
|
-
"ctypes",
|
|
597
|
-
"unittest",
|
|
598
|
-
"doctest",
|
|
599
|
-
"pdb",
|
|
600
|
-
"profile",
|
|
601
|
-
"cProfile",
|
|
602
|
-
"timeit",
|
|
603
|
-
"trace",
|
|
604
|
-
"calendar",
|
|
605
|
-
"decimal",
|
|
606
|
-
"fractions",
|
|
607
|
-
"statistics",
|
|
608
|
-
"tomllib",
|
|
609
|
-
}
|
|
610
|
-
return module in stdlib_modules
|
|
178
|
+
self.logger = logging.getLogger("crackerjack.code_cleaner.pipeline")
|
|
611
179
|
|
|
612
|
-
def
|
|
180
|
+
def clean_file(
|
|
181
|
+
self,
|
|
182
|
+
file_path: Path,
|
|
183
|
+
cleaning_steps: list[CleaningStepProtocol],
|
|
184
|
+
) -> CleaningResult:
|
|
185
|
+
self.logger.info(f"Starting clean_file for {file_path}")
|
|
613
186
|
try:
|
|
614
|
-
|
|
615
|
-
|
|
187
|
+
original_code = self.file_processor.read_file_safely(file_path)
|
|
188
|
+
original_size = len(original_code.encode("utf - 8"))
|
|
189
|
+
|
|
190
|
+
result = self._apply_cleaning_pipeline(
|
|
191
|
+
original_code,
|
|
192
|
+
file_path,
|
|
193
|
+
cleaning_steps,
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
if result.success and result.cleaned_code != original_code:
|
|
197
|
+
self.file_processor.write_file_safely(file_path, result.cleaned_code)
|
|
198
|
+
cleaned_size = len(result.cleaned_code.encode("utf - 8"))
|
|
616
199
|
else:
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
return (
|
|
629
|
-
function_tracker["in_function"]
|
|
630
|
-
and bool(line)
|
|
631
|
-
and (len(line) - len(stripped_line) <= function_tracker["function_indent"])
|
|
632
|
-
and (not stripped_line.startswith(("@", "#")))
|
|
633
|
-
)
|
|
200
|
+
cleaned_size = original_size
|
|
201
|
+
|
|
202
|
+
cleaning_result = CleaningResult(
|
|
203
|
+
file_path=file_path,
|
|
204
|
+
success=result.success,
|
|
205
|
+
steps_completed=result.steps_completed,
|
|
206
|
+
steps_failed=result.steps_failed,
|
|
207
|
+
warnings=result.warnings,
|
|
208
|
+
original_size=original_size,
|
|
209
|
+
cleaned_size=cleaned_size,
|
|
210
|
+
)
|
|
634
211
|
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
if function_tracker["in_function"]:
|
|
650
|
-
return self._should_skip_function_empty_line(line_idx, lines)
|
|
651
|
-
return False
|
|
652
|
-
|
|
653
|
-
def _is_import_section_separator(
|
|
654
|
-
self, line_idx: int, lines: list[str], import_tracker: dict[str, t.Any]
|
|
655
|
-
) -> bool:
|
|
656
|
-
if not import_tracker["in_imports"]:
|
|
657
|
-
return False
|
|
658
|
-
|
|
659
|
-
next_line_idx = line_idx + 1
|
|
660
|
-
while next_line_idx < len(lines) and not lines[next_line_idx].strip():
|
|
661
|
-
next_line_idx += 1
|
|
662
|
-
|
|
663
|
-
if next_line_idx >= len(lines):
|
|
664
|
-
return False
|
|
665
|
-
|
|
666
|
-
next_line = lines[next_line_idx].strip()
|
|
667
|
-
if not next_line.startswith(("import ", "from ")):
|
|
668
|
-
return False
|
|
669
|
-
|
|
670
|
-
if self._is_stdlib_import(next_line):
|
|
671
|
-
next_type = "stdlib"
|
|
672
|
-
elif self._is_local_import(next_line):
|
|
673
|
-
next_type = "local"
|
|
674
|
-
else:
|
|
675
|
-
next_type = "third_party"
|
|
676
|
-
|
|
677
|
-
return import_tracker["last_import_type"] != next_type
|
|
678
|
-
|
|
679
|
-
def _should_skip_function_empty_line(self, line_idx: int, lines: list[str]) -> bool:
|
|
680
|
-
next_line_idx = line_idx + 1
|
|
681
|
-
if next_line_idx >= len(lines):
|
|
682
|
-
return False
|
|
683
|
-
next_line = lines[next_line_idx].strip()
|
|
684
|
-
return not self._is_significant_next_line(next_line)
|
|
685
|
-
|
|
686
|
-
def _is_significant_next_line(self, next_line: str) -> bool:
|
|
687
|
-
if next_line.startswith(("return", "class ", "def ", "async def ", "@")):
|
|
688
|
-
return True
|
|
689
|
-
if next_line in ("pass", "break", "continue", "raise"):
|
|
690
|
-
return True
|
|
691
|
-
return self._is_special_comment(next_line)
|
|
692
|
-
|
|
693
|
-
def _is_special_comment(self, line: str) -> bool:
|
|
694
|
-
if not line.startswith("#"):
|
|
695
|
-
return False
|
|
696
|
-
special_patterns = ("type:", "noqa", "nosec", "pragma:", "pylint:", "mypy:")
|
|
697
|
-
return any(pattern in line for pattern in special_patterns)
|
|
698
|
-
|
|
699
|
-
def _remove_trailing_empty_lines(self, lines: list[str]) -> list[str]:
|
|
700
|
-
while lines and (not lines[-1]):
|
|
701
|
-
lines.pop()
|
|
702
|
-
return lines
|
|
703
|
-
|
|
704
|
-
def reformat_code(self, code: str) -> str:
|
|
705
|
-
try:
|
|
706
|
-
import tempfile
|
|
212
|
+
self.error_handler.log_cleaning_result(cleaning_result)
|
|
213
|
+
return cleaning_result
|
|
214
|
+
|
|
215
|
+
except Exception as e:
|
|
216
|
+
self.error_handler.handle_file_error(file_path, e, "file_processing")
|
|
217
|
+
return CleaningResult(
|
|
218
|
+
file_path=file_path,
|
|
219
|
+
success=False,
|
|
220
|
+
steps_completed=[],
|
|
221
|
+
steps_failed=["file_processing"],
|
|
222
|
+
warnings=[],
|
|
223
|
+
original_size=0,
|
|
224
|
+
cleaned_size=0,
|
|
225
|
+
)
|
|
707
226
|
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
227
|
+
@dataclass
|
|
228
|
+
class PipelineResult:
|
|
229
|
+
cleaned_code: str
|
|
230
|
+
success: bool
|
|
231
|
+
steps_completed: list[str]
|
|
232
|
+
steps_failed: list[str]
|
|
233
|
+
warnings: list[str]
|
|
234
|
+
|
|
235
|
+
def _apply_cleaning_pipeline(
|
|
236
|
+
self,
|
|
237
|
+
code: str,
|
|
238
|
+
file_path: Path,
|
|
239
|
+
cleaning_steps: list[CleaningStepProtocol],
|
|
240
|
+
) -> PipelineResult:
|
|
241
|
+
current_code = code
|
|
242
|
+
steps_completed: list[str] = []
|
|
243
|
+
steps_failed: list[str] = []
|
|
244
|
+
warnings: list[str] = []
|
|
245
|
+
overall_success = True
|
|
246
|
+
|
|
247
|
+
for step in cleaning_steps:
|
|
713
248
|
try:
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
249
|
+
step_result = step(current_code, file_path)
|
|
250
|
+
current_code = step_result
|
|
251
|
+
steps_completed.append(step.name)
|
|
252
|
+
|
|
253
|
+
self.logger.debug(
|
|
254
|
+
"Cleaning step completed",
|
|
255
|
+
extra={"step": step.name, "file_path": str(file_path)},
|
|
719
256
|
)
|
|
720
|
-
|
|
721
|
-
formatted_code = temp_path.read_text()
|
|
722
|
-
else:
|
|
723
|
-
self.console.print(
|
|
724
|
-
f"[bold bright_yellow]⚠️ Ruff formatting failed: {result.stderr}[/bold bright_yellow]"
|
|
725
|
-
)
|
|
726
|
-
handle_error(
|
|
727
|
-
ExecutionError(
|
|
728
|
-
message="Code formatting failed",
|
|
729
|
-
error_code=ErrorCode.FORMATTING_ERROR,
|
|
730
|
-
details=result.stderr,
|
|
731
|
-
recovery="Check Ruff configuration and formatting rules",
|
|
732
|
-
),
|
|
733
|
-
console=self.console,
|
|
734
|
-
exit_on_error=False,
|
|
735
|
-
)
|
|
736
|
-
formatted_code = code
|
|
257
|
+
|
|
737
258
|
except Exception as e:
|
|
738
|
-
self.
|
|
739
|
-
|
|
740
|
-
)
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
exit_on_error=False,
|
|
259
|
+
self.error_handler.handle_file_error(file_path, e, step.name)
|
|
260
|
+
steps_failed.append(step.name)
|
|
261
|
+
warnings.append(f"{step.name} failed: {e}")
|
|
262
|
+
|
|
263
|
+
self.logger.warning(
|
|
264
|
+
"Cleaning step failed, continuing with original code",
|
|
265
|
+
extra={
|
|
266
|
+
"step": step.name,
|
|
267
|
+
"file_path": str(file_path),
|
|
268
|
+
"error": str(e),
|
|
269
|
+
},
|
|
750
270
|
)
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
return formatted_code
|
|
756
|
-
except Exception as e:
|
|
757
|
-
self.console.print(
|
|
758
|
-
f"[bold bright_red]❌ Error during reformatting: {e}[/bold bright_red]"
|
|
271
|
+
|
|
272
|
+
if steps_failed:
|
|
273
|
+
success_ratio = len(steps_completed) / (
|
|
274
|
+
len(steps_completed) + len(steps_failed)
|
|
759
275
|
)
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
276
|
+
overall_success = success_ratio >= 0.7
|
|
277
|
+
|
|
278
|
+
return self.PipelineResult(
|
|
279
|
+
cleaned_code=current_code,
|
|
280
|
+
success=overall_success,
|
|
281
|
+
steps_completed=steps_completed,
|
|
282
|
+
steps_failed=steps_failed,
|
|
283
|
+
warnings=warnings,
|
|
284
|
+
)
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
class CodeCleaner(BaseModel):
|
|
288
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
289
|
+
|
|
290
|
+
console: Console
|
|
291
|
+
file_processor: t.Any = None
|
|
292
|
+
error_handler: t.Any = None
|
|
293
|
+
pipeline: t.Any = None
|
|
294
|
+
logger: t.Any = None
|
|
295
|
+
|
|
296
|
+
def model_post_init(self, _: t.Any) -> None:
|
|
297
|
+
if self.logger is None:
|
|
298
|
+
import logging
|
|
299
|
+
|
|
300
|
+
self.logger = logging.getLogger("crackerjack.code_cleaner")
|
|
301
|
+
|
|
302
|
+
if self.file_processor is None:
|
|
303
|
+
self.file_processor = FileProcessor(console=self.console)
|
|
304
|
+
|
|
305
|
+
if self.error_handler is None:
|
|
306
|
+
self.error_handler = CleaningErrorHandler(console=self.console)
|
|
307
|
+
|
|
308
|
+
if self.pipeline is None:
|
|
309
|
+
self.pipeline = CleaningPipeline(
|
|
310
|
+
file_processor=self.file_processor,
|
|
311
|
+
error_handler=self.error_handler,
|
|
767
312
|
console=self.console,
|
|
768
313
|
)
|
|
769
|
-
return code
|
|
770
314
|
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
if not str(file_path.parent).startswith("__")
|
|
315
|
+
def clean_file(self, file_path: Path) -> CleaningResult:
|
|
316
|
+
cleaning_steps = [
|
|
317
|
+
self._create_line_comment_step(),
|
|
318
|
+
self._create_docstring_step(),
|
|
319
|
+
self._create_whitespace_step(),
|
|
320
|
+
self._create_formatting_step(),
|
|
778
321
|
]
|
|
779
|
-
if not python_files:
|
|
780
|
-
return
|
|
781
|
-
max_concurrent = min(len(python_files), 8)
|
|
782
|
-
semaphore = asyncio.Semaphore(max_concurrent)
|
|
783
322
|
|
|
784
|
-
|
|
785
|
-
async with semaphore:
|
|
786
|
-
await self.clean_file_async(file_path)
|
|
323
|
+
return self.pipeline.clean_file(file_path, cleaning_steps)
|
|
787
324
|
|
|
788
|
-
|
|
789
|
-
|
|
325
|
+
def clean_files(self, pkg_dir: Path | None = None) -> list[CleaningResult]:
|
|
326
|
+
if pkg_dir is None:
|
|
327
|
+
pkg_dir = Path.cwd()
|
|
328
|
+
|
|
329
|
+
python_files = list(pkg_dir.rglob(" * .py"))
|
|
330
|
+
results: list[CleaningResult] = []
|
|
331
|
+
|
|
332
|
+
self.logger.info(f"Starting clean_files for {len(python_files)} files")
|
|
333
|
+
for file_path in python_files:
|
|
334
|
+
if self.should_process_file(file_path):
|
|
335
|
+
result = self.clean_file(file_path)
|
|
336
|
+
results.append(result)
|
|
337
|
+
|
|
338
|
+
return results
|
|
339
|
+
|
|
340
|
+
def should_process_file(self, file_path: Path) -> bool:
|
|
341
|
+
ignore_patterns = {
|
|
342
|
+
"__pycache__",
|
|
343
|
+
".git",
|
|
344
|
+
".venv",
|
|
345
|
+
"site - packages",
|
|
346
|
+
".pytest_cache",
|
|
347
|
+
"build",
|
|
348
|
+
"dist",
|
|
349
|
+
}
|
|
790
350
|
|
|
791
|
-
|
|
351
|
+
for parent in file_path.parents:
|
|
352
|
+
if parent.name in ignore_patterns:
|
|
353
|
+
return False
|
|
792
354
|
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
except Exception as e:
|
|
818
|
-
self.console.print(
|
|
819
|
-
f"[bold bright_yellow]⚠️ Warning: Failed to remove extra whitespace from {file_path}: {e}[/bold bright_yellow]"
|
|
820
|
-
)
|
|
821
|
-
code = original_code
|
|
822
|
-
cleaning_failed = True
|
|
823
|
-
try:
|
|
824
|
-
code = await self.reformat_code_async(code)
|
|
825
|
-
except Exception as e:
|
|
826
|
-
self.console.print(
|
|
827
|
-
f"[bold bright_yellow]⚠️ Warning: Failed to reformat {file_path}: {e}[/bold bright_yellow]"
|
|
828
|
-
)
|
|
829
|
-
code = original_code
|
|
830
|
-
cleaning_failed = True
|
|
831
|
-
async with aiofiles.open(file_path, "w", encoding="utf-8") as f: # type: ignore[misc]
|
|
832
|
-
await f.write(code) # type: ignore[misc]
|
|
833
|
-
if cleaning_failed:
|
|
834
|
-
self.console.print(
|
|
835
|
-
f"[bold yellow]⚡ Partially cleaned:[/bold yellow] [dim bright_white]{file_path}[/dim bright_white]"
|
|
836
|
-
)
|
|
837
|
-
else:
|
|
838
|
-
self.console.print(
|
|
839
|
-
f"[bold green]✨ Cleaned:[/bold green] [dim bright_white]{file_path}[/dim bright_white]"
|
|
840
|
-
)
|
|
841
|
-
except PermissionError as e:
|
|
842
|
-
self.console.print(
|
|
843
|
-
f"[red]Failed to clean: {file_path} (Permission denied)[/red]"
|
|
844
|
-
)
|
|
845
|
-
handle_error(
|
|
846
|
-
ExecutionError(
|
|
847
|
-
message=f"Permission denied while cleaning {file_path}",
|
|
848
|
-
error_code=ErrorCode.PERMISSION_ERROR,
|
|
849
|
-
details=str(e),
|
|
850
|
-
recovery=f"Check file permissions for {file_path} and ensure you have write access",
|
|
851
|
-
),
|
|
852
|
-
console=self.console,
|
|
853
|
-
exit_on_error=False,
|
|
854
|
-
)
|
|
855
|
-
except OSError as e:
|
|
856
|
-
self.console.print(
|
|
857
|
-
f"[red]Failed to clean: {file_path} (File system error)[/red]"
|
|
858
|
-
)
|
|
859
|
-
handle_error(
|
|
860
|
-
ExecutionError(
|
|
861
|
-
message=f"File system error while cleaning {file_path}",
|
|
862
|
-
error_code=ErrorCode.FILE_WRITE_ERROR,
|
|
863
|
-
details=str(e),
|
|
864
|
-
recovery=f"Check that {file_path} exists and is not being used by another process",
|
|
865
|
-
),
|
|
866
|
-
console=self.console,
|
|
867
|
-
exit_on_error=False,
|
|
868
|
-
)
|
|
869
|
-
except UnicodeDecodeError as e:
|
|
870
|
-
self.console.print(
|
|
871
|
-
f"[red]Failed to clean: {file_path} (Encoding error)[/red]"
|
|
872
|
-
)
|
|
873
|
-
handle_error(
|
|
874
|
-
ExecutionError(
|
|
875
|
-
message=f"Encoding error while cleaning {file_path}",
|
|
876
|
-
error_code=ErrorCode.FILE_READ_ERROR,
|
|
877
|
-
details=str(e),
|
|
878
|
-
recovery=f"Check the file encoding of {file_path} - it may not be UTF-8",
|
|
879
|
-
),
|
|
880
|
-
console=self.console,
|
|
881
|
-
exit_on_error=False,
|
|
882
|
-
)
|
|
883
|
-
except Exception as e:
|
|
884
|
-
self.console.print(f"[red]Unexpected error cleaning {file_path}: {e}[/red]")
|
|
885
|
-
handle_error(
|
|
886
|
-
ExecutionError(
|
|
887
|
-
message=f"Unexpected error while cleaning {file_path}",
|
|
888
|
-
error_code=ErrorCode.UNEXPECTED_ERROR,
|
|
889
|
-
details=str(e),
|
|
890
|
-
recovery="Please report this issue with the full error details",
|
|
891
|
-
),
|
|
892
|
-
console=self.console,
|
|
893
|
-
exit_on_error=False,
|
|
355
|
+
return not (file_path.name.startswith(".") or file_path.suffix != ".py")
|
|
356
|
+
|
|
357
|
+
def _create_line_comment_step(self) -> CleaningStepProtocol:
|
|
358
|
+
"""Create a step for removing line comments while preserving special comments."""
|
|
359
|
+
return self._LineCommentStep()
|
|
360
|
+
|
|
361
|
+
def _create_docstring_step(self) -> CleaningStepProtocol:
|
|
362
|
+
"""Create a step for removing docstrings."""
|
|
363
|
+
return self._DocstringStep()
|
|
364
|
+
|
|
365
|
+
class _DocstringStep:
|
|
366
|
+
"""Step implementation for removing docstrings."""
|
|
367
|
+
|
|
368
|
+
name = "remove_docstrings"
|
|
369
|
+
|
|
370
|
+
def _is_docstring_node(self, node: ast.AST) -> bool:
|
|
371
|
+
body = getattr(node, "body", None)
|
|
372
|
+
return (
|
|
373
|
+
hasattr(node, "body")
|
|
374
|
+
and body is not None
|
|
375
|
+
and len(body) > 0
|
|
376
|
+
and isinstance(body[0], ast.Expr)
|
|
377
|
+
and isinstance(body[0].value, ast.Constant)
|
|
378
|
+
and isinstance(body[0].value.value, str)
|
|
894
379
|
)
|
|
895
380
|
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
381
|
+
def _find_docstrings(self, tree: ast.AST) -> list[ast.AST]:
|
|
382
|
+
docstring_nodes: list[ast.AST] = []
|
|
383
|
+
finder = self._DocstringFinder(docstring_nodes, self._is_docstring_node)
|
|
384
|
+
finder.visit(tree)
|
|
385
|
+
return docstring_nodes
|
|
386
|
+
|
|
387
|
+
class _DocstringFinder(ast.NodeVisitor):
|
|
388
|
+
def __init__(
|
|
389
|
+
self,
|
|
390
|
+
docstring_nodes: list[ast.AST],
|
|
391
|
+
is_docstring_node: t.Callable[[ast.AST], bool],
|
|
392
|
+
):
|
|
393
|
+
self.docstring_nodes = docstring_nodes
|
|
394
|
+
self.is_docstring_node = is_docstring_node
|
|
395
|
+
|
|
396
|
+
def _add_if_docstring(self, node: ast.AST) -> None:
|
|
397
|
+
if self.is_docstring_node(node) and hasattr(node, "body"):
|
|
398
|
+
body: list[ast.stmt] = getattr(node, "body")
|
|
399
|
+
self.docstring_nodes.append(body[0])
|
|
400
|
+
self.generic_visit(node)
|
|
401
|
+
|
|
402
|
+
def visit_Module(self, node: ast.Module) -> None:
|
|
403
|
+
self._add_if_docstring(node)
|
|
404
|
+
|
|
405
|
+
def visit_FunctionDef(self, node: ast.FunctionDef) -> None:
|
|
406
|
+
self._add_if_docstring(node)
|
|
407
|
+
|
|
408
|
+
def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None:
|
|
409
|
+
self._add_if_docstring(node)
|
|
410
|
+
|
|
411
|
+
def visit_ClassDef(self, node: ast.ClassDef) -> None:
|
|
412
|
+
self._add_if_docstring(node)
|
|
413
|
+
|
|
414
|
+
def __call__(self, code: str, file_path: Path) -> str:
|
|
906
415
|
try:
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
|
|
910
|
-
"ruff",
|
|
911
|
-
"format",
|
|
912
|
-
str(temp_path),
|
|
913
|
-
stdout=asyncio.subprocess.PIPE,
|
|
914
|
-
stderr=asyncio.subprocess.PIPE,
|
|
915
|
-
)
|
|
916
|
-
_, stderr = await proc.communicate()
|
|
917
|
-
if proc.returncode == 0:
|
|
918
|
-
async with aiofiles.open(temp_path, encoding="utf-8") as f: # type: ignore[misc]
|
|
919
|
-
formatted_code = await f.read() # type: ignore[misc]
|
|
920
|
-
else:
|
|
921
|
-
self.console.print(
|
|
922
|
-
f"[bold bright_yellow]⚠️ Warning: Ruff format failed with return code {proc.returncode}[/bold bright_yellow]"
|
|
923
|
-
)
|
|
924
|
-
if stderr:
|
|
925
|
-
self.console.print(f"[dim]Ruff stderr: {stderr.decode()}[/dim]")
|
|
926
|
-
formatted_code = code
|
|
927
|
-
except Exception as e:
|
|
928
|
-
self.console.print(
|
|
929
|
-
f"[bold bright_red]❌ Error running Ruff: {e}[/bold bright_red]"
|
|
930
|
-
)
|
|
931
|
-
handle_error(
|
|
932
|
-
ExecutionError(
|
|
933
|
-
message="Error running Ruff",
|
|
934
|
-
error_code=ErrorCode.FORMATTING_ERROR,
|
|
935
|
-
details=str(e),
|
|
936
|
-
recovery="Verify Ruff is installed and configured correctly",
|
|
937
|
-
),
|
|
938
|
-
console=self.console,
|
|
939
|
-
exit_on_error=False,
|
|
940
|
-
)
|
|
941
|
-
formatted_code = code
|
|
942
|
-
finally:
|
|
943
|
-
with suppress(FileNotFoundError):
|
|
944
|
-
temp_path.unlink()
|
|
416
|
+
tree = ast.parse(code, filename=str(file_path))
|
|
417
|
+
except SyntaxError:
|
|
418
|
+
return self._regex_fallback_removal(code)
|
|
945
419
|
|
|
946
|
-
|
|
947
|
-
|
|
948
|
-
|
|
949
|
-
|
|
420
|
+
docstring_nodes = self._find_docstrings(tree)
|
|
421
|
+
|
|
422
|
+
if not docstring_nodes:
|
|
423
|
+
return code
|
|
424
|
+
|
|
425
|
+
lines = code.split("\n")
|
|
426
|
+
lines_to_remove: set[int] = set()
|
|
427
|
+
|
|
428
|
+
for node in docstring_nodes:
|
|
429
|
+
# Most AST nodes have lineno and end_lineno attributes
|
|
430
|
+
start_line = getattr(node, "lineno", 1)
|
|
431
|
+
end_line = getattr(node, "end_lineno", start_line + 1)
|
|
432
|
+
lines_to_remove.update(range(start_line, end_line))
|
|
433
|
+
|
|
434
|
+
result_lines = [
|
|
435
|
+
line for i, line in enumerate(lines, 1) if i not in lines_to_remove
|
|
436
|
+
]
|
|
437
|
+
|
|
438
|
+
result = "\n".join(result_lines)
|
|
439
|
+
return self._regex_fallback_removal(result)
|
|
440
|
+
|
|
441
|
+
def _regex_fallback_removal(self, code: str) -> str:
|
|
442
|
+
import re
|
|
443
|
+
|
|
444
|
+
patterns = [
|
|
445
|
+
r'^\s*""".*?"""\s*$',
|
|
446
|
+
r"^\s*'''.*?'''\s*$",
|
|
447
|
+
r'^\s*""".*?"""\s*$',
|
|
448
|
+
r"^\s*'''.*?'''\s*$",
|
|
449
|
+
]
|
|
450
|
+
result = code
|
|
451
|
+
for pattern in patterns:
|
|
452
|
+
result = re.sub(pattern, "", result, flags=re.MULTILINE | re.DOTALL)
|
|
453
|
+
return result
|
|
454
|
+
|
|
455
|
+
class _LineCommentStep:
|
|
456
|
+
"""Step implementation for removing line comments."""
|
|
457
|
+
|
|
458
|
+
name = "remove_line_comments"
|
|
459
|
+
|
|
460
|
+
def __call__(self, code: str, file_path: Path) -> str:
|
|
461
|
+
lines = code.split("\n")
|
|
462
|
+
# Performance: Use list comprehension instead of generator for small-to-medium files
|
|
463
|
+
processed_lines = [self._process_line_for_comments(line) for line in lines]
|
|
464
|
+
return "\n".join(processed_lines)
|
|
465
|
+
|
|
466
|
+
def _process_line_for_comments(self, line: str) -> str:
|
|
467
|
+
"""Process a single line to remove comments while preserving strings."""
|
|
468
|
+
if not line.strip() or self._is_preserved_comment_line(line):
|
|
469
|
+
return line
|
|
470
|
+
return self._remove_comment_from_line(line)
|
|
471
|
+
|
|
472
|
+
def _is_preserved_comment_line(self, line: str) -> bool:
|
|
473
|
+
"""Check if this comment line should be preserved."""
|
|
474
|
+
stripped = line.strip()
|
|
475
|
+
if not stripped.startswith("#"):
|
|
476
|
+
return False
|
|
477
|
+
return self._has_preserved_pattern(stripped)
|
|
478
|
+
|
|
479
|
+
def _has_preserved_pattern(self, stripped_line: str) -> bool:
|
|
480
|
+
"""Check if line contains preserved comment patterns."""
|
|
481
|
+
preserved_patterns = ["coding: ", "encoding: ", "type: ", "noqa", "pragma"]
|
|
482
|
+
return stripped_line.startswith("# !/ ") or any(
|
|
483
|
+
pattern in stripped_line for pattern in preserved_patterns
|
|
950
484
|
)
|
|
951
|
-
|
|
952
|
-
|
|
953
|
-
|
|
954
|
-
|
|
955
|
-
|
|
956
|
-
|
|
957
|
-
)
|
|
958
|
-
|
|
959
|
-
|
|
485
|
+
|
|
486
|
+
def _remove_comment_from_line(self, line: str) -> str:
|
|
487
|
+
"""Remove comments from a line while preserving string literals."""
|
|
488
|
+
result: list[str] = []
|
|
489
|
+
string_state: dict[str, t.Any] = {"in_string": False, "quote_char": None}
|
|
490
|
+
for i, char in enumerate(line):
|
|
491
|
+
if self._should_break_at_comment(char, string_state):
|
|
492
|
+
break
|
|
493
|
+
self._update_string_state(char, i, line, string_state)
|
|
494
|
+
result.append(char)
|
|
495
|
+
return "".join(result).rstrip()
|
|
496
|
+
|
|
497
|
+
def _should_break_at_comment(self, char: str, state: dict[str, t.Any]) -> bool:
|
|
498
|
+
"""Check if we should break at a comment character."""
|
|
499
|
+
return not state["in_string"] and char == "#"
|
|
500
|
+
|
|
501
|
+
def _update_string_state(
|
|
502
|
+
self,
|
|
503
|
+
char: str,
|
|
504
|
+
index: int,
|
|
505
|
+
line: str,
|
|
506
|
+
state: dict[str, t.Any],
|
|
507
|
+
) -> None:
|
|
508
|
+
"""Update string parsing state based on current character."""
|
|
509
|
+
if self._is_string_start(char, state):
|
|
510
|
+
state["in_string"], state["quote_char"] = True, char
|
|
511
|
+
elif self._is_string_end(char, index, line, state):
|
|
512
|
+
state["in_string"], state["quote_char"] = False, None
|
|
513
|
+
|
|
514
|
+
def _is_string_start(self, char: str, state: dict[str, t.Any]) -> bool:
|
|
515
|
+
"""Check if character starts a string."""
|
|
516
|
+
return not state["in_string"] and char in ('"', "'")
|
|
517
|
+
|
|
518
|
+
def _is_string_end(
|
|
519
|
+
self,
|
|
520
|
+
char: str,
|
|
521
|
+
index: int,
|
|
522
|
+
line: str,
|
|
523
|
+
state: dict[str, t.Any],
|
|
524
|
+
) -> bool:
|
|
525
|
+
"""Check if character ends a string."""
|
|
526
|
+
return (
|
|
527
|
+
state["in_string"]
|
|
528
|
+
and char == state["quote_char"]
|
|
529
|
+
and (index == 0 or line[index - 1] != "\\")
|
|
960
530
|
)
|
|
961
|
-
|
|
962
|
-
|
|
963
|
-
|
|
964
|
-
|
|
965
|
-
|
|
966
|
-
|
|
967
|
-
|
|
968
|
-
|
|
969
|
-
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
|
|
980
|
-
|
|
531
|
+
|
|
532
|
+
def _create_docstring_finder_class(
|
|
533
|
+
self,
|
|
534
|
+
docstring_nodes: list[ast.AST],
|
|
535
|
+
) -> type[ast.NodeVisitor]:
|
|
536
|
+
class DocstringFinder(ast.NodeVisitor):
|
|
537
|
+
def _is_docstring_node(self, node: ast.AST) -> bool:
|
|
538
|
+
body = getattr(node, "body", None)
|
|
539
|
+
return (
|
|
540
|
+
hasattr(node, "body")
|
|
541
|
+
and body is not None
|
|
542
|
+
and len(body) > 0
|
|
543
|
+
and isinstance(body[0], ast.Expr)
|
|
544
|
+
and isinstance(body[0].value, ast.Constant)
|
|
545
|
+
and isinstance(body[0].value.value, str)
|
|
546
|
+
)
|
|
547
|
+
|
|
548
|
+
def _add_if_docstring(self, node: ast.AST) -> None:
|
|
549
|
+
if self._is_docstring_node(node) and hasattr(node, "body"):
|
|
550
|
+
body: list[ast.stmt] = getattr(node, "body")
|
|
551
|
+
docstring_nodes.append(body[0])
|
|
552
|
+
self.generic_visit(node)
|
|
553
|
+
|
|
554
|
+
def visit_Module(self, node: ast.Module) -> None:
|
|
555
|
+
self._add_if_docstring(node)
|
|
556
|
+
|
|
557
|
+
def visit_FunctionDef(self, node: ast.FunctionDef) -> None:
|
|
558
|
+
self._add_if_docstring(node)
|
|
559
|
+
|
|
560
|
+
def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None:
|
|
561
|
+
self._add_if_docstring(node)
|
|
562
|
+
|
|
563
|
+
def visit_ClassDef(self, node: ast.ClassDef) -> None:
|
|
564
|
+
self._add_if_docstring(node)
|
|
565
|
+
|
|
566
|
+
return DocstringFinder
|
|
567
|
+
|
|
568
|
+
def _create_whitespace_step(self) -> CleaningStepProtocol:
|
|
569
|
+
class WhitespaceStep:
|
|
570
|
+
name = "remove_extra_whitespace"
|
|
571
|
+
|
|
572
|
+
def __call__(self, code: str, file_path: Path) -> str:
|
|
573
|
+
import re
|
|
574
|
+
|
|
575
|
+
lines = code.split("\n")
|
|
576
|
+
cleaned_lines: list[str] = []
|
|
577
|
+
|
|
578
|
+
empty_line_count = 0
|
|
579
|
+
|
|
580
|
+
for line in lines:
|
|
581
|
+
cleaned_line = line.rstrip()
|
|
582
|
+
|
|
583
|
+
if not cleaned_line.strip():
|
|
584
|
+
empty_line_count += 1
|
|
585
|
+
if empty_line_count <= 2:
|
|
586
|
+
cleaned_lines.append("")
|
|
587
|
+
else:
|
|
588
|
+
empty_line_count = 0
|
|
589
|
+
|
|
590
|
+
leading_whitespace = len(cleaned_line) - len(
|
|
591
|
+
cleaned_line.lstrip(),
|
|
592
|
+
)
|
|
593
|
+
content = cleaned_line.lstrip()
|
|
594
|
+
|
|
595
|
+
content = re.sub(r" {2, }", " ", content)
|
|
596
|
+
|
|
597
|
+
cleaned_line = cleaned_line[:leading_whitespace] + content
|
|
598
|
+
cleaned_lines.append(cleaned_line)
|
|
599
|
+
|
|
600
|
+
while cleaned_lines and not cleaned_lines[-1].strip():
|
|
601
|
+
cleaned_lines.pop()
|
|
602
|
+
|
|
603
|
+
result = "\n".join(cleaned_lines)
|
|
604
|
+
if result and not result.endswith("\n"):
|
|
605
|
+
result += "\n"
|
|
606
|
+
|
|
607
|
+
return result
|
|
608
|
+
|
|
609
|
+
return WhitespaceStep()
|
|
610
|
+
|
|
611
|
+
def _create_formatting_step(self) -> CleaningStepProtocol:
|
|
612
|
+
class FormattingStep:
|
|
613
|
+
name = "format_code"
|
|
614
|
+
|
|
615
|
+
def __call__(self, code: str, file_path: Path) -> str:
|
|
616
|
+
import re
|
|
617
|
+
|
|
618
|
+
lines = code.split("\n")
|
|
619
|
+
formatted_lines: list[str] = []
|
|
620
|
+
|
|
621
|
+
for line in lines:
|
|
622
|
+
if line.strip():
|
|
623
|
+
leading_whitespace = len(line) - len(line.lstrip())
|
|
624
|
+
content = line.lstrip()
|
|
625
|
+
|
|
626
|
+
content = re.sub(
|
|
627
|
+
r"([ =+ \ -*/%<>!&|^ ])([ ^ =+ \ -*/%<>!&|^ ])",
|
|
628
|
+
r"\1 \2",
|
|
629
|
+
content,
|
|
630
|
+
)
|
|
631
|
+
content = re.sub(
|
|
632
|
+
r"([ ^ =+ \ -*/%<>!&|^ ])([ =+ \ -*/%<>!&|^ ])",
|
|
633
|
+
r"\1 \2",
|
|
634
|
+
content,
|
|
635
|
+
)
|
|
636
|
+
|
|
637
|
+
content = re.sub(r", ([ ^ \n])", r", \1", content)
|
|
638
|
+
|
|
639
|
+
content = re.sub(r": ([ ^ \n: ])", r": \1", content)
|
|
640
|
+
|
|
641
|
+
content = re.sub(r" {2, }", " ", content)
|
|
642
|
+
|
|
643
|
+
formatted_line = line[:leading_whitespace] + content
|
|
644
|
+
formatted_lines.append(formatted_line)
|
|
645
|
+
else:
|
|
646
|
+
formatted_lines.append(line)
|
|
647
|
+
|
|
648
|
+
return "\n".join(formatted_lines)
|
|
649
|
+
|
|
650
|
+
return FormattingStep()
|
|
651
|
+
|
|
652
|
+
def remove_line_comments(self, code: str, file_path: Path | None = None) -> str:
|
|
653
|
+
file_path = file_path or Path("temp.py")
|
|
654
|
+
step = self._create_line_comment_step()
|
|
655
|
+
return step(code, file_path)
|
|
656
|
+
|
|
657
|
+
def remove_docstrings(self, code: str, file_path: Path | None = None) -> str:
|
|
658
|
+
file_path = file_path or Path("temp.py")
|
|
659
|
+
step = self._create_docstring_step()
|
|
660
|
+
return step(code, file_path)
|
|
661
|
+
|
|
662
|
+
def remove_extra_whitespace(self, code: str, file_path: Path | None = None) -> str:
|
|
663
|
+
file_path = file_path or Path("temp.py")
|
|
664
|
+
step = self._create_whitespace_step()
|
|
665
|
+
return step(code, file_path)
|
|
666
|
+
|
|
667
|
+
def format_code(self, code: str, file_path: Path | None = None) -> str:
|
|
668
|
+
file_path = file_path or Path("temp.py")
|
|
669
|
+
step = self._create_formatting_step()
|
|
670
|
+
return step(code, file_path)
|