crackerjack 0.30.3__py3-none-any.whl → 0.31.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crackerjack might be problematic. Click here for more details.

Files changed (155) hide show
  1. crackerjack/CLAUDE.md +1005 -0
  2. crackerjack/RULES.md +380 -0
  3. crackerjack/__init__.py +42 -13
  4. crackerjack/__main__.py +225 -299
  5. crackerjack/agents/__init__.py +41 -0
  6. crackerjack/agents/architect_agent.py +281 -0
  7. crackerjack/agents/base.py +169 -0
  8. crackerjack/agents/coordinator.py +512 -0
  9. crackerjack/agents/documentation_agent.py +498 -0
  10. crackerjack/agents/dry_agent.py +388 -0
  11. crackerjack/agents/formatting_agent.py +245 -0
  12. crackerjack/agents/import_optimization_agent.py +281 -0
  13. crackerjack/agents/performance_agent.py +669 -0
  14. crackerjack/agents/proactive_agent.py +104 -0
  15. crackerjack/agents/refactoring_agent.py +788 -0
  16. crackerjack/agents/security_agent.py +529 -0
  17. crackerjack/agents/test_creation_agent.py +652 -0
  18. crackerjack/agents/test_specialist_agent.py +486 -0
  19. crackerjack/agents/tracker.py +212 -0
  20. crackerjack/api.py +560 -0
  21. crackerjack/cli/__init__.py +24 -0
  22. crackerjack/cli/facade.py +104 -0
  23. crackerjack/cli/handlers.py +267 -0
  24. crackerjack/cli/interactive.py +471 -0
  25. crackerjack/cli/options.py +401 -0
  26. crackerjack/cli/utils.py +18 -0
  27. crackerjack/code_cleaner.py +618 -928
  28. crackerjack/config/__init__.py +19 -0
  29. crackerjack/config/hooks.py +218 -0
  30. crackerjack/core/__init__.py +0 -0
  31. crackerjack/core/async_workflow_orchestrator.py +406 -0
  32. crackerjack/core/autofix_coordinator.py +200 -0
  33. crackerjack/core/container.py +104 -0
  34. crackerjack/core/enhanced_container.py +542 -0
  35. crackerjack/core/performance.py +243 -0
  36. crackerjack/core/phase_coordinator.py +561 -0
  37. crackerjack/core/proactive_workflow.py +316 -0
  38. crackerjack/core/session_coordinator.py +289 -0
  39. crackerjack/core/workflow_orchestrator.py +640 -0
  40. crackerjack/dynamic_config.py +94 -103
  41. crackerjack/errors.py +263 -41
  42. crackerjack/executors/__init__.py +11 -0
  43. crackerjack/executors/async_hook_executor.py +431 -0
  44. crackerjack/executors/cached_hook_executor.py +242 -0
  45. crackerjack/executors/hook_executor.py +345 -0
  46. crackerjack/executors/individual_hook_executor.py +669 -0
  47. crackerjack/intelligence/__init__.py +44 -0
  48. crackerjack/intelligence/adaptive_learning.py +751 -0
  49. crackerjack/intelligence/agent_orchestrator.py +551 -0
  50. crackerjack/intelligence/agent_registry.py +414 -0
  51. crackerjack/intelligence/agent_selector.py +502 -0
  52. crackerjack/intelligence/integration.py +290 -0
  53. crackerjack/interactive.py +576 -315
  54. crackerjack/managers/__init__.py +11 -0
  55. crackerjack/managers/async_hook_manager.py +135 -0
  56. crackerjack/managers/hook_manager.py +137 -0
  57. crackerjack/managers/publish_manager.py +411 -0
  58. crackerjack/managers/test_command_builder.py +151 -0
  59. crackerjack/managers/test_executor.py +435 -0
  60. crackerjack/managers/test_manager.py +258 -0
  61. crackerjack/managers/test_manager_backup.py +1124 -0
  62. crackerjack/managers/test_progress.py +144 -0
  63. crackerjack/mcp/__init__.py +0 -0
  64. crackerjack/mcp/cache.py +336 -0
  65. crackerjack/mcp/client_runner.py +104 -0
  66. crackerjack/mcp/context.py +615 -0
  67. crackerjack/mcp/dashboard.py +636 -0
  68. crackerjack/mcp/enhanced_progress_monitor.py +479 -0
  69. crackerjack/mcp/file_monitor.py +336 -0
  70. crackerjack/mcp/progress_components.py +569 -0
  71. crackerjack/mcp/progress_monitor.py +949 -0
  72. crackerjack/mcp/rate_limiter.py +332 -0
  73. crackerjack/mcp/server.py +22 -0
  74. crackerjack/mcp/server_core.py +244 -0
  75. crackerjack/mcp/service_watchdog.py +501 -0
  76. crackerjack/mcp/state.py +395 -0
  77. crackerjack/mcp/task_manager.py +257 -0
  78. crackerjack/mcp/tools/__init__.py +17 -0
  79. crackerjack/mcp/tools/core_tools.py +249 -0
  80. crackerjack/mcp/tools/error_analyzer.py +308 -0
  81. crackerjack/mcp/tools/execution_tools.py +370 -0
  82. crackerjack/mcp/tools/execution_tools_backup.py +1097 -0
  83. crackerjack/mcp/tools/intelligence_tool_registry.py +80 -0
  84. crackerjack/mcp/tools/intelligence_tools.py +314 -0
  85. crackerjack/mcp/tools/monitoring_tools.py +502 -0
  86. crackerjack/mcp/tools/proactive_tools.py +384 -0
  87. crackerjack/mcp/tools/progress_tools.py +141 -0
  88. crackerjack/mcp/tools/utility_tools.py +341 -0
  89. crackerjack/mcp/tools/workflow_executor.py +360 -0
  90. crackerjack/mcp/websocket/__init__.py +14 -0
  91. crackerjack/mcp/websocket/app.py +39 -0
  92. crackerjack/mcp/websocket/endpoints.py +559 -0
  93. crackerjack/mcp/websocket/jobs.py +253 -0
  94. crackerjack/mcp/websocket/server.py +116 -0
  95. crackerjack/mcp/websocket/websocket_handler.py +78 -0
  96. crackerjack/mcp/websocket_server.py +10 -0
  97. crackerjack/models/__init__.py +31 -0
  98. crackerjack/models/config.py +93 -0
  99. crackerjack/models/config_adapter.py +230 -0
  100. crackerjack/models/protocols.py +118 -0
  101. crackerjack/models/task.py +154 -0
  102. crackerjack/monitoring/ai_agent_watchdog.py +450 -0
  103. crackerjack/monitoring/regression_prevention.py +638 -0
  104. crackerjack/orchestration/__init__.py +0 -0
  105. crackerjack/orchestration/advanced_orchestrator.py +970 -0
  106. crackerjack/orchestration/execution_strategies.py +341 -0
  107. crackerjack/orchestration/test_progress_streamer.py +636 -0
  108. crackerjack/plugins/__init__.py +15 -0
  109. crackerjack/plugins/base.py +200 -0
  110. crackerjack/plugins/hooks.py +246 -0
  111. crackerjack/plugins/loader.py +335 -0
  112. crackerjack/plugins/managers.py +259 -0
  113. crackerjack/py313.py +8 -3
  114. crackerjack/services/__init__.py +22 -0
  115. crackerjack/services/cache.py +314 -0
  116. crackerjack/services/config.py +347 -0
  117. crackerjack/services/config_integrity.py +99 -0
  118. crackerjack/services/contextual_ai_assistant.py +516 -0
  119. crackerjack/services/coverage_ratchet.py +347 -0
  120. crackerjack/services/debug.py +736 -0
  121. crackerjack/services/dependency_monitor.py +617 -0
  122. crackerjack/services/enhanced_filesystem.py +439 -0
  123. crackerjack/services/file_hasher.py +151 -0
  124. crackerjack/services/filesystem.py +395 -0
  125. crackerjack/services/git.py +165 -0
  126. crackerjack/services/health_metrics.py +611 -0
  127. crackerjack/services/initialization.py +847 -0
  128. crackerjack/services/log_manager.py +286 -0
  129. crackerjack/services/logging.py +174 -0
  130. crackerjack/services/metrics.py +578 -0
  131. crackerjack/services/pattern_cache.py +362 -0
  132. crackerjack/services/pattern_detector.py +515 -0
  133. crackerjack/services/performance_benchmarks.py +653 -0
  134. crackerjack/services/security.py +163 -0
  135. crackerjack/services/server_manager.py +234 -0
  136. crackerjack/services/smart_scheduling.py +144 -0
  137. crackerjack/services/tool_version_service.py +61 -0
  138. crackerjack/services/unified_config.py +437 -0
  139. crackerjack/services/version_checker.py +248 -0
  140. crackerjack/slash_commands/__init__.py +14 -0
  141. crackerjack/slash_commands/init.md +122 -0
  142. crackerjack/slash_commands/run.md +163 -0
  143. crackerjack/slash_commands/status.md +127 -0
  144. crackerjack-0.31.4.dist-info/METADATA +742 -0
  145. crackerjack-0.31.4.dist-info/RECORD +148 -0
  146. crackerjack-0.31.4.dist-info/entry_points.txt +2 -0
  147. crackerjack/.gitignore +0 -34
  148. crackerjack/.libcst.codemod.yaml +0 -18
  149. crackerjack/.pdm.toml +0 -1
  150. crackerjack/crackerjack.py +0 -3805
  151. crackerjack/pyproject.toml +0 -286
  152. crackerjack-0.30.3.dist-info/METADATA +0 -1290
  153. crackerjack-0.30.3.dist-info/RECORD +0 -16
  154. {crackerjack-0.30.3.dist-info → crackerjack-0.31.4.dist-info}/WHEEL +0 -0
  155. {crackerjack-0.30.3.dist-info → crackerjack-0.31.4.dist-info}/licenses/LICENSE +0 -0
@@ -1,980 +1,670 @@
1
- import asyncio
2
- import re
3
- import subprocess
1
+ import ast
4
2
  import typing as t
5
- from concurrent.futures import ThreadPoolExecutor, as_completed
6
- from contextlib import suppress
7
- from functools import lru_cache
3
+ from dataclasses import dataclass
4
+ from enum import Enum
8
5
  from pathlib import Path
6
+ from typing import Protocol
9
7
 
10
- import aiofiles
11
- from pydantic import BaseModel
8
+ from pydantic import BaseModel, ConfigDict
12
9
  from rich.console import Console
13
10
 
14
- from .errors import ErrorCode, ExecutionError, handle_error
11
+ from .errors import ErrorCode, ExecutionError
15
12
 
16
13
 
17
- class CodeCleaner(BaseModel, arbitrary_types_allowed=True):
18
- console: Console
14
+ class CleaningStepResult(Enum):
15
+ SUCCESS = "success"
16
+ FAILED = "failed"
17
+ SKIPPED = "skipped"
19
18
 
20
- def _analyze_workload_characteristics(self, files: list[Path]) -> dict[str, t.Any]:
21
- if not files:
22
- return {
23
- "total_files": 0,
24
- "total_size": 0,
25
- "avg_file_size": 0,
26
- "complexity": "low",
27
- }
28
- total_size = 0
29
- large_files = 0
30
- for file_path in files:
31
- try:
32
- size = file_path.stat().st_size
33
- total_size += size
34
- if size > 50_000:
35
- large_files += 1
36
- except (OSError, PermissionError):
37
- continue
38
- avg_file_size = total_size / len(files) if files else 0
39
- large_file_ratio = large_files / len(files) if files else 0
40
- if len(files) > 100 or avg_file_size > 20_000 or large_file_ratio > 0.3:
41
- complexity = "high"
42
- elif len(files) > 50 or avg_file_size > 10_000 or large_file_ratio > 0.1:
43
- complexity = "medium"
44
- else:
45
- complexity = "low"
46
-
47
- return {
48
- "total_files": len(files),
49
- "total_size": total_size,
50
- "avg_file_size": avg_file_size,
51
- "large_files": large_files,
52
- "large_file_ratio": large_file_ratio,
53
- "complexity": complexity,
54
- }
55
19
 
56
- def _calculate_optimal_workers(self, workload: dict[str, t.Any]) -> int:
57
- import os
20
+ @dataclass
21
+ class CleaningResult:
22
+ file_path: Path
23
+ success: bool
24
+ steps_completed: list[str]
25
+ steps_failed: list[str]
26
+ warnings: list[str]
27
+ original_size: int
28
+ cleaned_size: int
58
29
 
59
- cpu_count = os.cpu_count() or 4
60
- if workload["complexity"] == "high":
61
- max_workers = min(cpu_count // 2, 3)
62
- elif workload["complexity"] == "medium":
63
- max_workers = min(cpu_count, 6)
64
- else:
65
- max_workers = min(cpu_count + 2, 8)
66
30
 
67
- return min(max_workers, workload["total_files"])
31
+ class FileProcessorProtocol(Protocol):
32
+ def read_file_safely(self, file_path: Path) -> str: ...
33
+ def write_file_safely(self, file_path: Path, content: str) -> None: ...
34
+ def backup_file(self, file_path: Path) -> Path: ...
68
35
 
69
- def clean_files(self, pkg_dir: Path | None) -> None:
70
- if pkg_dir is None:
71
- return
72
- python_files = [
73
- file_path
74
- for file_path in pkg_dir.rglob("*.py")
75
- if not str(file_path.parent).startswith("__")
76
- ]
77
- if not python_files:
78
- return
79
- workload = self._analyze_workload_characteristics(python_files)
80
- max_workers = self._calculate_optimal_workers(workload)
81
- if len(python_files) > 10:
82
- self.console.print(
83
- f"[dim]Cleaning {workload['total_files']} files "
84
- f"({workload['complexity']} complexity) with {max_workers} workers[/dim]"
85
- )
86
- with ThreadPoolExecutor(max_workers=max_workers) as executor:
87
- future_to_file = {
88
- executor.submit(self.clean_file, file_path): file_path
89
- for file_path in python_files
90
- }
91
- for future in as_completed(future_to_file):
92
- file_path = future_to_file[future]
36
+
37
+ class CleaningStepProtocol(Protocol):
38
+ def __call__(self, code: str, file_path: Path) -> str: ...
39
+
40
+ @property
41
+ def name(self) -> str: ...
42
+
43
+
44
+ class ErrorHandlerProtocol(Protocol):
45
+ def handle_file_error(
46
+ self,
47
+ file_path: Path,
48
+ error: Exception,
49
+ step: str,
50
+ ) -> None: ...
51
+ def log_cleaning_result(self, result: CleaningResult) -> None: ...
52
+
53
+
54
+ class FileProcessor(BaseModel):
55
+ model_config = ConfigDict(arbitrary_types_allowed=True)
56
+
57
+ console: Console
58
+ logger: t.Any = None
59
+
60
+ def model_post_init(self, _: t.Any) -> None:
61
+ if self.logger is None:
62
+ import logging
63
+
64
+ self.logger = logging.getLogger("crackerjack.code_cleaner.file_processor")
65
+
66
+ def read_file_safely(self, file_path: Path) -> str:
67
+ try:
68
+ return file_path.read_text(encoding="utf - 8")
69
+ except UnicodeDecodeError:
70
+ for encoding in ("latin1", "cp1252"):
93
71
  try:
94
- future.result()
95
- except Exception as e:
96
- self.console.print(
97
- f"[bold bright_red]❌ Error cleaning {file_path}: {e}[/bold bright_red]"
72
+ content = file_path.read_text(encoding=encoding)
73
+ self.logger.warning(
74
+ f"File {file_path} read with {encoding} encoding",
98
75
  )
99
- self._cleanup_cache_directories(pkg_dir)
100
-
101
- def _cleanup_cache_directories(self, pkg_dir: Path) -> None:
102
- with suppress(PermissionError, OSError):
103
- pycache_dir = pkg_dir / "__pycache__"
104
- if pycache_dir.exists():
105
- for cache_file in pycache_dir.iterdir():
106
- with suppress(PermissionError, OSError):
107
- cache_file.unlink()
108
- pycache_dir.rmdir()
109
- parent_pycache = pkg_dir.parent / "__pycache__"
110
- if parent_pycache.exists():
111
- for cache_file in parent_pycache.iterdir():
112
- with suppress(PermissionError, OSError):
113
- cache_file.unlink()
114
- parent_pycache.rmdir()
115
-
116
- def clean_file(self, file_path: Path) -> None:
117
- try:
118
- code = file_path.read_text(encoding="utf-8")
119
- original_code = code
120
- cleaning_failed = False
121
- try:
122
- code = self.remove_line_comments_streaming(code)
123
- except Exception as e:
124
- self.console.print(
125
- f"[bold bright_yellow]⚠️ Warning: Failed to remove line comments from {file_path}: {e}[/bold bright_yellow]"
126
- )
127
- code = original_code
128
- cleaning_failed = True
129
- try:
130
- code = self.remove_docstrings_streaming(code)
131
- except Exception as e:
132
- self.console.print(
133
- f"[bold bright_yellow]⚠️ Warning: Failed to remove docstrings from {file_path}: {e}[/bold bright_yellow]"
134
- )
135
- code = original_code
136
- cleaning_failed = True
137
- try:
138
- code = self.remove_extra_whitespace_streaming(code)
139
- except Exception as e:
140
- self.console.print(
141
- f"[bold bright_yellow]⚠️ Warning: Failed to remove extra whitespace from {file_path}: {e}[/bold bright_yellow]"
142
- )
143
- code = original_code
144
- cleaning_failed = True
145
- try:
146
- code = self.reformat_code(code)
147
- except Exception as e:
148
- self.console.print(
149
- f"[bold bright_yellow]⚠️ Warning: Failed to reformat {file_path}: {e}[/bold bright_yellow]"
150
- )
151
- code = original_code
152
- cleaning_failed = True
153
- file_path.write_text(code, encoding="utf-8")
154
- if cleaning_failed:
155
- self.console.print(
156
- f"[bold yellow]⚡ Partially cleaned:[/bold yellow] [dim bright_white]{file_path}[/dim bright_white]"
157
- )
158
- else:
159
- self.console.print(
160
- f"[bold green]✨ Cleaned:[/bold green] [dim bright_white]{file_path}[/dim bright_white]"
161
- )
162
- except PermissionError as e:
163
- self.console.print(
164
- f"[red]Failed to clean: {file_path} (Permission denied)[/red]"
165
- )
166
- handle_error(
167
- ExecutionError(
168
- message=f"Permission denied while cleaning {file_path}",
169
- error_code=ErrorCode.PERMISSION_ERROR,
170
- details=str(e),
171
- recovery=f"Check file permissions for {file_path} and ensure you have write access",
172
- ),
173
- console=self.console,
174
- exit_on_error=False,
175
- )
176
- except OSError as e:
177
- self.console.print(
178
- f"[red]Failed to clean: {file_path} (File system error)[/red]"
179
- )
180
- handle_error(
181
- ExecutionError(
182
- message=f"File system error while cleaning {file_path}",
183
- error_code=ErrorCode.FILE_WRITE_ERROR,
184
- details=str(e),
185
- recovery=f"Check that {file_path} exists and is not being used by another process",
186
- ),
187
- console=self.console,
188
- exit_on_error=False,
189
- )
190
- except UnicodeDecodeError as e:
191
- self.console.print(
192
- f"[red]Failed to clean: {file_path} (Encoding error)[/red]"
193
- )
194
- handle_error(
195
- ExecutionError(
196
- message=f"Encoding error while reading {file_path}",
197
- error_code=ErrorCode.FILE_READ_ERROR,
198
- details=str(e),
199
- recovery=f"File {file_path} contains non-UTF-8 characters. Please check the file encoding.",
200
- ),
201
- console=self.console,
202
- exit_on_error=False,
76
+ return content
77
+ except UnicodeDecodeError:
78
+ continue
79
+ raise ExecutionError(
80
+ message=f"Could not decode file {file_path}",
81
+ error_code=ErrorCode.FILE_READ_ERROR,
203
82
  )
204
83
  except Exception as e:
205
- self.console.print(
206
- f"[red]Failed to clean: {file_path} (Unexpected error)[/red]"
207
- )
208
- handle_error(
209
- ExecutionError(
210
- message=f"Unexpected error while cleaning {file_path}",
211
- error_code=ErrorCode.UNEXPECTED_ERROR,
212
- details=str(e),
213
- recovery="This is an unexpected error. Please report this issue with the file content if possible.",
214
- ),
215
- console=self.console,
216
- exit_on_error=False,
217
- )
84
+ raise ExecutionError(
85
+ message=f"Failed to read file {file_path}: {e}",
86
+ error_code=ErrorCode.FILE_READ_ERROR,
87
+ ) from e
218
88
 
219
- def _initialize_docstring_state(self) -> dict[str, t.Any]:
220
- return {
221
- "in_docstring": False,
222
- "delimiter": None,
223
- "waiting": False,
224
- "function_indent": 0,
225
- "removed_docstring": False,
226
- "in_multiline_def": False,
227
- }
89
+ def write_file_safely(self, file_path: Path, content: str) -> None:
90
+ try:
91
+ file_path.write_text(content, encoding="utf - 8")
92
+ except Exception as e:
93
+ raise ExecutionError(
94
+ message=f"Failed to write file {file_path}: {e}",
95
+ error_code=ErrorCode.FILE_WRITE_ERROR,
96
+ ) from e
228
97
 
229
- def _handle_function_definition(
230
- self, line: str, stripped: str, state: dict[str, t.Any]
231
- ) -> bool:
232
- if self._is_function_or_class_definition(stripped):
233
- state["waiting"] = True
234
- state["function_indent"] = len(line) - len(line.lstrip())
235
- state["removed_docstring"] = False
236
- state["in_multiline_def"] = not stripped.endswith(":")
237
- return True
238
- return False
239
-
240
- def _handle_multiline_definition(
241
- self, line: str, stripped: str, state: dict[str, t.Any]
242
- ) -> bool:
243
- if state["in_multiline_def"]:
244
- if stripped.endswith(":"):
245
- state["in_multiline_def"] = False
246
- return True
247
- return False
248
-
249
- def _handle_waiting_docstring(
250
- self, lines: list[str], i: int, stripped: str, state: dict[str, t.Any]
251
- ) -> tuple[bool, str | None]:
252
- if state["waiting"] and stripped:
253
- if self._handle_docstring_start(stripped, state):
254
- pass_line = None
255
- if not state["in_docstring"]:
256
- function_indent: int = state["function_indent"]
257
- if self._needs_pass_statement(lines, i + 1, function_indent):
258
- pass_line = " " * (function_indent + 4) + "pass"
259
- state["removed_docstring"] = True
260
- return True, pass_line
261
- else:
262
- state["waiting"] = False
263
- return False, None
264
-
265
- def _handle_docstring_content(
266
- self, lines: list[str], i: int, stripped: str, state: dict[str, t.Any]
267
- ) -> tuple[bool, str | None]:
268
- if state["in_docstring"]:
269
- if self._handle_docstring_end(stripped, state):
270
- pass_line = None
271
- function_indent: int = state["function_indent"]
272
- if self._needs_pass_statement(lines, i + 1, function_indent):
273
- pass_line = " " * (function_indent + 4) + "pass"
274
- state["removed_docstring"] = False
275
- return True, pass_line
276
- else:
277
- return True, None
278
- return False, None
279
-
280
- def _process_line(
281
- self, lines: list[str], i: int, line: str, state: dict[str, t.Any]
282
- ) -> tuple[bool, str | None]:
283
- stripped = line.strip()
284
- if self._handle_function_definition(line, stripped, state):
285
- return True, line
286
- if self._handle_multiline_definition(line, stripped, state):
287
- return True, line
288
- handled, pass_line = self._handle_waiting_docstring(lines, i, stripped, state)
289
- if handled:
290
- return True, pass_line
291
- handled, pass_line = self._handle_docstring_content(lines, i, stripped, state)
292
- if handled:
293
- return True, pass_line
294
- if state["removed_docstring"] and stripped:
295
- state["removed_docstring"] = False
296
- return False, line
297
-
298
- def remove_docstrings(self, code: str) -> str:
299
- lines = code.split("\n")
300
- cleaned_lines: list[str] = []
301
- docstring_state = self._initialize_docstring_state()
302
- for i, line in enumerate(lines):
303
- handled, result_line = self._process_line(lines, i, line, docstring_state)
304
- if handled:
305
- if result_line is not None:
306
- cleaned_lines.append(result_line)
307
- else:
308
- cleaned_lines.append(line)
309
- return "\n".join(cleaned_lines)
98
+ def backup_file(self, file_path: Path) -> Path:
99
+ backup_path = file_path.with_suffix(f"{file_path.suffix}.backup")
100
+ try:
101
+ backup_path.write_bytes(file_path.read_bytes())
102
+ return backup_path
103
+ except Exception as e:
104
+ raise ExecutionError(
105
+ message=f"Failed to create backup for {file_path}: {e}",
106
+ error_code=ErrorCode.FILE_WRITE_ERROR,
107
+ ) from e
310
108
 
311
- def _is_function_or_class_definition(self, stripped_line: str) -> bool:
312
- return stripped_line.startswith(("def ", "class ", "async def "))
313
109
 
314
- def _handle_docstring_start(self, stripped: str, state: dict[str, t.Any]) -> bool:
315
- if not stripped.startswith(('"""', "'''", '"', "'")):
316
- return False
317
- if stripped.startswith(('"""', "'''")):
318
- delimiter = stripped[:3]
319
- else:
320
- delimiter = stripped[0]
321
- state["delimiter"] = delimiter
322
- if self._is_single_line_docstring(stripped, delimiter):
323
- state["waiting"] = False
324
- return True
110
+ class CleaningErrorHandler(BaseModel):
111
+ model_config = ConfigDict(arbitrary_types_allowed=True)
112
+
113
+ console: Console
114
+ logger: t.Any = None
115
+
116
+ def model_post_init(self, _: t.Any) -> None:
117
+ if self.logger is None:
118
+ import logging
119
+
120
+ self.logger = logging.getLogger("crackerjack.code_cleaner.error_handler")
121
+
122
+ def handle_file_error(self, file_path: Path, error: Exception, step: str) -> None:
123
+ self.console.print(
124
+ f"[bold bright_yellow]⚠️ Warning: {step} failed for {file_path}: {error}[/bold bright_yellow]",
125
+ )
126
+
127
+ self.logger.warning(
128
+ "Cleaning step failed",
129
+ extra={
130
+ "file_path": str(file_path),
131
+ "step": step,
132
+ "error": str(error),
133
+ "error_type": type(error).__name__,
134
+ },
135
+ )
136
+
137
+ def log_cleaning_result(self, result: CleaningResult) -> None:
138
+ if result.success:
139
+ self.console.print(
140
+ f"[green]✅ Cleaned {result.file_path}[/green] "
141
+ f"({result.original_size} → {result.cleaned_size} bytes)",
142
+ )
325
143
  else:
326
- state["in_docstring"] = True
327
- state["waiting"] = False
328
- return True
329
-
330
- def _is_single_line_docstring(self, stripped: str, delimiter: str) -> bool:
331
- return stripped.endswith(delimiter) and len(stripped) > len(delimiter)
332
-
333
- def _handle_docstring_end(self, stripped: str, state: dict[str, t.Any]) -> bool:
334
- if state["delimiter"] and stripped.endswith(state["delimiter"]):
335
- state["in_docstring"] = False
336
- state["delimiter"] = None
337
- return True
338
- return False
339
-
340
- def _needs_pass_statement(
341
- self, lines: list[str], start_index: int, function_indent: int
342
- ) -> bool:
343
- for i in range(start_index, len(lines)):
344
- line = lines[i]
345
- stripped = line.strip()
346
- if not stripped:
347
- continue
348
- line_indent = len(line) - len(line.lstrip())
349
- if line_indent <= function_indent:
350
- return True
351
- if line_indent > function_indent:
352
- return False
353
- return True
354
-
355
- def remove_line_comments(self, code: str) -> str:
356
- lines = code.split("\n")
357
- cleaned_lines: list[str] = []
358
- for line in lines:
359
- if not line.strip():
360
- cleaned_lines.append(line)
361
- continue
362
- cleaned_line = self._process_line_for_comments(line)
363
- if cleaned_line or not line.strip():
364
- cleaned_lines.append(cleaned_line or line)
365
- return "\n".join(cleaned_lines)
366
-
367
- def _process_line_for_comments(self, line: str) -> str:
368
- result: list[str] = []
369
- string_state = {"in_string": None}
370
- for i, char in enumerate(line):
371
- if self._handle_string_character(char, i, line, string_state, result):
372
- continue
373
- elif self._handle_comment_character(char, i, line, string_state, result):
374
- break
375
- else:
376
- result.append(char)
377
- return "".join(result).rstrip()
144
+ self.console.print(
145
+ f"[red] Failed to clean {result.file_path}[/red] "
146
+ f"({len(result.steps_failed)} steps failed)",
147
+ )
378
148
 
379
- def _handle_string_character(
380
- self,
381
- char: str,
382
- index: int,
383
- line: str,
384
- string_state: dict[str, t.Any],
385
- result: list[str],
386
- ) -> bool:
387
- if char not in ("'", '"'):
388
- return False
389
- if index > 0 and line[index - 1] == "\\":
390
- return False
391
- if string_state["in_string"] is None:
392
- string_state["in_string"] = char
393
- elif string_state["in_string"] == char:
394
- string_state["in_string"] = None
395
- result.append(char)
396
- return True
397
-
398
- def _handle_comment_character(
399
- self,
400
- char: str,
401
- index: int,
402
- line: str,
403
- string_state: dict[str, t.Any],
404
- result: list[str],
405
- ) -> bool:
406
- if char != "#" or string_state["in_string"] is not None:
407
- return False
408
- comment = line[index:].strip()
409
- if self._is_special_comment_line(comment):
410
- result.append(line[index:])
411
- return True
412
-
413
- def _is_special_comment_line(self, comment: str) -> bool:
414
- special_comment_pattern = (
415
- r"^#\s*(?:type:\s*ignore(?:\[.*?\])?|noqa|nosec|pragma:\s*no\s*cover"
416
- r"|pylint:\s*disable|mypy:\s*ignore)"
149
+ if result.warnings:
150
+ for warning in result.warnings:
151
+ self.console.print(f"[yellow]⚠️ {warning}[/yellow]")
152
+
153
+ self.logger.info(
154
+ "File cleaning completed",
155
+ extra={
156
+ "file_path": str(result.file_path),
157
+ "success": result.success,
158
+ "steps_completed": result.steps_completed,
159
+ "steps_failed": result.steps_failed,
160
+ "original_size": result.original_size,
161
+ "cleaned_size": result.cleaned_size,
162
+ },
417
163
  )
418
- return bool(re.match(special_comment_pattern, comment))
419
-
420
- def remove_extra_whitespace(self, code: str) -> str:
421
- lines = code.split("\n")
422
- cleaned_lines: list[str] = []
423
- function_tracker = {"in_function": False, "function_indent": 0}
424
- import_tracker = {"in_imports": False, "last_import_type": None}
425
- for i, line in enumerate(lines):
426
- line = line.rstrip()
427
- stripped_line = line.lstrip()
428
- self._update_function_state(line, stripped_line, function_tracker)
429
- self._update_import_state(line, stripped_line, import_tracker)
430
- if not line:
431
- if self._should_skip_empty_line(
432
- i, lines, cleaned_lines, function_tracker, import_tracker
433
- ):
434
- continue
435
- cleaned_lines.append(line)
436
- return "\n".join(self._remove_trailing_empty_lines(cleaned_lines))
437
164
 
438
- def remove_docstrings_streaming(self, code: str) -> str:
439
- if len(code) < 10000:
440
- return self.remove_docstrings(code)
441
165
 
442
- def process_lines():
443
- lines = code.split("\n")
444
- docstring_state = self._initialize_docstring_state()
445
- for i, line in enumerate(lines):
446
- handled, result_line = self._process_line(
447
- lines, i, line, docstring_state
448
- )
449
- if handled:
450
- if result_line is not None:
451
- yield result_line
452
- else:
453
- yield line
454
-
455
- return "\n".join(process_lines())
456
-
457
- def remove_line_comments_streaming(self, code: str) -> str:
458
- if len(code) < 10000:
459
- return self.remove_line_comments(code)
460
-
461
- def process_lines():
462
- for line in code.split("\n"):
463
- if not line.strip():
464
- yield line
465
- continue
466
- cleaned_line = self._process_line_for_comments(line)
467
- if cleaned_line or not line.strip():
468
- yield cleaned_line or line
166
+ class CleaningPipeline(BaseModel):
167
+ model_config = ConfigDict(arbitrary_types_allowed=True)
469
168
 
470
- return "\n".join(process_lines())
169
+ file_processor: t.Any
170
+ error_handler: t.Any
171
+ console: Console
172
+ logger: t.Any = None
471
173
 
472
- def remove_extra_whitespace_streaming(self, code: str) -> str:
473
- if len(code) < 10000:
474
- return self.remove_extra_whitespace(code)
174
+ def model_post_init(self, _: t.Any) -> None:
175
+ if self.logger is None:
176
+ import logging
475
177
 
476
- def process_lines():
477
- lines = code.split("\n")
478
- function_tracker: dict[str, t.Any] = {
479
- "in_function": False,
480
- "function_indent": 0,
481
- }
482
- import_tracker: dict[str, t.Any] = {
483
- "in_imports": False,
484
- "last_import_type": None,
485
- }
486
- previous_lines: list[str] = []
487
- for i, line in enumerate(lines):
488
- line = line.rstrip()
489
- stripped_line = line.lstrip()
490
- self._update_function_state(line, stripped_line, function_tracker)
491
- self._update_import_state(line, stripped_line, import_tracker)
492
- if not line:
493
- if self._should_skip_empty_line(
494
- i, lines, previous_lines, function_tracker, import_tracker
495
- ):
496
- continue
497
- previous_lines.append(line)
498
- yield line
499
-
500
- processed_lines = list(process_lines())
501
- return "\n".join(self._remove_trailing_empty_lines(processed_lines))
502
-
503
- def _update_function_state(
504
- self, line: str, stripped_line: str, function_tracker: dict[str, t.Any]
505
- ) -> None:
506
- if stripped_line.startswith(("def ", "async def ")):
507
- function_tracker["in_function"] = True
508
- function_tracker["function_indent"] = len(line) - len(stripped_line)
509
- elif self._is_function_end(line, stripped_line, function_tracker):
510
- function_tracker["in_function"] = False
511
- function_tracker["function_indent"] = 0
512
-
513
- def _update_import_state(
514
- self, line: str, stripped_line: str, import_tracker: dict[str, t.Any]
515
- ) -> None:
516
- if stripped_line.startswith(("import ", "from ")):
517
- import_tracker["in_imports"] = True
518
- if self._is_stdlib_import(stripped_line):
519
- current_type = "stdlib"
520
- elif self._is_local_import(stripped_line):
521
- current_type = "local"
522
- else:
523
- current_type = "third_party"
524
- import_tracker["last_import_type"] = current_type
525
- elif stripped_line and not stripped_line.startswith("#"):
526
- import_tracker["in_imports"] = False
527
- import_tracker["last_import_type"] = None
528
-
529
- @staticmethod
530
- @lru_cache(maxsize=256)
531
- def _is_stdlib_module(module: str) -> bool:
532
- stdlib_modules = {
533
- "os",
534
- "sys",
535
- "re",
536
- "json",
537
- "datetime",
538
- "time",
539
- "pathlib",
540
- "typing",
541
- "collections",
542
- "itertools",
543
- "functools",
544
- "operator",
545
- "math",
546
- "random",
547
- "uuid",
548
- "urllib",
549
- "http",
550
- "html",
551
- "xml",
552
- "email",
553
- "csv",
554
- "sqlite3",
555
- "subprocess",
556
- "threading",
557
- "multiprocessing",
558
- "asyncio",
559
- "contextlib",
560
- "dataclasses",
561
- "enum",
562
- "abc",
563
- "io",
564
- "tempfile",
565
- "shutil",
566
- "glob",
567
- "pickle",
568
- "copy",
569
- "heapq",
570
- "bisect",
571
- "array",
572
- "struct",
573
- "zlib",
574
- "hashlib",
575
- "hmac",
576
- "secrets",
577
- "base64",
578
- "binascii",
579
- "codecs",
580
- "locale",
581
- "platform",
582
- "socket",
583
- "ssl",
584
- "ipaddress",
585
- "logging",
586
- "warnings",
587
- "inspect",
588
- "ast",
589
- "dis",
590
- "tokenize",
591
- "keyword",
592
- "linecache",
593
- "traceback",
594
- "weakref",
595
- "gc",
596
- "ctypes",
597
- "unittest",
598
- "doctest",
599
- "pdb",
600
- "profile",
601
- "cProfile",
602
- "timeit",
603
- "trace",
604
- "calendar",
605
- "decimal",
606
- "fractions",
607
- "statistics",
608
- "tomllib",
609
- }
610
- return module in stdlib_modules
178
+ self.logger = logging.getLogger("crackerjack.code_cleaner.pipeline")
611
179
 
612
- def _is_stdlib_import(self, stripped_line: str) -> bool:
180
+ def clean_file(
181
+ self,
182
+ file_path: Path,
183
+ cleaning_steps: list[CleaningStepProtocol],
184
+ ) -> CleaningResult:
185
+ self.logger.info(f"Starting clean_file for {file_path}")
613
186
  try:
614
- if stripped_line.startswith("from "):
615
- module = stripped_line.split()[1].split(".")[0]
187
+ original_code = self.file_processor.read_file_safely(file_path)
188
+ original_size = len(original_code.encode("utf - 8"))
189
+
190
+ result = self._apply_cleaning_pipeline(
191
+ original_code,
192
+ file_path,
193
+ cleaning_steps,
194
+ )
195
+
196
+ if result.success and result.cleaned_code != original_code:
197
+ self.file_processor.write_file_safely(file_path, result.cleaned_code)
198
+ cleaned_size = len(result.cleaned_code.encode("utf - 8"))
616
199
  else:
617
- module = stripped_line.split()[1].split(".")[0]
618
- except IndexError:
619
- return False
620
- return CodeCleaner._is_stdlib_module(module)
621
-
622
- def _is_local_import(self, stripped_line: str) -> bool:
623
- return stripped_line.startswith("from .") or " . " in stripped_line
624
-
625
- def _is_function_end(
626
- self, line: str, stripped_line: str, function_tracker: dict[str, t.Any]
627
- ) -> bool:
628
- return (
629
- function_tracker["in_function"]
630
- and bool(line)
631
- and (len(line) - len(stripped_line) <= function_tracker["function_indent"])
632
- and (not stripped_line.startswith(("@", "#")))
633
- )
200
+ cleaned_size = original_size
201
+
202
+ cleaning_result = CleaningResult(
203
+ file_path=file_path,
204
+ success=result.success,
205
+ steps_completed=result.steps_completed,
206
+ steps_failed=result.steps_failed,
207
+ warnings=result.warnings,
208
+ original_size=original_size,
209
+ cleaned_size=cleaned_size,
210
+ )
634
211
 
635
- def _should_skip_empty_line(
636
- self,
637
- line_idx: int,
638
- lines: list[str],
639
- cleaned_lines: list[str],
640
- function_tracker: dict[str, t.Any],
641
- import_tracker: dict[str, t.Any],
642
- ) -> bool:
643
- if line_idx > 0 and cleaned_lines and (not cleaned_lines[-1]):
644
- return True
645
-
646
- if self._is_import_section_separator(line_idx, lines, import_tracker):
647
- return False
648
-
649
- if function_tracker["in_function"]:
650
- return self._should_skip_function_empty_line(line_idx, lines)
651
- return False
652
-
653
- def _is_import_section_separator(
654
- self, line_idx: int, lines: list[str], import_tracker: dict[str, t.Any]
655
- ) -> bool:
656
- if not import_tracker["in_imports"]:
657
- return False
658
-
659
- next_line_idx = line_idx + 1
660
- while next_line_idx < len(lines) and not lines[next_line_idx].strip():
661
- next_line_idx += 1
662
-
663
- if next_line_idx >= len(lines):
664
- return False
665
-
666
- next_line = lines[next_line_idx].strip()
667
- if not next_line.startswith(("import ", "from ")):
668
- return False
669
-
670
- if self._is_stdlib_import(next_line):
671
- next_type = "stdlib"
672
- elif self._is_local_import(next_line):
673
- next_type = "local"
674
- else:
675
- next_type = "third_party"
676
-
677
- return import_tracker["last_import_type"] != next_type
678
-
679
- def _should_skip_function_empty_line(self, line_idx: int, lines: list[str]) -> bool:
680
- next_line_idx = line_idx + 1
681
- if next_line_idx >= len(lines):
682
- return False
683
- next_line = lines[next_line_idx].strip()
684
- return not self._is_significant_next_line(next_line)
685
-
686
- def _is_significant_next_line(self, next_line: str) -> bool:
687
- if next_line.startswith(("return", "class ", "def ", "async def ", "@")):
688
- return True
689
- if next_line in ("pass", "break", "continue", "raise"):
690
- return True
691
- return self._is_special_comment(next_line)
692
-
693
- def _is_special_comment(self, line: str) -> bool:
694
- if not line.startswith("#"):
695
- return False
696
- special_patterns = ("type:", "noqa", "nosec", "pragma:", "pylint:", "mypy:")
697
- return any(pattern in line for pattern in special_patterns)
698
-
699
- def _remove_trailing_empty_lines(self, lines: list[str]) -> list[str]:
700
- while lines and (not lines[-1]):
701
- lines.pop()
702
- return lines
703
-
704
- def reformat_code(self, code: str) -> str:
705
- try:
706
- import tempfile
212
+ self.error_handler.log_cleaning_result(cleaning_result)
213
+ return cleaning_result
214
+
215
+ except Exception as e:
216
+ self.error_handler.handle_file_error(file_path, e, "file_processing")
217
+ return CleaningResult(
218
+ file_path=file_path,
219
+ success=False,
220
+ steps_completed=[],
221
+ steps_failed=["file_processing"],
222
+ warnings=[],
223
+ original_size=0,
224
+ cleaned_size=0,
225
+ )
707
226
 
708
- with tempfile.NamedTemporaryFile(
709
- suffix=".py", mode="w+", delete=False
710
- ) as temp:
711
- temp_path = Path(temp.name)
712
- temp_path.write_text(code)
227
+ @dataclass
228
+ class PipelineResult:
229
+ cleaned_code: str
230
+ success: bool
231
+ steps_completed: list[str]
232
+ steps_failed: list[str]
233
+ warnings: list[str]
234
+
235
+ def _apply_cleaning_pipeline(
236
+ self,
237
+ code: str,
238
+ file_path: Path,
239
+ cleaning_steps: list[CleaningStepProtocol],
240
+ ) -> PipelineResult:
241
+ current_code = code
242
+ steps_completed: list[str] = []
243
+ steps_failed: list[str] = []
244
+ warnings: list[str] = []
245
+ overall_success = True
246
+
247
+ for step in cleaning_steps:
713
248
  try:
714
- result = subprocess.run(
715
- ["uv", "run", "ruff", "format", str(temp_path)],
716
- check=False,
717
- capture_output=True,
718
- text=True,
249
+ step_result = step(current_code, file_path)
250
+ current_code = step_result
251
+ steps_completed.append(step.name)
252
+
253
+ self.logger.debug(
254
+ "Cleaning step completed",
255
+ extra={"step": step.name, "file_path": str(file_path)},
719
256
  )
720
- if result.returncode == 0:
721
- formatted_code = temp_path.read_text()
722
- else:
723
- self.console.print(
724
- f"[bold bright_yellow]⚠️ Ruff formatting failed: {result.stderr}[/bold bright_yellow]"
725
- )
726
- handle_error(
727
- ExecutionError(
728
- message="Code formatting failed",
729
- error_code=ErrorCode.FORMATTING_ERROR,
730
- details=result.stderr,
731
- recovery="Check Ruff configuration and formatting rules",
732
- ),
733
- console=self.console,
734
- exit_on_error=False,
735
- )
736
- formatted_code = code
257
+
737
258
  except Exception as e:
738
- self.console.print(
739
- f"[bold bright_red]❌ Error running Ruff: {e}[/bold bright_red]"
740
- )
741
- handle_error(
742
- ExecutionError(
743
- message="Error running Ruff",
744
- error_code=ErrorCode.FORMATTING_ERROR,
745
- details=str(e),
746
- recovery="Verify Ruff is installed and configured correctly",
747
- ),
748
- console=self.console,
749
- exit_on_error=False,
259
+ self.error_handler.handle_file_error(file_path, e, step.name)
260
+ steps_failed.append(step.name)
261
+ warnings.append(f"{step.name} failed: {e}")
262
+
263
+ self.logger.warning(
264
+ "Cleaning step failed, continuing with original code",
265
+ extra={
266
+ "step": step.name,
267
+ "file_path": str(file_path),
268
+ "error": str(e),
269
+ },
750
270
  )
751
- formatted_code = code
752
- finally:
753
- with suppress(FileNotFoundError):
754
- temp_path.unlink()
755
- return formatted_code
756
- except Exception as e:
757
- self.console.print(
758
- f"[bold bright_red]❌ Error during reformatting: {e}[/bold bright_red]"
271
+
272
+ if steps_failed:
273
+ success_ratio = len(steps_completed) / (
274
+ len(steps_completed) + len(steps_failed)
759
275
  )
760
- handle_error(
761
- ExecutionError(
762
- message="Error during reformatting",
763
- error_code=ErrorCode.FORMATTING_ERROR,
764
- details=str(e),
765
- recovery="Check file permissions and disk space",
766
- ),
276
+ overall_success = success_ratio >= 0.7
277
+
278
+ return self.PipelineResult(
279
+ cleaned_code=current_code,
280
+ success=overall_success,
281
+ steps_completed=steps_completed,
282
+ steps_failed=steps_failed,
283
+ warnings=warnings,
284
+ )
285
+
286
+
287
+ class CodeCleaner(BaseModel):
288
+ model_config = ConfigDict(arbitrary_types_allowed=True)
289
+
290
+ console: Console
291
+ file_processor: t.Any = None
292
+ error_handler: t.Any = None
293
+ pipeline: t.Any = None
294
+ logger: t.Any = None
295
+
296
+ def model_post_init(self, _: t.Any) -> None:
297
+ if self.logger is None:
298
+ import logging
299
+
300
+ self.logger = logging.getLogger("crackerjack.code_cleaner")
301
+
302
+ if self.file_processor is None:
303
+ self.file_processor = FileProcessor(console=self.console)
304
+
305
+ if self.error_handler is None:
306
+ self.error_handler = CleaningErrorHandler(console=self.console)
307
+
308
+ if self.pipeline is None:
309
+ self.pipeline = CleaningPipeline(
310
+ file_processor=self.file_processor,
311
+ error_handler=self.error_handler,
767
312
  console=self.console,
768
313
  )
769
- return code
770
314
 
771
- async def clean_files_async(self, pkg_dir: Path | None) -> None:
772
- if pkg_dir is None:
773
- return
774
- python_files = [
775
- file_path
776
- for file_path in pkg_dir.rglob("*.py")
777
- if not str(file_path.parent).startswith("__")
315
+ def clean_file(self, file_path: Path) -> CleaningResult:
316
+ cleaning_steps = [
317
+ self._create_line_comment_step(),
318
+ self._create_docstring_step(),
319
+ self._create_whitespace_step(),
320
+ self._create_formatting_step(),
778
321
  ]
779
- if not python_files:
780
- return
781
- max_concurrent = min(len(python_files), 8)
782
- semaphore = asyncio.Semaphore(max_concurrent)
783
322
 
784
- async def clean_with_semaphore(file_path: Path) -> None:
785
- async with semaphore:
786
- await self.clean_file_async(file_path)
323
+ return self.pipeline.clean_file(file_path, cleaning_steps)
787
324
 
788
- tasks = [clean_with_semaphore(file_path) for file_path in python_files]
789
- await asyncio.gather(*tasks, return_exceptions=True)
325
+ def clean_files(self, pkg_dir: Path | None = None) -> list[CleaningResult]:
326
+ if pkg_dir is None:
327
+ pkg_dir = Path.cwd()
328
+
329
+ python_files = list(pkg_dir.rglob(" * .py"))
330
+ results: list[CleaningResult] = []
331
+
332
+ self.logger.info(f"Starting clean_files for {len(python_files)} files")
333
+ for file_path in python_files:
334
+ if self.should_process_file(file_path):
335
+ result = self.clean_file(file_path)
336
+ results.append(result)
337
+
338
+ return results
339
+
340
+ def should_process_file(self, file_path: Path) -> bool:
341
+ ignore_patterns = {
342
+ "__pycache__",
343
+ ".git",
344
+ ".venv",
345
+ "site - packages",
346
+ ".pytest_cache",
347
+ "build",
348
+ "dist",
349
+ }
790
350
 
791
- await self._cleanup_cache_directories_async(pkg_dir)
351
+ for parent in file_path.parents:
352
+ if parent.name in ignore_patterns:
353
+ return False
792
354
 
793
- async def clean_file_async(self, file_path: Path) -> None:
794
- try:
795
- async with aiofiles.open(file_path, encoding="utf-8") as f: # type: ignore[misc]
796
- code: str = await f.read() # type: ignore[misc]
797
- original_code: str = code
798
- cleaning_failed = False
799
- try:
800
- code = self.remove_line_comments_streaming(code)
801
- except Exception as e:
802
- self.console.print(
803
- f"[bold bright_yellow]⚠️ Warning: Failed to remove line comments from {file_path}: {e}[/bold bright_yellow]"
804
- )
805
- code = original_code
806
- cleaning_failed = True
807
- try:
808
- code = self.remove_docstrings_streaming(code)
809
- except Exception as e:
810
- self.console.print(
811
- f"[bold bright_yellow]⚠️ Warning: Failed to remove docstrings from {file_path}: {e}[/bold bright_yellow]"
812
- )
813
- code = original_code
814
- cleaning_failed = True
815
- try:
816
- code = self.remove_extra_whitespace_streaming(code)
817
- except Exception as e:
818
- self.console.print(
819
- f"[bold bright_yellow]⚠️ Warning: Failed to remove extra whitespace from {file_path}: {e}[/bold bright_yellow]"
820
- )
821
- code = original_code
822
- cleaning_failed = True
823
- try:
824
- code = await self.reformat_code_async(code)
825
- except Exception as e:
826
- self.console.print(
827
- f"[bold bright_yellow]⚠️ Warning: Failed to reformat {file_path}: {e}[/bold bright_yellow]"
828
- )
829
- code = original_code
830
- cleaning_failed = True
831
- async with aiofiles.open(file_path, "w", encoding="utf-8") as f: # type: ignore[misc]
832
- await f.write(code) # type: ignore[misc]
833
- if cleaning_failed:
834
- self.console.print(
835
- f"[bold yellow]⚡ Partially cleaned:[/bold yellow] [dim bright_white]{file_path}[/dim bright_white]"
836
- )
837
- else:
838
- self.console.print(
839
- f"[bold green]✨ Cleaned:[/bold green] [dim bright_white]{file_path}[/dim bright_white]"
840
- )
841
- except PermissionError as e:
842
- self.console.print(
843
- f"[red]Failed to clean: {file_path} (Permission denied)[/red]"
844
- )
845
- handle_error(
846
- ExecutionError(
847
- message=f"Permission denied while cleaning {file_path}",
848
- error_code=ErrorCode.PERMISSION_ERROR,
849
- details=str(e),
850
- recovery=f"Check file permissions for {file_path} and ensure you have write access",
851
- ),
852
- console=self.console,
853
- exit_on_error=False,
854
- )
855
- except OSError as e:
856
- self.console.print(
857
- f"[red]Failed to clean: {file_path} (File system error)[/red]"
858
- )
859
- handle_error(
860
- ExecutionError(
861
- message=f"File system error while cleaning {file_path}",
862
- error_code=ErrorCode.FILE_WRITE_ERROR,
863
- details=str(e),
864
- recovery=f"Check that {file_path} exists and is not being used by another process",
865
- ),
866
- console=self.console,
867
- exit_on_error=False,
868
- )
869
- except UnicodeDecodeError as e:
870
- self.console.print(
871
- f"[red]Failed to clean: {file_path} (Encoding error)[/red]"
872
- )
873
- handle_error(
874
- ExecutionError(
875
- message=f"Encoding error while cleaning {file_path}",
876
- error_code=ErrorCode.FILE_READ_ERROR,
877
- details=str(e),
878
- recovery=f"Check the file encoding of {file_path} - it may not be UTF-8",
879
- ),
880
- console=self.console,
881
- exit_on_error=False,
882
- )
883
- except Exception as e:
884
- self.console.print(f"[red]Unexpected error cleaning {file_path}: {e}[/red]")
885
- handle_error(
886
- ExecutionError(
887
- message=f"Unexpected error while cleaning {file_path}",
888
- error_code=ErrorCode.UNEXPECTED_ERROR,
889
- details=str(e),
890
- recovery="Please report this issue with the full error details",
891
- ),
892
- console=self.console,
893
- exit_on_error=False,
355
+ return not (file_path.name.startswith(".") or file_path.suffix != ".py")
356
+
357
+ def _create_line_comment_step(self) -> CleaningStepProtocol:
358
+ """Create a step for removing line comments while preserving special comments."""
359
+ return self._LineCommentStep()
360
+
361
+ def _create_docstring_step(self) -> CleaningStepProtocol:
362
+ """Create a step for removing docstrings."""
363
+ return self._DocstringStep()
364
+
365
+ class _DocstringStep:
366
+ """Step implementation for removing docstrings."""
367
+
368
+ name = "remove_docstrings"
369
+
370
+ def _is_docstring_node(self, node: ast.AST) -> bool:
371
+ body = getattr(node, "body", None)
372
+ return (
373
+ hasattr(node, "body")
374
+ and body is not None
375
+ and len(body) > 0
376
+ and isinstance(body[0], ast.Expr)
377
+ and isinstance(body[0].value, ast.Constant)
378
+ and isinstance(body[0].value.value, str)
894
379
  )
895
380
 
896
- async def reformat_code_async(self, code: str) -> str:
897
- try:
898
- import tempfile
899
-
900
- with tempfile.NamedTemporaryFile(
901
- suffix=".py", mode="w+", delete=False
902
- ) as temp:
903
- temp_path = Path(temp.name)
904
- async with aiofiles.open(temp_path, "w", encoding="utf-8") as f: # type: ignore[misc]
905
- await f.write(code) # type: ignore[misc]
381
+ def _find_docstrings(self, tree: ast.AST) -> list[ast.AST]:
382
+ docstring_nodes: list[ast.AST] = []
383
+ finder = self._DocstringFinder(docstring_nodes, self._is_docstring_node)
384
+ finder.visit(tree)
385
+ return docstring_nodes
386
+
387
+ class _DocstringFinder(ast.NodeVisitor):
388
+ def __init__(
389
+ self,
390
+ docstring_nodes: list[ast.AST],
391
+ is_docstring_node: t.Callable[[ast.AST], bool],
392
+ ):
393
+ self.docstring_nodes = docstring_nodes
394
+ self.is_docstring_node = is_docstring_node
395
+
396
+ def _add_if_docstring(self, node: ast.AST) -> None:
397
+ if self.is_docstring_node(node) and hasattr(node, "body"):
398
+ body: list[ast.stmt] = getattr(node, "body")
399
+ self.docstring_nodes.append(body[0])
400
+ self.generic_visit(node)
401
+
402
+ def visit_Module(self, node: ast.Module) -> None:
403
+ self._add_if_docstring(node)
404
+
405
+ def visit_FunctionDef(self, node: ast.FunctionDef) -> None:
406
+ self._add_if_docstring(node)
407
+
408
+ def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None:
409
+ self._add_if_docstring(node)
410
+
411
+ def visit_ClassDef(self, node: ast.ClassDef) -> None:
412
+ self._add_if_docstring(node)
413
+
414
+ def __call__(self, code: str, file_path: Path) -> str:
906
415
  try:
907
- proc = await asyncio.create_subprocess_exec(
908
- "uv",
909
- "run",
910
- "ruff",
911
- "format",
912
- str(temp_path),
913
- stdout=asyncio.subprocess.PIPE,
914
- stderr=asyncio.subprocess.PIPE,
915
- )
916
- _, stderr = await proc.communicate()
917
- if proc.returncode == 0:
918
- async with aiofiles.open(temp_path, encoding="utf-8") as f: # type: ignore[misc]
919
- formatted_code = await f.read() # type: ignore[misc]
920
- else:
921
- self.console.print(
922
- f"[bold bright_yellow]⚠️ Warning: Ruff format failed with return code {proc.returncode}[/bold bright_yellow]"
923
- )
924
- if stderr:
925
- self.console.print(f"[dim]Ruff stderr: {stderr.decode()}[/dim]")
926
- formatted_code = code
927
- except Exception as e:
928
- self.console.print(
929
- f"[bold bright_red]❌ Error running Ruff: {e}[/bold bright_red]"
930
- )
931
- handle_error(
932
- ExecutionError(
933
- message="Error running Ruff",
934
- error_code=ErrorCode.FORMATTING_ERROR,
935
- details=str(e),
936
- recovery="Verify Ruff is installed and configured correctly",
937
- ),
938
- console=self.console,
939
- exit_on_error=False,
940
- )
941
- formatted_code = code
942
- finally:
943
- with suppress(FileNotFoundError):
944
- temp_path.unlink()
416
+ tree = ast.parse(code, filename=str(file_path))
417
+ except SyntaxError:
418
+ return self._regex_fallback_removal(code)
945
419
 
946
- return formatted_code
947
- except Exception as e:
948
- self.console.print(
949
- f"[bold bright_red]❌ Error during reformatting: {e}[/bold bright_red]"
420
+ docstring_nodes = self._find_docstrings(tree)
421
+
422
+ if not docstring_nodes:
423
+ return code
424
+
425
+ lines = code.split("\n")
426
+ lines_to_remove: set[int] = set()
427
+
428
+ for node in docstring_nodes:
429
+ # Most AST nodes have lineno and end_lineno attributes
430
+ start_line = getattr(node, "lineno", 1)
431
+ end_line = getattr(node, "end_lineno", start_line + 1)
432
+ lines_to_remove.update(range(start_line, end_line))
433
+
434
+ result_lines = [
435
+ line for i, line in enumerate(lines, 1) if i not in lines_to_remove
436
+ ]
437
+
438
+ result = "\n".join(result_lines)
439
+ return self._regex_fallback_removal(result)
440
+
441
+ def _regex_fallback_removal(self, code: str) -> str:
442
+ import re
443
+
444
+ patterns = [
445
+ r'^\s*""".*?"""\s*$',
446
+ r"^\s*'''.*?'''\s*$",
447
+ r'^\s*""".*?"""\s*$',
448
+ r"^\s*'''.*?'''\s*$",
449
+ ]
450
+ result = code
451
+ for pattern in patterns:
452
+ result = re.sub(pattern, "", result, flags=re.MULTILINE | re.DOTALL)
453
+ return result
454
+
455
+ class _LineCommentStep:
456
+ """Step implementation for removing line comments."""
457
+
458
+ name = "remove_line_comments"
459
+
460
+ def __call__(self, code: str, file_path: Path) -> str:
461
+ lines = code.split("\n")
462
+ # Performance: Use list comprehension instead of generator for small-to-medium files
463
+ processed_lines = [self._process_line_for_comments(line) for line in lines]
464
+ return "\n".join(processed_lines)
465
+
466
+ def _process_line_for_comments(self, line: str) -> str:
467
+ """Process a single line to remove comments while preserving strings."""
468
+ if not line.strip() or self._is_preserved_comment_line(line):
469
+ return line
470
+ return self._remove_comment_from_line(line)
471
+
472
+ def _is_preserved_comment_line(self, line: str) -> bool:
473
+ """Check if this comment line should be preserved."""
474
+ stripped = line.strip()
475
+ if not stripped.startswith("#"):
476
+ return False
477
+ return self._has_preserved_pattern(stripped)
478
+
479
+ def _has_preserved_pattern(self, stripped_line: str) -> bool:
480
+ """Check if line contains preserved comment patterns."""
481
+ preserved_patterns = ["coding: ", "encoding: ", "type: ", "noqa", "pragma"]
482
+ return stripped_line.startswith("# !/ ") or any(
483
+ pattern in stripped_line for pattern in preserved_patterns
950
484
  )
951
- handle_error(
952
- ExecutionError(
953
- message="Error during reformatting",
954
- error_code=ErrorCode.FORMATTING_ERROR,
955
- details=str(e),
956
- recovery="Check file permissions and disk space",
957
- ),
958
- console=self.console,
959
- exit_on_error=False,
485
+
486
+ def _remove_comment_from_line(self, line: str) -> str:
487
+ """Remove comments from a line while preserving string literals."""
488
+ result: list[str] = []
489
+ string_state: dict[str, t.Any] = {"in_string": False, "quote_char": None}
490
+ for i, char in enumerate(line):
491
+ if self._should_break_at_comment(char, string_state):
492
+ break
493
+ self._update_string_state(char, i, line, string_state)
494
+ result.append(char)
495
+ return "".join(result).rstrip()
496
+
497
+ def _should_break_at_comment(self, char: str, state: dict[str, t.Any]) -> bool:
498
+ """Check if we should break at a comment character."""
499
+ return not state["in_string"] and char == "#"
500
+
501
+ def _update_string_state(
502
+ self,
503
+ char: str,
504
+ index: int,
505
+ line: str,
506
+ state: dict[str, t.Any],
507
+ ) -> None:
508
+ """Update string parsing state based on current character."""
509
+ if self._is_string_start(char, state):
510
+ state["in_string"], state["quote_char"] = True, char
511
+ elif self._is_string_end(char, index, line, state):
512
+ state["in_string"], state["quote_char"] = False, None
513
+
514
+ def _is_string_start(self, char: str, state: dict[str, t.Any]) -> bool:
515
+ """Check if character starts a string."""
516
+ return not state["in_string"] and char in ('"', "'")
517
+
518
+ def _is_string_end(
519
+ self,
520
+ char: str,
521
+ index: int,
522
+ line: str,
523
+ state: dict[str, t.Any],
524
+ ) -> bool:
525
+ """Check if character ends a string."""
526
+ return (
527
+ state["in_string"]
528
+ and char == state["quote_char"]
529
+ and (index == 0 or line[index - 1] != "\\")
960
530
  )
961
- return code
962
-
963
- async def _cleanup_cache_directories_async(self, pkg_dir: Path) -> None:
964
- def cleanup_sync() -> None:
965
- with suppress(PermissionError, OSError):
966
- pycache_dir = pkg_dir / "__pycache__"
967
- if pycache_dir.exists():
968
- for cache_file in pycache_dir.iterdir():
969
- with suppress(PermissionError, OSError):
970
- cache_file.unlink()
971
- pycache_dir.rmdir()
972
- parent_pycache = pkg_dir.parent / "__pycache__"
973
- if parent_pycache.exists():
974
- for cache_file in parent_pycache.iterdir():
975
- with suppress(PermissionError, OSError):
976
- cache_file.unlink()
977
- parent_pycache.rmdir()
978
-
979
- loop = asyncio.get_event_loop()
980
- await loop.run_in_executor(None, cleanup_sync)
531
+
532
+ def _create_docstring_finder_class(
533
+ self,
534
+ docstring_nodes: list[ast.AST],
535
+ ) -> type[ast.NodeVisitor]:
536
+ class DocstringFinder(ast.NodeVisitor):
537
+ def _is_docstring_node(self, node: ast.AST) -> bool:
538
+ body = getattr(node, "body", None)
539
+ return (
540
+ hasattr(node, "body")
541
+ and body is not None
542
+ and len(body) > 0
543
+ and isinstance(body[0], ast.Expr)
544
+ and isinstance(body[0].value, ast.Constant)
545
+ and isinstance(body[0].value.value, str)
546
+ )
547
+
548
+ def _add_if_docstring(self, node: ast.AST) -> None:
549
+ if self._is_docstring_node(node) and hasattr(node, "body"):
550
+ body: list[ast.stmt] = getattr(node, "body")
551
+ docstring_nodes.append(body[0])
552
+ self.generic_visit(node)
553
+
554
+ def visit_Module(self, node: ast.Module) -> None:
555
+ self._add_if_docstring(node)
556
+
557
+ def visit_FunctionDef(self, node: ast.FunctionDef) -> None:
558
+ self._add_if_docstring(node)
559
+
560
+ def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None:
561
+ self._add_if_docstring(node)
562
+
563
+ def visit_ClassDef(self, node: ast.ClassDef) -> None:
564
+ self._add_if_docstring(node)
565
+
566
+ return DocstringFinder
567
+
568
+ def _create_whitespace_step(self) -> CleaningStepProtocol:
569
+ class WhitespaceStep:
570
+ name = "remove_extra_whitespace"
571
+
572
+ def __call__(self, code: str, file_path: Path) -> str:
573
+ import re
574
+
575
+ lines = code.split("\n")
576
+ cleaned_lines: list[str] = []
577
+
578
+ empty_line_count = 0
579
+
580
+ for line in lines:
581
+ cleaned_line = line.rstrip()
582
+
583
+ if not cleaned_line.strip():
584
+ empty_line_count += 1
585
+ if empty_line_count <= 2:
586
+ cleaned_lines.append("")
587
+ else:
588
+ empty_line_count = 0
589
+
590
+ leading_whitespace = len(cleaned_line) - len(
591
+ cleaned_line.lstrip(),
592
+ )
593
+ content = cleaned_line.lstrip()
594
+
595
+ content = re.sub(r" {2, }", " ", content)
596
+
597
+ cleaned_line = cleaned_line[:leading_whitespace] + content
598
+ cleaned_lines.append(cleaned_line)
599
+
600
+ while cleaned_lines and not cleaned_lines[-1].strip():
601
+ cleaned_lines.pop()
602
+
603
+ result = "\n".join(cleaned_lines)
604
+ if result and not result.endswith("\n"):
605
+ result += "\n"
606
+
607
+ return result
608
+
609
+ return WhitespaceStep()
610
+
611
+ def _create_formatting_step(self) -> CleaningStepProtocol:
612
+ class FormattingStep:
613
+ name = "format_code"
614
+
615
+ def __call__(self, code: str, file_path: Path) -> str:
616
+ import re
617
+
618
+ lines = code.split("\n")
619
+ formatted_lines: list[str] = []
620
+
621
+ for line in lines:
622
+ if line.strip():
623
+ leading_whitespace = len(line) - len(line.lstrip())
624
+ content = line.lstrip()
625
+
626
+ content = re.sub(
627
+ r"([ =+ \ -*/%<>!&|^ ])([ ^ =+ \ -*/%<>!&|^ ])",
628
+ r"\1 \2",
629
+ content,
630
+ )
631
+ content = re.sub(
632
+ r"([ ^ =+ \ -*/%<>!&|^ ])([ =+ \ -*/%<>!&|^ ])",
633
+ r"\1 \2",
634
+ content,
635
+ )
636
+
637
+ content = re.sub(r", ([ ^ \n])", r", \1", content)
638
+
639
+ content = re.sub(r": ([ ^ \n: ])", r": \1", content)
640
+
641
+ content = re.sub(r" {2, }", " ", content)
642
+
643
+ formatted_line = line[:leading_whitespace] + content
644
+ formatted_lines.append(formatted_line)
645
+ else:
646
+ formatted_lines.append(line)
647
+
648
+ return "\n".join(formatted_lines)
649
+
650
+ return FormattingStep()
651
+
652
+ def remove_line_comments(self, code: str, file_path: Path | None = None) -> str:
653
+ file_path = file_path or Path("temp.py")
654
+ step = self._create_line_comment_step()
655
+ return step(code, file_path)
656
+
657
+ def remove_docstrings(self, code: str, file_path: Path | None = None) -> str:
658
+ file_path = file_path or Path("temp.py")
659
+ step = self._create_docstring_step()
660
+ return step(code, file_path)
661
+
662
+ def remove_extra_whitespace(self, code: str, file_path: Path | None = None) -> str:
663
+ file_path = file_path or Path("temp.py")
664
+ step = self._create_whitespace_step()
665
+ return step(code, file_path)
666
+
667
+ def format_code(self, code: str, file_path: Path | None = None) -> str:
668
+ file_path = file_path or Path("temp.py")
669
+ step = self._create_formatting_step()
670
+ return step(code, file_path)