crackerjack 0.31.10__py3-none-any.whl → 0.31.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crackerjack might be problematic. Click here for more details.

Files changed (155) hide show
  1. crackerjack/CLAUDE.md +288 -705
  2. crackerjack/__main__.py +22 -8
  3. crackerjack/agents/__init__.py +0 -3
  4. crackerjack/agents/architect_agent.py +0 -43
  5. crackerjack/agents/base.py +1 -9
  6. crackerjack/agents/coordinator.py +2 -148
  7. crackerjack/agents/documentation_agent.py +109 -81
  8. crackerjack/agents/dry_agent.py +122 -97
  9. crackerjack/agents/formatting_agent.py +3 -16
  10. crackerjack/agents/import_optimization_agent.py +1174 -130
  11. crackerjack/agents/performance_agent.py +956 -188
  12. crackerjack/agents/performance_helpers.py +229 -0
  13. crackerjack/agents/proactive_agent.py +1 -48
  14. crackerjack/agents/refactoring_agent.py +516 -246
  15. crackerjack/agents/refactoring_helpers.py +282 -0
  16. crackerjack/agents/security_agent.py +393 -90
  17. crackerjack/agents/test_creation_agent.py +1776 -120
  18. crackerjack/agents/test_specialist_agent.py +59 -15
  19. crackerjack/agents/tracker.py +0 -102
  20. crackerjack/api.py +145 -37
  21. crackerjack/cli/handlers.py +48 -30
  22. crackerjack/cli/interactive.py +11 -11
  23. crackerjack/cli/options.py +66 -4
  24. crackerjack/code_cleaner.py +808 -148
  25. crackerjack/config/global_lock_config.py +110 -0
  26. crackerjack/config/hooks.py +43 -64
  27. crackerjack/core/async_workflow_orchestrator.py +247 -97
  28. crackerjack/core/autofix_coordinator.py +192 -109
  29. crackerjack/core/enhanced_container.py +46 -63
  30. crackerjack/core/file_lifecycle.py +549 -0
  31. crackerjack/core/performance.py +9 -8
  32. crackerjack/core/performance_monitor.py +395 -0
  33. crackerjack/core/phase_coordinator.py +281 -94
  34. crackerjack/core/proactive_workflow.py +9 -58
  35. crackerjack/core/resource_manager.py +501 -0
  36. crackerjack/core/service_watchdog.py +490 -0
  37. crackerjack/core/session_coordinator.py +4 -8
  38. crackerjack/core/timeout_manager.py +504 -0
  39. crackerjack/core/websocket_lifecycle.py +475 -0
  40. crackerjack/core/workflow_orchestrator.py +343 -209
  41. crackerjack/dynamic_config.py +47 -6
  42. crackerjack/errors.py +3 -4
  43. crackerjack/executors/async_hook_executor.py +63 -13
  44. crackerjack/executors/cached_hook_executor.py +14 -14
  45. crackerjack/executors/hook_executor.py +100 -37
  46. crackerjack/executors/hook_lock_manager.py +856 -0
  47. crackerjack/executors/individual_hook_executor.py +120 -86
  48. crackerjack/intelligence/__init__.py +0 -7
  49. crackerjack/intelligence/adaptive_learning.py +13 -86
  50. crackerjack/intelligence/agent_orchestrator.py +15 -78
  51. crackerjack/intelligence/agent_registry.py +12 -59
  52. crackerjack/intelligence/agent_selector.py +31 -92
  53. crackerjack/intelligence/integration.py +1 -41
  54. crackerjack/interactive.py +9 -9
  55. crackerjack/managers/async_hook_manager.py +25 -8
  56. crackerjack/managers/hook_manager.py +9 -9
  57. crackerjack/managers/publish_manager.py +57 -59
  58. crackerjack/managers/test_command_builder.py +6 -36
  59. crackerjack/managers/test_executor.py +9 -61
  60. crackerjack/managers/test_manager.py +17 -63
  61. crackerjack/managers/test_manager_backup.py +77 -127
  62. crackerjack/managers/test_progress.py +4 -23
  63. crackerjack/mcp/cache.py +5 -12
  64. crackerjack/mcp/client_runner.py +10 -10
  65. crackerjack/mcp/context.py +64 -6
  66. crackerjack/mcp/dashboard.py +14 -11
  67. crackerjack/mcp/enhanced_progress_monitor.py +55 -55
  68. crackerjack/mcp/file_monitor.py +72 -42
  69. crackerjack/mcp/progress_components.py +103 -84
  70. crackerjack/mcp/progress_monitor.py +122 -49
  71. crackerjack/mcp/rate_limiter.py +12 -12
  72. crackerjack/mcp/server_core.py +16 -22
  73. crackerjack/mcp/service_watchdog.py +26 -26
  74. crackerjack/mcp/state.py +15 -0
  75. crackerjack/mcp/tools/core_tools.py +95 -39
  76. crackerjack/mcp/tools/error_analyzer.py +6 -32
  77. crackerjack/mcp/tools/execution_tools.py +1 -56
  78. crackerjack/mcp/tools/execution_tools_backup.py +35 -131
  79. crackerjack/mcp/tools/intelligence_tool_registry.py +0 -36
  80. crackerjack/mcp/tools/intelligence_tools.py +2 -55
  81. crackerjack/mcp/tools/monitoring_tools.py +308 -145
  82. crackerjack/mcp/tools/proactive_tools.py +12 -42
  83. crackerjack/mcp/tools/progress_tools.py +23 -15
  84. crackerjack/mcp/tools/utility_tools.py +3 -40
  85. crackerjack/mcp/tools/workflow_executor.py +40 -60
  86. crackerjack/mcp/websocket/app.py +0 -3
  87. crackerjack/mcp/websocket/endpoints.py +206 -268
  88. crackerjack/mcp/websocket/jobs.py +213 -66
  89. crackerjack/mcp/websocket/server.py +84 -6
  90. crackerjack/mcp/websocket/websocket_handler.py +137 -29
  91. crackerjack/models/config_adapter.py +3 -16
  92. crackerjack/models/protocols.py +162 -3
  93. crackerjack/models/resource_protocols.py +454 -0
  94. crackerjack/models/task.py +3 -3
  95. crackerjack/monitoring/__init__.py +0 -0
  96. crackerjack/monitoring/ai_agent_watchdog.py +25 -71
  97. crackerjack/monitoring/regression_prevention.py +28 -87
  98. crackerjack/orchestration/advanced_orchestrator.py +44 -78
  99. crackerjack/orchestration/coverage_improvement.py +10 -60
  100. crackerjack/orchestration/execution_strategies.py +16 -16
  101. crackerjack/orchestration/test_progress_streamer.py +61 -53
  102. crackerjack/plugins/base.py +1 -1
  103. crackerjack/plugins/managers.py +22 -20
  104. crackerjack/py313.py +65 -21
  105. crackerjack/services/backup_service.py +467 -0
  106. crackerjack/services/bounded_status_operations.py +627 -0
  107. crackerjack/services/cache.py +7 -9
  108. crackerjack/services/config.py +35 -52
  109. crackerjack/services/config_integrity.py +5 -16
  110. crackerjack/services/config_merge.py +542 -0
  111. crackerjack/services/contextual_ai_assistant.py +17 -19
  112. crackerjack/services/coverage_ratchet.py +44 -73
  113. crackerjack/services/debug.py +25 -39
  114. crackerjack/services/dependency_monitor.py +52 -50
  115. crackerjack/services/enhanced_filesystem.py +14 -11
  116. crackerjack/services/file_hasher.py +1 -1
  117. crackerjack/services/filesystem.py +1 -12
  118. crackerjack/services/git.py +71 -47
  119. crackerjack/services/health_metrics.py +31 -27
  120. crackerjack/services/initialization.py +276 -428
  121. crackerjack/services/input_validator.py +760 -0
  122. crackerjack/services/log_manager.py +16 -16
  123. crackerjack/services/logging.py +7 -6
  124. crackerjack/services/metrics.py +43 -43
  125. crackerjack/services/pattern_cache.py +2 -31
  126. crackerjack/services/pattern_detector.py +26 -63
  127. crackerjack/services/performance_benchmarks.py +20 -45
  128. crackerjack/services/regex_patterns.py +2887 -0
  129. crackerjack/services/regex_utils.py +537 -0
  130. crackerjack/services/secure_path_utils.py +683 -0
  131. crackerjack/services/secure_status_formatter.py +534 -0
  132. crackerjack/services/secure_subprocess.py +605 -0
  133. crackerjack/services/security.py +47 -10
  134. crackerjack/services/security_logger.py +492 -0
  135. crackerjack/services/server_manager.py +109 -50
  136. crackerjack/services/smart_scheduling.py +8 -25
  137. crackerjack/services/status_authentication.py +603 -0
  138. crackerjack/services/status_security_manager.py +442 -0
  139. crackerjack/services/thread_safe_status_collector.py +546 -0
  140. crackerjack/services/tool_version_service.py +1 -23
  141. crackerjack/services/unified_config.py +36 -58
  142. crackerjack/services/validation_rate_limiter.py +269 -0
  143. crackerjack/services/version_checker.py +9 -40
  144. crackerjack/services/websocket_resource_limiter.py +572 -0
  145. crackerjack/slash_commands/__init__.py +52 -2
  146. crackerjack/tools/__init__.py +0 -0
  147. crackerjack/tools/validate_input_validator_patterns.py +262 -0
  148. crackerjack/tools/validate_regex_patterns.py +198 -0
  149. {crackerjack-0.31.10.dist-info → crackerjack-0.31.12.dist-info}/METADATA +197 -12
  150. crackerjack-0.31.12.dist-info/RECORD +178 -0
  151. crackerjack/cli/facade.py +0 -104
  152. crackerjack-0.31.10.dist-info/RECORD +0 -149
  153. {crackerjack-0.31.10.dist-info → crackerjack-0.31.12.dist-info}/WHEEL +0 -0
  154. {crackerjack-0.31.10.dist-info → crackerjack-0.31.12.dist-info}/entry_points.txt +0 -0
  155. {crackerjack-0.31.10.dist-info → crackerjack-0.31.12.dist-info}/licenses/LICENSE +0 -0
@@ -1,7 +1,6 @@
1
1
  import ast
2
2
  import typing as t
3
3
  from dataclasses import dataclass
4
- from enum import Enum
5
4
  from pathlib import Path
6
5
  from typing import Protocol
7
6
 
@@ -9,12 +8,49 @@ from pydantic import BaseModel, ConfigDict
9
8
  from rich.console import Console
10
9
 
11
10
  from .errors import ErrorCode, ExecutionError
12
-
13
-
14
- class CleaningStepResult(Enum):
15
- SUCCESS = "success"
16
- FAILED = "failed"
17
- SKIPPED = "skipped"
11
+ from .services.backup_service import BackupMetadata, PackageBackupService
12
+ from .services.regex_patterns import SAFE_PATTERNS
13
+ from .services.secure_path_utils import (
14
+ AtomicFileOperations,
15
+ SecurePathValidator,
16
+ )
17
+ from .services.security_logger import (
18
+ SecurityEventLevel,
19
+ SecurityEventType,
20
+ get_security_logger,
21
+ )
22
+
23
+
24
+ class SafePatternApplicator:
25
+ """Safe pattern applicator using centralized SAFE_PATTERNS."""
26
+
27
+ def apply_docstring_patterns(self, code: str) -> str:
28
+ """Apply docstring removal patterns safely."""
29
+ result = code
30
+ result = SAFE_PATTERNS["docstring_triple_double"].apply(result)
31
+ result = SAFE_PATTERNS["docstring_triple_single"].apply(result)
32
+ return result
33
+
34
+ def apply_formatting_patterns(self, content: str) -> str:
35
+ """Apply formatting patterns safely."""
36
+ # Apply spacing patterns
37
+ content = SAFE_PATTERNS["spacing_after_comma"].apply(content)
38
+ content = SAFE_PATTERNS["spacing_after_colon"].apply(content)
39
+ content = SAFE_PATTERNS["multiple_spaces"].apply(content)
40
+ return content
41
+
42
+ def has_preserved_comment(self, line: str) -> bool:
43
+ """Check if a line contains preserved comments."""
44
+ if line.strip().startswith("#! /"):
45
+ return True
46
+
47
+ # Check for preserved comment keywords
48
+ line_lower = line.lower()
49
+ preserved_keywords = ["coding:", "encoding:", "type:", "noqa", "pragma"]
50
+ return any(keyword in line_lower for keyword in preserved_keywords)
51
+
52
+
53
+ _safe_applicator = SafePatternApplicator()
18
54
 
19
55
 
20
56
  @dataclass
@@ -26,12 +62,18 @@ class CleaningResult:
26
62
  warnings: list[str]
27
63
  original_size: int
28
64
  cleaned_size: int
65
+ backup_metadata: BackupMetadata | None = None
29
66
 
30
67
 
31
- class FileProcessorProtocol(Protocol):
32
- def read_file_safely(self, file_path: Path) -> str: ...
33
- def write_file_safely(self, file_path: Path, content: str) -> None: ...
34
- def backup_file(self, file_path: Path) -> Path: ...
68
+ @dataclass
69
+ class PackageCleaningResult:
70
+ total_files: int
71
+ successful_files: int
72
+ failed_files: int
73
+ file_results: list[CleaningResult]
74
+ backup_metadata: BackupMetadata | None
75
+ backup_restored: bool = False
76
+ overall_success: bool = False
35
77
 
36
78
 
37
79
  class CleaningStepProtocol(Protocol):
@@ -41,21 +83,13 @@ class CleaningStepProtocol(Protocol):
41
83
  def name(self) -> str: ...
42
84
 
43
85
 
44
- class ErrorHandlerProtocol(Protocol):
45
- def handle_file_error(
46
- self,
47
- file_path: Path,
48
- error: Exception,
49
- step: str,
50
- ) -> None: ...
51
- def log_cleaning_result(self, result: CleaningResult) -> None: ...
52
-
53
-
54
86
  class FileProcessor(BaseModel):
55
- model_config = ConfigDict(arbitrary_types_allowed=True)
87
+ model_config = ConfigDict(arbitrary_types_allowed=True, extra="allow")
56
88
 
57
89
  console: Console
58
90
  logger: t.Any = None
91
+ base_directory: Path | None = None
92
+ security_logger: t.Any = None
59
93
 
60
94
  def model_post_init(self, _: t.Any) -> None:
61
95
  if self.logger is None:
@@ -63,24 +97,55 @@ class FileProcessor(BaseModel):
63
97
 
64
98
  self.logger = logging.getLogger("crackerjack.code_cleaner.file_processor")
65
99
 
100
+ if self.security_logger is None:
101
+ self.security_logger = get_security_logger()
102
+
66
103
  def read_file_safely(self, file_path: Path) -> str:
104
+ validated_path = SecurePathValidator.validate_file_path(
105
+ file_path, self.base_directory
106
+ )
107
+ SecurePathValidator.validate_file_size(validated_path)
108
+
109
+ self.security_logger.log_security_event(
110
+ SecurityEventType.FILE_CLEANED,
111
+ SecurityEventLevel.LOW,
112
+ f"Reading file for cleaning: {validated_path}",
113
+ file_path=validated_path,
114
+ )
115
+
67
116
  try:
68
- return file_path.read_text(encoding="utf - 8")
117
+ return validated_path.read_text(encoding="utf-8")
118
+
69
119
  except UnicodeDecodeError:
70
120
  for encoding in ("latin1", "cp1252"):
71
121
  try:
72
- content = file_path.read_text(encoding=encoding)
122
+ content = validated_path.read_text(encoding=encoding)
73
123
  self.logger.warning(
74
- f"File {file_path} read with {encoding} encoding",
124
+ f"File {validated_path} read with {encoding} encoding",
75
125
  )
76
126
  return content
77
127
  except UnicodeDecodeError:
78
128
  continue
129
+
130
+ self.security_logger.log_validation_failed(
131
+ "encoding",
132
+ file_path,
133
+ "Could not decode file with any supported encoding",
134
+ )
135
+
79
136
  raise ExecutionError(
80
137
  message=f"Could not decode file {file_path}",
81
138
  error_code=ErrorCode.FILE_READ_ERROR,
82
139
  )
140
+
141
+ except ExecutionError:
142
+ raise
143
+
83
144
  except Exception as e:
145
+ self.security_logger.log_validation_failed(
146
+ "file_read", file_path, f"Unexpected error during file read: {e}"
147
+ )
148
+
84
149
  raise ExecutionError(
85
150
  message=f"Failed to read file {file_path}: {e}",
86
151
  error_code=ErrorCode.FILE_READ_ERROR,
@@ -88,19 +153,42 @@ class FileProcessor(BaseModel):
88
153
 
89
154
  def write_file_safely(self, file_path: Path, content: str) -> None:
90
155
  try:
91
- file_path.write_text(content, encoding="utf - 8")
156
+ AtomicFileOperations.atomic_write(file_path, content, self.base_directory)
157
+
158
+ self.security_logger.log_atomic_operation("write", file_path, True)
159
+
160
+ except ExecutionError:
161
+ self.security_logger.log_atomic_operation("write", file_path, False)
162
+ raise
163
+
92
164
  except Exception as e:
165
+ self.security_logger.log_atomic_operation(
166
+ "write", file_path, False, error=str(e)
167
+ )
168
+
93
169
  raise ExecutionError(
94
170
  message=f"Failed to write file {file_path}: {e}",
95
171
  error_code=ErrorCode.FILE_WRITE_ERROR,
96
172
  ) from e
97
173
 
98
174
  def backup_file(self, file_path: Path) -> Path:
99
- backup_path = file_path.with_suffix(f"{file_path.suffix}.backup")
100
175
  try:
101
- backup_path.write_bytes(file_path.read_bytes())
176
+ backup_path = AtomicFileOperations.atomic_backup_and_write(
177
+ file_path, file_path.read_bytes(), self.base_directory
178
+ )
179
+
180
+ self.security_logger.log_backup_created(file_path, backup_path)
181
+
102
182
  return backup_path
183
+
184
+ except ExecutionError:
185
+ raise
186
+
103
187
  except Exception as e:
188
+ self.security_logger.log_validation_failed(
189
+ "backup_creation", file_path, f"Backup creation failed: {e}"
190
+ )
191
+
104
192
  raise ExecutionError(
105
193
  message=f"Failed to create backup for {file_path}: {e}",
106
194
  error_code=ErrorCode.FILE_WRITE_ERROR,
@@ -121,7 +209,7 @@ class CleaningErrorHandler(BaseModel):
121
209
 
122
210
  def handle_file_error(self, file_path: Path, error: Exception, step: str) -> None:
123
211
  self.console.print(
124
- f"[bold bright_yellow]⚠️ Warning: {step} failed for {file_path}: {error}[/bold bright_yellow]",
212
+ f"[bold bright_yellow]⚠️ Warning: {step} failed for {file_path}: {error}[/ bold bright_yellow]",
125
213
  )
126
214
 
127
215
  self.logger.warning(
@@ -137,18 +225,18 @@ class CleaningErrorHandler(BaseModel):
137
225
  def log_cleaning_result(self, result: CleaningResult) -> None:
138
226
  if result.success:
139
227
  self.console.print(
140
- f"[green]✅ Cleaned {result.file_path}[/green] "
228
+ f"[green]✅ Cleaned {result.file_path}[/ green] "
141
229
  f"({result.original_size} → {result.cleaned_size} bytes)",
142
230
  )
143
231
  else:
144
232
  self.console.print(
145
- f"[red]❌ Failed to clean {result.file_path}[/red] "
233
+ f"[red]❌ Failed to clean {result.file_path}[/ red] "
146
234
  f"({len(result.steps_failed)} steps failed)",
147
235
  )
148
236
 
149
237
  if result.warnings:
150
238
  for warning in result.warnings:
151
- self.console.print(f"[yellow]⚠️ {warning}[/yellow]")
239
+ self.console.print(f"[yellow]⚠️ {warning}[/ yellow]")
152
240
 
153
241
  self.logger.info(
154
242
  "File cleaning completed",
@@ -185,7 +273,7 @@ class CleaningPipeline(BaseModel):
185
273
  self.logger.info(f"Starting clean_file for {file_path}")
186
274
  try:
187
275
  original_code = self.file_processor.read_file_safely(file_path)
188
- original_size = len(original_code.encode("utf - 8"))
276
+ original_size = len(original_code.encode("utf-8"))
189
277
 
190
278
  result = self._apply_cleaning_pipeline(
191
279
  original_code,
@@ -193,11 +281,10 @@ class CleaningPipeline(BaseModel):
193
281
  cleaning_steps,
194
282
  )
195
283
 
284
+ cleaned_size = original_size
196
285
  if result.success and result.cleaned_code != original_code:
197
286
  self.file_processor.write_file_safely(file_path, result.cleaned_code)
198
- cleaned_size = len(result.cleaned_code.encode("utf - 8"))
199
- else:
200
- cleaned_size = original_size
287
+ cleaned_size = len(result.cleaned_code.encode("utf-8"))
201
288
 
202
289
  cleaning_result = CleaningResult(
203
290
  file_path=file_path,
@@ -285,13 +372,16 @@ class CleaningPipeline(BaseModel):
285
372
 
286
373
 
287
374
  class CodeCleaner(BaseModel):
288
- model_config = ConfigDict(arbitrary_types_allowed=True)
375
+ model_config = ConfigDict(arbitrary_types_allowed=True, extra="allow")
289
376
 
290
377
  console: Console
291
378
  file_processor: t.Any = None
292
379
  error_handler: t.Any = None
293
380
  pipeline: t.Any = None
294
381
  logger: t.Any = None
382
+ base_directory: Path | None = None
383
+ security_logger: t.Any = None
384
+ backup_service: t.Any = None
295
385
 
296
386
  def model_post_init(self, _: t.Any) -> None:
297
387
  if self.logger is None:
@@ -299,8 +389,13 @@ class CodeCleaner(BaseModel):
299
389
 
300
390
  self.logger = logging.getLogger("crackerjack.code_cleaner")
301
391
 
392
+ if self.base_directory is None:
393
+ self.base_directory = Path.cwd()
394
+
302
395
  if self.file_processor is None:
303
- self.file_processor = FileProcessor(console=self.console)
396
+ self.file_processor = FileProcessor(
397
+ console=self.console, base_directory=self.base_directory
398
+ )
304
399
 
305
400
  if self.error_handler is None:
306
401
  self.error_handler = CleaningErrorHandler(console=self.console)
@@ -312,6 +407,12 @@ class CodeCleaner(BaseModel):
312
407
  console=self.console,
313
408
  )
314
409
 
410
+ if self.security_logger is None:
411
+ self.security_logger = get_security_logger()
412
+
413
+ if self.backup_service is None:
414
+ self.backup_service = PackageBackupService()
415
+
315
416
  def clean_file(self, file_path: Path) -> CleaningResult:
316
417
  cleaning_steps = [
317
418
  self._create_line_comment_step(),
@@ -322,49 +423,655 @@ class CodeCleaner(BaseModel):
322
423
 
323
424
  return self.pipeline.clean_file(file_path, cleaning_steps)
324
425
 
325
- def clean_files(self, pkg_dir: Path | None = None) -> list[CleaningResult]:
426
+ def clean_files(
427
+ self, pkg_dir: Path | None = None, use_backup: bool = True
428
+ ) -> list[CleaningResult] | PackageCleaningResult:
429
+ """Clean package files with optional backup protection.
430
+
431
+ Args:
432
+ pkg_dir: Package directory to clean (defaults to current directory)
433
+ use_backup: Whether to use backup protection (default: True for safety)
434
+
435
+ Returns:
436
+ PackageCleaningResult with backup protection (default), list[CleaningResult] if use_backup=False (legacy)
437
+ """
438
+ if use_backup:
439
+ # Use the comprehensive backup system for maximum safety
440
+ package_result = self.clean_files_with_backup(pkg_dir)
441
+ self.logger.info(
442
+ f"Package cleaning with backup completed: "
443
+ f"success={package_result.overall_success}, "
444
+ f"restored={package_result.backup_restored}"
445
+ )
446
+ return package_result
447
+
448
+ # Legacy non-backup mode (deprecated, kept for compatibility)
449
+ self.console.print(
450
+ "[yellow]⚠️ WARNING: Running without backup protection. "
451
+ "Consider using use_backup=True for safety.[/yellow]"
452
+ )
453
+
326
454
  if pkg_dir is None:
327
455
  pkg_dir = Path.cwd()
328
456
 
329
- python_files = list(pkg_dir.rglob(" * .py"))
457
+ python_files = self._discover_package_files(pkg_dir)
458
+
459
+ files_to_process = [
460
+ file_path
461
+ for file_path in python_files
462
+ if self.should_process_file(file_path)
463
+ ]
464
+
330
465
  results: list[CleaningResult] = []
466
+ self.logger.info(f"Starting clean_files for {len(files_to_process)} files")
331
467
 
332
- self.logger.info(f"Starting clean_files for {len(python_files)} files")
333
- for file_path in python_files:
334
- if self.should_process_file(file_path):
335
- result = self.clean_file(file_path)
336
- results.append(result)
468
+ cleaning_steps = [
469
+ self._create_line_comment_step(),
470
+ self._create_docstring_step(),
471
+ self._create_whitespace_step(),
472
+ self._create_formatting_step(),
473
+ ]
474
+
475
+ for file_path in files_to_process:
476
+ result = self.pipeline.clean_file(file_path, cleaning_steps)
477
+ results.append(result)
337
478
 
338
479
  return results
339
480
 
340
- def should_process_file(self, file_path: Path) -> bool:
341
- ignore_patterns = {
481
+ def clean_files_with_backup(
482
+ self, pkg_dir: Path | None = None
483
+ ) -> PackageCleaningResult:
484
+ validated_pkg_dir = self._prepare_package_directory(pkg_dir)
485
+
486
+ self.logger.info(
487
+ f"Starting safe package cleaning with backup: {validated_pkg_dir}"
488
+ )
489
+ self.console.print(
490
+ "[cyan]🛡️ Starting package cleaning with backup protection...[/cyan]"
491
+ )
492
+
493
+ backup_metadata: BackupMetadata | None = None
494
+
495
+ try:
496
+ backup_metadata = self._create_backup(validated_pkg_dir)
497
+ files_to_process = self._find_files_to_process(validated_pkg_dir)
498
+
499
+ if not files_to_process:
500
+ return self._handle_no_files_to_process(backup_metadata)
501
+
502
+ cleaning_result = self._execute_cleaning_with_backup(
503
+ files_to_process, backup_metadata
504
+ )
505
+
506
+ return self._finalize_cleaning_result(cleaning_result, backup_metadata)
507
+
508
+ except Exception as e:
509
+ return self._handle_critical_error(e, backup_metadata)
510
+
511
+ def _prepare_package_directory(self, pkg_dir: Path | None) -> Path:
512
+ if pkg_dir is None:
513
+ pkg_dir = Path.cwd()
514
+
515
+ return SecurePathValidator.validate_file_path(pkg_dir, self.base_directory)
516
+
517
+ def _create_backup(self, validated_pkg_dir: Path) -> BackupMetadata:
518
+ self.console.print(
519
+ "[yellow]📦 Creating backup of all package files...[/yellow]"
520
+ )
521
+
522
+ backup_metadata = self.backup_service.create_package_backup(
523
+ validated_pkg_dir, self.base_directory
524
+ )
525
+
526
+ self.console.print(
527
+ f"[green]✅ Backup created: {backup_metadata.backup_id}[/green] "
528
+ f"({backup_metadata.total_files} files, {backup_metadata.total_size} bytes)"
529
+ )
530
+
531
+ return backup_metadata
532
+
533
+ def _find_files_to_process(self, validated_pkg_dir: Path) -> list[Path]:
534
+ python_files = self._discover_package_files(validated_pkg_dir)
535
+ return [
536
+ file_path
537
+ for file_path in python_files
538
+ if self.should_process_file(file_path)
539
+ ]
540
+
541
+ def _discover_package_files(self, root_dir: Path) -> list[Path]:
542
+ """Discover Python files in the main package directory using crackerjack naming convention.
543
+
544
+ Crackerjack convention:
545
+ - Project name with dashes → package name with underscores
546
+ - Single word → same name lowercase
547
+ - Package directory determined from pyproject.toml [project.name]
548
+
549
+ Args:
550
+ root_dir: Project root directory
551
+
552
+ Returns:
553
+ List of Python files found only in the main package directory
554
+ """
555
+ package_dir = self._find_package_directory(root_dir)
556
+
557
+ if not package_dir or not package_dir.exists():
558
+ # Fallback: look for any directory with __init__.py (excluding common non-package dirs)
559
+ self.console.print(
560
+ "[yellow]⚠️ Could not determine package directory, searching for Python packages...[/yellow]"
561
+ )
562
+ return self._fallback_discover_packages(root_dir)
563
+
564
+ self.logger.debug(f"Using package directory: {package_dir}")
565
+
566
+ # Get all Python files from the package directory only
567
+ package_files = list(package_dir.rglob("*.py"))
568
+
569
+ # Filter out any problematic subdirectories that might exist within the package
570
+ exclude_dirs = {
571
+ "__pycache__",
572
+ ".pytest_cache",
573
+ ".mypy_cache",
574
+ ".ruff_cache",
575
+ ".venv",
576
+ "venv",
577
+ }
578
+ filtered_files = [
579
+ f
580
+ for f in package_files
581
+ if not any(excl in f.parts for excl in exclude_dirs)
582
+ ]
583
+
584
+ return filtered_files
585
+
586
+ def _find_package_directory(self, root_dir: Path) -> Path | None:
587
+ """Find the main package directory using crackerjack naming convention.
588
+
589
+ Args:
590
+ root_dir: Project root directory
591
+
592
+ Returns:
593
+ Path to package directory or None if not found
594
+ """
595
+ # First, try to get project name from pyproject.toml
596
+ pyproject_path = root_dir / "pyproject.toml"
597
+ if pyproject_path.exists():
598
+ try:
599
+ import tomllib
600
+
601
+ with pyproject_path.open("rb") as f:
602
+ config = tomllib.load(f)
603
+
604
+ project_name = config.get("project", {}).get("name")
605
+ if project_name:
606
+ # Apply crackerjack naming convention
607
+ package_name = project_name.replace("-", "_").lower()
608
+ package_dir = root_dir / package_name
609
+
610
+ if package_dir.exists() and (package_dir / "__init__.py").exists():
611
+ return package_dir
612
+
613
+ except Exception as e:
614
+ self.logger.debug(f"Could not parse pyproject.toml: {e}")
615
+
616
+ # Fallback: infer from directory name
617
+ package_name = root_dir.name.replace("-", "_").lower()
618
+ package_dir = root_dir / package_name
619
+
620
+ if package_dir.exists() and (package_dir / "__init__.py").exists():
621
+ return package_dir
622
+
623
+ return None
624
+
625
+ def _fallback_discover_packages(self, root_dir: Path) -> list[Path]:
626
+ """Fallback method to discover package files when convention-based detection fails."""
627
+ python_files = []
628
+ exclude_dirs = {
342
629
  "__pycache__",
343
630
  ".git",
344
631
  ".venv",
345
- "site - packages",
632
+ "venv",
633
+ "site-packages",
346
634
  ".pytest_cache",
347
635
  "build",
348
636
  "dist",
637
+ ".tox",
638
+ "node_modules",
639
+ "tests",
640
+ "test",
641
+ "examples",
642
+ "example",
643
+ "docs",
644
+ "doc",
645
+ ".mypy_cache",
646
+ ".ruff_cache",
647
+ "htmlcov",
648
+ ".coverage",
349
649
  }
350
650
 
351
- for parent in file_path.parents:
352
- if parent.name in ignore_patterns:
651
+ for item in root_dir.iterdir():
652
+ if (
653
+ not item.is_dir()
654
+ or item.name.startswith(".")
655
+ or item.name in exclude_dirs
656
+ ):
657
+ continue
658
+
659
+ if (item / "__init__.py").exists():
660
+ package_files = [
661
+ f
662
+ for f in item.rglob("*.py")
663
+ if self._should_include_file_path(f, exclude_dirs)
664
+ ]
665
+ python_files.extend(package_files)
666
+
667
+ return python_files
668
+
669
+ def _should_include_file_path(
670
+ self, file_path: Path, exclude_dirs: set[str]
671
+ ) -> bool:
672
+ """Check if a file path should be included (not in excluded directories)."""
673
+ # Convert path parts to set for efficient lookup
674
+ path_parts = set(file_path.parts)
675
+
676
+ # If any part of the path is in exclude_dirs, exclude it
677
+ return not bool(path_parts.intersection(exclude_dirs))
678
+
679
+ def _handle_no_files_to_process(
680
+ self, backup_metadata: BackupMetadata
681
+ ) -> PackageCleaningResult:
682
+ self.console.print("[yellow]⚠️ No files found to process[/yellow]")
683
+ self.backup_service.cleanup_backup(backup_metadata)
684
+
685
+ return PackageCleaningResult(
686
+ total_files=0,
687
+ successful_files=0,
688
+ failed_files=0,
689
+ file_results=[],
690
+ backup_metadata=None,
691
+ backup_restored=False,
692
+ overall_success=True,
693
+ )
694
+
695
+ def _execute_cleaning_with_backup(
696
+ self, files_to_process: list[Path], backup_metadata: BackupMetadata
697
+ ) -> dict[str, t.Any]:
698
+ self.console.print(f"[cyan]🧹 Cleaning {len(files_to_process)} files...[/cyan]")
699
+
700
+ cleaning_steps = [
701
+ self._create_line_comment_step(),
702
+ self._create_docstring_step(),
703
+ self._create_whitespace_step(),
704
+ self._create_formatting_step(),
705
+ ]
706
+
707
+ file_results: list[CleaningResult] = []
708
+ cleaning_errors: list[Exception] = []
709
+
710
+ for file_path in files_to_process:
711
+ try:
712
+ result = self.pipeline.clean_file(file_path, cleaning_steps)
713
+ result.backup_metadata = backup_metadata
714
+ file_results.append(result)
715
+
716
+ if not result.success:
717
+ cleaning_errors.append(
718
+ ExecutionError(
719
+ message=f"Cleaning failed for {file_path}: {result.steps_failed}",
720
+ error_code=ErrorCode.CODE_CLEANING_ERROR,
721
+ )
722
+ )
723
+ except Exception as e:
724
+ cleaning_errors.append(e)
725
+ file_results.append(
726
+ CleaningResult(
727
+ file_path=file_path,
728
+ success=False,
729
+ steps_completed=[],
730
+ steps_failed=["file_processing"],
731
+ warnings=[f"Exception during cleaning: {e}"],
732
+ original_size=0,
733
+ cleaned_size=0,
734
+ backup_metadata=backup_metadata,
735
+ )
736
+ )
737
+
738
+ return {
739
+ "file_results": file_results,
740
+ "cleaning_errors": cleaning_errors,
741
+ "files_to_process": files_to_process,
742
+ }
743
+
744
+ def _finalize_cleaning_result(
745
+ self, cleaning_result: dict[str, t.Any], backup_metadata: BackupMetadata
746
+ ) -> PackageCleaningResult:
747
+ file_results = cleaning_result["file_results"]
748
+ cleaning_errors = cleaning_result["cleaning_errors"]
749
+ files_to_process = cleaning_result["files_to_process"]
750
+
751
+ successful_files = sum(1 for result in file_results if result.success)
752
+ failed_files = len(file_results) - successful_files
753
+
754
+ if cleaning_errors or failed_files > 0:
755
+ return self._handle_cleaning_failure(
756
+ backup_metadata,
757
+ file_results,
758
+ files_to_process,
759
+ successful_files,
760
+ failed_files,
761
+ cleaning_errors,
762
+ )
763
+
764
+ return self._handle_cleaning_success(
765
+ backup_metadata, file_results, files_to_process, successful_files
766
+ )
767
+
768
+ def _handle_cleaning_failure(
769
+ self,
770
+ backup_metadata: BackupMetadata,
771
+ file_results: list[CleaningResult],
772
+ files_to_process: list[Path],
773
+ successful_files: int,
774
+ failed_files: int,
775
+ cleaning_errors: list[Exception],
776
+ ) -> PackageCleaningResult:
777
+ self.console.print(
778
+ f"[red]❌ Cleaning failed ({failed_files} files failed). "
779
+ f"Restoring from backup...[/red]"
780
+ )
781
+
782
+ self.logger.error(
783
+ f"Package cleaning failed with {len(cleaning_errors)} errors, "
784
+ f"restoring from backup {backup_metadata.backup_id}"
785
+ )
786
+
787
+ self.backup_service.restore_from_backup(backup_metadata, self.base_directory)
788
+
789
+ self.console.print("[green]✅ Files restored from backup successfully[/green]")
790
+
791
+ return PackageCleaningResult(
792
+ total_files=len(files_to_process),
793
+ successful_files=successful_files,
794
+ failed_files=failed_files,
795
+ file_results=file_results,
796
+ backup_metadata=backup_metadata,
797
+ backup_restored=True,
798
+ overall_success=False,
799
+ )
800
+
801
+ def _handle_cleaning_success(
802
+ self,
803
+ backup_metadata: BackupMetadata,
804
+ file_results: list[CleaningResult],
805
+ files_to_process: list[Path],
806
+ successful_files: int,
807
+ ) -> PackageCleaningResult:
808
+ self.console.print(
809
+ f"[green]✅ Package cleaning completed successfully![/green] "
810
+ f"({successful_files} files cleaned)"
811
+ )
812
+
813
+ self.backup_service.cleanup_backup(backup_metadata)
814
+
815
+ return PackageCleaningResult(
816
+ total_files=len(files_to_process),
817
+ successful_files=successful_files,
818
+ failed_files=0,
819
+ file_results=file_results,
820
+ backup_metadata=None,
821
+ backup_restored=False,
822
+ overall_success=True,
823
+ )
824
+
825
+ def _handle_critical_error(
826
+ self, error: Exception, backup_metadata: BackupMetadata | None
827
+ ) -> PackageCleaningResult:
828
+ self.logger.error(f"Critical error during package cleaning: {error}")
829
+ self.console.print(f"[red]💥 Critical error: {error}[/red]")
830
+
831
+ backup_restored = False
832
+
833
+ if backup_metadata:
834
+ backup_restored = self._attempt_emergency_restoration(backup_metadata)
835
+
836
+ return PackageCleaningResult(
837
+ total_files=0,
838
+ successful_files=0,
839
+ failed_files=0,
840
+ file_results=[],
841
+ backup_metadata=backup_metadata,
842
+ backup_restored=backup_restored,
843
+ overall_success=False,
844
+ )
845
+
846
+ def _attempt_emergency_restoration(self, backup_metadata: BackupMetadata) -> bool:
847
+ try:
848
+ self.console.print(
849
+ "[yellow]🔄 Attempting emergency restoration...[/yellow]"
850
+ )
851
+ self.backup_service.restore_from_backup(
852
+ backup_metadata, self.base_directory
853
+ )
854
+ self.console.print("[green]✅ Emergency restoration completed[/green]")
855
+ return True
856
+
857
+ except Exception as restore_error:
858
+ self.logger.error(f"Emergency restoration failed: {restore_error}")
859
+ self.console.print(
860
+ f"[red]💥 Emergency restoration failed: {restore_error}[/red]\n"
861
+ f"[yellow]⚠️ Manual restoration may be needed from: "
862
+ f"{backup_metadata.backup_directory}[/yellow]"
863
+ )
864
+ return False
865
+
866
+ def restore_from_backup_metadata(self, backup_metadata: BackupMetadata) -> None:
867
+ """Manually restore from backup metadata.
868
+
869
+ Args:
870
+ backup_metadata: Backup metadata containing restoration information
871
+ """
872
+ self.console.print(
873
+ f"[yellow]🔄 Manually restoring from backup: {backup_metadata.backup_id}[/yellow]"
874
+ )
875
+
876
+ self.backup_service.restore_from_backup(backup_metadata, self.base_directory)
877
+
878
+ self.console.print(
879
+ f"[green]✅ Manual restoration completed from backup: "
880
+ f"{backup_metadata.backup_id}[/green]"
881
+ )
882
+
883
+ def create_emergency_backup(self, pkg_dir: Path | None = None) -> BackupMetadata:
884
+ """Create an emergency backup before potentially risky operations.
885
+
886
+ Args:
887
+ pkg_dir: Package directory to backup (defaults to current directory)
888
+
889
+ Returns:
890
+ BackupMetadata for the created backup
891
+ """
892
+ validated_pkg_dir = self._prepare_package_directory(pkg_dir)
893
+
894
+ self.console.print(
895
+ "[cyan]🛡️ Creating emergency backup before risky operation...[/cyan]"
896
+ )
897
+
898
+ backup_metadata = self._create_backup(validated_pkg_dir)
899
+
900
+ self.console.print(
901
+ f"[green]✅ Emergency backup created: {backup_metadata.backup_id}[/green]"
902
+ )
903
+
904
+ return backup_metadata
905
+
906
+ def restore_emergency_backup(self, backup_metadata: BackupMetadata) -> bool:
907
+ """Restore from an emergency backup with enhanced error handling.
908
+
909
+ Args:
910
+ backup_metadata: Backup metadata for restoration
911
+
912
+ Returns:
913
+ True if restoration succeeded, False otherwise
914
+ """
915
+ try:
916
+ self.console.print(
917
+ f"[yellow]🔄 Restoring emergency backup: {backup_metadata.backup_id}[/yellow]"
918
+ )
919
+
920
+ self.backup_service.restore_from_backup(
921
+ backup_metadata, self.base_directory
922
+ )
923
+
924
+ self.console.print(
925
+ f"[green]✅ Emergency backup restored successfully: {backup_metadata.backup_id}[/green]"
926
+ )
927
+
928
+ return True
929
+
930
+ except Exception as e:
931
+ self.logger.error(f"Emergency backup restoration failed: {e}")
932
+ self.console.print(
933
+ f"[red]💥 Emergency backup restoration failed: {e}[/red]\n"
934
+ f"[yellow]⚠️ Manual intervention required. Backup location: "
935
+ f"{backup_metadata.backup_directory}[/yellow]"
936
+ )
937
+
938
+ return False
939
+
940
+ def verify_backup_integrity(self, backup_metadata: BackupMetadata) -> bool:
941
+ """Verify the integrity of a backup without restoring it.
942
+
943
+ Args:
944
+ backup_metadata: Backup metadata to verify
945
+
946
+ Returns:
947
+ True if backup is valid and can be restored, False otherwise
948
+ """
949
+ try:
950
+ validation_result = self.backup_service._validate_backup(backup_metadata)
951
+
952
+ if validation_result.is_valid:
953
+ self.console.print(
954
+ f"[green]✅ Backup verification passed: {backup_metadata.backup_id}[/green] "
955
+ f"({validation_result.total_validated} files verified)"
956
+ )
957
+ return True
958
+ else:
959
+ self.console.print(
960
+ f"[red]❌ Backup verification failed: {backup_metadata.backup_id}[/red]"
961
+ )
962
+
963
+ for error in validation_result.validation_errors[
964
+ :3
965
+ ]: # Show first 3 errors
966
+ self.console.print(f"[red] • {error}[/red]")
967
+
968
+ if len(validation_result.validation_errors) > 3:
969
+ remaining = len(validation_result.validation_errors) - 3
970
+ self.console.print(f"[red] ... and {remaining} more errors[/red]")
971
+
353
972
  return False
354
973
 
355
- return not (file_path.name.startswith(".") or file_path.suffix != ".py")
974
+ except Exception as e:
975
+ self.logger.error(f"Backup verification failed with exception: {e}")
976
+ self.console.print(f"[red]💥 Backup verification error: {e}[/red]")
977
+ return False
978
+
979
+ def list_available_backups(self) -> list[Path]:
980
+ """List all available backup directories.
981
+
982
+ Returns:
983
+ List of backup directory paths
984
+ """
985
+ if (
986
+ not self.backup_service.backup_root
987
+ or not self.backup_service.backup_root.exists()
988
+ ):
989
+ self.console.print("[yellow]⚠️ No backup root directory found[/yellow]")
990
+ return []
991
+
992
+ try:
993
+ backup_dirs = [
994
+ path
995
+ for path in self.backup_service.backup_root.iterdir()
996
+ if path.is_dir() and path.name.startswith("backup_")
997
+ ]
998
+
999
+ if backup_dirs:
1000
+ self.console.print(
1001
+ f"[cyan]📦 Found {len(backup_dirs)} available backups:[/cyan]"
1002
+ )
1003
+ for backup_dir in sorted(backup_dirs):
1004
+ self.console.print(f" • {backup_dir.name}")
1005
+ else:
1006
+ self.console.print("[yellow]⚠️ No backups found[/yellow]")
1007
+
1008
+ return backup_dirs
1009
+
1010
+ except Exception as e:
1011
+ self.logger.error(f"Failed to list backups: {e}")
1012
+ self.console.print(f"[red]💥 Error listing backups: {e}[/red]")
1013
+ return []
1014
+
1015
+ def should_process_file(self, file_path: Path) -> bool:
1016
+ try:
1017
+ validated_path = SecurePathValidator.validate_file_path(
1018
+ file_path, self.base_directory
1019
+ )
1020
+
1021
+ SecurePathValidator.validate_file_size(validated_path)
1022
+
1023
+ ignore_patterns = {
1024
+ "__pycache__",
1025
+ ".git",
1026
+ ".venv",
1027
+ "site-packages",
1028
+ ".pytest_cache",
1029
+ "build",
1030
+ "dist",
1031
+ "tests",
1032
+ "test",
1033
+ "examples",
1034
+ "example",
1035
+ }
1036
+
1037
+ for parent in validated_path.parents:
1038
+ if parent.name in ignore_patterns:
1039
+ return False
1040
+
1041
+ should_process = not (
1042
+ validated_path.name.startswith(".") or validated_path.suffix != ".py"
1043
+ )
1044
+
1045
+ if should_process:
1046
+ self.security_logger.log_security_event(
1047
+ SecurityEventType.FILE_CLEANED,
1048
+ SecurityEventLevel.LOW,
1049
+ f"File approved for processing: {validated_path}",
1050
+ file_path=validated_path,
1051
+ )
1052
+
1053
+ return should_process
1054
+
1055
+ except ExecutionError as e:
1056
+ self.security_logger.log_validation_failed(
1057
+ "file_processing_check",
1058
+ file_path,
1059
+ f"File failed security validation: {e}",
1060
+ )
1061
+
1062
+ return False
1063
+
1064
+ except Exception as e:
1065
+ self.logger.warning(f"Unexpected error checking file {file_path}: {e}")
1066
+ return False
356
1067
 
357
1068
  def _create_line_comment_step(self) -> CleaningStepProtocol:
358
- """Create a step for removing line comments while preserving special comments."""
359
1069
  return self._LineCommentStep()
360
1070
 
361
1071
  def _create_docstring_step(self) -> CleaningStepProtocol:
362
- """Create a step for removing docstrings."""
363
1072
  return self._DocstringStep()
364
1073
 
365
1074
  class _DocstringStep:
366
- """Step implementation for removing docstrings."""
367
-
368
1075
  name = "remove_docstrings"
369
1076
 
370
1077
  def _is_docstring_node(self, node: ast.AST) -> bool:
@@ -426,10 +1133,10 @@ class CodeCleaner(BaseModel):
426
1133
  lines_to_remove: set[int] = set()
427
1134
 
428
1135
  for node in docstring_nodes:
429
- # Most AST nodes have lineno and end_lineno attributes
430
1136
  start_line = getattr(node, "lineno", 1)
431
- end_line = getattr(node, "end_lineno", start_line + 1)
432
- lines_to_remove.update(range(start_line, end_line))
1137
+ end_line = getattr(node, "end_lineno", start_line)
1138
+
1139
+ lines_to_remove.update(range(start_line, end_line + 1))
433
1140
 
434
1141
  result_lines = [
435
1142
  line for i, line in enumerate(lines, 1) if i not in lines_to_remove
@@ -439,95 +1146,58 @@ class CodeCleaner(BaseModel):
439
1146
  return self._regex_fallback_removal(result)
440
1147
 
441
1148
  def _regex_fallback_removal(self, code: str) -> str:
442
- import re
443
-
444
- patterns = [
445
- r'^\s*""".*?"""\s*$',
446
- r"^\s*'''.*?'''\s*$",
447
- r'^\s*""".*?"""\s*$',
448
- r"^\s*'''.*?'''\s*$",
449
- ]
450
- result = code
451
- for pattern in patterns:
452
- result = re.sub(pattern, "", result, flags=re.MULTILINE | re.DOTALL)
453
- return result
1149
+ return _safe_applicator.apply_docstring_patterns(code)
454
1150
 
455
1151
  class _LineCommentStep:
456
- """Step implementation for removing line comments."""
457
-
458
1152
  name = "remove_line_comments"
459
1153
 
460
1154
  def __call__(self, code: str, file_path: Path) -> str:
461
1155
  lines = code.split("\n")
462
- # Performance: Use list comprehension instead of generator for small-to-medium files
1156
+
463
1157
  processed_lines = [self._process_line_for_comments(line) for line in lines]
464
1158
  return "\n".join(processed_lines)
465
1159
 
466
1160
  def _process_line_for_comments(self, line: str) -> str:
467
- """Process a single line to remove comments while preserving strings."""
468
1161
  if not line.strip() or self._is_preserved_comment_line(line):
469
1162
  return line
470
1163
  return self._remove_comment_from_line(line)
471
1164
 
472
1165
  def _is_preserved_comment_line(self, line: str) -> bool:
473
- """Check if this comment line should be preserved."""
474
1166
  stripped = line.strip()
475
1167
  if not stripped.startswith("#"):
476
1168
  return False
477
1169
  return self._has_preserved_pattern(stripped)
478
1170
 
479
1171
  def _has_preserved_pattern(self, stripped_line: str) -> bool:
480
- """Check if line contains preserved comment patterns."""
481
- preserved_patterns = ["coding: ", "encoding: ", "type: ", "noqa", "pragma"]
482
- return stripped_line.startswith("# !/ ") or any(
483
- pattern in stripped_line for pattern in preserved_patterns
484
- )
1172
+ return _safe_applicator.has_preserved_comment(stripped_line)
485
1173
 
486
1174
  def _remove_comment_from_line(self, line: str) -> str:
487
- """Remove comments from a line while preserving string literals."""
488
- result: list[str] = []
489
- string_state: dict[str, t.Any] = {"in_string": False, "quote_char": None}
490
- for i, char in enumerate(line):
491
- if self._should_break_at_comment(char, string_state):
492
- break
493
- self._update_string_state(char, i, line, string_state)
494
- result.append(char)
495
- return "".join(result).rstrip()
496
-
497
- def _should_break_at_comment(self, char: str, state: dict[str, t.Any]) -> bool:
498
- """Check if we should break at a comment character."""
499
- return not state["in_string"] and char == "#"
500
-
501
- def _update_string_state(
502
- self,
503
- char: str,
504
- index: int,
505
- line: str,
506
- state: dict[str, t.Any],
507
- ) -> None:
508
- """Update string parsing state based on current character."""
509
- if self._is_string_start(char, state):
510
- state["in_string"], state["quote_char"] = True, char
511
- elif self._is_string_end(char, index, line, state):
512
- state["in_string"], state["quote_char"] = False, None
513
-
514
- def _is_string_start(self, char: str, state: dict[str, t.Any]) -> bool:
515
- """Check if character starts a string."""
516
- return not state["in_string"] and char in ('"', "'")
517
-
518
- def _is_string_end(
519
- self,
520
- char: str,
521
- index: int,
522
- line: str,
523
- state: dict[str, t.Any],
524
- ) -> bool:
525
- """Check if character ends a string."""
526
- return (
527
- state["in_string"]
528
- and char == state["quote_char"]
529
- and (index == 0 or line[index - 1] != "\\")
530
- )
1175
+ if '"' not in line and "'" not in line and "#" not in line:
1176
+ return line
1177
+
1178
+ result_chars = []
1179
+ in_string = False
1180
+ quote_char = None
1181
+ i = 0
1182
+ length = len(line)
1183
+
1184
+ while i < length:
1185
+ char = line[i]
1186
+
1187
+ if not in_string:
1188
+ if char == "#":
1189
+ break
1190
+ elif char in ('"', "'"):
1191
+ in_string = True
1192
+ quote_char = char
1193
+ elif char == quote_char and (i == 0 or line[i - 1] != "\\"):
1194
+ in_string = False
1195
+ quote_char = None
1196
+
1197
+ result_chars.append(char)
1198
+ i += 1
1199
+
1200
+ return "".join(result_chars).rstrip()
531
1201
 
532
1202
  def _create_docstring_finder_class(
533
1203
  self,
@@ -570,11 +1240,8 @@ class CodeCleaner(BaseModel):
570
1240
  name = "remove_extra_whitespace"
571
1241
 
572
1242
  def __call__(self, code: str, file_path: Path) -> str:
573
- import re
574
-
575
1243
  lines = code.split("\n")
576
1244
  cleaned_lines: list[str] = []
577
-
578
1245
  empty_line_count = 0
579
1246
 
580
1247
  for line in lines:
@@ -586,13 +1253,13 @@ class CodeCleaner(BaseModel):
586
1253
  cleaned_lines.append("")
587
1254
  else:
588
1255
  empty_line_count = 0
589
-
590
1256
  leading_whitespace = len(cleaned_line) - len(
591
- cleaned_line.lstrip(),
1257
+ cleaned_line.lstrip()
592
1258
  )
593
1259
  content = cleaned_line.lstrip()
594
1260
 
595
- content = re.sub(r" {2, }", " ", content)
1261
+ # Use SAFE_PATTERNS for multiple spaces replacement
1262
+ content = SAFE_PATTERNS["multiple_spaces"].apply(content)
596
1263
 
597
1264
  cleaned_line = cleaned_line[:leading_whitespace] + content
598
1265
  cleaned_lines.append(cleaned_line)
@@ -612,33 +1279,26 @@ class CodeCleaner(BaseModel):
612
1279
  class FormattingStep:
613
1280
  name = "format_code"
614
1281
 
615
- def __call__(self, code: str, file_path: Path) -> str:
616
- import re
1282
+ def _is_preserved_comment_line(self, line: str) -> bool:
1283
+ stripped = line.strip()
1284
+ if not stripped.startswith("#"):
1285
+ return False
1286
+ return _safe_applicator.has_preserved_comment(line)
617
1287
 
1288
+ def __call__(self, code: str, file_path: Path) -> str:
618
1289
  lines = code.split("\n")
619
1290
  formatted_lines: list[str] = []
620
1291
 
621
1292
  for line in lines:
622
1293
  if line.strip():
1294
+ if self._is_preserved_comment_line(line):
1295
+ formatted_lines.append(line)
1296
+ continue
1297
+
623
1298
  leading_whitespace = len(line) - len(line.lstrip())
624
1299
  content = line.lstrip()
625
1300
 
626
- content = re.sub(
627
- r"([ =+ \ -*/%<>!&|^ ])([ ^ =+ \ -*/%<>!&|^ ])",
628
- r"\1 \2",
629
- content,
630
- )
631
- content = re.sub(
632
- r"([ ^ =+ \ -*/%<>!&|^ ])([ =+ \ -*/%<>!&|^ ])",
633
- r"\1 \2",
634
- content,
635
- )
636
-
637
- content = re.sub(r", ([ ^ \n])", r", \1", content)
638
-
639
- content = re.sub(r": ([ ^ \n: ])", r": \1", content)
640
-
641
- content = re.sub(r" {2, }", " ", content)
1301
+ content = _safe_applicator.apply_formatting_patterns(content)
642
1302
 
643
1303
  formatted_line = line[:leading_whitespace] + content
644
1304
  formatted_lines.append(formatted_line)