crackerjack 0.28.0__py3-none-any.whl → 0.30.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crackerjack might be problematic. Click here for more details.

@@ -0,0 +1,980 @@
1
+ import asyncio
2
+ import re
3
+ import subprocess
4
+ import typing as t
5
+ from concurrent.futures import ThreadPoolExecutor, as_completed
6
+ from contextlib import suppress
7
+ from functools import lru_cache
8
+ from pathlib import Path
9
+
10
+ import aiofiles
11
+ from pydantic import BaseModel
12
+ from rich.console import Console
13
+
14
+ from .errors import ErrorCode, ExecutionError, handle_error
15
+
16
+
17
+ class CodeCleaner(BaseModel, arbitrary_types_allowed=True):
18
+ console: Console
19
+
20
+ def _analyze_workload_characteristics(self, files: list[Path]) -> dict[str, t.Any]:
21
+ if not files:
22
+ return {
23
+ "total_files": 0,
24
+ "total_size": 0,
25
+ "avg_file_size": 0,
26
+ "complexity": "low",
27
+ }
28
+ total_size = 0
29
+ large_files = 0
30
+ for file_path in files:
31
+ try:
32
+ size = file_path.stat().st_size
33
+ total_size += size
34
+ if size > 50_000:
35
+ large_files += 1
36
+ except (OSError, PermissionError):
37
+ continue
38
+ avg_file_size = total_size / len(files) if files else 0
39
+ large_file_ratio = large_files / len(files) if files else 0
40
+ if len(files) > 100 or avg_file_size > 20_000 or large_file_ratio > 0.3:
41
+ complexity = "high"
42
+ elif len(files) > 50 or avg_file_size > 10_000 or large_file_ratio > 0.1:
43
+ complexity = "medium"
44
+ else:
45
+ complexity = "low"
46
+
47
+ return {
48
+ "total_files": len(files),
49
+ "total_size": total_size,
50
+ "avg_file_size": avg_file_size,
51
+ "large_files": large_files,
52
+ "large_file_ratio": large_file_ratio,
53
+ "complexity": complexity,
54
+ }
55
+
56
+ def _calculate_optimal_workers(self, workload: dict[str, t.Any]) -> int:
57
+ import os
58
+
59
+ cpu_count = os.cpu_count() or 4
60
+ if workload["complexity"] == "high":
61
+ max_workers = min(cpu_count // 2, 3)
62
+ elif workload["complexity"] == "medium":
63
+ max_workers = min(cpu_count, 6)
64
+ else:
65
+ max_workers = min(cpu_count + 2, 8)
66
+
67
+ return min(max_workers, workload["total_files"])
68
+
69
+ def clean_files(self, pkg_dir: Path | None) -> None:
70
+ if pkg_dir is None:
71
+ return
72
+ python_files = [
73
+ file_path
74
+ for file_path in pkg_dir.rglob("*.py")
75
+ if not str(file_path.parent).startswith("__")
76
+ ]
77
+ if not python_files:
78
+ return
79
+ workload = self._analyze_workload_characteristics(python_files)
80
+ max_workers = self._calculate_optimal_workers(workload)
81
+ if len(python_files) > 10:
82
+ self.console.print(
83
+ f"[dim]Cleaning {workload['total_files']} files "
84
+ f"({workload['complexity']} complexity) with {max_workers} workers[/dim]"
85
+ )
86
+ with ThreadPoolExecutor(max_workers=max_workers) as executor:
87
+ future_to_file = {
88
+ executor.submit(self.clean_file, file_path): file_path
89
+ for file_path in python_files
90
+ }
91
+ for future in as_completed(future_to_file):
92
+ file_path = future_to_file[future]
93
+ try:
94
+ future.result()
95
+ except Exception as e:
96
+ self.console.print(
97
+ f"[bold bright_red]❌ Error cleaning {file_path}: {e}[/bold bright_red]"
98
+ )
99
+ self._cleanup_cache_directories(pkg_dir)
100
+
101
+ def _cleanup_cache_directories(self, pkg_dir: Path) -> None:
102
+ with suppress(PermissionError, OSError):
103
+ pycache_dir = pkg_dir / "__pycache__"
104
+ if pycache_dir.exists():
105
+ for cache_file in pycache_dir.iterdir():
106
+ with suppress(PermissionError, OSError):
107
+ cache_file.unlink()
108
+ pycache_dir.rmdir()
109
+ parent_pycache = pkg_dir.parent / "__pycache__"
110
+ if parent_pycache.exists():
111
+ for cache_file in parent_pycache.iterdir():
112
+ with suppress(PermissionError, OSError):
113
+ cache_file.unlink()
114
+ parent_pycache.rmdir()
115
+
116
+ def clean_file(self, file_path: Path) -> None:
117
+ try:
118
+ code = file_path.read_text(encoding="utf-8")
119
+ original_code = code
120
+ cleaning_failed = False
121
+ try:
122
+ code = self.remove_line_comments_streaming(code)
123
+ except Exception as e:
124
+ self.console.print(
125
+ f"[bold bright_yellow]⚠️ Warning: Failed to remove line comments from {file_path}: {e}[/bold bright_yellow]"
126
+ )
127
+ code = original_code
128
+ cleaning_failed = True
129
+ try:
130
+ code = self.remove_docstrings_streaming(code)
131
+ except Exception as e:
132
+ self.console.print(
133
+ f"[bold bright_yellow]⚠️ Warning: Failed to remove docstrings from {file_path}: {e}[/bold bright_yellow]"
134
+ )
135
+ code = original_code
136
+ cleaning_failed = True
137
+ try:
138
+ code = self.remove_extra_whitespace_streaming(code)
139
+ except Exception as e:
140
+ self.console.print(
141
+ f"[bold bright_yellow]⚠️ Warning: Failed to remove extra whitespace from {file_path}: {e}[/bold bright_yellow]"
142
+ )
143
+ code = original_code
144
+ cleaning_failed = True
145
+ try:
146
+ code = self.reformat_code(code)
147
+ except Exception as e:
148
+ self.console.print(
149
+ f"[bold bright_yellow]⚠️ Warning: Failed to reformat {file_path}: {e}[/bold bright_yellow]"
150
+ )
151
+ code = original_code
152
+ cleaning_failed = True
153
+ file_path.write_text(code, encoding="utf-8")
154
+ if cleaning_failed:
155
+ self.console.print(
156
+ f"[bold yellow]⚡ Partially cleaned:[/bold yellow] [dim bright_white]{file_path}[/dim bright_white]"
157
+ )
158
+ else:
159
+ self.console.print(
160
+ f"[bold green]✨ Cleaned:[/bold green] [dim bright_white]{file_path}[/dim bright_white]"
161
+ )
162
+ except PermissionError as e:
163
+ self.console.print(
164
+ f"[red]Failed to clean: {file_path} (Permission denied)[/red]"
165
+ )
166
+ handle_error(
167
+ ExecutionError(
168
+ message=f"Permission denied while cleaning {file_path}",
169
+ error_code=ErrorCode.PERMISSION_ERROR,
170
+ details=str(e),
171
+ recovery=f"Check file permissions for {file_path} and ensure you have write access",
172
+ ),
173
+ console=self.console,
174
+ exit_on_error=False,
175
+ )
176
+ except OSError as e:
177
+ self.console.print(
178
+ f"[red]Failed to clean: {file_path} (File system error)[/red]"
179
+ )
180
+ handle_error(
181
+ ExecutionError(
182
+ message=f"File system error while cleaning {file_path}",
183
+ error_code=ErrorCode.FILE_WRITE_ERROR,
184
+ details=str(e),
185
+ recovery=f"Check that {file_path} exists and is not being used by another process",
186
+ ),
187
+ console=self.console,
188
+ exit_on_error=False,
189
+ )
190
+ except UnicodeDecodeError as e:
191
+ self.console.print(
192
+ f"[red]Failed to clean: {file_path} (Encoding error)[/red]"
193
+ )
194
+ handle_error(
195
+ ExecutionError(
196
+ message=f"Encoding error while reading {file_path}",
197
+ error_code=ErrorCode.FILE_READ_ERROR,
198
+ details=str(e),
199
+ recovery=f"File {file_path} contains non-UTF-8 characters. Please check the file encoding.",
200
+ ),
201
+ console=self.console,
202
+ exit_on_error=False,
203
+ )
204
+ except Exception as e:
205
+ self.console.print(
206
+ f"[red]Failed to clean: {file_path} (Unexpected error)[/red]"
207
+ )
208
+ handle_error(
209
+ ExecutionError(
210
+ message=f"Unexpected error while cleaning {file_path}",
211
+ error_code=ErrorCode.UNEXPECTED_ERROR,
212
+ details=str(e),
213
+ recovery="This is an unexpected error. Please report this issue with the file content if possible.",
214
+ ),
215
+ console=self.console,
216
+ exit_on_error=False,
217
+ )
218
+
219
+ def _initialize_docstring_state(self) -> dict[str, t.Any]:
220
+ return {
221
+ "in_docstring": False,
222
+ "delimiter": None,
223
+ "waiting": False,
224
+ "function_indent": 0,
225
+ "removed_docstring": False,
226
+ "in_multiline_def": False,
227
+ }
228
+
229
+ def _handle_function_definition(
230
+ self, line: str, stripped: str, state: dict[str, t.Any]
231
+ ) -> bool:
232
+ if self._is_function_or_class_definition(stripped):
233
+ state["waiting"] = True
234
+ state["function_indent"] = len(line) - len(line.lstrip())
235
+ state["removed_docstring"] = False
236
+ state["in_multiline_def"] = not stripped.endswith(":")
237
+ return True
238
+ return False
239
+
240
+ def _handle_multiline_definition(
241
+ self, line: str, stripped: str, state: dict[str, t.Any]
242
+ ) -> bool:
243
+ if state["in_multiline_def"]:
244
+ if stripped.endswith(":"):
245
+ state["in_multiline_def"] = False
246
+ return True
247
+ return False
248
+
249
+ def _handle_waiting_docstring(
250
+ self, lines: list[str], i: int, stripped: str, state: dict[str, t.Any]
251
+ ) -> tuple[bool, str | None]:
252
+ if state["waiting"] and stripped:
253
+ if self._handle_docstring_start(stripped, state):
254
+ pass_line = None
255
+ if not state["in_docstring"]:
256
+ function_indent: int = state["function_indent"]
257
+ if self._needs_pass_statement(lines, i + 1, function_indent):
258
+ pass_line = " " * (function_indent + 4) + "pass"
259
+ state["removed_docstring"] = True
260
+ return True, pass_line
261
+ else:
262
+ state["waiting"] = False
263
+ return False, None
264
+
265
+ def _handle_docstring_content(
266
+ self, lines: list[str], i: int, stripped: str, state: dict[str, t.Any]
267
+ ) -> tuple[bool, str | None]:
268
+ if state["in_docstring"]:
269
+ if self._handle_docstring_end(stripped, state):
270
+ pass_line = None
271
+ function_indent: int = state["function_indent"]
272
+ if self._needs_pass_statement(lines, i + 1, function_indent):
273
+ pass_line = " " * (function_indent + 4) + "pass"
274
+ state["removed_docstring"] = False
275
+ return True, pass_line
276
+ else:
277
+ return True, None
278
+ return False, None
279
+
280
+ def _process_line(
281
+ self, lines: list[str], i: int, line: str, state: dict[str, t.Any]
282
+ ) -> tuple[bool, str | None]:
283
+ stripped = line.strip()
284
+ if self._handle_function_definition(line, stripped, state):
285
+ return True, line
286
+ if self._handle_multiline_definition(line, stripped, state):
287
+ return True, line
288
+ handled, pass_line = self._handle_waiting_docstring(lines, i, stripped, state)
289
+ if handled:
290
+ return True, pass_line
291
+ handled, pass_line = self._handle_docstring_content(lines, i, stripped, state)
292
+ if handled:
293
+ return True, pass_line
294
+ if state["removed_docstring"] and stripped:
295
+ state["removed_docstring"] = False
296
+ return False, line
297
+
298
+ def remove_docstrings(self, code: str) -> str:
299
+ lines = code.split("\n")
300
+ cleaned_lines: list[str] = []
301
+ docstring_state = self._initialize_docstring_state()
302
+ for i, line in enumerate(lines):
303
+ handled, result_line = self._process_line(lines, i, line, docstring_state)
304
+ if handled:
305
+ if result_line is not None:
306
+ cleaned_lines.append(result_line)
307
+ else:
308
+ cleaned_lines.append(line)
309
+ return "\n".join(cleaned_lines)
310
+
311
+ def _is_function_or_class_definition(self, stripped_line: str) -> bool:
312
+ return stripped_line.startswith(("def ", "class ", "async def "))
313
+
314
+ def _handle_docstring_start(self, stripped: str, state: dict[str, t.Any]) -> bool:
315
+ if not stripped.startswith(('"""', "'''", '"', "'")):
316
+ return False
317
+ if stripped.startswith(('"""', "'''")):
318
+ delimiter = stripped[:3]
319
+ else:
320
+ delimiter = stripped[0]
321
+ state["delimiter"] = delimiter
322
+ if self._is_single_line_docstring(stripped, delimiter):
323
+ state["waiting"] = False
324
+ return True
325
+ else:
326
+ state["in_docstring"] = True
327
+ state["waiting"] = False
328
+ return True
329
+
330
+ def _is_single_line_docstring(self, stripped: str, delimiter: str) -> bool:
331
+ return stripped.endswith(delimiter) and len(stripped) > len(delimiter)
332
+
333
+ def _handle_docstring_end(self, stripped: str, state: dict[str, t.Any]) -> bool:
334
+ if state["delimiter"] and stripped.endswith(state["delimiter"]):
335
+ state["in_docstring"] = False
336
+ state["delimiter"] = None
337
+ return True
338
+ return False
339
+
340
+ def _needs_pass_statement(
341
+ self, lines: list[str], start_index: int, function_indent: int
342
+ ) -> bool:
343
+ for i in range(start_index, len(lines)):
344
+ line = lines[i]
345
+ stripped = line.strip()
346
+ if not stripped:
347
+ continue
348
+ line_indent = len(line) - len(line.lstrip())
349
+ if line_indent <= function_indent:
350
+ return True
351
+ if line_indent > function_indent:
352
+ return False
353
+ return True
354
+
355
+ def remove_line_comments(self, code: str) -> str:
356
+ lines = code.split("\n")
357
+ cleaned_lines: list[str] = []
358
+ for line in lines:
359
+ if not line.strip():
360
+ cleaned_lines.append(line)
361
+ continue
362
+ cleaned_line = self._process_line_for_comments(line)
363
+ if cleaned_line or not line.strip():
364
+ cleaned_lines.append(cleaned_line or line)
365
+ return "\n".join(cleaned_lines)
366
+
367
+ def _process_line_for_comments(self, line: str) -> str:
368
+ result: list[str] = []
369
+ string_state = {"in_string": None}
370
+ for i, char in enumerate(line):
371
+ if self._handle_string_character(char, i, line, string_state, result):
372
+ continue
373
+ elif self._handle_comment_character(char, i, line, string_state, result):
374
+ break
375
+ else:
376
+ result.append(char)
377
+ return "".join(result).rstrip()
378
+
379
+ def _handle_string_character(
380
+ self,
381
+ char: str,
382
+ index: int,
383
+ line: str,
384
+ string_state: dict[str, t.Any],
385
+ result: list[str],
386
+ ) -> bool:
387
+ if char not in ("'", '"'):
388
+ return False
389
+ if index > 0 and line[index - 1] == "\\":
390
+ return False
391
+ if string_state["in_string"] is None:
392
+ string_state["in_string"] = char
393
+ elif string_state["in_string"] == char:
394
+ string_state["in_string"] = None
395
+ result.append(char)
396
+ return True
397
+
398
+ def _handle_comment_character(
399
+ self,
400
+ char: str,
401
+ index: int,
402
+ line: str,
403
+ string_state: dict[str, t.Any],
404
+ result: list[str],
405
+ ) -> bool:
406
+ if char != "#" or string_state["in_string"] is not None:
407
+ return False
408
+ comment = line[index:].strip()
409
+ if self._is_special_comment_line(comment):
410
+ result.append(line[index:])
411
+ return True
412
+
413
+ def _is_special_comment_line(self, comment: str) -> bool:
414
+ special_comment_pattern = (
415
+ r"^#\s*(?:type:\s*ignore(?:\[.*?\])?|noqa|nosec|pragma:\s*no\s*cover"
416
+ r"|pylint:\s*disable|mypy:\s*ignore)"
417
+ )
418
+ return bool(re.match(special_comment_pattern, comment))
419
+
420
+ def remove_extra_whitespace(self, code: str) -> str:
421
+ lines = code.split("\n")
422
+ cleaned_lines: list[str] = []
423
+ function_tracker = {"in_function": False, "function_indent": 0}
424
+ import_tracker = {"in_imports": False, "last_import_type": None}
425
+ for i, line in enumerate(lines):
426
+ line = line.rstrip()
427
+ stripped_line = line.lstrip()
428
+ self._update_function_state(line, stripped_line, function_tracker)
429
+ self._update_import_state(line, stripped_line, import_tracker)
430
+ if not line:
431
+ if self._should_skip_empty_line(
432
+ i, lines, cleaned_lines, function_tracker, import_tracker
433
+ ):
434
+ continue
435
+ cleaned_lines.append(line)
436
+ return "\n".join(self._remove_trailing_empty_lines(cleaned_lines))
437
+
438
+ def remove_docstrings_streaming(self, code: str) -> str:
439
+ if len(code) < 10000:
440
+ return self.remove_docstrings(code)
441
+
442
+ def process_lines():
443
+ lines = code.split("\n")
444
+ docstring_state = self._initialize_docstring_state()
445
+ for i, line in enumerate(lines):
446
+ handled, result_line = self._process_line(
447
+ lines, i, line, docstring_state
448
+ )
449
+ if handled:
450
+ if result_line is not None:
451
+ yield result_line
452
+ else:
453
+ yield line
454
+
455
+ return "\n".join(process_lines())
456
+
457
+ def remove_line_comments_streaming(self, code: str) -> str:
458
+ if len(code) < 10000:
459
+ return self.remove_line_comments(code)
460
+
461
+ def process_lines():
462
+ for line in code.split("\n"):
463
+ if not line.strip():
464
+ yield line
465
+ continue
466
+ cleaned_line = self._process_line_for_comments(line)
467
+ if cleaned_line or not line.strip():
468
+ yield cleaned_line or line
469
+
470
+ return "\n".join(process_lines())
471
+
472
+ def remove_extra_whitespace_streaming(self, code: str) -> str:
473
+ if len(code) < 10000:
474
+ return self.remove_extra_whitespace(code)
475
+
476
+ def process_lines():
477
+ lines = code.split("\n")
478
+ function_tracker: dict[str, t.Any] = {
479
+ "in_function": False,
480
+ "function_indent": 0,
481
+ }
482
+ import_tracker: dict[str, t.Any] = {
483
+ "in_imports": False,
484
+ "last_import_type": None,
485
+ }
486
+ previous_lines: list[str] = []
487
+ for i, line in enumerate(lines):
488
+ line = line.rstrip()
489
+ stripped_line = line.lstrip()
490
+ self._update_function_state(line, stripped_line, function_tracker)
491
+ self._update_import_state(line, stripped_line, import_tracker)
492
+ if not line:
493
+ if self._should_skip_empty_line(
494
+ i, lines, previous_lines, function_tracker, import_tracker
495
+ ):
496
+ continue
497
+ previous_lines.append(line)
498
+ yield line
499
+
500
+ processed_lines = list(process_lines())
501
+ return "\n".join(self._remove_trailing_empty_lines(processed_lines))
502
+
503
+ def _update_function_state(
504
+ self, line: str, stripped_line: str, function_tracker: dict[str, t.Any]
505
+ ) -> None:
506
+ if stripped_line.startswith(("def ", "async def ")):
507
+ function_tracker["in_function"] = True
508
+ function_tracker["function_indent"] = len(line) - len(stripped_line)
509
+ elif self._is_function_end(line, stripped_line, function_tracker):
510
+ function_tracker["in_function"] = False
511
+ function_tracker["function_indent"] = 0
512
+
513
+ def _update_import_state(
514
+ self, line: str, stripped_line: str, import_tracker: dict[str, t.Any]
515
+ ) -> None:
516
+ if stripped_line.startswith(("import ", "from ")):
517
+ import_tracker["in_imports"] = True
518
+ if self._is_stdlib_import(stripped_line):
519
+ current_type = "stdlib"
520
+ elif self._is_local_import(stripped_line):
521
+ current_type = "local"
522
+ else:
523
+ current_type = "third_party"
524
+ import_tracker["last_import_type"] = current_type
525
+ elif stripped_line and not stripped_line.startswith("#"):
526
+ import_tracker["in_imports"] = False
527
+ import_tracker["last_import_type"] = None
528
+
529
+ @staticmethod
530
+ @lru_cache(maxsize=256)
531
+ def _is_stdlib_module(module: str) -> bool:
532
+ stdlib_modules = {
533
+ "os",
534
+ "sys",
535
+ "re",
536
+ "json",
537
+ "datetime",
538
+ "time",
539
+ "pathlib",
540
+ "typing",
541
+ "collections",
542
+ "itertools",
543
+ "functools",
544
+ "operator",
545
+ "math",
546
+ "random",
547
+ "uuid",
548
+ "urllib",
549
+ "http",
550
+ "html",
551
+ "xml",
552
+ "email",
553
+ "csv",
554
+ "sqlite3",
555
+ "subprocess",
556
+ "threading",
557
+ "multiprocessing",
558
+ "asyncio",
559
+ "contextlib",
560
+ "dataclasses",
561
+ "enum",
562
+ "abc",
563
+ "io",
564
+ "tempfile",
565
+ "shutil",
566
+ "glob",
567
+ "pickle",
568
+ "copy",
569
+ "heapq",
570
+ "bisect",
571
+ "array",
572
+ "struct",
573
+ "zlib",
574
+ "hashlib",
575
+ "hmac",
576
+ "secrets",
577
+ "base64",
578
+ "binascii",
579
+ "codecs",
580
+ "locale",
581
+ "platform",
582
+ "socket",
583
+ "ssl",
584
+ "ipaddress",
585
+ "logging",
586
+ "warnings",
587
+ "inspect",
588
+ "ast",
589
+ "dis",
590
+ "tokenize",
591
+ "keyword",
592
+ "linecache",
593
+ "traceback",
594
+ "weakref",
595
+ "gc",
596
+ "ctypes",
597
+ "unittest",
598
+ "doctest",
599
+ "pdb",
600
+ "profile",
601
+ "cProfile",
602
+ "timeit",
603
+ "trace",
604
+ "calendar",
605
+ "decimal",
606
+ "fractions",
607
+ "statistics",
608
+ "tomllib",
609
+ }
610
+ return module in stdlib_modules
611
+
612
+ def _is_stdlib_import(self, stripped_line: str) -> bool:
613
+ try:
614
+ if stripped_line.startswith("from "):
615
+ module = stripped_line.split()[1].split(".")[0]
616
+ else:
617
+ module = stripped_line.split()[1].split(".")[0]
618
+ except IndexError:
619
+ return False
620
+ return CodeCleaner._is_stdlib_module(module)
621
+
622
+ def _is_local_import(self, stripped_line: str) -> bool:
623
+ return stripped_line.startswith("from .") or " . " in stripped_line
624
+
625
+ def _is_function_end(
626
+ self, line: str, stripped_line: str, function_tracker: dict[str, t.Any]
627
+ ) -> bool:
628
+ return (
629
+ function_tracker["in_function"]
630
+ and bool(line)
631
+ and (len(line) - len(stripped_line) <= function_tracker["function_indent"])
632
+ and (not stripped_line.startswith(("@", "#")))
633
+ )
634
+
635
+ def _should_skip_empty_line(
636
+ self,
637
+ line_idx: int,
638
+ lines: list[str],
639
+ cleaned_lines: list[str],
640
+ function_tracker: dict[str, t.Any],
641
+ import_tracker: dict[str, t.Any],
642
+ ) -> bool:
643
+ if line_idx > 0 and cleaned_lines and (not cleaned_lines[-1]):
644
+ return True
645
+
646
+ if self._is_import_section_separator(line_idx, lines, import_tracker):
647
+ return False
648
+
649
+ if function_tracker["in_function"]:
650
+ return self._should_skip_function_empty_line(line_idx, lines)
651
+ return False
652
+
653
+ def _is_import_section_separator(
654
+ self, line_idx: int, lines: list[str], import_tracker: dict[str, t.Any]
655
+ ) -> bool:
656
+ if not import_tracker["in_imports"]:
657
+ return False
658
+
659
+ next_line_idx = line_idx + 1
660
+ while next_line_idx < len(lines) and not lines[next_line_idx].strip():
661
+ next_line_idx += 1
662
+
663
+ if next_line_idx >= len(lines):
664
+ return False
665
+
666
+ next_line = lines[next_line_idx].strip()
667
+ if not next_line.startswith(("import ", "from ")):
668
+ return False
669
+
670
+ if self._is_stdlib_import(next_line):
671
+ next_type = "stdlib"
672
+ elif self._is_local_import(next_line):
673
+ next_type = "local"
674
+ else:
675
+ next_type = "third_party"
676
+
677
+ return import_tracker["last_import_type"] != next_type
678
+
679
+ def _should_skip_function_empty_line(self, line_idx: int, lines: list[str]) -> bool:
680
+ next_line_idx = line_idx + 1
681
+ if next_line_idx >= len(lines):
682
+ return False
683
+ next_line = lines[next_line_idx].strip()
684
+ return not self._is_significant_next_line(next_line)
685
+
686
+ def _is_significant_next_line(self, next_line: str) -> bool:
687
+ if next_line.startswith(("return", "class ", "def ", "async def ", "@")):
688
+ return True
689
+ if next_line in ("pass", "break", "continue", "raise"):
690
+ return True
691
+ return self._is_special_comment(next_line)
692
+
693
+ def _is_special_comment(self, line: str) -> bool:
694
+ if not line.startswith("#"):
695
+ return False
696
+ special_patterns = ("type:", "noqa", "nosec", "pragma:", "pylint:", "mypy:")
697
+ return any(pattern in line for pattern in special_patterns)
698
+
699
+ def _remove_trailing_empty_lines(self, lines: list[str]) -> list[str]:
700
+ while lines and (not lines[-1]):
701
+ lines.pop()
702
+ return lines
703
+
704
+ def reformat_code(self, code: str) -> str:
705
+ try:
706
+ import tempfile
707
+
708
+ with tempfile.NamedTemporaryFile(
709
+ suffix=".py", mode="w+", delete=False
710
+ ) as temp:
711
+ temp_path = Path(temp.name)
712
+ temp_path.write_text(code)
713
+ try:
714
+ result = subprocess.run(
715
+ ["uv", "run", "ruff", "format", str(temp_path)],
716
+ check=False,
717
+ capture_output=True,
718
+ text=True,
719
+ )
720
+ if result.returncode == 0:
721
+ formatted_code = temp_path.read_text()
722
+ else:
723
+ self.console.print(
724
+ f"[bold bright_yellow]⚠️ Ruff formatting failed: {result.stderr}[/bold bright_yellow]"
725
+ )
726
+ handle_error(
727
+ ExecutionError(
728
+ message="Code formatting failed",
729
+ error_code=ErrorCode.FORMATTING_ERROR,
730
+ details=result.stderr,
731
+ recovery="Check Ruff configuration and formatting rules",
732
+ ),
733
+ console=self.console,
734
+ exit_on_error=False,
735
+ )
736
+ formatted_code = code
737
+ except Exception as e:
738
+ self.console.print(
739
+ f"[bold bright_red]❌ Error running Ruff: {e}[/bold bright_red]"
740
+ )
741
+ handle_error(
742
+ ExecutionError(
743
+ message="Error running Ruff",
744
+ error_code=ErrorCode.FORMATTING_ERROR,
745
+ details=str(e),
746
+ recovery="Verify Ruff is installed and configured correctly",
747
+ ),
748
+ console=self.console,
749
+ exit_on_error=False,
750
+ )
751
+ formatted_code = code
752
+ finally:
753
+ with suppress(FileNotFoundError):
754
+ temp_path.unlink()
755
+ return formatted_code
756
+ except Exception as e:
757
+ self.console.print(
758
+ f"[bold bright_red]❌ Error during reformatting: {e}[/bold bright_red]"
759
+ )
760
+ handle_error(
761
+ ExecutionError(
762
+ message="Error during reformatting",
763
+ error_code=ErrorCode.FORMATTING_ERROR,
764
+ details=str(e),
765
+ recovery="Check file permissions and disk space",
766
+ ),
767
+ console=self.console,
768
+ )
769
+ return code
770
+
771
+ async def clean_files_async(self, pkg_dir: Path | None) -> None:
772
+ if pkg_dir is None:
773
+ return
774
+ python_files = [
775
+ file_path
776
+ for file_path in pkg_dir.rglob("*.py")
777
+ if not str(file_path.parent).startswith("__")
778
+ ]
779
+ if not python_files:
780
+ return
781
+ max_concurrent = min(len(python_files), 8)
782
+ semaphore = asyncio.Semaphore(max_concurrent)
783
+
784
+ async def clean_with_semaphore(file_path: Path) -> None:
785
+ async with semaphore:
786
+ await self.clean_file_async(file_path)
787
+
788
+ tasks = [clean_with_semaphore(file_path) for file_path in python_files]
789
+ await asyncio.gather(*tasks, return_exceptions=True)
790
+
791
+ await self._cleanup_cache_directories_async(pkg_dir)
792
+
793
+ async def clean_file_async(self, file_path: Path) -> None:
794
+ try:
795
+ async with aiofiles.open(file_path, encoding="utf-8") as f: # type: ignore[misc]
796
+ code: str = await f.read() # type: ignore[misc]
797
+ original_code: str = code
798
+ cleaning_failed = False
799
+ try:
800
+ code = self.remove_line_comments_streaming(code)
801
+ except Exception as e:
802
+ self.console.print(
803
+ f"[bold bright_yellow]⚠️ Warning: Failed to remove line comments from {file_path}: {e}[/bold bright_yellow]"
804
+ )
805
+ code = original_code
806
+ cleaning_failed = True
807
+ try:
808
+ code = self.remove_docstrings_streaming(code)
809
+ except Exception as e:
810
+ self.console.print(
811
+ f"[bold bright_yellow]⚠️ Warning: Failed to remove docstrings from {file_path}: {e}[/bold bright_yellow]"
812
+ )
813
+ code = original_code
814
+ cleaning_failed = True
815
+ try:
816
+ code = self.remove_extra_whitespace_streaming(code)
817
+ except Exception as e:
818
+ self.console.print(
819
+ f"[bold bright_yellow]⚠️ Warning: Failed to remove extra whitespace from {file_path}: {e}[/bold bright_yellow]"
820
+ )
821
+ code = original_code
822
+ cleaning_failed = True
823
+ try:
824
+ code = await self.reformat_code_async(code)
825
+ except Exception as e:
826
+ self.console.print(
827
+ f"[bold bright_yellow]⚠️ Warning: Failed to reformat {file_path}: {e}[/bold bright_yellow]"
828
+ )
829
+ code = original_code
830
+ cleaning_failed = True
831
+ async with aiofiles.open(file_path, "w", encoding="utf-8") as f: # type: ignore[misc]
832
+ await f.write(code) # type: ignore[misc]
833
+ if cleaning_failed:
834
+ self.console.print(
835
+ f"[bold yellow]⚡ Partially cleaned:[/bold yellow] [dim bright_white]{file_path}[/dim bright_white]"
836
+ )
837
+ else:
838
+ self.console.print(
839
+ f"[bold green]✨ Cleaned:[/bold green] [dim bright_white]{file_path}[/dim bright_white]"
840
+ )
841
+ except PermissionError as e:
842
+ self.console.print(
843
+ f"[red]Failed to clean: {file_path} (Permission denied)[/red]"
844
+ )
845
+ handle_error(
846
+ ExecutionError(
847
+ message=f"Permission denied while cleaning {file_path}",
848
+ error_code=ErrorCode.PERMISSION_ERROR,
849
+ details=str(e),
850
+ recovery=f"Check file permissions for {file_path} and ensure you have write access",
851
+ ),
852
+ console=self.console,
853
+ exit_on_error=False,
854
+ )
855
+ except OSError as e:
856
+ self.console.print(
857
+ f"[red]Failed to clean: {file_path} (File system error)[/red]"
858
+ )
859
+ handle_error(
860
+ ExecutionError(
861
+ message=f"File system error while cleaning {file_path}",
862
+ error_code=ErrorCode.FILE_WRITE_ERROR,
863
+ details=str(e),
864
+ recovery=f"Check that {file_path} exists and is not being used by another process",
865
+ ),
866
+ console=self.console,
867
+ exit_on_error=False,
868
+ )
869
+ except UnicodeDecodeError as e:
870
+ self.console.print(
871
+ f"[red]Failed to clean: {file_path} (Encoding error)[/red]"
872
+ )
873
+ handle_error(
874
+ ExecutionError(
875
+ message=f"Encoding error while cleaning {file_path}",
876
+ error_code=ErrorCode.FILE_READ_ERROR,
877
+ details=str(e),
878
+ recovery=f"Check the file encoding of {file_path} - it may not be UTF-8",
879
+ ),
880
+ console=self.console,
881
+ exit_on_error=False,
882
+ )
883
+ except Exception as e:
884
+ self.console.print(f"[red]Unexpected error cleaning {file_path}: {e}[/red]")
885
+ handle_error(
886
+ ExecutionError(
887
+ message=f"Unexpected error while cleaning {file_path}",
888
+ error_code=ErrorCode.UNEXPECTED_ERROR,
889
+ details=str(e),
890
+ recovery="Please report this issue with the full error details",
891
+ ),
892
+ console=self.console,
893
+ exit_on_error=False,
894
+ )
895
+
896
+ async def reformat_code_async(self, code: str) -> str:
897
+ try:
898
+ import tempfile
899
+
900
+ with tempfile.NamedTemporaryFile(
901
+ suffix=".py", mode="w+", delete=False
902
+ ) as temp:
903
+ temp_path = Path(temp.name)
904
+ async with aiofiles.open(temp_path, "w", encoding="utf-8") as f: # type: ignore[misc]
905
+ await f.write(code) # type: ignore[misc]
906
+ try:
907
+ proc = await asyncio.create_subprocess_exec(
908
+ "uv",
909
+ "run",
910
+ "ruff",
911
+ "format",
912
+ str(temp_path),
913
+ stdout=asyncio.subprocess.PIPE,
914
+ stderr=asyncio.subprocess.PIPE,
915
+ )
916
+ _, stderr = await proc.communicate()
917
+ if proc.returncode == 0:
918
+ async with aiofiles.open(temp_path, encoding="utf-8") as f: # type: ignore[misc]
919
+ formatted_code = await f.read() # type: ignore[misc]
920
+ else:
921
+ self.console.print(
922
+ f"[bold bright_yellow]⚠️ Warning: Ruff format failed with return code {proc.returncode}[/bold bright_yellow]"
923
+ )
924
+ if stderr:
925
+ self.console.print(f"[dim]Ruff stderr: {stderr.decode()}[/dim]")
926
+ formatted_code = code
927
+ except Exception as e:
928
+ self.console.print(
929
+ f"[bold bright_red]❌ Error running Ruff: {e}[/bold bright_red]"
930
+ )
931
+ handle_error(
932
+ ExecutionError(
933
+ message="Error running Ruff",
934
+ error_code=ErrorCode.FORMATTING_ERROR,
935
+ details=str(e),
936
+ recovery="Verify Ruff is installed and configured correctly",
937
+ ),
938
+ console=self.console,
939
+ exit_on_error=False,
940
+ )
941
+ formatted_code = code
942
+ finally:
943
+ with suppress(FileNotFoundError):
944
+ temp_path.unlink()
945
+
946
+ return formatted_code
947
+ except Exception as e:
948
+ self.console.print(
949
+ f"[bold bright_red]❌ Error during reformatting: {e}[/bold bright_red]"
950
+ )
951
+ handle_error(
952
+ ExecutionError(
953
+ message="Error during reformatting",
954
+ error_code=ErrorCode.FORMATTING_ERROR,
955
+ details=str(e),
956
+ recovery="Check file permissions and disk space",
957
+ ),
958
+ console=self.console,
959
+ exit_on_error=False,
960
+ )
961
+ return code
962
+
963
+ async def _cleanup_cache_directories_async(self, pkg_dir: Path) -> None:
964
+ def cleanup_sync() -> None:
965
+ with suppress(PermissionError, OSError):
966
+ pycache_dir = pkg_dir / "__pycache__"
967
+ if pycache_dir.exists():
968
+ for cache_file in pycache_dir.iterdir():
969
+ with suppress(PermissionError, OSError):
970
+ cache_file.unlink()
971
+ pycache_dir.rmdir()
972
+ parent_pycache = pkg_dir.parent / "__pycache__"
973
+ if parent_pycache.exists():
974
+ for cache_file in parent_pycache.iterdir():
975
+ with suppress(PermissionError, OSError):
976
+ cache_file.unlink()
977
+ parent_pycache.rmdir()
978
+
979
+ loop = asyncio.get_event_loop()
980
+ await loop.run_in_executor(None, cleanup_sync)