code-normalizer-pro 3.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- code_normalizer_pro/__init__.py +5 -0
- code_normalizer_pro/cli.py +42 -0
- code_normalizer_pro/code_normalize_pro.py +989 -0
- code_normalizer_pro-3.0.1.dist-info/METADATA +304 -0
- code_normalizer_pro-3.0.1.dist-info/RECORD +9 -0
- code_normalizer_pro-3.0.1.dist-info/WHEEL +5 -0
- code_normalizer_pro-3.0.1.dist-info/entry_points.txt +2 -0
- code_normalizer_pro-3.0.1.dist-info/licenses/LICENSE +22 -0
- code_normalizer_pro-3.0.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
"""Console entry point for the code-normalizer-pro installed package.
|
|
2
|
+
|
|
3
|
+
Uses runpy so the core script (src/code_normalize_pro.py) does not need
|
|
4
|
+
to be a proper importable module. Works correctly under editable installs
|
|
5
|
+
(pip install -e .). Before building a wheel for PyPI distribution, copy
|
|
6
|
+
src/code_normalize_pro.py into this package directory and update
|
|
7
|
+
_find_script() to look for it there.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import runpy
|
|
13
|
+
import sys
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _find_script() -> Path:
|
|
18
|
+
"""Return the path to code_normalize_pro.py.
|
|
19
|
+
|
|
20
|
+
Search order:
|
|
21
|
+
1. src/ next to package root -- editable / dev install
|
|
22
|
+
2. Inside this package dir -- future bundled layout
|
|
23
|
+
"""
|
|
24
|
+
pkg_dir = Path(__file__).resolve().parent # code_normalizer_pro/
|
|
25
|
+
root = pkg_dir.parent
|
|
26
|
+
candidates = [
|
|
27
|
+
root / "src" / "code_normalize_pro.py", # editable install
|
|
28
|
+
pkg_dir / "code_normalize_pro.py", # future bundled
|
|
29
|
+
]
|
|
30
|
+
for p in candidates:
|
|
31
|
+
if p.is_file():
|
|
32
|
+
return p
|
|
33
|
+
checked = ", ".join(str(c) for c in candidates)
|
|
34
|
+
raise FileNotFoundError(
|
|
35
|
+
f"code_normalize_pro.py not found. Checked: {checked}"
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def main() -> None:
|
|
40
|
+
script = _find_script()
|
|
41
|
+
sys.argv[0] = str(script)
|
|
42
|
+
runpy.run_path(str(script), run_name="__main__")
|
|
@@ -0,0 +1,989 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Code Normalizer Pro - Production-Grade Code Normalization Tool
|
|
5
|
+
================================================================
|
|
6
|
+
|
|
7
|
+
High-Impact Features:
|
|
8
|
+
- Parallel Processing (multi-core performance)
|
|
9
|
+
- Pre-Commit Hook Generation
|
|
10
|
+
- Incremental Processing (hash-based caching)
|
|
11
|
+
- Multi-Language Syntax Checking
|
|
12
|
+
- Interactive Mode (file-by-file approval)
|
|
13
|
+
|
|
14
|
+
Plus all v2.0 features:
|
|
15
|
+
- Dry-run mode, In-place editing, Automatic backups
|
|
16
|
+
- Progress tracking, Detailed statistics, Error handling
|
|
17
|
+
|
|
18
|
+
Author: MR
|
|
19
|
+
Date: 2026-02-09
|
|
20
|
+
Version: 3.0 Pro
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
import argparse
|
|
24
|
+
import subprocess
|
|
25
|
+
import sys
|
|
26
|
+
import os
|
|
27
|
+
import hashlib
|
|
28
|
+
import json
|
|
29
|
+
import shutil
|
|
30
|
+
from pathlib import Path
|
|
31
|
+
from typing import Optional, Tuple, List, Dict, Set
|
|
32
|
+
from dataclasses import dataclass, asdict
|
|
33
|
+
from datetime import datetime
|
|
34
|
+
from concurrent.futures import ProcessPoolExecutor, as_completed
|
|
35
|
+
from multiprocessing import cpu_count
|
|
36
|
+
|
|
37
|
+
# Fix Windows console encoding
|
|
38
|
+
if sys.platform == 'win32':
|
|
39
|
+
try:
|
|
40
|
+
sys.stdout.reconfigure(encoding='utf-8')
|
|
41
|
+
sys.stderr.reconfigure(encoding='utf-8')
|
|
42
|
+
except AttributeError:
|
|
43
|
+
import io
|
|
44
|
+
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
|
|
45
|
+
sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8')
|
|
46
|
+
os.environ['PYTHONIOENCODING'] = 'utf-8'
|
|
47
|
+
|
|
48
|
+
# Optional dependencies
|
|
49
|
+
try:
|
|
50
|
+
from tqdm import tqdm
|
|
51
|
+
HAS_TQDM = True
|
|
52
|
+
except ImportError:
|
|
53
|
+
HAS_TQDM = False
|
|
54
|
+
|
|
55
|
+
COMMON_ENCODINGS = [
|
|
56
|
+
"utf-8",
|
|
57
|
+
"utf-8-sig",
|
|
58
|
+
"utf-16",
|
|
59
|
+
"utf-16-le",
|
|
60
|
+
"utf-16-be",
|
|
61
|
+
"windows-1252",
|
|
62
|
+
"latin-1",
|
|
63
|
+
"iso-8859-1",
|
|
64
|
+
]
|
|
65
|
+
|
|
66
|
+
# Multi-language syntax checkers
|
|
67
|
+
SYNTAX_CHECKERS = {
|
|
68
|
+
".py": {
|
|
69
|
+
"command": ["python", "-m", "py_compile"],
|
|
70
|
+
"stdin": False,
|
|
71
|
+
"file_arg": True,
|
|
72
|
+
},
|
|
73
|
+
".js": {
|
|
74
|
+
"command": ["node", "--check"],
|
|
75
|
+
"stdin": False,
|
|
76
|
+
"file_arg": True,
|
|
77
|
+
},
|
|
78
|
+
".ts": {
|
|
79
|
+
"command": ["tsc", "--noEmit"],
|
|
80
|
+
"stdin": False,
|
|
81
|
+
"file_arg": True,
|
|
82
|
+
},
|
|
83
|
+
".go": {
|
|
84
|
+
"command": ["gofmt", "-e"],
|
|
85
|
+
"stdin": True,
|
|
86
|
+
"file_arg": False,
|
|
87
|
+
},
|
|
88
|
+
".rs": {
|
|
89
|
+
"command": ["rustc", "--crate-type", "lib", "-"],
|
|
90
|
+
"stdin": True,
|
|
91
|
+
"file_arg": False,
|
|
92
|
+
},
|
|
93
|
+
".c": {
|
|
94
|
+
"command": ["gcc", "-fsyntax-only", "-x", "c"],
|
|
95
|
+
"stdin": False,
|
|
96
|
+
"file_arg": True,
|
|
97
|
+
},
|
|
98
|
+
".cpp": {
|
|
99
|
+
"command": ["g++", "-fsyntax-only", "-x", "c++"],
|
|
100
|
+
"stdin": False,
|
|
101
|
+
"file_arg": True,
|
|
102
|
+
},
|
|
103
|
+
".java": {
|
|
104
|
+
"command": ["javac", "-Xstdout"],
|
|
105
|
+
"stdin": False,
|
|
106
|
+
"file_arg": True,
|
|
107
|
+
},
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
CACHE_FILE = ".normalize-cache.json"
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
@dataclass
|
|
114
|
+
class FileCache:
|
|
115
|
+
"""Cache entry for a file"""
|
|
116
|
+
path: str
|
|
117
|
+
hash: str
|
|
118
|
+
last_normalized: str
|
|
119
|
+
size: int
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
@dataclass
|
|
123
|
+
class ProcessStats:
|
|
124
|
+
"""Statistics for processing session"""
|
|
125
|
+
total_files: int = 0
|
|
126
|
+
processed: int = 0
|
|
127
|
+
skipped: int = 0
|
|
128
|
+
cached: int = 0
|
|
129
|
+
errors: int = 0
|
|
130
|
+
encoding_changes: int = 0
|
|
131
|
+
newline_fixes: int = 0
|
|
132
|
+
whitespace_fixes: int = 0
|
|
133
|
+
syntax_checks_passed: int = 0
|
|
134
|
+
syntax_checks_failed: int = 0
|
|
135
|
+
bytes_removed: int = 0
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
class CacheManager:
|
|
139
|
+
"""Manages file hash cache for incremental processing"""
|
|
140
|
+
|
|
141
|
+
def __init__(self, cache_path: Optional[Path] = None):
|
|
142
|
+
self.cache_path = cache_path or Path(CACHE_FILE)
|
|
143
|
+
self.cache: Dict[str, FileCache] = {}
|
|
144
|
+
self.load()
|
|
145
|
+
|
|
146
|
+
def load(self):
|
|
147
|
+
"""Load cache from disk"""
|
|
148
|
+
if self.cache_path.exists():
|
|
149
|
+
try:
|
|
150
|
+
with open(self.cache_path, 'r', encoding='utf-8') as f:
|
|
151
|
+
data = json.load(f)
|
|
152
|
+
self.cache = {
|
|
153
|
+
k: FileCache(**v) for k, v in data.items()
|
|
154
|
+
}
|
|
155
|
+
except Exception as e:
|
|
156
|
+
print(f"Warning: Could not load cache: {e}")
|
|
157
|
+
self.cache = {}
|
|
158
|
+
|
|
159
|
+
def save(self):
|
|
160
|
+
"""Save cache to disk"""
|
|
161
|
+
try:
|
|
162
|
+
with open(self.cache_path, 'w', encoding='utf-8') as f:
|
|
163
|
+
data = {k: asdict(v) for k, v in self.cache.items()}
|
|
164
|
+
json.dump(data, f, indent=2)
|
|
165
|
+
except Exception as e:
|
|
166
|
+
print(f"Warning: Could not save cache: {e}")
|
|
167
|
+
|
|
168
|
+
def get_file_hash(self, path: Path) -> str:
|
|
169
|
+
"""Calculate SHA256 hash of file"""
|
|
170
|
+
sha256 = hashlib.sha256()
|
|
171
|
+
with open(path, 'rb') as f:
|
|
172
|
+
for chunk in iter(lambda: f.read(8192), b''):
|
|
173
|
+
sha256.update(chunk)
|
|
174
|
+
return sha256.hexdigest()
|
|
175
|
+
|
|
176
|
+
def is_cached(self, path: Path) -> bool:
|
|
177
|
+
"""Check if file is in cache and unchanged"""
|
|
178
|
+
path_str = str(path)
|
|
179
|
+
if path_str not in self.cache:
|
|
180
|
+
return False
|
|
181
|
+
|
|
182
|
+
cached = self.cache[path_str]
|
|
183
|
+
|
|
184
|
+
# Check if file still exists
|
|
185
|
+
if not path.exists():
|
|
186
|
+
return False
|
|
187
|
+
|
|
188
|
+
# Check size first (fast check)
|
|
189
|
+
if path.stat().st_size != cached.size:
|
|
190
|
+
return False
|
|
191
|
+
|
|
192
|
+
# Check hash (slower but accurate)
|
|
193
|
+
current_hash = self.get_file_hash(path)
|
|
194
|
+
return current_hash == cached.hash
|
|
195
|
+
|
|
196
|
+
def update(self, path: Path):
|
|
197
|
+
"""Update cache entry for file"""
|
|
198
|
+
path_str = str(path)
|
|
199
|
+
self.cache[path_str] = FileCache(
|
|
200
|
+
path=path_str,
|
|
201
|
+
hash=self.get_file_hash(path),
|
|
202
|
+
last_normalized=datetime.now().isoformat(),
|
|
203
|
+
size=path.stat().st_size
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
class CodeNormalizer:
|
|
208
|
+
"""Production-grade code normalizer with advanced features"""
|
|
209
|
+
|
|
210
|
+
def __init__(self,
|
|
211
|
+
dry_run: bool = False,
|
|
212
|
+
verbose: bool = False,
|
|
213
|
+
in_place: bool = False,
|
|
214
|
+
create_backup: bool = True,
|
|
215
|
+
use_cache: bool = True,
|
|
216
|
+
interactive: bool = False,
|
|
217
|
+
parallel: bool = False,
|
|
218
|
+
max_workers: Optional[int] = None):
|
|
219
|
+
self.dry_run = dry_run
|
|
220
|
+
self.verbose = verbose
|
|
221
|
+
self.in_place = in_place
|
|
222
|
+
self.create_backup = create_backup
|
|
223
|
+
self.use_cache = use_cache
|
|
224
|
+
self.interactive = interactive
|
|
225
|
+
self.parallel = parallel
|
|
226
|
+
self.max_workers = max_workers or max(1, cpu_count() - 1)
|
|
227
|
+
self.stats = ProcessStats()
|
|
228
|
+
self.errors: List[Tuple[Path, str]] = []
|
|
229
|
+
self.cache = CacheManager() if use_cache else None
|
|
230
|
+
|
|
231
|
+
def _looks_like_utf16_text(self, data: bytes) -> bool:
|
|
232
|
+
"""Best-effort check for UTF-16 text before binary rejection."""
|
|
233
|
+
if not data:
|
|
234
|
+
return False
|
|
235
|
+
|
|
236
|
+
# BOM signatures
|
|
237
|
+
if data.startswith((b"\xff\xfe", b"\xfe\xff")):
|
|
238
|
+
return True
|
|
239
|
+
|
|
240
|
+
sample = data[:256]
|
|
241
|
+
if len(sample) < 4:
|
|
242
|
+
return False
|
|
243
|
+
|
|
244
|
+
for enc in ("utf-16-le", "utf-16-be"):
|
|
245
|
+
try:
|
|
246
|
+
decoded = sample.decode(enc)
|
|
247
|
+
except UnicodeDecodeError:
|
|
248
|
+
continue
|
|
249
|
+
|
|
250
|
+
if not decoded:
|
|
251
|
+
continue
|
|
252
|
+
|
|
253
|
+
printable = sum(1 for ch in decoded if ch.isprintable() or ch in "\r\n\t")
|
|
254
|
+
alpha = sum(1 for ch in decoded if ch.isalpha())
|
|
255
|
+
printable_ratio = printable / len(decoded)
|
|
256
|
+
|
|
257
|
+
# Require mostly printable content and at least some alphabetic text.
|
|
258
|
+
if printable_ratio >= 0.85 and alpha >= max(1, len(decoded) // 20):
|
|
259
|
+
return True
|
|
260
|
+
|
|
261
|
+
return False
|
|
262
|
+
|
|
263
|
+
def guess_and_read(self, path: Path) -> Tuple[str, str]:
|
|
264
|
+
"""Detect encoding and read file"""
|
|
265
|
+
data = path.read_bytes()
|
|
266
|
+
|
|
267
|
+
if b"\x00" in data and not self._looks_like_utf16_text(data):
|
|
268
|
+
raise ValueError(f"File appears to be binary")
|
|
269
|
+
|
|
270
|
+
last_error = None
|
|
271
|
+
for enc in COMMON_ENCODINGS:
|
|
272
|
+
try:
|
|
273
|
+
text = data.decode(enc)
|
|
274
|
+
return enc, text
|
|
275
|
+
except UnicodeDecodeError as e:
|
|
276
|
+
last_error = e
|
|
277
|
+
continue
|
|
278
|
+
|
|
279
|
+
raise UnicodeError(
|
|
280
|
+
f"Could not decode with common encodings"
|
|
281
|
+
) from last_error
|
|
282
|
+
|
|
283
|
+
def normalize_text(self, text: str) -> Tuple[str, dict]:
|
|
284
|
+
"""Normalize text and track changes"""
|
|
285
|
+
changes = {
|
|
286
|
+
'newline_fixes': 0,
|
|
287
|
+
'whitespace_fixes': 0,
|
|
288
|
+
'bytes_removed': 0,
|
|
289
|
+
'final_newline_added': False
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
original = text
|
|
293
|
+
original_size = len(text.encode('utf-8'))
|
|
294
|
+
|
|
295
|
+
# Normalize newlines
|
|
296
|
+
if '\r\n' in text or '\r' in text:
|
|
297
|
+
text = text.replace("\r\n", "\n").replace("\r", "\n")
|
|
298
|
+
changes['newline_fixes'] = original.count('\r\n') + original.count('\r')
|
|
299
|
+
|
|
300
|
+
# Strip trailing whitespace
|
|
301
|
+
lines = text.split("\n")
|
|
302
|
+
stripped_lines = [line.rstrip() for line in lines]
|
|
303
|
+
|
|
304
|
+
whitespace_removed = sum(
|
|
305
|
+
len(orig) - len(stripped)
|
|
306
|
+
for orig, stripped in zip(lines, stripped_lines)
|
|
307
|
+
)
|
|
308
|
+
changes['whitespace_fixes'] = whitespace_removed
|
|
309
|
+
|
|
310
|
+
text = "\n".join(stripped_lines)
|
|
311
|
+
|
|
312
|
+
# Ensure final newline
|
|
313
|
+
if not text.endswith("\n"):
|
|
314
|
+
text += "\n"
|
|
315
|
+
changes['final_newline_added'] = True
|
|
316
|
+
|
|
317
|
+
# Calculate bytes removed
|
|
318
|
+
new_size = len(text.encode('utf-8'))
|
|
319
|
+
changes['bytes_removed'] = original_size - new_size
|
|
320
|
+
|
|
321
|
+
return text, changes
|
|
322
|
+
|
|
323
|
+
def syntax_check(self, path: Path, language: Optional[str] = None) -> Tuple[bool, str]:
|
|
324
|
+
"""Run syntax check on file - multi-language support"""
|
|
325
|
+
ext = path.suffix.lower()
|
|
326
|
+
|
|
327
|
+
if ext not in SYNTAX_CHECKERS:
|
|
328
|
+
return True, "No checker available"
|
|
329
|
+
|
|
330
|
+
checker = SYNTAX_CHECKERS[ext]
|
|
331
|
+
cmd = checker['command'].copy()
|
|
332
|
+
|
|
333
|
+
try:
|
|
334
|
+
if checker['file_arg']:
|
|
335
|
+
cmd.append(str(path))
|
|
336
|
+
result = subprocess.run(
|
|
337
|
+
cmd,
|
|
338
|
+
stdout=subprocess.DEVNULL,
|
|
339
|
+
stderr=subprocess.PIPE,
|
|
340
|
+
timeout=10,
|
|
341
|
+
text=True
|
|
342
|
+
)
|
|
343
|
+
else:
|
|
344
|
+
# Read file and pass via stdin
|
|
345
|
+
with open(path, 'r', encoding='utf-8') as f:
|
|
346
|
+
content = f.read()
|
|
347
|
+
result = subprocess.run(
|
|
348
|
+
cmd,
|
|
349
|
+
input=content,
|
|
350
|
+
stdout=subprocess.DEVNULL,
|
|
351
|
+
stderr=subprocess.PIPE,
|
|
352
|
+
timeout=10,
|
|
353
|
+
text=True
|
|
354
|
+
)
|
|
355
|
+
|
|
356
|
+
if result.returncode == 0:
|
|
357
|
+
return True, "OK"
|
|
358
|
+
else:
|
|
359
|
+
return False, result.stderr.strip()[:100]
|
|
360
|
+
|
|
361
|
+
except FileNotFoundError:
|
|
362
|
+
return True, f"{checker['command'][0]} not installed"
|
|
363
|
+
except subprocess.TimeoutExpired:
|
|
364
|
+
return False, "Timeout"
|
|
365
|
+
except Exception as e:
|
|
366
|
+
return False, str(e)[:100]
|
|
367
|
+
|
|
368
|
+
def create_backup_file(self, path: Path) -> Path:
|
|
369
|
+
"""Create timestamped backup"""
|
|
370
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
371
|
+
backup_path = path.with_suffix(f".backup_{timestamp}{path.suffix}")
|
|
372
|
+
backup_path.write_bytes(path.read_bytes())
|
|
373
|
+
return backup_path
|
|
374
|
+
|
|
375
|
+
def get_output_path(self, input_path: Path, output_path: Optional[Path]) -> Path:
|
|
376
|
+
"""Determine output path"""
|
|
377
|
+
if output_path:
|
|
378
|
+
return output_path
|
|
379
|
+
if self.in_place:
|
|
380
|
+
return input_path
|
|
381
|
+
return input_path.with_name(input_path.stem + "_clean" + input_path.suffix)
|
|
382
|
+
|
|
383
|
+
def show_diff(self, path: Path, original: str, normalized: str) -> bool:
|
|
384
|
+
"""Show diff and get user approval (interactive mode)"""
|
|
385
|
+
print(f"\n{'='*70}")
|
|
386
|
+
print(f"File: {path}")
|
|
387
|
+
print(f"{'='*70}")
|
|
388
|
+
|
|
389
|
+
# Simple line-by-line diff
|
|
390
|
+
orig_lines = original.split('\n')
|
|
391
|
+
norm_lines = normalized.split('\n')
|
|
392
|
+
|
|
393
|
+
changes = []
|
|
394
|
+
for i, (orig, norm) in enumerate(zip(orig_lines, norm_lines), 1):
|
|
395
|
+
if orig != norm:
|
|
396
|
+
changes.append((i, orig, norm))
|
|
397
|
+
|
|
398
|
+
# Show first 10 changes
|
|
399
|
+
for line_num, orig, norm in changes[:10]:
|
|
400
|
+
print(f"\nLine {line_num}:")
|
|
401
|
+
print(f" - {repr(orig)}")
|
|
402
|
+
print(f" + {repr(norm)}")
|
|
403
|
+
|
|
404
|
+
if len(changes) > 10:
|
|
405
|
+
print(f"\n... and {len(changes) - 10} more changes")
|
|
406
|
+
|
|
407
|
+
print(f"\n{'='*70}")
|
|
408
|
+
|
|
409
|
+
# Get user input
|
|
410
|
+
while True:
|
|
411
|
+
choice = input("Apply changes? [y]es / [n]o / [d]iff all / [q]uit: ").lower()
|
|
412
|
+
|
|
413
|
+
if choice in ('y', 'yes'):
|
|
414
|
+
return True
|
|
415
|
+
elif choice in ('n', 'no'):
|
|
416
|
+
return False
|
|
417
|
+
elif choice in ('d', 'diff'):
|
|
418
|
+
# Show all changes
|
|
419
|
+
for line_num, orig, norm in changes:
|
|
420
|
+
print(f"\nLine {line_num}:")
|
|
421
|
+
print(f" - {repr(orig)}")
|
|
422
|
+
print(f" + {repr(norm)}")
|
|
423
|
+
elif choice in ('q', 'quit'):
|
|
424
|
+
print("Quitting...")
|
|
425
|
+
sys.exit(0)
|
|
426
|
+
else:
|
|
427
|
+
print("Invalid choice. Please enter y, n, d, or q.")
|
|
428
|
+
|
|
429
|
+
def process_file(self, path: Path, output_path: Optional[Path] = None,
|
|
430
|
+
check_syntax: bool = False) -> bool:
|
|
431
|
+
"""Process a single file"""
|
|
432
|
+
self.stats.total_files += 1
|
|
433
|
+
|
|
434
|
+
try:
|
|
435
|
+
# Check cache first (incremental processing)
|
|
436
|
+
if self.use_cache and self.cache and self.cache.is_cached(path):
|
|
437
|
+
if self.verbose:
|
|
438
|
+
print(f"⊙ CACHED {path.name} - unchanged since last run")
|
|
439
|
+
self.stats.cached += 1
|
|
440
|
+
self.stats.skipped += 1
|
|
441
|
+
return True
|
|
442
|
+
|
|
443
|
+
# Read and detect encoding
|
|
444
|
+
enc, text = self.guess_and_read(path)
|
|
445
|
+
|
|
446
|
+
# Normalize
|
|
447
|
+
normalized, changes = self.normalize_text(text)
|
|
448
|
+
|
|
449
|
+
# Determine output
|
|
450
|
+
out_path = self.get_output_path(path, output_path)
|
|
451
|
+
|
|
452
|
+
# Check if changes needed
|
|
453
|
+
if text == normalized:
|
|
454
|
+
if self.verbose:
|
|
455
|
+
print(f"⊗ SKIP {path.name} - already normalized")
|
|
456
|
+
self.stats.skipped += 1
|
|
457
|
+
|
|
458
|
+
# Update cache even for unchanged files
|
|
459
|
+
if self.use_cache and self.cache:
|
|
460
|
+
self.cache.update(path)
|
|
461
|
+
|
|
462
|
+
return True
|
|
463
|
+
|
|
464
|
+
# Interactive mode
|
|
465
|
+
if self.interactive and not self.dry_run:
|
|
466
|
+
if not self.show_diff(path, text, normalized):
|
|
467
|
+
print(f"⊗ SKIP {path.name} - user declined")
|
|
468
|
+
self.stats.skipped += 1
|
|
469
|
+
return True
|
|
470
|
+
|
|
471
|
+
# Dry run mode
|
|
472
|
+
if self.dry_run:
|
|
473
|
+
print(f"[DRY RUN] Would normalize: {path}")
|
|
474
|
+
if enc != "utf-8":
|
|
475
|
+
print(f" Encoding: {enc} → utf-8")
|
|
476
|
+
self.stats.encoding_changes += 1
|
|
477
|
+
if changes['newline_fixes'] > 0:
|
|
478
|
+
print(f" Newlines: {changes['newline_fixes']} fixes")
|
|
479
|
+
self.stats.newline_fixes += 1
|
|
480
|
+
if changes['whitespace_fixes'] > 0:
|
|
481
|
+
print(f" Whitespace: {changes['whitespace_fixes']} chars removed")
|
|
482
|
+
self.stats.whitespace_fixes += 1
|
|
483
|
+
if changes['final_newline_added']:
|
|
484
|
+
print(f" Final newline: added")
|
|
485
|
+
|
|
486
|
+
self.stats.bytes_removed += changes['bytes_removed']
|
|
487
|
+
self.stats.processed += 1
|
|
488
|
+
return True
|
|
489
|
+
|
|
490
|
+
# Create backup if needed
|
|
491
|
+
backup_created = None
|
|
492
|
+
if self.in_place and self.create_backup:
|
|
493
|
+
backup_created = self.create_backup_file(path)
|
|
494
|
+
|
|
495
|
+
# Write normalized version
|
|
496
|
+
out_path.write_text(normalized, encoding="utf-8", newline="\n")
|
|
497
|
+
|
|
498
|
+
# Update stats
|
|
499
|
+
self.stats.processed += 1
|
|
500
|
+
self.stats.bytes_removed += changes['bytes_removed']
|
|
501
|
+
if enc != "utf-8":
|
|
502
|
+
self.stats.encoding_changes += 1
|
|
503
|
+
if changes['newline_fixes'] > 0:
|
|
504
|
+
self.stats.newline_fixes += 1
|
|
505
|
+
if changes['whitespace_fixes'] > 0:
|
|
506
|
+
self.stats.whitespace_fixes += 1
|
|
507
|
+
|
|
508
|
+
# Report
|
|
509
|
+
if self.in_place:
|
|
510
|
+
msg = f"✓ {path.name} (in-place)"
|
|
511
|
+
else:
|
|
512
|
+
msg = f"✓ {path.name} → {out_path.name}"
|
|
513
|
+
|
|
514
|
+
if enc != "utf-8":
|
|
515
|
+
msg += f" [{enc}→utf-8]"
|
|
516
|
+
|
|
517
|
+
print(msg)
|
|
518
|
+
|
|
519
|
+
if backup_created:
|
|
520
|
+
print(f" Backup: {backup_created.name}")
|
|
521
|
+
|
|
522
|
+
# Syntax check
|
|
523
|
+
if check_syntax:
|
|
524
|
+
ok, reason = self.syntax_check(out_path)
|
|
525
|
+
status = "✓ OK" if ok else f"✗ {reason}"
|
|
526
|
+
print(f" Syntax: {status}")
|
|
527
|
+
|
|
528
|
+
if ok:
|
|
529
|
+
self.stats.syntax_checks_passed += 1
|
|
530
|
+
else:
|
|
531
|
+
self.stats.syntax_checks_failed += 1
|
|
532
|
+
|
|
533
|
+
# Update cache
|
|
534
|
+
if self.use_cache and self.cache:
|
|
535
|
+
self.cache.update(path)
|
|
536
|
+
|
|
537
|
+
return True
|
|
538
|
+
|
|
539
|
+
except Exception as e:
|
|
540
|
+
self.stats.errors += 1
|
|
541
|
+
self.errors.append((path, str(e)))
|
|
542
|
+
print(f"✗ ERROR {path.name}: {e}")
|
|
543
|
+
return False
|
|
544
|
+
|
|
545
|
+
def walk_and_process(self, root: Path, exts: List[str],
|
|
546
|
+
check_syntax: bool = False) -> None:
|
|
547
|
+
"""Process all files in directory tree"""
|
|
548
|
+
# Collect files
|
|
549
|
+
files = []
|
|
550
|
+
for ext in exts:
|
|
551
|
+
files.extend(root.rglob(f"*{ext}"))
|
|
552
|
+
|
|
553
|
+
files = [f for f in files if f.is_file()]
|
|
554
|
+
|
|
555
|
+
if not files:
|
|
556
|
+
print(f"No files with extensions {exts} found in {root}")
|
|
557
|
+
return
|
|
558
|
+
|
|
559
|
+
files_to_process = files
|
|
560
|
+
if self.use_cache and self.cache:
|
|
561
|
+
uncached_files = []
|
|
562
|
+
cached_hits = 0
|
|
563
|
+
for file_path in files:
|
|
564
|
+
if self.cache.is_cached(file_path):
|
|
565
|
+
cached_hits += 1
|
|
566
|
+
self.stats.cached += 1
|
|
567
|
+
self.stats.skipped += 1
|
|
568
|
+
self.stats.total_files += 1
|
|
569
|
+
if self.verbose:
|
|
570
|
+
print(f"⊙ CACHED {file_path.name} - unchanged since last run")
|
|
571
|
+
else:
|
|
572
|
+
uncached_files.append(file_path)
|
|
573
|
+
files_to_process = uncached_files
|
|
574
|
+
|
|
575
|
+
if cached_hits and self.verbose:
|
|
576
|
+
print(f"⊙ Cache prefilter skipped {cached_hits} unchanged file(s)")
|
|
577
|
+
|
|
578
|
+
print(f"\n📁 Found {len(files)} file(s) to process")
|
|
579
|
+
print(f" Extensions: {', '.join(exts)}")
|
|
580
|
+
mode_desc = "DRY RUN" if self.dry_run else "IN-PLACE" if self.in_place else "CLEAN COPY"
|
|
581
|
+
if self.parallel:
|
|
582
|
+
mode_desc += f" (PARALLEL {self.max_workers} workers)"
|
|
583
|
+
if self.use_cache:
|
|
584
|
+
mode_desc += " (CACHED)"
|
|
585
|
+
if self.interactive:
|
|
586
|
+
mode_desc += " (INTERACTIVE)"
|
|
587
|
+
print(f" Mode: {mode_desc}")
|
|
588
|
+
|
|
589
|
+
if not files_to_process:
|
|
590
|
+
print("All discovered files were unchanged and skipped by cache.")
|
|
591
|
+
return
|
|
592
|
+
|
|
593
|
+
# Confirmation
|
|
594
|
+
if not self.dry_run and self.in_place and not self.interactive:
|
|
595
|
+
response = input(f"\n⚠️ In-place editing will modify {len(files_to_process)} files. Continue? (y/N): ")
|
|
596
|
+
if response.lower() != 'y':
|
|
597
|
+
print("Cancelled")
|
|
598
|
+
return
|
|
599
|
+
|
|
600
|
+
# Process files
|
|
601
|
+
|
|
602
|
+
if self.parallel and not self.interactive:
|
|
603
|
+
self._process_parallel(files_to_process, check_syntax)
|
|
604
|
+
else:
|
|
605
|
+
self._process_sequential(files_to_process, check_syntax)
|
|
606
|
+
|
|
607
|
+
# Save cache
|
|
608
|
+
if self.use_cache and self.cache and not self.dry_run:
|
|
609
|
+
self.cache.save()
|
|
610
|
+
|
|
611
|
+
def _process_sequential(self, files: List[Path], check_syntax: bool):
|
|
612
|
+
"""Process files sequentially"""
|
|
613
|
+
iterator = tqdm(files, desc="Processing") if HAS_TQDM and not self.interactive else files
|
|
614
|
+
|
|
615
|
+
for file_path in iterator:
|
|
616
|
+
self.process_file(file_path, check_syntax=check_syntax)
|
|
617
|
+
|
|
618
|
+
def _process_parallel(self, files: List[Path], check_syntax: bool):
|
|
619
|
+
"""Process files in parallel"""
|
|
620
|
+
print(f"\n🚀 Parallel processing with {self.max_workers} workers...\n")
|
|
621
|
+
|
|
622
|
+
with ProcessPoolExecutor(max_workers=self.max_workers) as executor:
|
|
623
|
+
# Submit all tasks
|
|
624
|
+
futures = {
|
|
625
|
+
executor.submit(
|
|
626
|
+
process_file_worker,
|
|
627
|
+
file_path,
|
|
628
|
+
self.dry_run,
|
|
629
|
+
self.in_place,
|
|
630
|
+
self.create_backup,
|
|
631
|
+
check_syntax
|
|
632
|
+
): file_path
|
|
633
|
+
for file_path in files
|
|
634
|
+
}
|
|
635
|
+
|
|
636
|
+
# Progress tracking
|
|
637
|
+
iterator = as_completed(futures)
|
|
638
|
+
if HAS_TQDM:
|
|
639
|
+
iterator = tqdm(iterator, total=len(files), desc="Processing")
|
|
640
|
+
|
|
641
|
+
# Collect results
|
|
642
|
+
for future in iterator:
|
|
643
|
+
file_path = futures[future]
|
|
644
|
+
try:
|
|
645
|
+
success, stats_update, error = future.result()
|
|
646
|
+
|
|
647
|
+
# Update stats
|
|
648
|
+
self.stats.total_files += 1
|
|
649
|
+
if success:
|
|
650
|
+
self.stats.processed += stats_update['processed']
|
|
651
|
+
self.stats.skipped += stats_update['skipped']
|
|
652
|
+
self.stats.encoding_changes += stats_update['encoding_changes']
|
|
653
|
+
self.stats.newline_fixes += stats_update['newline_fixes']
|
|
654
|
+
self.stats.whitespace_fixes += stats_update['whitespace_fixes']
|
|
655
|
+
self.stats.bytes_removed += stats_update['bytes_removed']
|
|
656
|
+
self.stats.syntax_checks_passed += stats_update['syntax_checks_passed']
|
|
657
|
+
self.stats.syntax_checks_failed += stats_update['syntax_checks_failed']
|
|
658
|
+
if self.use_cache and self.cache and not self.dry_run:
|
|
659
|
+
self.cache.update(file_path)
|
|
660
|
+
else:
|
|
661
|
+
self.stats.errors += 1
|
|
662
|
+
self.errors.append((file_path, error))
|
|
663
|
+
|
|
664
|
+
except Exception as e:
|
|
665
|
+
self.stats.errors += 1
|
|
666
|
+
self.errors.append((file_path, str(e)))
|
|
667
|
+
|
|
668
|
+
def print_summary(self):
|
|
669
|
+
"""Print processing summary"""
|
|
670
|
+
print("\n" + "="*70)
|
|
671
|
+
print("PROCESSING SUMMARY")
|
|
672
|
+
print("="*70)
|
|
673
|
+
print(f" Total files: {self.stats.total_files}")
|
|
674
|
+
print(f" ✓ Processed: {self.stats.processed}")
|
|
675
|
+
print(f" ⊗ Skipped: {self.stats.skipped}")
|
|
676
|
+
if self.use_cache:
|
|
677
|
+
print(f" ⊙ Cached hits: {self.stats.cached}")
|
|
678
|
+
print(f" ✗ Errors: {self.stats.errors}")
|
|
679
|
+
print()
|
|
680
|
+
print(f" Encoding changes: {self.stats.encoding_changes}")
|
|
681
|
+
print(f" Newline fixes: {self.stats.newline_fixes}")
|
|
682
|
+
print(f" Whitespace fixes: {self.stats.whitespace_fixes}")
|
|
683
|
+
print(f" Bytes removed: {self.stats.bytes_removed:,}")
|
|
684
|
+
|
|
685
|
+
if self.stats.syntax_checks_passed > 0 or self.stats.syntax_checks_failed > 0:
|
|
686
|
+
print()
|
|
687
|
+
print(f" Syntax checks passed: {self.stats.syntax_checks_passed}")
|
|
688
|
+
print(f" Syntax checks failed: {self.stats.syntax_checks_failed}")
|
|
689
|
+
|
|
690
|
+
if self.errors:
|
|
691
|
+
print("\n❌ ERRORS:")
|
|
692
|
+
for path, error in self.errors[:10]:
|
|
693
|
+
print(f" {path.name}: {error}")
|
|
694
|
+
if len(self.errors) > 10:
|
|
695
|
+
print(f" ... and {len(self.errors) - 10} more")
|
|
696
|
+
|
|
697
|
+
print("="*70)
|
|
698
|
+
|
|
699
|
+
|
|
700
|
+
def process_file_worker(file_path: Path, dry_run: bool, in_place: bool,
|
|
701
|
+
create_backup: bool, check_syntax: bool) -> Tuple[bool, dict, str]:
|
|
702
|
+
"""Worker function for parallel processing"""
|
|
703
|
+
normalizer = CodeNormalizer(
|
|
704
|
+
dry_run=dry_run,
|
|
705
|
+
in_place=in_place,
|
|
706
|
+
create_backup=create_backup,
|
|
707
|
+
use_cache=False, # Cache managed by main process
|
|
708
|
+
interactive=False,
|
|
709
|
+
parallel=False
|
|
710
|
+
)
|
|
711
|
+
|
|
712
|
+
success = normalizer.process_file(file_path, check_syntax=check_syntax)
|
|
713
|
+
|
|
714
|
+
stats_update = {
|
|
715
|
+
'processed': normalizer.stats.processed,
|
|
716
|
+
'skipped': normalizer.stats.skipped,
|
|
717
|
+
'encoding_changes': normalizer.stats.encoding_changes,
|
|
718
|
+
'newline_fixes': normalizer.stats.newline_fixes,
|
|
719
|
+
'whitespace_fixes': normalizer.stats.whitespace_fixes,
|
|
720
|
+
'bytes_removed': normalizer.stats.bytes_removed,
|
|
721
|
+
'syntax_checks_passed': normalizer.stats.syntax_checks_passed,
|
|
722
|
+
'syntax_checks_failed': normalizer.stats.syntax_checks_failed,
|
|
723
|
+
}
|
|
724
|
+
|
|
725
|
+
error = normalizer.errors[0][1] if normalizer.errors else ""
|
|
726
|
+
|
|
727
|
+
return success, stats_update, error
|
|
728
|
+
|
|
729
|
+
|
|
730
|
+
def install_git_hook(hook_type: str = "pre-commit") -> bool:
|
|
731
|
+
"""Install pre-commit hook for automatic normalization"""
|
|
732
|
+
git_dir = Path(".git")
|
|
733
|
+
|
|
734
|
+
if not git_dir.exists():
|
|
735
|
+
print("❌ Not a git repository")
|
|
736
|
+
return False
|
|
737
|
+
|
|
738
|
+
hooks_dir = git_dir / "hooks"
|
|
739
|
+
hooks_dir.mkdir(exist_ok=True)
|
|
740
|
+
|
|
741
|
+
hook_path = hooks_dir / hook_type
|
|
742
|
+
|
|
743
|
+
normalizer_script = Path(__file__).resolve()
|
|
744
|
+
|
|
745
|
+
# Create hook script
|
|
746
|
+
hook_script = f"""#!/usr/bin/env python3
|
|
747
|
+
# Auto-generated by code_normalize_pro.py
|
|
748
|
+
import subprocess
|
|
749
|
+
import sys
|
|
750
|
+
from pathlib import Path
|
|
751
|
+
|
|
752
|
+
def main():
|
|
753
|
+
# Get staged Python files
|
|
754
|
+
result = subprocess.run(
|
|
755
|
+
["git", "diff", "--cached", "--name-only", "--diff-filter=ACM"],
|
|
756
|
+
capture_output=True,
|
|
757
|
+
text=True
|
|
758
|
+
)
|
|
759
|
+
|
|
760
|
+
files = [
|
|
761
|
+
f for f in result.stdout.strip().split('\\n')
|
|
762
|
+
if f.endswith('.py') and Path(f).exists()
|
|
763
|
+
]
|
|
764
|
+
|
|
765
|
+
if not files:
|
|
766
|
+
sys.exit(0)
|
|
767
|
+
|
|
768
|
+
print(f"🔍 Checking {{len(files)}} Python file(s)...")
|
|
769
|
+
|
|
770
|
+
# Run normalizer in check mode, one file at a time. The CLI accepts a
|
|
771
|
+
# single positional path, so passing all files at once breaks argparse.
|
|
772
|
+
needs_normalization = []
|
|
773
|
+
for file_path in files:
|
|
774
|
+
result = subprocess.run(
|
|
775
|
+
[sys.executable, r"{normalizer_script}", file_path, "--dry-run"],
|
|
776
|
+
capture_output=True,
|
|
777
|
+
text=True
|
|
778
|
+
)
|
|
779
|
+
|
|
780
|
+
if result.returncode != 0:
|
|
781
|
+
print("\\n❌ Normalizer execution failed")
|
|
782
|
+
if result.stderr.strip():
|
|
783
|
+
print(result.stderr.strip())
|
|
784
|
+
sys.exit(result.returncode)
|
|
785
|
+
|
|
786
|
+
if "Would normalize:" in result.stdout:
|
|
787
|
+
needs_normalization.append(file_path)
|
|
788
|
+
|
|
789
|
+
if needs_normalization:
|
|
790
|
+
print("\\n⚠️ Some files need normalization:")
|
|
791
|
+
for file_path in needs_normalization:
|
|
792
|
+
print(f" - {{file_path}}")
|
|
793
|
+
print("\\nRun: python src/code_normalize_pro.py <file> --in-place")
|
|
794
|
+
print("Or add --no-verify to skip this check")
|
|
795
|
+
sys.exit(1)
|
|
796
|
+
|
|
797
|
+
print("✅ All files are normalized")
|
|
798
|
+
sys.exit(0)
|
|
799
|
+
|
|
800
|
+
if __name__ == "__main__":
|
|
801
|
+
main()
|
|
802
|
+
"""
|
|
803
|
+
|
|
804
|
+
# Write hook
|
|
805
|
+
hook_path.write_text(hook_script, encoding="utf-8", newline="\n")
|
|
806
|
+
hook_path.chmod(0o755)
|
|
807
|
+
|
|
808
|
+
print(f"✅ Installed {hook_type} hook at {hook_path}")
|
|
809
|
+
print(f" Hook will check Python files before commit")
|
|
810
|
+
print(f" Use 'git commit --no-verify' to skip check")
|
|
811
|
+
|
|
812
|
+
return True
|
|
813
|
+
|
|
814
|
+
|
|
815
|
+
def main():
|
|
816
|
+
ap = argparse.ArgumentParser(
|
|
817
|
+
description="Code Normalizer Pro - Production-grade normalization tool",
|
|
818
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
819
|
+
epilog="""
|
|
820
|
+
Examples:
|
|
821
|
+
# Dry run with parallel processing
|
|
822
|
+
python code_normalize_pro.py /path/to/dir --dry-run --parallel
|
|
823
|
+
|
|
824
|
+
# Interactive mode (file-by-file approval)
|
|
825
|
+
python code_normalize_pro.py /path/to/dir --interactive
|
|
826
|
+
|
|
827
|
+
# In-place with incremental processing (uses cache)
|
|
828
|
+
python code_normalize_pro.py /path/to/dir -e .py --in-place --cache
|
|
829
|
+
|
|
830
|
+
# Multi-language syntax checking
|
|
831
|
+
python code_normalize_pro.py /path/to/dir -e .py -e .js -e .go --check
|
|
832
|
+
|
|
833
|
+
# Install git pre-commit hook
|
|
834
|
+
python code_normalize_pro.py --install-hook
|
|
835
|
+
|
|
836
|
+
# Parallel processing (all cores)
|
|
837
|
+
python code_normalize_pro.py /path/to/dir --parallel --in-place
|
|
838
|
+
"""
|
|
839
|
+
)
|
|
840
|
+
|
|
841
|
+
ap.add_argument(
|
|
842
|
+
"path",
|
|
843
|
+
type=Path,
|
|
844
|
+
nargs="?",
|
|
845
|
+
help="File or directory to process"
|
|
846
|
+
)
|
|
847
|
+
ap.add_argument(
|
|
848
|
+
"-e", "--ext",
|
|
849
|
+
action="append",
|
|
850
|
+
help="File extensions (e.g. -e .py -e .js)"
|
|
851
|
+
)
|
|
852
|
+
ap.add_argument(
|
|
853
|
+
"-o", "--output",
|
|
854
|
+
type=Path,
|
|
855
|
+
help="Output file (single file mode only)"
|
|
856
|
+
)
|
|
857
|
+
ap.add_argument(
|
|
858
|
+
"--check",
|
|
859
|
+
action="store_true",
|
|
860
|
+
help="Run syntax check after normalization"
|
|
861
|
+
)
|
|
862
|
+
ap.add_argument(
|
|
863
|
+
"--dry-run",
|
|
864
|
+
action="store_true",
|
|
865
|
+
help="Preview changes without modifying files"
|
|
866
|
+
)
|
|
867
|
+
ap.add_argument(
|
|
868
|
+
"--in-place",
|
|
869
|
+
action="store_true",
|
|
870
|
+
help="Edit files in-place"
|
|
871
|
+
)
|
|
872
|
+
ap.add_argument(
|
|
873
|
+
"--no-backup",
|
|
874
|
+
action="store_true",
|
|
875
|
+
help="Don't create backups (dangerous!)"
|
|
876
|
+
)
|
|
877
|
+
ap.add_argument(
|
|
878
|
+
"--cache",
|
|
879
|
+
action="store_true",
|
|
880
|
+
help="Use incremental processing (skip unchanged files)"
|
|
881
|
+
)
|
|
882
|
+
ap.add_argument(
|
|
883
|
+
"--no-cache",
|
|
884
|
+
action="store_true",
|
|
885
|
+
help="Disable cache (process all files)"
|
|
886
|
+
)
|
|
887
|
+
ap.add_argument(
|
|
888
|
+
"--interactive",
|
|
889
|
+
action="store_true",
|
|
890
|
+
help="Interactive mode (approve each file)"
|
|
891
|
+
)
|
|
892
|
+
ap.add_argument(
|
|
893
|
+
"--parallel",
|
|
894
|
+
action="store_true",
|
|
895
|
+
help="Parallel processing (multi-core)"
|
|
896
|
+
)
|
|
897
|
+
ap.add_argument(
|
|
898
|
+
"--workers",
|
|
899
|
+
type=int,
|
|
900
|
+
help=f"Number of parallel workers (default: {max(1, cpu_count() - 1)})"
|
|
901
|
+
)
|
|
902
|
+
ap.add_argument(
|
|
903
|
+
"--install-hook",
|
|
904
|
+
action="store_true",
|
|
905
|
+
help="Install git pre-commit hook"
|
|
906
|
+
)
|
|
907
|
+
ap.add_argument(
|
|
908
|
+
"-v", "--verbose",
|
|
909
|
+
action="store_true",
|
|
910
|
+
help="Verbose output"
|
|
911
|
+
)
|
|
912
|
+
|
|
913
|
+
args = ap.parse_args()
|
|
914
|
+
|
|
915
|
+
# Install hook mode
|
|
916
|
+
if args.install_hook:
|
|
917
|
+
success = install_git_hook()
|
|
918
|
+
sys.exit(0 if success else 1)
|
|
919
|
+
|
|
920
|
+
# Validate
|
|
921
|
+
if not args.path:
|
|
922
|
+
ap.print_help()
|
|
923
|
+
sys.exit(1)
|
|
924
|
+
|
|
925
|
+
if args.output and args.path.is_dir():
|
|
926
|
+
print("Error: --output only works with single file")
|
|
927
|
+
sys.exit(1)
|
|
928
|
+
|
|
929
|
+
if args.no_backup and not args.in_place:
|
|
930
|
+
print("Warning: --no-backup has no effect without --in-place")
|
|
931
|
+
|
|
932
|
+
if args.interactive and args.parallel:
|
|
933
|
+
print("Warning: --interactive disables --parallel")
|
|
934
|
+
args.parallel = False
|
|
935
|
+
|
|
936
|
+
# Determine cache setting
|
|
937
|
+
use_cache = True
|
|
938
|
+
if args.no_cache:
|
|
939
|
+
use_cache = False
|
|
940
|
+
elif args.cache:
|
|
941
|
+
use_cache = True
|
|
942
|
+
|
|
943
|
+
# Create normalizer
|
|
944
|
+
normalizer = CodeNormalizer(
|
|
945
|
+
dry_run=args.dry_run,
|
|
946
|
+
verbose=args.verbose,
|
|
947
|
+
in_place=args.in_place,
|
|
948
|
+
create_backup=not args.no_backup,
|
|
949
|
+
use_cache=use_cache,
|
|
950
|
+
interactive=args.interactive,
|
|
951
|
+
parallel=args.parallel,
|
|
952
|
+
max_workers=args.workers
|
|
953
|
+
)
|
|
954
|
+
|
|
955
|
+
print("="*70)
|
|
956
|
+
print("CODE NORMALIZER PRO v3.0")
|
|
957
|
+
print("="*70)
|
|
958
|
+
|
|
959
|
+
# Process
|
|
960
|
+
try:
|
|
961
|
+
if args.path.is_dir():
|
|
962
|
+
exts = args.ext or [".py"]
|
|
963
|
+
normalizer.walk_and_process(args.path, exts, check_syntax=args.check)
|
|
964
|
+
else:
|
|
965
|
+
if not args.path.exists():
|
|
966
|
+
print(f"Error: File not found: {args.path}")
|
|
967
|
+
sys.exit(1)
|
|
968
|
+
|
|
969
|
+
normalizer.process_file(args.path, args.output, check_syntax=args.check)
|
|
970
|
+
|
|
971
|
+
# Summary
|
|
972
|
+
normalizer.print_summary()
|
|
973
|
+
|
|
974
|
+
# Exit code
|
|
975
|
+
sys.exit(0 if normalizer.stats.errors == 0 else 1)
|
|
976
|
+
|
|
977
|
+
except KeyboardInterrupt:
|
|
978
|
+
print("\n\n⚠️ Interrupted by user")
|
|
979
|
+
normalizer.print_summary()
|
|
980
|
+
sys.exit(130)
|
|
981
|
+
except Exception as e:
|
|
982
|
+
print(f"\n❌ Fatal error: {e}")
|
|
983
|
+
import traceback
|
|
984
|
+
traceback.print_exc()
|
|
985
|
+
sys.exit(1)
|
|
986
|
+
|
|
987
|
+
|
|
988
|
+
if __name__ == "__main__":
|
|
989
|
+
main()
|
|
@@ -0,0 +1,304 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: code-normalizer-pro
|
|
3
|
+
Version: 3.0.1
|
|
4
|
+
Summary: Production-grade code normalization tool for encoding, newlines, and whitespace hygiene.
|
|
5
|
+
Author: MR
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/MRJR0101/code-normalizer-pro
|
|
8
|
+
Project-URL: Documentation, https://github.com/MRJR0101/code-normalizer-pro/blob/main/README.md
|
|
9
|
+
Project-URL: Issues, https://github.com/MRJR0101/code-normalizer-pro/issues
|
|
10
|
+
Keywords: code-quality,formatter,normalization,cli,python
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Environment :: Console
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Topic :: Software Development :: Quality Assurance
|
|
19
|
+
Classifier: Topic :: Software Development :: Build Tools
|
|
20
|
+
Requires-Python: >=3.10
|
|
21
|
+
Description-Content-Type: text/markdown
|
|
22
|
+
License-File: LICENSE
|
|
23
|
+
Requires-Dist: tqdm
|
|
24
|
+
Provides-Extra: dev
|
|
25
|
+
Requires-Dist: pytest>=7; extra == "dev"
|
|
26
|
+
Dynamic: license-file
|
|
27
|
+
|
|
28
|
+
# CODE - Code Normalization Tool
|
|
29
|
+
|
|
30
|
+
**Python CLI that cleans up source code encoding, line endings, and whitespace
|
|
31
|
+
across entire codebases -- with parallel processing, SHA256 caching, and pre-commit hook support.**
|
|
32
|
+
|
|
33
|
+
- Location: `C:\Dev\PROJECTS\CODE`
|
|
34
|
+
- Status: v3.0 code complete. Package stub ready. No pyproject.toml = blocked from PyPI.
|
|
35
|
+
- Updated: 2026-03-10
|
|
36
|
+
|
|
37
|
+
---
|
|
38
|
+
|
|
39
|
+
## What It Does
|
|
40
|
+
|
|
41
|
+
Run it against any directory and it will:
|
|
42
|
+
|
|
43
|
+
1. Detect and convert file encoding to UTF-8
|
|
44
|
+
(handles utf-8, utf-8-sig, utf-16, utf-16-le, utf-16-be, windows-1252, latin-1, iso-8859-1)
|
|
45
|
+
2. Fix line endings -- CRLF to LF
|
|
46
|
+
3. Strip trailing whitespace from every line
|
|
47
|
+
4. Ensure a single newline at end of file
|
|
48
|
+
5. Optionally validate syntax for Python, JS, TS, Go, Rust, C, C++, Java
|
|
49
|
+
|
|
50
|
+
Files already clean are skipped. SHA256 caching means repeat runs on unchanged files
|
|
51
|
+
are near-instant. Multi-core parallel mode handles large codebases at 80-200 files/sec.
|
|
52
|
+
|
|
53
|
+
---
|
|
54
|
+
|
|
55
|
+
## Quick Start
|
|
56
|
+
|
|
57
|
+
```powershell
|
|
58
|
+
Set-Location C:\Dev\PROJECTS\CODE
|
|
59
|
+
|
|
60
|
+
# See what would change without touching anything
|
|
61
|
+
python main.py C:\path\to\project --dry-run
|
|
62
|
+
|
|
63
|
+
# Normalize everything in-place using all CPU cores
|
|
64
|
+
python main.py C:\path\to\project --parallel --in-place
|
|
65
|
+
|
|
66
|
+
# Normalize only Python and JavaScript files
|
|
67
|
+
python main.py C:\path\to\project -e .py -e .js --in-place
|
|
68
|
+
|
|
69
|
+
# Review and approve each file before it's written
|
|
70
|
+
python main.py C:\path\to\project --interactive
|
|
71
|
+
|
|
72
|
+
# Run syntax validation after normalizing
|
|
73
|
+
python main.py C:\path\to\project --in-place --check
|
|
74
|
+
|
|
75
|
+
# Install a pre-commit hook into a git repo
|
|
76
|
+
cd C:\your-repo
|
|
77
|
+
python C:\Dev\PROJECTS\CODE\main.py --install-hook
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
`main.py` at root is a thin wrapper that delegates to `src/code_normalize_pro.py`.
|
|
81
|
+
Call either one -- same result.
|
|
82
|
+
|
|
83
|
+
---
|
|
84
|
+
|
|
85
|
+
## Pre-Commit Hook
|
|
86
|
+
|
|
87
|
+
Checks only staged files before each commit. Blocks commit if any need normalization
|
|
88
|
+
and prints the fix command.
|
|
89
|
+
|
|
90
|
+
```powershell
|
|
91
|
+
# One-time install per repo
|
|
92
|
+
cd C:\your-repo
|
|
93
|
+
python C:\Dev\PROJECTS\CODE\main.py --install-hook
|
|
94
|
+
|
|
95
|
+
# Commit as normal -- hook fires automatically
|
|
96
|
+
git commit -m "your message"
|
|
97
|
+
|
|
98
|
+
# Skip hook for one commit
|
|
99
|
+
git commit --no-verify -m "your message"
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
---
|
|
103
|
+
|
|
104
|
+
## Performance
|
|
105
|
+
|
|
106
|
+
| Files | Sequential | Parallel 4-core | Speedup |
|
|
107
|
+
|-------|------------|-----------------|---------|
|
|
108
|
+
| 100 | 3.2s | 1.1s | 2.9x |
|
|
109
|
+
| 500 | 16.8s | 4.3s | 3.9x |
|
|
110
|
+
| 1000 | 33.5s | 7.1s | 4.7x |
|
|
111
|
+
|
|
112
|
+
8 cores: 150-200 files/sec. SHA256 cache on unchanged files: 500-1000 files/sec.
|
|
113
|
+
Workers default to CPU count. Override with `--workers N`.
|
|
114
|
+
|
|
115
|
+
---
|
|
116
|
+
|
|
117
|
+
## Testing
|
|
118
|
+
|
|
119
|
+
```powershell
|
|
120
|
+
Set-Location C:\Dev\PROJECTS\CODE
|
|
121
|
+
.\.venv\Scripts\Activate.ps1
|
|
122
|
+
|
|
123
|
+
python -m pytest -q
|
|
124
|
+
python main.py --help
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
Test files in `tests/` cover the main tool plus all four launch/sales scripts.
|
|
128
|
+
All 5 features tested on 2026-02-09 (see docs/TEST_REPORT.md). Manual confirmation
|
|
129
|
+
of interactive mode still pending.
|
|
130
|
+
|
|
131
|
+
---
|
|
132
|
+
|
|
133
|
+
## Project Layout
|
|
134
|
+
|
|
135
|
+
```
|
|
136
|
+
CODE/
|
|
137
|
+
main.py -- Root entrypoint. Delegates to src/code_normalize_pro.py
|
|
138
|
+
src/
|
|
139
|
+
code_normalize_pro.py -- v3.0 Pro. 917 lines. The active tool.
|
|
140
|
+
code_normalize_v2.py -- v2.0. Kept for reference.
|
|
141
|
+
code_normalizer_pro/ -- PyPI package stub
|
|
142
|
+
__init__.py -- Exposes __version__ = "3.0.1"
|
|
143
|
+
cli.py -- Console entry point (calls src/code_normalize_pro.py)
|
|
144
|
+
README.md
|
|
145
|
+
config/
|
|
146
|
+
settings.py -- Env-var settings loader (not wired up yet)
|
|
147
|
+
docs/
|
|
148
|
+
README.md -- Full feature reference docs
|
|
149
|
+
TEST_REPORT.md -- Test results from 2026-02-09
|
|
150
|
+
ARCHITECTURE.md -- Stub
|
|
151
|
+
launch/ -- Outreach templates, user tracking CSV, metrics JSON
|
|
152
|
+
sales/ -- Pricing, pipeline CSV, customer offer template
|
|
153
|
+
release/
|
|
154
|
+
alpha_release_checklist.md -- Step-by-step PyPI publish checklist
|
|
155
|
+
release_readiness.json -- Says ready=true, wheel+sdist listed
|
|
156
|
+
roadmaps/
|
|
157
|
+
README.md -- Overview of all 6 paths
|
|
158
|
+
01_solo_dev_tool.md -- CHOSEN: bootstrap to PyPI
|
|
159
|
+
02_dev_tool_saas.md
|
|
160
|
+
03_enterprise_platform.md
|
|
161
|
+
04_open_source_support.md
|
|
162
|
+
05_grammarly_for_code.md
|
|
163
|
+
06_ai_transformation_engine.md
|
|
164
|
+
scripts/
|
|
165
|
+
launch_metrics.py
|
|
166
|
+
feedback_prioritizer.py
|
|
167
|
+
sales_pipeline_metrics.py
|
|
168
|
+
release_prep.py
|
|
169
|
+
tests/
|
|
170
|
+
test_code_normalize_pro.py
|
|
171
|
+
test_feedback_prioritizer.py
|
|
172
|
+
test_launch_metrics.py
|
|
173
|
+
test_release_prep.py
|
|
174
|
+
test_sales_pipeline_metrics.py
|
|
175
|
+
site/
|
|
176
|
+
index.html -- Static landing page
|
|
177
|
+
styles.css
|
|
178
|
+
.github/
|
|
179
|
+
workflows/ci.yml -- CI: install, smoke check, pytest, build
|
|
180
|
+
ISSUE_TEMPLATE/
|
|
181
|
+
pull_request_template.md
|
|
182
|
+
files/
|
|
183
|
+
cache_sandbox/ -- Test fixtures (a.py, b.py)
|
|
184
|
+
smoke_case.py
|
|
185
|
+
EXECUTION_PLAN.md -- 7-day launch plan (all tasks pending)
|
|
186
|
+
VERIFY.md -- Verification runbook
|
|
187
|
+
MISSINGMORE.txt -- Gap tracking
|
|
188
|
+
QUICK_REFERENCE.md -- Command cheat sheet
|
|
189
|
+
CHANGELOG.md -- Stub (unreleased only)
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
---
|
|
193
|
+
|
|
194
|
+
## Dependencies
|
|
195
|
+
|
|
196
|
+
Core: zero. Python 3.10+ only.
|
|
197
|
+
|
|
198
|
+
Optional:
|
|
199
|
+
- `tqdm` -- progress bars
|
|
200
|
+
- Syntax checkers (only needed with `--check`):
|
|
201
|
+
Python: built-in (py_compile) | JS: node | TS: tsc | Go: gofmt
|
|
202
|
+
Rust: rustc | C: gcc | C++: g++ | Java: javac
|
|
203
|
+
|
|
204
|
+
Dev/test: pytest (see requirements.txt)
|
|
205
|
+
|
|
206
|
+
---
|
|
207
|
+
|
|
208
|
+
## Known Issues (fix before PyPI launch)
|
|
209
|
+
|
|
210
|
+
**Critical -- blocks shipping:**
|
|
211
|
+
|
|
212
|
+
1. No `pyproject.toml` -- CI runs `python -m build` which will fail without it.
|
|
213
|
+
The `code_normalizer_pro.egg-info/` dir shows packaging was attempted but no
|
|
214
|
+
config file exists in the tree. Create `pyproject.toml` with src layout and
|
|
215
|
+
console_scripts entry point before running Day 1 tasks.
|
|
216
|
+
|
|
217
|
+
2. `code_normalizer_pro/cli.py` has a broken import:
|
|
218
|
+
`from code_normalize_pro import main`
|
|
219
|
+
After `pip install`, Python looks for a module named `code_normalize_pro` in
|
|
220
|
+
site-packages, not in `src/`. Without a proper src layout in pyproject.toml,
|
|
221
|
+
the installed CLI command will fail on launch.
|
|
222
|
+
|
|
223
|
+
**Code bugs worth fixing:**
|
|
224
|
+
|
|
225
|
+
3. Cache default is on in `__init__` but `--cache` flag implies opt-in and
|
|
226
|
+
`--no-cache` implies opt-out. The flags and the default contradict each other.
|
|
227
|
+
Pick one direction and make the help text match.
|
|
228
|
+
|
|
229
|
+
4. `--parallel --in-place` silently disables backups. `process_file_worker`
|
|
230
|
+
passes `create_backup=False` but backup logic only lives inside `process_file`.
|
|
231
|
+
Users running parallel mode have no backups. Either warn loudly or fix it.
|
|
232
|
+
|
|
233
|
+
5. `walk_and_process` and `process_file` both increment `total_files` for the
|
|
234
|
+
same files. Summary stats will show inflated counts.
|
|
235
|
+
|
|
236
|
+
6. `.normalize-cache.json` lands in CWD, not the target directory. Running the
|
|
237
|
+
tool against three different projects from the same shell session corrupts the
|
|
238
|
+
cache. Pass `root / CACHE_FILE` to CacheManager in `walk_and_process`.
|
|
239
|
+
|
|
240
|
+
7. `--dry-run` always exits 0 even when it finds files needing normalization.
|
|
241
|
+
CI pipelines need a non-zero exit to catch violations. Add `--fail-on-changes`
|
|
242
|
+
or make dry-run exit 1 when changes are detected.
|
|
243
|
+
|
|
244
|
+
**Cleanup:**
|
|
245
|
+
|
|
246
|
+
8. `code_normalize_pro.py` at root -- stale copy. Real file is `src/`. Delete it.
|
|
247
|
+
9. `roadmaps/New Text Document.txt` -- empty temp file. Delete it.
|
|
248
|
+
10. `roadmaps/talking about code.txt` -- saved AI chat session. Delete or move to docs/.
|
|
249
|
+
11. All `README_20260220_*.md.bak` files throughout the tree -- ReadmeForge backups.
|
|
250
|
+
12. `config/settings.py` is a clean env-var loader but nothing imports it.
|
|
251
|
+
Either wire it into `code_normalize_pro.py` or remove it.
|
|
252
|
+
13. `README_PRO.md` at root duplicates `docs/README.md`. Consolidate.
|
|
253
|
+
14. `restore_report.json` and `smoke_report.json` at root -- generated artifacts,
|
|
254
|
+
add to `.gitignore`.
|
|
255
|
+
15. `PROJECT_STATUS.md` says roadmap docs are "coming soon" -- all 6 exist. Stale.
|
|
256
|
+
|
|
257
|
+
---
|
|
258
|
+
|
|
259
|
+
## Launch Status (Path 1 - Solo Dev Tool)
|
|
260
|
+
|
|
261
|
+
EXECUTION_PLAN.md has a 7-day checklist. As of 2026-03-10, nothing started.
|
|
262
|
+
|
|
263
|
+
Before Day 1 tasks will work, pyproject.toml needs to exist (see issue #1 above).
|
|
264
|
+
|
|
265
|
+
Day 1 after pyproject.toml is in place:
|
|
266
|
+
|
|
267
|
+
```powershell
|
|
268
|
+
Set-Location C:\Dev\PROJECTS\CODE
|
|
269
|
+
.\.venv\Scripts\Activate.ps1
|
|
270
|
+
python -m pytest -q
|
|
271
|
+
pip install -e .
|
|
272
|
+
code-normalizer-pro --help
|
|
273
|
+
```
|
|
274
|
+
|
|
275
|
+
Full release steps: see `docs/release/alpha_release_checklist.md`
|
|
276
|
+
|
|
277
|
+
---
|
|
278
|
+
|
|
279
|
+
## CI
|
|
280
|
+
|
|
281
|
+
`.github/workflows/ci.yml` runs on push to main/master and on PRs:
|
|
282
|
+
- Python 3.11
|
|
283
|
+
- pip install from requirements.txt
|
|
284
|
+
- CLI smoke check (main.py and src/code_normalize_pro.py --help)
|
|
285
|
+
- pytest -q
|
|
286
|
+
- python -m build (sdist + wheel)
|
|
287
|
+
|
|
288
|
+
Note: `python -m build` requires `pyproject.toml`. CI will fail until that exists.
|
|
289
|
+
|
|
290
|
+
---
|
|
291
|
+
|
|
292
|
+
## Version History
|
|
293
|
+
|
|
294
|
+
| Version | Date | Changes |
|
|
295
|
+
|---------|------------|---------|
|
|
296
|
+
| v3.0 | 2026-02-09 | Parallel processing, SHA256 caching, pre-commit hooks, multi-language syntax, interactive mode |
|
|
297
|
+
| v2.0 | 2026-02-09 | Dry-run, in-place editing, backups, tqdm, detailed stats |
|
|
298
|
+
| v1.0 | -- | Basic encoding fix, CRLF, whitespace |
|
|
299
|
+
|
|
300
|
+
Package version: 3.0.1 (set in `code_normalizer_pro/__init__.py`)
|
|
301
|
+
|
|
302
|
+
---
|
|
303
|
+
|
|
304
|
+
Developer: MR (Michael Rawls Jr.) -- Houston, TX -- GitHub: MRJR0101
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
code_normalizer_pro/__init__.py,sha256=d0Ji7B4QXO6YY6oMre61-nwRoX6gXK22drzGjek0NqE,98
|
|
2
|
+
code_normalizer_pro/cli.py,sha256=dtpngEplOPZTuXHnPZhQr3H3U1SoEku2xNIF2lp9t2A,1322
|
|
3
|
+
code_normalizer_pro/code_normalize_pro.py,sha256=VlKw_Om_HvJJlphpgZnYsyPAK3hPNnVKhUQFwz6IU3U,31749
|
|
4
|
+
code_normalizer_pro-3.0.1.dist-info/licenses/LICENSE,sha256=dQVLagjhAIyGtgRauZrdLI-lcZh4gJUamSnxHuVqfWQ,1057
|
|
5
|
+
code_normalizer_pro-3.0.1.dist-info/METADATA,sha256=wnk0GLXCmfNjO50bjOoj4KohIXxwFT66LgMmG4KzgHg,10701
|
|
6
|
+
code_normalizer_pro-3.0.1.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
7
|
+
code_normalizer_pro-3.0.1.dist-info/entry_points.txt,sha256=gFQf6V2-Rbe-1ZpOcR2O1TCCxxwcYN6zblHuQ7W7JIE,69
|
|
8
|
+
code_normalizer_pro-3.0.1.dist-info/top_level.txt,sha256=UYaCi8KON0Ez5W78NKGNbnyHHpAs_OH4t2MR5HBlOzE,20
|
|
9
|
+
code_normalizer_pro-3.0.1.dist-info/RECORD,,
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
22
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
code_normalizer_pro
|