encoding-doctor 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- encoding_doctor-0.2.0/PKG-INFO +108 -0
- encoding_doctor-0.2.0/README.md +84 -0
- encoding_doctor-0.2.0/encoding_doctor/__init__.py +19 -0
- encoding_doctor-0.2.0/encoding_doctor/cli.py +244 -0
- encoding_doctor-0.2.0/encoding_doctor/fixer.py +85 -0
- encoding_doctor-0.2.0/encoding_doctor/license.py +193 -0
- encoding_doctor-0.2.0/encoding_doctor/reporter.py +107 -0
- encoding_doctor-0.2.0/encoding_doctor/scanner.py +138 -0
- encoding_doctor-0.2.0/encoding_doctor/verifier.py +67 -0
- encoding_doctor-0.2.0/encoding_doctor.egg-info/PKG-INFO +108 -0
- encoding_doctor-0.2.0/encoding_doctor.egg-info/SOURCES.txt +17 -0
- encoding_doctor-0.2.0/encoding_doctor.egg-info/dependency_links.txt +1 -0
- encoding_doctor-0.2.0/encoding_doctor.egg-info/entry_points.txt +2 -0
- encoding_doctor-0.2.0/encoding_doctor.egg-info/top_level.txt +1 -0
- encoding_doctor-0.2.0/pyproject.toml +46 -0
- encoding_doctor-0.2.0/setup.cfg +4 -0
- encoding_doctor-0.2.0/tests/test_fixer.py +90 -0
- encoding_doctor-0.2.0/tests/test_scanner.py +107 -0
- encoding_doctor-0.2.0/tests/test_verifier.py +61 -0
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: encoding-doctor
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: Scan, fix, and verify file encoding issues. Mojibake, BOM, CRLF, null bytes — fixed in one command.
|
|
5
|
+
Author-email: Stateflow Labs <stateflow.labs@gmail.com>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://stateflow-dev.github.io/stateflowlabs/
|
|
8
|
+
Project-URL: Repository, https://github.com/stateflow-dev/encoding-doctor
|
|
9
|
+
Keywords: encoding,utf-8,mojibake,bom,crlf,file encoding,encoding fix,encoding repair,python encoding,encoding tool,developer tools
|
|
10
|
+
Classifier: Development Status :: 3 - Alpha
|
|
11
|
+
Classifier: Environment :: Console
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Operating System :: OS Independent
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
20
|
+
Classifier: Topic :: Text Processing
|
|
21
|
+
Classifier: Topic :: Utilities
|
|
22
|
+
Requires-Python: >=3.9
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
|
|
25
|
+
# encoding-doctor
|
|
26
|
+
|
|
27
|
+
**Scan, fix, and verify file encoding issues across your project in one command.**
|
|
28
|
+
|
|
29
|
+
Fixes mojibake, BOM, CRLF line endings, null bytes, and non-UTF-8 encoding —
|
|
30
|
+
automatically detected and repaired, with backups created before every change.
|
|
31
|
+
|
|
32
|
+
Built from real encoding bugs found in production Python projects on Windows.
|
|
33
|
+
|
|
34
|
+
---
|
|
35
|
+
|
|
36
|
+
## Install
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
pip install encoding-doctor
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
---
|
|
43
|
+
|
|
44
|
+
## Usage
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
# Step 1 — scan first, always
|
|
48
|
+
enc-doctor scan ./my_project
|
|
49
|
+
|
|
50
|
+
# Step 2 — preview changes without writing
|
|
51
|
+
enc-doctor fix ./my_project --dry-run
|
|
52
|
+
|
|
53
|
+
# Step 3 — fix (backups created automatically as .bak)
|
|
54
|
+
enc-doctor fix ./my_project
|
|
55
|
+
|
|
56
|
+
# Step 4 — verify everything is clean
|
|
57
|
+
enc-doctor verify ./my_project
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
---
|
|
61
|
+
|
|
62
|
+
## What it fixes
|
|
63
|
+
|
|
64
|
+
| Problem | Description |
|
|
65
|
+
|---|---|
|
|
66
|
+
| **Mojibake** | UTF-8 bytes mis-read as cp1252 and saved as garbage |
|
|
67
|
+
| **BOM** | `\xef\xbb\xbf` prefix added by Notepad/Excel that breaks parsers |
|
|
68
|
+
| **CRLF** | Windows `\r\n` mixed with Unix `\n` — causes Git diff noise |
|
|
69
|
+
| **Null bytes** | Binary corruption from FTP or terminal copy-paste |
|
|
70
|
+
| **Non-UTF-8** | Detected and flagged for manual conversion |
|
|
71
|
+
|
|
72
|
+
---
|
|
73
|
+
|
|
74
|
+
## Warning
|
|
75
|
+
|
|
76
|
+
> **encoding-doctor modifies files in-place.**
|
|
77
|
+
>
|
|
78
|
+
> - Always run `scan` first and review the report before running `fix`.
|
|
79
|
+
> - Backups are created automatically as `.bak` files.
|
|
80
|
+
> - Run on a Git-tracked project so you can always revert with `git checkout .`
|
|
81
|
+
> - Do not run `fix` on production files without testing first.
|
|
82
|
+
> - `verify` after every fix before committing.
|
|
83
|
+
|
|
84
|
+
---
|
|
85
|
+
|
|
86
|
+
## Options
|
|
87
|
+
|
|
88
|
+
```bash
|
|
89
|
+
enc-doctor scan <path> [--all] # --all shows clean files too
|
|
90
|
+
enc-doctor fix <path> [--dry-run] # --dry-run previews without writing
|
|
91
|
+
enc-doctor verify <path>
|
|
92
|
+
enc-doctor restore <file> # restore single file from .bak
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
---
|
|
96
|
+
|
|
97
|
+
## Run tests
|
|
98
|
+
|
|
99
|
+
```bash
|
|
100
|
+
pip install pytest
|
|
101
|
+
pytest tests/ -v
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
---
|
|
105
|
+
|
|
106
|
+
## License
|
|
107
|
+
|
|
108
|
+
MIT © [Stateflow Labs](https://github.com/stateflow-dev)
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
# encoding-doctor
|
|
2
|
+
|
|
3
|
+
**Scan, fix, and verify file encoding issues across your project in one command.**
|
|
4
|
+
|
|
5
|
+
Fixes mojibake, BOM, CRLF line endings, null bytes, and non-UTF-8 encoding —
|
|
6
|
+
automatically detected and repaired, with backups created before every change.
|
|
7
|
+
|
|
8
|
+
Built from real encoding bugs found in production Python projects on Windows.
|
|
9
|
+
|
|
10
|
+
---
|
|
11
|
+
|
|
12
|
+
## Install
|
|
13
|
+
|
|
14
|
+
```bash
|
|
15
|
+
pip install encoding-doctor
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
---
|
|
19
|
+
|
|
20
|
+
## Usage
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
# Step 1 — scan first, always
|
|
24
|
+
enc-doctor scan ./my_project
|
|
25
|
+
|
|
26
|
+
# Step 2 — preview changes without writing
|
|
27
|
+
enc-doctor fix ./my_project --dry-run
|
|
28
|
+
|
|
29
|
+
# Step 3 — fix (backups created automatically as .bak)
|
|
30
|
+
enc-doctor fix ./my_project
|
|
31
|
+
|
|
32
|
+
# Step 4 — verify everything is clean
|
|
33
|
+
enc-doctor verify ./my_project
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
---
|
|
37
|
+
|
|
38
|
+
## What it fixes
|
|
39
|
+
|
|
40
|
+
| Problem | Description |
|
|
41
|
+
|---|---|
|
|
42
|
+
| **Mojibake** | UTF-8 bytes mis-read as cp1252 and saved as garbage |
|
|
43
|
+
| **BOM** | `\xef\xbb\xbf` prefix added by Notepad/Excel that breaks parsers |
|
|
44
|
+
| **CRLF** | Windows `\r\n` mixed with Unix `\n` — causes Git diff noise |
|
|
45
|
+
| **Null bytes** | Binary corruption from FTP or terminal copy-paste |
|
|
46
|
+
| **Non-UTF-8** | Detected and flagged for manual conversion |
|
|
47
|
+
|
|
48
|
+
---
|
|
49
|
+
|
|
50
|
+
## Warning
|
|
51
|
+
|
|
52
|
+
> **encoding-doctor modifies files in-place.**
|
|
53
|
+
>
|
|
54
|
+
> - Always run `scan` first and review the report before running `fix`.
|
|
55
|
+
> - Backups are created automatically as `.bak` files.
|
|
56
|
+
> - Run on a Git-tracked project so you can always revert with `git checkout .`
|
|
57
|
+
> - Do not run `fix` on production files without testing first.
|
|
58
|
+
> - `verify` after every fix before committing.
|
|
59
|
+
|
|
60
|
+
---
|
|
61
|
+
|
|
62
|
+
## Options
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
enc-doctor scan <path> [--all] # --all shows clean files too
|
|
66
|
+
enc-doctor fix <path> [--dry-run] # --dry-run previews without writing
|
|
67
|
+
enc-doctor verify <path>
|
|
68
|
+
enc-doctor restore <file> # restore single file from .bak
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
---
|
|
72
|
+
|
|
73
|
+
## Run tests
|
|
74
|
+
|
|
75
|
+
```bash
|
|
76
|
+
pip install pytest
|
|
77
|
+
pytest tests/ -v
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
---
|
|
81
|
+
|
|
82
|
+
## License
|
|
83
|
+
|
|
84
|
+
MIT © [Stateflow Labs](https://github.com/stateflow-dev)
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""
|
|
2
|
+
encoding-doctor
|
|
3
|
+
---------------
|
|
4
|
+
Scan, fix, and verify file encoding issues.
|
|
5
|
+
|
|
6
|
+
Usage:
|
|
7
|
+
from encoding_doctor.scanner import scan_directory
|
|
8
|
+
from encoding_doctor.fixer import fix_directory
|
|
9
|
+
from encoding_doctor.verifier import verify_directory
|
|
10
|
+
|
|
11
|
+
Or via CLI:
|
|
12
|
+
enc-doctor scan ./my_project
|
|
13
|
+
enc-doctor fix ./my_project
|
|
14
|
+
enc-doctor verify ./my_project
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
__version__ = "0.1.0"
|
|
18
|
+
__author__ = "Stateflow Labs"
|
|
19
|
+
__license__ = "MIT"
|
|
@@ -0,0 +1,244 @@
|
|
|
1
|
+
"""
|
|
2
|
+
cli.py
|
|
3
|
+
------
|
|
4
|
+
Command-line interface for encoding-doctor.
|
|
5
|
+
|
|
6
|
+
Commands:
|
|
7
|
+
enc-doctor scan <path> -- detect encoding issues (FREE)
|
|
8
|
+
enc-doctor fix <path> -- fix detected issues (LICENSE)
|
|
9
|
+
enc-doctor verify <path> -- verify files are clean (LICENSE)
|
|
10
|
+
enc-doctor restore <file> -- restore a .bak file (LICENSE)
|
|
11
|
+
enc-doctor activate <key> -- activate license key
|
|
12
|
+
enc-doctor deactivate -- deactivate license (free up seat)
|
|
13
|
+
enc-doctor license -- show license status
|
|
14
|
+
|
|
15
|
+
Flags:
|
|
16
|
+
--dry-run (fix only) show what would change without writing
|
|
17
|
+
--all (scan only) show clean files too
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
import sys
|
|
21
|
+
import os
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _check_path(path: str):
|
|
25
|
+
if not os.path.exists(path):
|
|
26
|
+
print(f"ERROR: path not found: {path}")
|
|
27
|
+
sys.exit(1)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def cmd_scan(path: str, show_all: bool = False):
|
|
31
|
+
from .scanner import scan_directory, scan_file
|
|
32
|
+
from .reporter import print_scan_report
|
|
33
|
+
|
|
34
|
+
if os.path.isfile(path):
|
|
35
|
+
report = scan_file(path)
|
|
36
|
+
reports = [report] if report else []
|
|
37
|
+
else:
|
|
38
|
+
reports = scan_directory(path)
|
|
39
|
+
|
|
40
|
+
print_scan_report(reports, show_clean=show_all)
|
|
41
|
+
|
|
42
|
+
has_issues = any(r.has_issues for r in reports)
|
|
43
|
+
sys.exit(1 if has_issues else 0)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def cmd_fix(path: str, dry_run: bool = False):
|
|
47
|
+
from .license import require_license
|
|
48
|
+
require_license("fix")
|
|
49
|
+
|
|
50
|
+
from .scanner import scan_directory, scan_file
|
|
51
|
+
from .fixer import fix_directory
|
|
52
|
+
from .reporter import print_fix_report
|
|
53
|
+
|
|
54
|
+
print("\033[93mWARNING: enc-doctor fix modifies files in-place.\033[0m")
|
|
55
|
+
print("\033[2mBackups will be created as .bak files.\033[0m")
|
|
56
|
+
print("\033[2mRun 'enc-doctor scan' first if you have not already.\033[0m\n")
|
|
57
|
+
|
|
58
|
+
if os.path.isfile(path):
|
|
59
|
+
report = scan_file(path)
|
|
60
|
+
reports = [report] if report else []
|
|
61
|
+
else:
|
|
62
|
+
reports = scan_directory(path)
|
|
63
|
+
|
|
64
|
+
results = fix_directory(reports, dry_run=dry_run)
|
|
65
|
+
print_fix_report(results, dry_run=dry_run)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def cmd_verify(path: str):
|
|
69
|
+
from .license import require_license
|
|
70
|
+
require_license("verify")
|
|
71
|
+
|
|
72
|
+
from .verifier import verify_directory, verify_file
|
|
73
|
+
from .reporter import print_verify_report
|
|
74
|
+
|
|
75
|
+
if os.path.isfile(path):
|
|
76
|
+
results = [verify_file(path)]
|
|
77
|
+
else:
|
|
78
|
+
results = verify_directory(path)
|
|
79
|
+
|
|
80
|
+
print_verify_report(results)
|
|
81
|
+
|
|
82
|
+
all_ok = all(r.ok for r in results)
|
|
83
|
+
sys.exit(0 if all_ok else 1)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def cmd_restore(path: str):
|
|
87
|
+
from .license import require_license
|
|
88
|
+
require_license("restore")
|
|
89
|
+
|
|
90
|
+
from .verifier import restore_backup
|
|
91
|
+
bak = path + ".bak"
|
|
92
|
+
if not os.path.exists(bak):
|
|
93
|
+
print(f"ERROR: no backup found at {bak}")
|
|
94
|
+
sys.exit(1)
|
|
95
|
+
restored = restore_backup(path)
|
|
96
|
+
if restored:
|
|
97
|
+
print(f"\033[92mRestored: {path}\033[0m")
|
|
98
|
+
print(f"\033[2mFrom: {bak}\033[0m")
|
|
99
|
+
else:
|
|
100
|
+
print("ERROR: restore failed")
|
|
101
|
+
sys.exit(1)
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def cmd_activate(key: str):
|
|
105
|
+
from .license import activate
|
|
106
|
+
ok, msg = activate(key)
|
|
107
|
+
if ok:
|
|
108
|
+
print(f"\n \033[92m✓ {msg}\033[0m")
|
|
109
|
+
print()
|
|
110
|
+
print(" \033[2mLicense saved to ~/.encoding-doctor/license.json\033[0m")
|
|
111
|
+
print()
|
|
112
|
+
print(" Unlocked commands:")
|
|
113
|
+
print(" scan \033[92m✓ free\033[0m")
|
|
114
|
+
print(" fix \033[92m✓ licensed\033[0m")
|
|
115
|
+
print(" verify \033[92m✓ licensed\033[0m")
|
|
116
|
+
print(" restore \033[92m✓ licensed\033[0m")
|
|
117
|
+
print()
|
|
118
|
+
else:
|
|
119
|
+
print(f"\n \033[91m✗ {msg}\033[0m\n")
|
|
120
|
+
sys.exit(1)
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def cmd_deactivate():
|
|
124
|
+
from .license import deactivate
|
|
125
|
+
ok, msg = deactivate()
|
|
126
|
+
if ok:
|
|
127
|
+
print(f"\n \033[92m✓ {msg}\033[0m\n")
|
|
128
|
+
else:
|
|
129
|
+
print(f"\n \033[91m✗ {msg}\033[0m\n")
|
|
130
|
+
sys.exit(1)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def cmd_license_status():
|
|
134
|
+
from .license import status
|
|
135
|
+
info = status()
|
|
136
|
+
print()
|
|
137
|
+
if info["active"]:
|
|
138
|
+
print(" \033[92mLicense: ACTIVE\033[0m")
|
|
139
|
+
if info.get("customer"):
|
|
140
|
+
print(f" Customer : {info['customer']}")
|
|
141
|
+
if info.get("key_preview"):
|
|
142
|
+
print(f" Key : {info['key_preview']}")
|
|
143
|
+
if info.get("activated_at"):
|
|
144
|
+
print(f" Since : {info['activated_at'][:10]}")
|
|
145
|
+
print()
|
|
146
|
+
print(" All commands unlocked.")
|
|
147
|
+
else:
|
|
148
|
+
print(" \033[93mLicense: NOT ACTIVATED\033[0m")
|
|
149
|
+
print()
|
|
150
|
+
print(" Free: scan")
|
|
151
|
+
print(" Paid: fix, verify, restore")
|
|
152
|
+
print()
|
|
153
|
+
print(" Get license → https://stateflow.dev/encoding-doctor")
|
|
154
|
+
print(" Then run: enc-doctor activate <YOUR-KEY>")
|
|
155
|
+
print()
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def main():
|
|
159
|
+
args = sys.argv[1:]
|
|
160
|
+
|
|
161
|
+
if not args or args[0] in ("-h", "--help"):
|
|
162
|
+
print("""
|
|
163
|
+
encoding-doctor v0.2.0
|
|
164
|
+
|
|
165
|
+
Usage:
|
|
166
|
+
enc-doctor scan <path> [--all]
|
|
167
|
+
enc-doctor fix <path> [--dry-run]
|
|
168
|
+
enc-doctor verify <path>
|
|
169
|
+
enc-doctor restore <file>
|
|
170
|
+
enc-doctor activate <license-key>
|
|
171
|
+
enc-doctor deactivate
|
|
172
|
+
enc-doctor license
|
|
173
|
+
|
|
174
|
+
Commands:
|
|
175
|
+
scan Detect encoding issues — free forever
|
|
176
|
+
fix Fix detected issues — requires license
|
|
177
|
+
verify Confirm all files are valid UTF-8 — requires license
|
|
178
|
+
restore Restore a file from .bak backup — requires license
|
|
179
|
+
activate Activate your license key
|
|
180
|
+
deactivate Free up your license seat (e.g. when switching machines)
|
|
181
|
+
license Show current license status
|
|
182
|
+
|
|
183
|
+
Flags:
|
|
184
|
+
--all (scan) also show clean files
|
|
185
|
+
--dry-run (fix) show what would change without writing files
|
|
186
|
+
|
|
187
|
+
Get a license → https://stateflow.dev/encoding-doctor
|
|
188
|
+
""")
|
|
189
|
+
sys.exit(0)
|
|
190
|
+
|
|
191
|
+
command = args[0]
|
|
192
|
+
rest = args[1:]
|
|
193
|
+
|
|
194
|
+
if command == "scan":
|
|
195
|
+
if not rest:
|
|
196
|
+
print("ERROR: provide a path. Usage: enc-doctor scan <path>")
|
|
197
|
+
sys.exit(1)
|
|
198
|
+
path = rest[0]
|
|
199
|
+
_check_path(path)
|
|
200
|
+
show_all = "--all" in rest
|
|
201
|
+
cmd_scan(path, show_all=show_all)
|
|
202
|
+
|
|
203
|
+
elif command == "fix":
|
|
204
|
+
if not rest:
|
|
205
|
+
print("ERROR: provide a path. Usage: enc-doctor fix <path>")
|
|
206
|
+
sys.exit(1)
|
|
207
|
+
path = rest[0]
|
|
208
|
+
_check_path(path)
|
|
209
|
+
dry_run = "--dry-run" in rest
|
|
210
|
+
cmd_fix(path, dry_run=dry_run)
|
|
211
|
+
|
|
212
|
+
elif command == "verify":
|
|
213
|
+
if not rest:
|
|
214
|
+
print("ERROR: provide a path. Usage: enc-doctor verify <path>")
|
|
215
|
+
sys.exit(1)
|
|
216
|
+
path = rest[0]
|
|
217
|
+
_check_path(path)
|
|
218
|
+
cmd_verify(path)
|
|
219
|
+
|
|
220
|
+
elif command == "restore":
|
|
221
|
+
if not rest:
|
|
222
|
+
print("ERROR: provide a file path. Usage: enc-doctor restore <file>")
|
|
223
|
+
sys.exit(1)
|
|
224
|
+
cmd_restore(rest[0])
|
|
225
|
+
|
|
226
|
+
elif command == "activate":
|
|
227
|
+
if not rest:
|
|
228
|
+
print("ERROR: provide a license key. Usage: enc-doctor activate <key>")
|
|
229
|
+
sys.exit(1)
|
|
230
|
+
cmd_activate(rest[0])
|
|
231
|
+
|
|
232
|
+
elif command == "deactivate":
|
|
233
|
+
cmd_deactivate()
|
|
234
|
+
|
|
235
|
+
elif command == "license":
|
|
236
|
+
cmd_license_status()
|
|
237
|
+
|
|
238
|
+
else:
|
|
239
|
+
print(f"ERROR: unknown command '{command}'. Run 'enc-doctor --help'")
|
|
240
|
+
sys.exit(1)
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
if __name__ == "__main__":
|
|
244
|
+
main()
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
"""
|
|
2
|
+
fixer.py
|
|
3
|
+
--------
|
|
4
|
+
Fixes encoding issues detected by scanner.py.
|
|
5
|
+
Always creates .bak backup before modifying any file.
|
|
6
|
+
|
|
7
|
+
Based on real fix operations performed on adaptive_runtime source files.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import os
|
|
11
|
+
import shutil
|
|
12
|
+
from .scanner import MOJIBAKE_PATTERNS, BOM, FileReport
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _backup(path: str) -> str:
|
|
16
|
+
bak = path + ".bak"
|
|
17
|
+
shutil.copy2(path, bak)
|
|
18
|
+
return bak
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def fix_file(report: FileReport, dry_run: bool = False) -> dict:
|
|
22
|
+
"""
|
|
23
|
+
Fix all fixable issues in a file.
|
|
24
|
+
Returns dict with keys: path, fixed, skipped, backup, changes
|
|
25
|
+
"""
|
|
26
|
+
result = {"path": report.path, "fixed": [], "skipped": [], "backup": None, "changes": 0}
|
|
27
|
+
|
|
28
|
+
if not report.fixable:
|
|
29
|
+
result["skipped"] = [i.label for i in report.issues if not i.fixable]
|
|
30
|
+
return result
|
|
31
|
+
|
|
32
|
+
with open(report.path, "rb") as f:
|
|
33
|
+
raw = f.read()
|
|
34
|
+
|
|
35
|
+
original = raw
|
|
36
|
+
|
|
37
|
+
# Fix BOM
|
|
38
|
+
if raw.startswith(BOM):
|
|
39
|
+
raw = raw[3:]
|
|
40
|
+
result["fixed"].append("BOM stripped")
|
|
41
|
+
|
|
42
|
+
# Fix mojibake (byte-level replacement)
|
|
43
|
+
for bad_bytes, good_bytes, label in MOJIBAKE_PATTERNS:
|
|
44
|
+
n = raw.count(bad_bytes)
|
|
45
|
+
if n:
|
|
46
|
+
raw = raw.replace(bad_bytes, good_bytes)
|
|
47
|
+
result["fixed"].append(f"mojibake fixed: {label} ({n}x)")
|
|
48
|
+
|
|
49
|
+
# Fix CRLF -> LF
|
|
50
|
+
if b"\r\n" in raw:
|
|
51
|
+
count = raw.count(b"\r\n")
|
|
52
|
+
raw = raw.replace(b"\r\n", b"\n")
|
|
53
|
+
result["fixed"].append(f"CRLF -> LF ({count} lines)")
|
|
54
|
+
|
|
55
|
+
# Fix null bytes
|
|
56
|
+
if b"\x00" in raw:
|
|
57
|
+
count = raw.count(b"\x00")
|
|
58
|
+
raw = raw.replace(b"\x00", b"")
|
|
59
|
+
result["fixed"].append(f"null bytes removed ({count})")
|
|
60
|
+
|
|
61
|
+
if raw == original:
|
|
62
|
+
return result
|
|
63
|
+
|
|
64
|
+
result["changes"] = len(result["fixed"])
|
|
65
|
+
|
|
66
|
+
if not dry_run:
|
|
67
|
+
result["backup"] = _backup(report.path)
|
|
68
|
+
with open(report.path, "wb") as f:
|
|
69
|
+
f.write(raw)
|
|
70
|
+
|
|
71
|
+
return result
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def fix_directory(reports: list, dry_run: bool = False) -> list:
|
|
75
|
+
"""
|
|
76
|
+
Fix all fixable files from a list of FileReport.
|
|
77
|
+
Returns list of fix result dicts.
|
|
78
|
+
"""
|
|
79
|
+
results = []
|
|
80
|
+
for report in reports:
|
|
81
|
+
if report.has_issues and report.fixable:
|
|
82
|
+
result = fix_file(report, dry_run=dry_run)
|
|
83
|
+
if result["changes"] > 0 or result["skipped"]:
|
|
84
|
+
results.append(result)
|
|
85
|
+
return results
|