cutf 0.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cutf/__init__.py +0 -0
- cutf/app.py +201 -0
- cutf/controller/__init__.py +0 -0
- cutf/controller/fileChecker.py +86 -0
- cutf/controller/fileController.py +119 -0
- cutf/controller/resultHandler.py +174 -0
- cutf/model/AppSetting.py +29 -0
- cutf/model/FileScanResult.py +31 -0
- cutf/model/MissingCharResult.py +23 -0
- cutf/model/__init__.py +8 -0
- cutf/util/__init__.py +0 -0
- cutf/util/code.py +43 -0
- cutf/util/iconv.py +51 -0
- cutf/util/log.py +35 -0
- cutf/util/path.py +38 -0
- cutf-0.0.8.dist-info/METADATA +190 -0
- cutf-0.0.8.dist-info/RECORD +20 -0
- cutf-0.0.8.dist-info/WHEEL +4 -0
- cutf-0.0.8.dist-info/entry_points.txt +2 -0
- cutf-0.0.8.dist-info/licenses/LICENSE +21 -0
cutf/__init__.py
ADDED
|
File without changes
|
cutf/app.py
ADDED
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import os
|
|
3
|
+
from shutil import which
|
|
4
|
+
|
|
5
|
+
import rich
|
|
6
|
+
|
|
7
|
+
from cutf.controller.fileController import handle_file
|
|
8
|
+
from cutf.controller.resultHandler import print_results
|
|
9
|
+
from cutf.model.AppSetting import AppSetting
|
|
10
|
+
from cutf.util.log import format_log_error, format_log_path
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def check_path_file(path: str) -> None:
|
|
14
|
+
"""Validate that the provided path exists and is a file.
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
path: Candidate file path.
|
|
18
|
+
|
|
19
|
+
Raises:
|
|
20
|
+
FileNotFoundError: If the path does not exist as a file.
|
|
21
|
+
"""
|
|
22
|
+
if not os.path.isfile(path):
|
|
23
|
+
raise FileNotFoundError(f"File not found: {path}")
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def check_path_dir(path: str) -> None:
|
|
27
|
+
"""Validate that the provided path exists and is a directory.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
path: Candidate directory path.
|
|
31
|
+
|
|
32
|
+
Raises:
|
|
33
|
+
NotADirectoryError: If the path does not exist as a directory.
|
|
34
|
+
"""
|
|
35
|
+
if not os.path.isdir(path):
|
|
36
|
+
raise NotADirectoryError(f"Directory not found: {path}")
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def is_command_available(command: str) -> bool:
|
|
40
|
+
"""Check whether an executable is available on PATH.
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
command: Executable name.
|
|
44
|
+
|
|
45
|
+
Returns:
|
|
46
|
+
bool: ``True`` if the command can be resolved via ``PATH``.
|
|
47
|
+
"""
|
|
48
|
+
return which(command) is not None
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def build_parser() -> argparse.ArgumentParser:
|
|
52
|
+
"""Build and return the CLI argument parser.
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
argparse.ArgumentParser: Configured parser for CUFT command-line options.
|
|
56
|
+
"""
|
|
57
|
+
# Get CLI params
|
|
58
|
+
parser = argparse.ArgumentParser(
|
|
59
|
+
description="Convert source files from legacy encodings to UTF-8 with BOM."
|
|
60
|
+
)
|
|
61
|
+
parser.add_argument("--path", type=str, required=True, help="Path of the file/directory to scan/convert.")
|
|
62
|
+
parser.add_argument("--checks", action="store_true", help="Enable checks for the file")
|
|
63
|
+
parser.add_argument("--convert", action="store_true", help="Enable conversion from current encoding to UTF-8")
|
|
64
|
+
parser.add_argument("--copyOld", action="store_true", help="Copy old encoded file before converting")
|
|
65
|
+
parser.add_argument(
|
|
66
|
+
"--printMissingCharString",
|
|
67
|
+
action="store_true",
|
|
68
|
+
help="Print the string where the missing char has been found",
|
|
69
|
+
)
|
|
70
|
+
parser.add_argument(
|
|
71
|
+
"--printAllSkippedFile",
|
|
72
|
+
action="store_true",
|
|
73
|
+
help="Print all skipped files where no action was required",
|
|
74
|
+
)
|
|
75
|
+
parser.add_argument("--all", action="store_true", help="Enable both conversion and checks")
|
|
76
|
+
parser.add_argument("--verbose", action="store_true", help="Enable extended logging")
|
|
77
|
+
parser.add_argument(
|
|
78
|
+
"--only-relevant",
|
|
79
|
+
action="store_true",
|
|
80
|
+
help="Print only relevant results (hides less relevant missing-char entries)",
|
|
81
|
+
)
|
|
82
|
+
parser.add_argument(
|
|
83
|
+
"--extensions",
|
|
84
|
+
# Nome dell'argomento
|
|
85
|
+
type=str,
|
|
86
|
+
# Tipo stringa
|
|
87
|
+
nargs="+",
|
|
88
|
+
# Permette di passare piu estensioni separando con uno spazio
|
|
89
|
+
help="List of extensions to scan, for example: .cpp .h .cs .ini",
|
|
90
|
+
)
|
|
91
|
+
return parser
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def main(argv: list[str] | None = None, confirm_fn=input) -> int:
|
|
95
|
+
"""Run the command-line application.
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
argv: Optional CLI argument list. When ``None``, arguments are read from ``sys.argv``.
|
|
99
|
+
confirm_fn: Function used to prompt the user before processing files.
|
|
100
|
+
|
|
101
|
+
Returns:
|
|
102
|
+
int: Process exit code (``0`` on success).
|
|
103
|
+
|
|
104
|
+
Raises:
|
|
105
|
+
SystemExit: If user input is invalid or required system dependencies are missing.
|
|
106
|
+
"""
|
|
107
|
+
parser = build_parser()
|
|
108
|
+
args = parser.parse_args(argv)
|
|
109
|
+
|
|
110
|
+
# Check CLI params
|
|
111
|
+
if not (args.checks or args.convert or args.all):
|
|
112
|
+
rich.print(format_log_error("At least one of --checks or --convert must be set."))
|
|
113
|
+
raise SystemExit(1)
|
|
114
|
+
if not args.extensions:
|
|
115
|
+
rich.print(format_log_error("At least one file extension must be provided with --extensions."))
|
|
116
|
+
raise SystemExit(1)
|
|
117
|
+
|
|
118
|
+
# Get and Print CLI params
|
|
119
|
+
path = args.path
|
|
120
|
+
enable_checks = bool(args.checks or args.all)
|
|
121
|
+
enable_convert = bool(args.convert or args.all)
|
|
122
|
+
|
|
123
|
+
rich.print(f"Path to scan: {format_log_path(path)}")
|
|
124
|
+
rich.print(f"Checks enabled: {enable_checks}")
|
|
125
|
+
rich.print(f"Conversion enabled: {enable_convert}")
|
|
126
|
+
rich.print(f"Extensions to scan: {args.extensions}")
|
|
127
|
+
rich.print(f"Copy old encoded: {args.copyOld}")
|
|
128
|
+
rich.print("\n")
|
|
129
|
+
|
|
130
|
+
# Check path is valid and if it's a dir/file
|
|
131
|
+
is_file = os.path.isfile(path)
|
|
132
|
+
if is_file:
|
|
133
|
+
check_path_file(path)
|
|
134
|
+
else:
|
|
135
|
+
check_path_dir(path)
|
|
136
|
+
|
|
137
|
+
# Check iconv in path
|
|
138
|
+
if not is_command_available("iconv"):
|
|
139
|
+
rich.print(format_log_error("Iconv executable not found on your system PATH."))
|
|
140
|
+
raise SystemExit(1)
|
|
141
|
+
|
|
142
|
+
# Ask user confirmation
|
|
143
|
+
if is_file:
|
|
144
|
+
rich.print(
|
|
145
|
+
f"File \"{format_log_path(path)}\" will be checked and converted to UTF-8 "
|
|
146
|
+
"(with BOM). Proceed? (Enter to continue or CTRL-C to exit)"
|
|
147
|
+
)
|
|
148
|
+
else:
|
|
149
|
+
rich.print(
|
|
150
|
+
f"All files inside \"{format_log_path(path)}\" will be checked and converted "
|
|
151
|
+
"to UTF-8 (with BOM). Proceed? (Enter to continue or CTRL-C to exit)"
|
|
152
|
+
)
|
|
153
|
+
confirm_fn()
|
|
154
|
+
|
|
155
|
+
# Create setting object
|
|
156
|
+
setting = AppSetting(
|
|
157
|
+
input_path=path,
|
|
158
|
+
is_file=is_file,
|
|
159
|
+
extensions=args.extensions,
|
|
160
|
+
checks=enable_checks,
|
|
161
|
+
convert=enable_convert,
|
|
162
|
+
copy_old_encoded=args.copyOld,
|
|
163
|
+
print_missing_char_str=args.printMissingCharString,
|
|
164
|
+
verbose=args.verbose,
|
|
165
|
+
print_skipped_file_no_action=args.printAllSkippedFile,
|
|
166
|
+
print_result_only_relevant=args.only_relevant,
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
# Handle file/dir and get results
|
|
170
|
+
count_from_files = 0
|
|
171
|
+
results = []
|
|
172
|
+
rich.print(f"Scanning \"{format_log_path(path)}\"...")
|
|
173
|
+
if is_file:
|
|
174
|
+
count_from_files += 1
|
|
175
|
+
results.append(handle_file(setting.input_path, setting))
|
|
176
|
+
else:
|
|
177
|
+
for root, _, files in os.walk(path):
|
|
178
|
+
for file_name in files:
|
|
179
|
+
count_from_files += 1
|
|
180
|
+
full_file_path = os.path.join(root, file_name)
|
|
181
|
+
results.append(handle_file(full_file_path, setting))
|
|
182
|
+
|
|
183
|
+
rich.print("-------------------------------")
|
|
184
|
+
|
|
185
|
+
# Handle results
|
|
186
|
+
print_results(results, setting)
|
|
187
|
+
|
|
188
|
+
# Print count result
|
|
189
|
+
if setting.is_file:
|
|
190
|
+
rich.print(f"\nThis software scanned file {format_log_path(path)}.\n")
|
|
191
|
+
else:
|
|
192
|
+
rich.print(
|
|
193
|
+
f"\nThis software scanned {len(results)}/{count_from_files} files inside "
|
|
194
|
+
f"{format_log_path(path)}.\n"
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
return 0
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
if __name__ == "__main__":
|
|
201
|
+
main()
|
|
File without changes
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
import rich
|
|
4
|
+
|
|
5
|
+
from cutf.model.MissingCharResult import MissingCharResult
|
|
6
|
+
from cutf.util.code import is_line_commented
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def check_illegal_chars(file_path: str, source_encoding: str) -> list[MissingCharResult]:
|
|
10
|
+
"""Find replacement-character byte sequences in a text file.
|
|
11
|
+
|
|
12
|
+
This function searches for the UTF-8 byte sequence ``EF BF BD`` (U+FFFD,
|
|
13
|
+
replacement character) and reports each match with line metadata.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
file_path: Absolute or relative path of the file to inspect.
|
|
17
|
+
source_encoding: Encoding used to decode the file content for line extraction.
|
|
18
|
+
|
|
19
|
+
Returns:
|
|
20
|
+
list[MissingCharResult]: One item for each detected replacement character.
|
|
21
|
+
|
|
22
|
+
Raises:
|
|
23
|
+
RuntimeError: If the file cannot be decoded using ``source_encoding``.
|
|
24
|
+
"""
|
|
25
|
+
results = []
|
|
26
|
+
with open(file_path, "rb") as f:
|
|
27
|
+
raw_data = f.read()
|
|
28
|
+
|
|
29
|
+
# Verifica e gestisci il BOM per UTF-8
|
|
30
|
+
bom = b"\xef\xbb\xbf"
|
|
31
|
+
if raw_data.startswith(bom):
|
|
32
|
+
raw_data = raw_data[len(bom) :]
|
|
33
|
+
|
|
34
|
+
# Decodifica i byte usando l'encoding specificato (per esempio UTF-8)
|
|
35
|
+
try:
|
|
36
|
+
text_data = raw_data.decode(source_encoding, errors="replace")
|
|
37
|
+
except UnicodeDecodeError:
|
|
38
|
+
rich.print(
|
|
39
|
+
f"\t[bold red]Error decoding file {os.path.basename(file_path)} "
|
|
40
|
+
f"with encoding {source_encoding}.[/bold red]"
|
|
41
|
+
)
|
|
42
|
+
raise RuntimeError(f"Error decoding file {os.path.basename(file_path)} during check_illegal_chars().")
|
|
43
|
+
|
|
44
|
+
# Itera sui caratteri per trovare il carattere illegale
|
|
45
|
+
for idx in range(len(raw_data) - 2): # -2 per evitare di uscire fuori dal range durante il confronto
|
|
46
|
+
if raw_data[idx] == 0xEF and raw_data[idx + 1] == 0xBF and raw_data[idx + 2] == 0xBD:
|
|
47
|
+
line_start = text_data.rfind("\n", 0, idx)
|
|
48
|
+
line_end = text_data.find("\n", idx)
|
|
49
|
+
|
|
50
|
+
# Ottieni la riga completa dove e presente l'errore
|
|
51
|
+
if line_end == -1:
|
|
52
|
+
line_end = len(text_data)
|
|
53
|
+
|
|
54
|
+
line = text_data[line_start + 1:line_end]
|
|
55
|
+
# Trova la posizione relativa all'interno della riga
|
|
56
|
+
char_pos_in_line = line.find("�")
|
|
57
|
+
|
|
58
|
+
line_number = text_data.count("\n", 0, idx) + 1
|
|
59
|
+
|
|
60
|
+
# Stampa il risultato formattato
|
|
61
|
+
rich.print(
|
|
62
|
+
f"\t[bold yellow]Found illegal character at position {idx}, "
|
|
63
|
+
f"line {line_number} in file {os.path.basename(file_path)}[/bold yellow]"
|
|
64
|
+
)
|
|
65
|
+
# Creiamo una versione evidenziata della riga
|
|
66
|
+
# highlighted_line = (
|
|
67
|
+
# line[:char_pos_in_line]
|
|
68
|
+
# + f"[bold red]{line[char_pos_in_line]}[/bold red]"
|
|
69
|
+
# + line[char_pos_in_line + 1:]
|
|
70
|
+
# )
|
|
71
|
+
# rich.print("\t" + highlighted_line)
|
|
72
|
+
|
|
73
|
+
# Aggiungi il risultato all'array, utilizzando MissingCharResult
|
|
74
|
+
result = MissingCharResult(
|
|
75
|
+
is_commented=is_line_commented(file_path, line_number),
|
|
76
|
+
string=line.lstrip(" \t"),
|
|
77
|
+
line=line_number,
|
|
78
|
+
file_name=os.path.basename(file_path),
|
|
79
|
+
char_position=char_pos_in_line,
|
|
80
|
+
char_found=char_pos_in_line != -1,
|
|
81
|
+
byte_sequence_file_pos=idx,
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
results.append(result)
|
|
85
|
+
|
|
86
|
+
return results
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
import chardet
|
|
4
|
+
import rich
|
|
5
|
+
|
|
6
|
+
from cutf.controller.fileChecker import check_illegal_chars
|
|
7
|
+
from cutf.model.AppSetting import AppSetting
|
|
8
|
+
from cutf.model.FileScanResult import FileScanResult
|
|
9
|
+
from cutf.util.iconv import convert_to_utf8_with_iconv
|
|
10
|
+
from cutf.util.log import format_log_path
|
|
11
|
+
from cutf.util.path import copy_old_encoded_file
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def handle_file(file_path: str, setting: AppSetting) -> FileScanResult:
|
|
15
|
+
"""Scan and optionally convert a single file according to current settings.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
file_path: Full path of the file to process.
|
|
19
|
+
setting: Runtime application options that control checks/conversion behavior.
|
|
20
|
+
|
|
21
|
+
Returns:
|
|
22
|
+
FileScanResult: Structured outcome including conversion, checks, and errors.
|
|
23
|
+
"""
|
|
24
|
+
# Starting
|
|
25
|
+
file_name = os.path.basename(file_path)
|
|
26
|
+
encoding = None
|
|
27
|
+
|
|
28
|
+
try:
|
|
29
|
+
# Checking extension
|
|
30
|
+
_, extension = os.path.splitext(file_path)
|
|
31
|
+
normalized_extensions = {value.lower() for value in setting.extensions}
|
|
32
|
+
has_supported_extension = extension.lower() in normalized_extensions
|
|
33
|
+
if not has_supported_extension:
|
|
34
|
+
if setting.verbose:
|
|
35
|
+
rich.print(f"File {file_name} has no supported extension ({extension}). Skipping...")
|
|
36
|
+
return FileScanResult(
|
|
37
|
+
file_path=file_path,
|
|
38
|
+
file_name=file_name,
|
|
39
|
+
skipped=True,
|
|
40
|
+
)
|
|
41
|
+
# Load encoding
|
|
42
|
+
if setting.verbose:
|
|
43
|
+
rich.print(f"## Checking file \"{format_log_path(file_path)}\"...")
|
|
44
|
+
|
|
45
|
+
if setting.verbose:
|
|
46
|
+
rich.print(f"Opening file \"{file_path}\"...")
|
|
47
|
+
with open(file_path, "rb") as f:
|
|
48
|
+
raw_data = f.read()
|
|
49
|
+
result = chardet.detect(raw_data)
|
|
50
|
+
encoding = result["encoding"]
|
|
51
|
+
|
|
52
|
+
# Check encoding
|
|
53
|
+
if encoding is None:
|
|
54
|
+
raise RuntimeError(f"Cannot detect encoding of {file_name}")
|
|
55
|
+
is_already_utf8 = (
|
|
56
|
+
encoding.lower() in {"utf-8", "utf-8-sig", "utf-16", "utf-16le", "utf-16be"}
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
# Check if need to be converted
|
|
60
|
+
needs_convert = (not is_already_utf8) and setting.convert
|
|
61
|
+
|
|
62
|
+
# Copy old encoded (if enabled and needed)
|
|
63
|
+
if needs_convert and setting.copy_old_encoded:
|
|
64
|
+
old_copy_path = copy_old_encoded_file(file_path)
|
|
65
|
+
rich.print(f"Copied old encoded file to {format_log_path(old_copy_path)}")
|
|
66
|
+
|
|
67
|
+
# Exec operations
|
|
68
|
+
if needs_convert:
|
|
69
|
+
rich.print(f"File \"{file_name}\" has encoding {encoding}. Proceeding to convert and check...")
|
|
70
|
+
output_encoding = "utf-8"
|
|
71
|
+
convert_to_utf8_with_iconv(file_path, encoding, output_encoding)
|
|
72
|
+
missing_chars = check_illegal_chars(file_path, output_encoding)
|
|
73
|
+
if setting.verbose:
|
|
74
|
+
rich.print(f"Finished checking and converting file \"{file_name}\"!")
|
|
75
|
+
return FileScanResult(
|
|
76
|
+
file_path=file_path,
|
|
77
|
+
file_name=file_name,
|
|
78
|
+
encoding_before=encoding,
|
|
79
|
+
encoding_after=f"{output_encoding}(BOM)",
|
|
80
|
+
check_missing_char=missing_chars,
|
|
81
|
+
converted=True,
|
|
82
|
+
)
|
|
83
|
+
elif setting.checks:
|
|
84
|
+
rich.print(f"File \"{file_name}\" has encoding {encoding}. Proceeding to check...")
|
|
85
|
+
missing_chars = check_illegal_chars(file_path, encoding)
|
|
86
|
+
if setting.verbose:
|
|
87
|
+
rich.print(f"Finished checking file \"{file_name}\"!")
|
|
88
|
+
return FileScanResult(
|
|
89
|
+
file_path=file_path,
|
|
90
|
+
file_name=file_name,
|
|
91
|
+
encoding_before=encoding,
|
|
92
|
+
check_missing_char=missing_chars,
|
|
93
|
+
)
|
|
94
|
+
else:
|
|
95
|
+
rich.print(f"No operation to do on {file_name}")
|
|
96
|
+
return FileScanResult(
|
|
97
|
+
file_path=file_path,
|
|
98
|
+
file_name=file_name,
|
|
99
|
+
skipped=True,
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
except RuntimeError as e:
|
|
103
|
+
rich.print(f"[bold red]Conversion/checking of {file_name} interrupted because of an error: {e}[/bold red]")
|
|
104
|
+
return FileScanResult(
|
|
105
|
+
file_path=file_path,
|
|
106
|
+
file_name=file_name,
|
|
107
|
+
encoding_before=encoding,
|
|
108
|
+
error_skipped=True,
|
|
109
|
+
error_description=str(e),
|
|
110
|
+
)
|
|
111
|
+
# except FileNotFoundError as e:
|
|
112
|
+
# rich.print(f"[bold red]Conversion/checking of {file_name} interrupted because of an error: {e}[/bold red]")
|
|
113
|
+
# return FileScanResult(
|
|
114
|
+
# file_path=file_path,
|
|
115
|
+
# file_name=file_name,
|
|
116
|
+
# encoding_before=encoding,
|
|
117
|
+
# error_skipped=True,
|
|
118
|
+
# error_description=str(e),
|
|
119
|
+
# )
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
from collections import Counter
|
|
2
|
+
|
|
3
|
+
import rich
|
|
4
|
+
|
|
5
|
+
from cutf.model import AppSetting
|
|
6
|
+
from cutf.model.FileScanResult import FileScanResult
|
|
7
|
+
from cutf.util.log import format_log_error
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def __print_encoding_before(results: list[FileScanResult]):
|
|
11
|
+
"""Print a count of detected original encodings.
|
|
12
|
+
|
|
13
|
+
Args:
|
|
14
|
+
results: Collection of per-file scan outcomes.
|
|
15
|
+
"""
|
|
16
|
+
encoding_counter = Counter()
|
|
17
|
+
|
|
18
|
+
# Scorri ogni FileScanResult
|
|
19
|
+
for result in results:
|
|
20
|
+
# Aggiungi l'encoding_before se non e None
|
|
21
|
+
if result.encoding_before:
|
|
22
|
+
encoding_counter[result.encoding_before] += 1
|
|
23
|
+
|
|
24
|
+
# Stampa ogni encoding e il numero di occorrenze
|
|
25
|
+
rich.print(f"@ List of encodings found during scanning ({len(encoding_counter.items())}):")
|
|
26
|
+
for encoding, count in encoding_counter.items():
|
|
27
|
+
rich.print(f"{encoding}: {count}")
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def __print_converted_files(results: list[FileScanResult]):
|
|
31
|
+
"""Print all files that were converted during processing.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
results: Collection of per-file scan outcomes.
|
|
35
|
+
"""
|
|
36
|
+
count = 0
|
|
37
|
+
rich.print("@ List of converted files:")
|
|
38
|
+
for result in results:
|
|
39
|
+
if result.converted:
|
|
40
|
+
count += 1
|
|
41
|
+
rich.print(f"Converted file {result.file_name} from encoding {result.encoding_before} to encoding {result.encoding_after}.")
|
|
42
|
+
if count == 0:
|
|
43
|
+
rich.print("0 Files converted.")
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def __print_skipped_files(results: list[FileScanResult], print_all: bool):
|
|
47
|
+
"""Print files skipped because no action was required.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
results: Collection of per-file scan outcomes.
|
|
51
|
+
print_all: If ``True``, prints one row per skipped file.
|
|
52
|
+
"""
|
|
53
|
+
count = 0
|
|
54
|
+
rich.print("@ List of skipped files:")
|
|
55
|
+
if print_all:
|
|
56
|
+
for result in results:
|
|
57
|
+
if result.skipped:
|
|
58
|
+
count += 1
|
|
59
|
+
rich.print(f"File {result.file_name} skipped because no action is required.")
|
|
60
|
+
if count == 0:
|
|
61
|
+
rich.print("0 skipped file founds.")
|
|
62
|
+
else:
|
|
63
|
+
for result in results:
|
|
64
|
+
if result.skipped:
|
|
65
|
+
count += 1
|
|
66
|
+
if count == 0:
|
|
67
|
+
rich.print("0 skipped file founds.")
|
|
68
|
+
else:
|
|
69
|
+
rich.print(f"{count} file skipped because no action was required.")
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def __print_skipped_error_files(results: list[FileScanResult]):
|
|
73
|
+
"""Print files skipped because of processing errors.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
results: Collection of per-file scan outcomes.
|
|
77
|
+
"""
|
|
78
|
+
count = 0
|
|
79
|
+
rich.print("@ List of skipped files (from errors):")
|
|
80
|
+
for result in results:
|
|
81
|
+
if result.error_skipped:
|
|
82
|
+
count += 1
|
|
83
|
+
rich.print(format_log_error(f"File {result.file_path} skipped because of an error: {result.error_description}"))
|
|
84
|
+
if count == 0:
|
|
85
|
+
rich.print("0 errors founds.")
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def __print_missing_chars_on_comments(results: list[FileScanResult], print_mis_char_string: bool):
|
|
89
|
+
"""Print missing character occurrences detected in comments.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
results: Collection of per-file scan outcomes.
|
|
93
|
+
print_mis_char_string: If ``True``, include the original line string.
|
|
94
|
+
"""
|
|
95
|
+
rich.print("@ List of missing chars found on comments:")
|
|
96
|
+
for result in results:
|
|
97
|
+
if result.check_missing_char is not None:
|
|
98
|
+
for file in result.check_missing_char:
|
|
99
|
+
if file.is_commented:
|
|
100
|
+
rich.print(f"File = {file.file_name} | Missing char Visibile = {file.char_found} | Line = {file.line} | Line Pos = {file.char_position} | File pos = {file.byte_sequence_file_pos}")
|
|
101
|
+
if print_mis_char_string:
|
|
102
|
+
rich.print(f"String = {file.string}")
|
|
103
|
+
rich.print("-------------------")
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def __print_missing_chars_on_code(results: list[FileScanResult], print_mis_char_string: bool, only_relevant: bool):
|
|
107
|
+
"""Print missing character occurrences detected in code lines.
|
|
108
|
+
|
|
109
|
+
Args:
|
|
110
|
+
results: Collection of per-file scan outcomes.
|
|
111
|
+
print_mis_char_string: If ``True``, include the original line string.
|
|
112
|
+
only_relevant: If ``True``, hide missing-char entries where the symbol is not visible.
|
|
113
|
+
"""
|
|
114
|
+
rich.print("@ List of missing chars found on code:")
|
|
115
|
+
count = 0
|
|
116
|
+
for result in results:
|
|
117
|
+
if result.check_missing_char is not None:
|
|
118
|
+
for file in result.check_missing_char:
|
|
119
|
+
if not file.is_commented:
|
|
120
|
+
count += 1
|
|
121
|
+
if file.char_found:
|
|
122
|
+
rich.print(f"File = {file.file_name} | Missing char Visibile = {file.char_found} | Line = {file.line} | Line Pos = {file.char_position} | File pos = {file.byte_sequence_file_pos}")
|
|
123
|
+
else:
|
|
124
|
+
if not only_relevant:
|
|
125
|
+
rich.print(f"File = {file.file_name} | Missing char Visibile = {file.char_found} | Line = {file.line} | Line Pos = {file.char_position} | File pos = {file.byte_sequence_file_pos}")
|
|
126
|
+
if print_mis_char_string:
|
|
127
|
+
rich.print(f"String = {file.string}")
|
|
128
|
+
rich.print("-------------------")
|
|
129
|
+
if count == 0:
|
|
130
|
+
rich.print("0 missing chars on code founds.")
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def print_results(results: list[FileScanResult], setting: AppSetting):
|
|
134
|
+
"""Print the complete scan summary to the console.
|
|
135
|
+
|
|
136
|
+
Args:
|
|
137
|
+
results: Collection of per-file scan outcomes.
|
|
138
|
+
setting: Runtime settings controlling verbosity and filtering.
|
|
139
|
+
"""
|
|
140
|
+
|
|
141
|
+
rich.print("\n\n")
|
|
142
|
+
|
|
143
|
+
rich.print("########################################################") if setting.verbose else None
|
|
144
|
+
rich.print("### START OF RESULTS ###################################")
|
|
145
|
+
rich.print("########################################################") if setting.verbose else None
|
|
146
|
+
|
|
147
|
+
# Print list of encoding before all
|
|
148
|
+
__print_encoding_before(results)
|
|
149
|
+
rich.print("\n")
|
|
150
|
+
|
|
151
|
+
# Print file converted
|
|
152
|
+
__print_converted_files(results)
|
|
153
|
+
rich.print("\n")
|
|
154
|
+
|
|
155
|
+
# File skipped
|
|
156
|
+
__print_skipped_files(results, setting.print_skipped_file_no_action)
|
|
157
|
+
rich.print("\n")
|
|
158
|
+
|
|
159
|
+
# File skipped (Error)
|
|
160
|
+
__print_skipped_error_files(results)
|
|
161
|
+
rich.print("\n")
|
|
162
|
+
|
|
163
|
+
# Missing chars (comments)
|
|
164
|
+
if not setting.print_result_only_relevant:
|
|
165
|
+
__print_missing_chars_on_comments(results, setting.print_missing_char_str)
|
|
166
|
+
rich.print("\n")
|
|
167
|
+
|
|
168
|
+
# Missing chars (code)
|
|
169
|
+
__print_missing_chars_on_code(results, setting.print_missing_char_str, setting.print_result_only_relevant)
|
|
170
|
+
|
|
171
|
+
rich.print("\n\n")
|
|
172
|
+
rich.print("########################################################") if setting.verbose else None
|
|
173
|
+
rich.print("### END OF RESULTS #####################################")
|
|
174
|
+
rich.print("########################################################") if setting.verbose else None
|
cutf/model/AppSetting.py
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
@dataclass
|
|
5
|
+
class AppSetting:
|
|
6
|
+
"""Runtime configuration loaded from CLI flags.
|
|
7
|
+
|
|
8
|
+
Attributes:
|
|
9
|
+
input_path: Path of the input file or directory selected by the user.
|
|
10
|
+
is_file: ``True`` when ``input_path`` points to a file, ``False`` for directories.
|
|
11
|
+
extensions: Allowed file extensions to include in the scan.
|
|
12
|
+
checks: Enables illegal-character checks.
|
|
13
|
+
convert: Enables conversion to UTF-8 with BOM when needed.
|
|
14
|
+
copy_old_encoded: Saves a copy of legacy-encoded files before conversion.
|
|
15
|
+
print_missing_char_str: Prints the line text where missing characters are detected.
|
|
16
|
+
print_skipped_file_no_action: Prints all skipped files instead of just a count.
|
|
17
|
+
print_result_only_relevant: Hides less relevant missing-char entries.
|
|
18
|
+
verbose: Enables detailed progress logs.
|
|
19
|
+
"""
|
|
20
|
+
input_path: str
|
|
21
|
+
is_file: bool
|
|
22
|
+
extensions: list[str]
|
|
23
|
+
checks: bool = False
|
|
24
|
+
convert: bool = False
|
|
25
|
+
copy_old_encoded: bool = False
|
|
26
|
+
print_missing_char_str: bool = False
|
|
27
|
+
print_skipped_file_no_action: bool = False
|
|
28
|
+
print_result_only_relevant: bool = False
|
|
29
|
+
verbose: bool = False
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
|
|
3
|
+
from cutf.model.MissingCharResult import MissingCharResult
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@dataclass
|
|
7
|
+
class FileScanResult:
|
|
8
|
+
"""Result payload produced when processing a single file.
|
|
9
|
+
|
|
10
|
+
Attributes:
|
|
11
|
+
file_path: Absolute or relative file path that was scanned.
|
|
12
|
+
file_name: File name extracted from ``file_path``.
|
|
13
|
+
encoding_before: Detected source encoding before any operation.
|
|
14
|
+
encoding_after: Target encoding after conversion, if conversion happened.
|
|
15
|
+
converted: ``True`` when conversion was executed successfully.
|
|
16
|
+
check_missing_char: List of missing-character findings, if checks were run.
|
|
17
|
+
error_skipped: ``True`` when the file was skipped because of an error.
|
|
18
|
+
error_name: Optional short error name (reserved for future use).
|
|
19
|
+
error_description: Human-readable error details.
|
|
20
|
+
skipped: ``True`` when no operation was needed for the file.
|
|
21
|
+
"""
|
|
22
|
+
file_path: str
|
|
23
|
+
file_name: str
|
|
24
|
+
encoding_before: str | None = None
|
|
25
|
+
encoding_after: str | None = None
|
|
26
|
+
converted: bool = False
|
|
27
|
+
check_missing_char: list[MissingCharResult] | None = None
|
|
28
|
+
error_skipped: bool = False
|
|
29
|
+
error_name: str | None = None
|
|
30
|
+
error_description: str | None = None
|
|
31
|
+
skipped: bool = False
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
@dataclass
|
|
5
|
+
class MissingCharResult:
|
|
6
|
+
"""Details about one replacement-character occurrence in a file.
|
|
7
|
+
|
|
8
|
+
Attributes:
|
|
9
|
+
is_commented: ``True`` if the line is considered part of a comment.
|
|
10
|
+
string: Original line content where the issue was found.
|
|
11
|
+
line: 1-based line number in the file.
|
|
12
|
+
file_name: Name of the file that contains the issue.
|
|
13
|
+
char_position: Character index of ``�`` inside the line, or ``-1`` when not visible.
|
|
14
|
+
char_found: ``True`` when the replacement character is visible in decoded text.
|
|
15
|
+
byte_sequence_file_pos: Byte index in the raw file where ``EF BF BD`` starts.
|
|
16
|
+
"""
|
|
17
|
+
is_commented: bool
|
|
18
|
+
string: str
|
|
19
|
+
line: int
|
|
20
|
+
file_name: str
|
|
21
|
+
char_position: int
|
|
22
|
+
char_found: bool
|
|
23
|
+
byte_sequence_file_pos: int
|
cutf/model/__init__.py
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
"""Data models used by CUFT."""
|
|
2
|
+
|
|
3
|
+
from cutf.model.AppSetting import AppSetting
|
|
4
|
+
from cutf.model.FileScanResult import FileScanResult
|
|
5
|
+
from cutf.model.MissingCharResult import MissingCharResult
|
|
6
|
+
|
|
7
|
+
__all__ = ["AppSetting", "FileScanResult", "MissingCharResult"]
|
|
8
|
+
|
cutf/util/__init__.py
ADDED
|
File without changes
|
cutf/util/code.py
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
def is_line_commented(file_path: str, line_number: int) -> bool:
|
|
2
|
+
"""Determine whether a specific line is inside a comment block.
|
|
3
|
+
|
|
4
|
+
Supports C-like single-line comments (``//``) and block comments
|
|
5
|
+
delimited by ``/*`` and ``*/``.
|
|
6
|
+
|
|
7
|
+
Args:
|
|
8
|
+
file_path: Path to the source file.
|
|
9
|
+
line_number: 1-based target line index.
|
|
10
|
+
|
|
11
|
+
Returns:
|
|
12
|
+
bool: ``True`` if the line is recognized as commented, otherwise ``False``.
|
|
13
|
+
"""
|
|
14
|
+
in_block_comment = False
|
|
15
|
+
with open(file_path, "r", encoding="utf-8", errors="replace") as file:
|
|
16
|
+
for current_line_number, line in enumerate(file, start=1):
|
|
17
|
+
# Se siamo nella linea di interesse
|
|
18
|
+
if current_line_number == line_number:
|
|
19
|
+
# Verifica se la linea e commentata
|
|
20
|
+
line = line.strip().lstrip("\ufeff")
|
|
21
|
+
if in_block_comment:
|
|
22
|
+
# La riga e dentro un blocco di commento
|
|
23
|
+
return True
|
|
24
|
+
# Commenti su singola riga (//)
|
|
25
|
+
if line.startswith("//"):
|
|
26
|
+
return True
|
|
27
|
+
# Controllo se la riga e dentro un commento di blocco
|
|
28
|
+
if "/*" in line:
|
|
29
|
+
in_block_comment = True
|
|
30
|
+
if "*/" in line:
|
|
31
|
+
in_block_comment = False
|
|
32
|
+
return False
|
|
33
|
+
# La riga non e commentata
|
|
34
|
+
return False
|
|
35
|
+
# Gestisci l'inizio e la fine di un blocco di commento
|
|
36
|
+
if in_block_comment:
|
|
37
|
+
if "*/" in line:
|
|
38
|
+
in_block_comment = False
|
|
39
|
+
continue
|
|
40
|
+
if "/*" in line:
|
|
41
|
+
in_block_comment = True
|
|
42
|
+
|
|
43
|
+
return False # Se non troviamo mai la linea, significa che non e commentata
|
cutf/util/iconv.py
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import subprocess
|
|
3
|
+
|
|
4
|
+
import rich
|
|
5
|
+
|
|
6
|
+
from cutf.util.log import format_log_path
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def convert_to_utf8_with_iconv(path: str, source_encoding: str, target_encoding: str):
|
|
10
|
+
"""Convert a file encoding with iconv and write UTF-8 BOM output in place.
|
|
11
|
+
|
|
12
|
+
The function invokes the ``iconv`` executable, writes converted content to a
|
|
13
|
+
temporary file, prepends UTF-8 BOM bytes, and atomically replaces the original file.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
path: File path to convert in place.
|
|
17
|
+
source_encoding: Input encoding used by iconv (``-f``).
|
|
18
|
+
target_encoding: Output encoding used by iconv (``-t``), usually ``utf-8``.
|
|
19
|
+
"""
|
|
20
|
+
file_name = os.path.basename(path)
|
|
21
|
+
rich.print(f"Converting {file_name} to {target_encoding} with iconv...")
|
|
22
|
+
|
|
23
|
+
temp_file_path = path + ".tmp"
|
|
24
|
+
temp_bom_file_path = path + ".bom"
|
|
25
|
+
|
|
26
|
+
try:
|
|
27
|
+
command = ["iconv", "-f", source_encoding, "-t", target_encoding, path]
|
|
28
|
+
|
|
29
|
+
with open(temp_file_path, "w", encoding=target_encoding) as temp_file:
|
|
30
|
+
subprocess.run(command, stdout=temp_file, stderr=subprocess.PIPE, check=True)
|
|
31
|
+
|
|
32
|
+
# Aggiungi il BOM al file convertito
|
|
33
|
+
with open(temp_bom_file_path, "wb") as bom_file:
|
|
34
|
+
bom_file.write(b"\xef\xbb\xbf") # Scrivi il BOM (UTF-8)
|
|
35
|
+
with open(temp_file_path, "rb") as temp_file:
|
|
36
|
+
bom_file.write(temp_file.read()) # Aggiungi il contenuto del file convertito
|
|
37
|
+
|
|
38
|
+
# Sostituisci il file originale con il file con BOM
|
|
39
|
+
os.replace(temp_bom_file_path, path)
|
|
40
|
+
rich.print(f"Conversion completed for {format_log_path(os.path.basename(path))}")
|
|
41
|
+
|
|
42
|
+
except subprocess.CalledProcessError as e:
|
|
43
|
+
rich.print(f"Errore nella conversione di {path}: {e}")
|
|
44
|
+
if os.path.exists(temp_file_path):
|
|
45
|
+
os.remove(temp_file_path)
|
|
46
|
+
finally:
|
|
47
|
+
# Cancella i file temporanei, se esistono
|
|
48
|
+
if os.path.exists(temp_file_path):
|
|
49
|
+
os.remove(temp_file_path)
|
|
50
|
+
if os.path.exists(temp_bom_file_path):
|
|
51
|
+
os.remove(temp_bom_file_path)
|
cutf/util/log.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
|
|
2
|
+
def format_log_path(path: str) -> str:
|
|
3
|
+
"""Format a path string using a rich magenta style token.
|
|
4
|
+
|
|
5
|
+
Args:
|
|
6
|
+
path: Path text to format.
|
|
7
|
+
|
|
8
|
+
Returns:
|
|
9
|
+
str: Rich-markup styled path.
|
|
10
|
+
"""
|
|
11
|
+
return f"[bold magenta]{path}[/bold magenta]"
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def format_log_warning(string: str) -> str:
|
|
15
|
+
"""Format a warning message using a rich yellow style token.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
string: Warning text to format.
|
|
19
|
+
|
|
20
|
+
Returns:
|
|
21
|
+
str: Rich-markup styled warning message.
|
|
22
|
+
"""
|
|
23
|
+
return f"[bold yellow]{string}[/bold yellow]"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def format_log_error(string: str) -> str:
|
|
27
|
+
"""Format an error message using a rich red style token.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
string: Error text to format.
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
str: Rich-markup styled error message.
|
|
34
|
+
"""
|
|
35
|
+
return f"[bold red]{string}[/bold red]"
|
cutf/util/path.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import shutil
|
|
3
|
+
import tempfile
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def copy_old_encoded_file(file_path: str) -> str:
|
|
7
|
+
"""Copy a source file to the system temp backup folder.
|
|
8
|
+
|
|
9
|
+
The destination folder is ``<tempdir>/SrcChE`` and is created on demand.
|
|
10
|
+
|
|
11
|
+
Args:
|
|
12
|
+
file_path: Path of the file that should be copied.
|
|
13
|
+
|
|
14
|
+
Returns:
|
|
15
|
+
str: Full path of the copied backup file.
|
|
16
|
+
|
|
17
|
+
Raises:
|
|
18
|
+
FileNotFoundError: If ``file_path`` does not exist.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
# Controlliamo se il file esiste
|
|
22
|
+
if not os.path.exists(file_path):
|
|
23
|
+
raise FileNotFoundError(f"File {file_path} does not exist.")
|
|
24
|
+
|
|
25
|
+
# Otteniamo la cartella temporanea di Windows (solitamente la variabile d'ambiente TEMP)
|
|
26
|
+
temp_dir = tempfile.gettempdir()
|
|
27
|
+
|
|
28
|
+
# Creiamo un percorso per la copia del file nella cartella temporanea
|
|
29
|
+
temp_file_path = os.path.join(temp_dir, "SrcChE")
|
|
30
|
+
|
|
31
|
+
if not os.path.exists(temp_file_path):
|
|
32
|
+
os.makedirs(temp_file_path)
|
|
33
|
+
|
|
34
|
+
# Copiamo il file nella cartella temporanea
|
|
35
|
+
dest = shutil.copy2(file_path, temp_file_path)
|
|
36
|
+
|
|
37
|
+
# Restituire il percorso del file temporaneo
|
|
38
|
+
return dest
|
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: cutf
|
|
3
|
+
Version: 0.0.8
|
|
4
|
+
Summary: CLI tool to scan and convert source files to UTF-8 with BOM.
|
|
5
|
+
Author: Gabliz
|
|
6
|
+
License: MIT License
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2025 GaaabLiz
|
|
9
|
+
|
|
10
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
11
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
12
|
+
in the Software without restriction, including without limitation the rights
|
|
13
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
14
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
15
|
+
furnished to do so, subject to the following conditions:
|
|
16
|
+
|
|
17
|
+
The above copyright notice and this permission notice shall be included in all
|
|
18
|
+
copies or substantial portions of the Software.
|
|
19
|
+
|
|
20
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
21
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
22
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
23
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
24
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
25
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
26
|
+
SOFTWARE.
|
|
27
|
+
License-File: LICENSE
|
|
28
|
+
Keywords: bom,cli,encoding,iconv,utf-8
|
|
29
|
+
Classifier: Intended Audience :: Developers
|
|
30
|
+
Classifier: Operating System :: OS Independent
|
|
31
|
+
Classifier: Programming Language :: Python :: 3
|
|
32
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
33
|
+
Requires-Python: >=3.10
|
|
34
|
+
Requires-Dist: chardet>=5.2.0
|
|
35
|
+
Requires-Dist: rich>=13.9.4
|
|
36
|
+
Description-Content-Type: text/markdown
|
|
37
|
+
|
|
38
|
+
# CUFT
|
|
39
|
+
|
|
40
|
+
CUFT is a CLI tool that scans source files, detects legacy encodings, and converts them to **UTF-8 with BOM**.
|
|
41
|
+
|
|
42
|
+
It can also report replacement characters (`�`) introduced by decoding issues.
|
|
43
|
+
|
|
44
|
+
## Features
|
|
45
|
+
|
|
46
|
+
- Scan one file or an entire directory tree.
|
|
47
|
+
- Filter files by extension.
|
|
48
|
+
- Detect source encoding with `chardet`.
|
|
49
|
+
- Convert files to UTF-8 with BOM through `iconv`.
|
|
50
|
+
- Optional backup copy of original files.
|
|
51
|
+
- Detailed report for converted, skipped, and problematic files.
|
|
52
|
+
|
|
53
|
+
## Requirements
|
|
54
|
+
|
|
55
|
+
- Python 3.10+
|
|
56
|
+
- [uv](https://docs.astral.sh/uv/) *(only for local development)*
|
|
57
|
+
- `iconv` available in your system `PATH`
|
|
58
|
+
|
|
59
|
+
### Install `iconv`
|
|
60
|
+
|
|
61
|
+
- macOS: usually preinstalled (`iconv --version`)
|
|
62
|
+
- Linux: install from system package manager (for example `libc-bin` / `glibc` tools)
|
|
63
|
+
- Windows: download and install [GNU iconv for Windows (GnuWin32)](https://gnuwin32.sourceforge.net/packages/libiconv.htm) and make sure `iconv.exe` is in `PATH`
|
|
64
|
+
|
|
65
|
+
## Installation
|
|
66
|
+
|
|
67
|
+
### Option A – Install from PyPI (recommended)
|
|
68
|
+
|
|
69
|
+
No need to clone the repository. Just run:
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
pip install cutf
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
or with `uv`:
|
|
76
|
+
|
|
77
|
+
```bash
|
|
78
|
+
uv tool install cutf
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
Then use it directly:
|
|
82
|
+
|
|
83
|
+
```bash
|
|
84
|
+
cutf --path ./src --all --extensions .py .txt
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
### Option B – Clone and run locally
|
|
88
|
+
|
|
89
|
+
#### 1) Clone the repository
|
|
90
|
+
|
|
91
|
+
```bash
|
|
92
|
+
git clone https://github.com/<your-org>/cutf.git
|
|
93
|
+
cd cutf
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
#### 2) Create environment and install dependencies with uv
|
|
97
|
+
|
|
98
|
+
```bash
|
|
99
|
+
uv sync --all-groups
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
#### 3) Run CUFT
|
|
103
|
+
|
|
104
|
+
```bash
|
|
105
|
+
uv run cutf --path ./src --all --extensions .py .txt
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
## Usage
|
|
109
|
+
|
|
110
|
+
```text
|
|
111
|
+
usage: cutf --path PATH [--checks] [--convert] [--copyOld]
|
|
112
|
+
[--printMissingCharString] [--printAllSkippedFile]
|
|
113
|
+
[--all] [--verbose] [--only-relevant]
|
|
114
|
+
[--extensions EXT [EXT ...]]
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
### Main options
|
|
118
|
+
|
|
119
|
+
- `--path`: file or directory to process.
|
|
120
|
+
- `--checks`: run missing-character checks.
|
|
121
|
+
- `--convert`: convert non-UTF files to UTF-8 with BOM.
|
|
122
|
+
- `--all`: enable both `--checks` and `--convert`.
|
|
123
|
+
- `--extensions`: list of extensions to scan (required), for example `.cpp .h .cs .ini`.
|
|
124
|
+
- `--copyOld`: copy original file before conversion into temp folder.
|
|
125
|
+
- `--printMissingCharString`: print the line content for each missing-character finding.
|
|
126
|
+
- `--printAllSkippedFile`: print every skipped file instead of only the count.
|
|
127
|
+
- `--only-relevant`: hide less relevant missing-character entries.
|
|
128
|
+
- `--verbose`: print extra execution logs.
|
|
129
|
+
|
|
130
|
+
## Typical Commands
|
|
131
|
+
|
|
132
|
+
Run checks only:
|
|
133
|
+
|
|
134
|
+
```bash
|
|
135
|
+
uv run cutf --path ./project --checks --extensions .py .js .ts
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
Run conversion + checks:
|
|
139
|
+
|
|
140
|
+
```bash
|
|
141
|
+
uv run cutf --path ./project --all --extensions .cpp .h --copyOld
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
Process one file:
|
|
145
|
+
|
|
146
|
+
```bash
|
|
147
|
+
uv run cutf --path ./src/main.cpp --all --extensions .cpp
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
## Development
|
|
151
|
+
|
|
152
|
+
Run tests:
|
|
153
|
+
|
|
154
|
+
```bash
|
|
155
|
+
uv run pytest
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
Run linter:
|
|
159
|
+
|
|
160
|
+
```bash
|
|
161
|
+
uv run ruff check .
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
Format code:
|
|
165
|
+
|
|
166
|
+
```bash
|
|
167
|
+
uv run ruff format .
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
## FAQ
|
|
171
|
+
|
|
172
|
+
### Why does CUFT require `--extensions`?
|
|
173
|
+
It prevents accidental processing of unrelated files and keeps scans predictable.
|
|
174
|
+
|
|
175
|
+
### Why UTF-8 **with BOM**?
|
|
176
|
+
Some tools and Windows-oriented workflows require BOM for UTF-8 detection.
|
|
177
|
+
|
|
178
|
+
### What happens if `iconv` is missing?
|
|
179
|
+
CUFT stops before processing and prints an error. Install `iconv` and retry.
|
|
180
|
+
|
|
181
|
+
### Where are original files copied when `--copyOld` is enabled?
|
|
182
|
+
They are copied to `<system-temp>/SrcChE`.
|
|
183
|
+
|
|
184
|
+
### Does CUFT modify UTF-8 files?
|
|
185
|
+
Only when conversion is requested and the file is detected as non-UTF. Otherwise files are skipped.
|
|
186
|
+
|
|
187
|
+
## License
|
|
188
|
+
|
|
189
|
+
This project is distributed under the license in `LICENSE`.
|
|
190
|
+
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
cutf/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
cutf/app.py,sha256=Yze6qmTMh7ORMHUPPRjgPTeIUV3hQnXQrdECFMy5yfk,6595
|
|
3
|
+
cutf/controller/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
+
cutf/controller/fileChecker.py,sha256=3OzoyAVA0P4J7y97EgAMgFxUJZtRTLYCKAUJuviXGO8,3322
|
|
5
|
+
cutf/controller/fileController.py,sha256=84gB-wp8azGfu-kOf7fSDtEQGcUDgpZQA9mZVamHg88,4666
|
|
6
|
+
cutf/controller/resultHandler.py,sha256=UfxQ9iz3SU1C9VGuL9NY6jJESzJCBVOwHP0eRv6wz-M,6770
|
|
7
|
+
cutf/model/AppSetting.py,sha256=Iu-7Sy9mYsoy6-G7n6JtagOw2iP11RM5fBRvWTxlFmo,1193
|
|
8
|
+
cutf/model/FileScanResult.py,sha256=So3R7sOGro03YV9Yw1ABYspX9Fj-uEXTWXLvRodt8Gs,1267
|
|
9
|
+
cutf/model/MissingCharResult.py,sha256=BCdzZRDfPHNaMhIRTrJ29Cv6PyP9hJgUGyCmiUUCJXI,831
|
|
10
|
+
cutf/model/__init__.py,sha256=1_JoZQ01gT5-Xs2l2m_bnxjWdtaRYT8MP8P2cpIka4M,256
|
|
11
|
+
cutf/util/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
12
|
+
cutf/util/code.py,sha256=HtjY6XEn67Rn1mxBXtPY1_zSxaZTAO3xvJdft4NpwYQ,1767
|
|
13
|
+
cutf/util/iconv.py,sha256=jdnum1Nqu-IfQE8NUtJ0V-nViuClbSm01C8lesCFPQY,2030
|
|
14
|
+
cutf/util/log.py,sha256=TNgP8oH1C5WVR8NvP1g9agUSsUc6-r9ZuXwUrCnKej4,807
|
|
15
|
+
cutf/util/path.py,sha256=mnSvhj_oREcqUXZ-5lKRJh1Mw0KH8D7ybeEyMJRwTIk,1095
|
|
16
|
+
cutf-0.0.8.dist-info/METADATA,sha256=VxYDDBWgsyGjfCa3Q6KkwotX-FJIZCph-wdW7UY1NyQ,5303
|
|
17
|
+
cutf-0.0.8.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
18
|
+
cutf-0.0.8.dist-info/entry_points.txt,sha256=S-ygTSgHduL0RwSA24G2aCcvODm0EWIuHkF2hAXH_TA,39
|
|
19
|
+
cutf-0.0.8.dist-info/licenses/LICENSE,sha256=vo-PhwUi9tU24GL_1jckQKXonOeMIXigqe69KiDGuxM,1065
|
|
20
|
+
cutf-0.0.8.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 GaaabLiz
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|