cutf 0.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cutf/__init__.py ADDED
File without changes
cutf/app.py ADDED
@@ -0,0 +1,201 @@
1
+ import argparse
2
+ import os
3
+ from shutil import which
4
+
5
+ import rich
6
+
7
+ from cutf.controller.fileController import handle_file
8
+ from cutf.controller.resultHandler import print_results
9
+ from cutf.model.AppSetting import AppSetting
10
+ from cutf.util.log import format_log_error, format_log_path
11
+
12
+
13
+ def check_path_file(path: str) -> None:
14
+ """Validate that the provided path exists and is a file.
15
+
16
+ Args:
17
+ path: Candidate file path.
18
+
19
+ Raises:
20
+ FileNotFoundError: If the path does not exist as a file.
21
+ """
22
+ if not os.path.isfile(path):
23
+ raise FileNotFoundError(f"File not found: {path}")
24
+
25
+
26
+ def check_path_dir(path: str) -> None:
27
+ """Validate that the provided path exists and is a directory.
28
+
29
+ Args:
30
+ path: Candidate directory path.
31
+
32
+ Raises:
33
+ NotADirectoryError: If the path does not exist as a directory.
34
+ """
35
+ if not os.path.isdir(path):
36
+ raise NotADirectoryError(f"Directory not found: {path}")
37
+
38
+
39
+ def is_command_available(command: str) -> bool:
40
+ """Check whether an executable is available on PATH.
41
+
42
+ Args:
43
+ command: Executable name.
44
+
45
+ Returns:
46
+ bool: ``True`` if the command can be resolved via ``PATH``.
47
+ """
48
+ return which(command) is not None
49
+
50
+
51
+ def build_parser() -> argparse.ArgumentParser:
52
+ """Build and return the CLI argument parser.
53
+
54
+ Returns:
55
+ argparse.ArgumentParser: Configured parser for CUFT command-line options.
56
+ """
57
+ # Get CLI params
58
+ parser = argparse.ArgumentParser(
59
+ description="Convert source files from legacy encodings to UTF-8 with BOM."
60
+ )
61
+ parser.add_argument("--path", type=str, required=True, help="Path of the file/directory to scan/convert.")
62
+ parser.add_argument("--checks", action="store_true", help="Enable checks for the file")
63
+ parser.add_argument("--convert", action="store_true", help="Enable conversion from current encoding to UTF-8")
64
+ parser.add_argument("--copyOld", action="store_true", help="Copy old encoded file before converting")
65
+ parser.add_argument(
66
+ "--printMissingCharString",
67
+ action="store_true",
68
+ help="Print the string where the missing char has been found",
69
+ )
70
+ parser.add_argument(
71
+ "--printAllSkippedFile",
72
+ action="store_true",
73
+ help="Print all skipped files where no action was required",
74
+ )
75
+ parser.add_argument("--all", action="store_true", help="Enable both conversion and checks")
76
+ parser.add_argument("--verbose", action="store_true", help="Enable extended logging")
77
+ parser.add_argument(
78
+ "--only-relevant",
79
+ action="store_true",
80
+ help="Print only relevant results (hides less relevant missing-char entries)",
81
+ )
82
+ parser.add_argument(
83
+ "--extensions",
84
+ # Nome dell'argomento
85
+ type=str,
86
+ # Tipo stringa
87
+ nargs="+",
88
+ # Permette di passare piu estensioni separando con uno spazio
89
+ help="List of extensions to scan, for example: .cpp .h .cs .ini",
90
+ )
91
+ return parser
92
+
93
+
94
+ def main(argv: list[str] | None = None, confirm_fn=input) -> int:
95
+ """Run the command-line application.
96
+
97
+ Args:
98
+ argv: Optional CLI argument list. When ``None``, arguments are read from ``sys.argv``.
99
+ confirm_fn: Function used to prompt the user before processing files.
100
+
101
+ Returns:
102
+ int: Process exit code (``0`` on success).
103
+
104
+ Raises:
105
+ SystemExit: If user input is invalid or required system dependencies are missing.
106
+ """
107
+ parser = build_parser()
108
+ args = parser.parse_args(argv)
109
+
110
+ # Check CLI params
111
+ if not (args.checks or args.convert or args.all):
112
+ rich.print(format_log_error("At least one of --checks or --convert must be set."))
113
+ raise SystemExit(1)
114
+ if not args.extensions:
115
+ rich.print(format_log_error("At least one file extension must be provided with --extensions."))
116
+ raise SystemExit(1)
117
+
118
+ # Get and Print CLI params
119
+ path = args.path
120
+ enable_checks = bool(args.checks or args.all)
121
+ enable_convert = bool(args.convert or args.all)
122
+
123
+ rich.print(f"Path to scan: {format_log_path(path)}")
124
+ rich.print(f"Checks enabled: {enable_checks}")
125
+ rich.print(f"Conversion enabled: {enable_convert}")
126
+ rich.print(f"Extensions to scan: {args.extensions}")
127
+ rich.print(f"Copy old encoded: {args.copyOld}")
128
+ rich.print("\n")
129
+
130
+ # Check path is valid and if it's a dir/file
131
+ is_file = os.path.isfile(path)
132
+ if is_file:
133
+ check_path_file(path)
134
+ else:
135
+ check_path_dir(path)
136
+
137
+ # Check iconv in path
138
+ if not is_command_available("iconv"):
139
+ rich.print(format_log_error("Iconv executable not found on your system PATH."))
140
+ raise SystemExit(1)
141
+
142
+ # Ask user confirmation
143
+ if is_file:
144
+ rich.print(
145
+ f"File \"{format_log_path(path)}\" will be checked and converted to UTF-8 "
146
+ "(with BOM). Proceed? (Enter to continue or CTRL-C to exit)"
147
+ )
148
+ else:
149
+ rich.print(
150
+ f"All files inside \"{format_log_path(path)}\" will be checked and converted "
151
+ "to UTF-8 (with BOM). Proceed? (Enter to continue or CTRL-C to exit)"
152
+ )
153
+ confirm_fn()
154
+
155
+ # Create setting object
156
+ setting = AppSetting(
157
+ input_path=path,
158
+ is_file=is_file,
159
+ extensions=args.extensions,
160
+ checks=enable_checks,
161
+ convert=enable_convert,
162
+ copy_old_encoded=args.copyOld,
163
+ print_missing_char_str=args.printMissingCharString,
164
+ verbose=args.verbose,
165
+ print_skipped_file_no_action=args.printAllSkippedFile,
166
+ print_result_only_relevant=args.only_relevant,
167
+ )
168
+
169
+ # Handle file/dir and get results
170
+ count_from_files = 0
171
+ results = []
172
+ rich.print(f"Scanning \"{format_log_path(path)}\"...")
173
+ if is_file:
174
+ count_from_files += 1
175
+ results.append(handle_file(setting.input_path, setting))
176
+ else:
177
+ for root, _, files in os.walk(path):
178
+ for file_name in files:
179
+ count_from_files += 1
180
+ full_file_path = os.path.join(root, file_name)
181
+ results.append(handle_file(full_file_path, setting))
182
+
183
+ rich.print("-------------------------------")
184
+
185
+ # Handle results
186
+ print_results(results, setting)
187
+
188
+ # Print count result
189
+ if setting.is_file:
190
+ rich.print(f"\nThis software scanned file {format_log_path(path)}.\n")
191
+ else:
192
+ rich.print(
193
+ f"\nThis software scanned {len(results)}/{count_from_files} files inside "
194
+ f"{format_log_path(path)}.\n"
195
+ )
196
+
197
+ return 0
198
+
199
+
200
+ if __name__ == "__main__":
201
+ main()
File without changes
@@ -0,0 +1,86 @@
1
+ import os
2
+
3
+ import rich
4
+
5
+ from cutf.model.MissingCharResult import MissingCharResult
6
+ from cutf.util.code import is_line_commented
7
+
8
+
9
+ def check_illegal_chars(file_path: str, source_encoding: str) -> list[MissingCharResult]:
10
+ """Find replacement-character byte sequences in a text file.
11
+
12
+ This function searches for the UTF-8 byte sequence ``EF BF BD`` (U+FFFD,
13
+ replacement character) and reports each match with line metadata.
14
+
15
+ Args:
16
+ file_path: Absolute or relative path of the file to inspect.
17
+ source_encoding: Encoding used to decode the file content for line extraction.
18
+
19
+ Returns:
20
+ list[MissingCharResult]: One item for each detected replacement character.
21
+
22
+ Raises:
23
+ RuntimeError: If the file cannot be decoded using ``source_encoding``.
24
+ """
25
+ results = []
26
+ with open(file_path, "rb") as f:
27
+ raw_data = f.read()
28
+
29
+ # Verifica e gestisci il BOM per UTF-8
30
+ bom = b"\xef\xbb\xbf"
31
+ if raw_data.startswith(bom):
32
+ raw_data = raw_data[len(bom) :]
33
+
34
+ # Decodifica i byte usando l'encoding specificato (per esempio UTF-8)
35
+ try:
36
+ text_data = raw_data.decode(source_encoding, errors="replace")
37
+ except UnicodeDecodeError:
38
+ rich.print(
39
+ f"\t[bold red]Error decoding file {os.path.basename(file_path)} "
40
+ f"with encoding {source_encoding}.[/bold red]"
41
+ )
42
+ raise RuntimeError(f"Error decoding file {os.path.basename(file_path)} during check_illegal_chars().")
43
+
44
+ # Itera sui caratteri per trovare il carattere illegale
45
+ for idx in range(len(raw_data) - 2): # -2 per evitare di uscire fuori dal range durante il confronto
46
+ if raw_data[idx] == 0xEF and raw_data[idx + 1] == 0xBF and raw_data[idx + 2] == 0xBD:
47
+ line_start = text_data.rfind("\n", 0, idx)
48
+ line_end = text_data.find("\n", idx)
49
+
50
+ # Ottieni la riga completa dove e presente l'errore
51
+ if line_end == -1:
52
+ line_end = len(text_data)
53
+
54
+ line = text_data[line_start + 1:line_end]
55
+ # Trova la posizione relativa all'interno della riga
56
+ char_pos_in_line = line.find("�")
57
+
58
+ line_number = text_data.count("\n", 0, idx) + 1
59
+
60
+ # Stampa il risultato formattato
61
+ rich.print(
62
+ f"\t[bold yellow]Found illegal character at position {idx}, "
63
+ f"line {line_number} in file {os.path.basename(file_path)}[/bold yellow]"
64
+ )
65
+ # Creiamo una versione evidenziata della riga
66
+ # highlighted_line = (
67
+ # line[:char_pos_in_line]
68
+ # + f"[bold red]{line[char_pos_in_line]}[/bold red]"
69
+ # + line[char_pos_in_line + 1:]
70
+ # )
71
+ # rich.print("\t" + highlighted_line)
72
+
73
+ # Aggiungi il risultato all'array, utilizzando MissingCharResult
74
+ result = MissingCharResult(
75
+ is_commented=is_line_commented(file_path, line_number),
76
+ string=line.lstrip(" \t"),
77
+ line=line_number,
78
+ file_name=os.path.basename(file_path),
79
+ char_position=char_pos_in_line,
80
+ char_found=char_pos_in_line != -1,
81
+ byte_sequence_file_pos=idx,
82
+ )
83
+
84
+ results.append(result)
85
+
86
+ return results
@@ -0,0 +1,119 @@
1
+ import os
2
+
3
+ import chardet
4
+ import rich
5
+
6
+ from cutf.controller.fileChecker import check_illegal_chars
7
+ from cutf.model.AppSetting import AppSetting
8
+ from cutf.model.FileScanResult import FileScanResult
9
+ from cutf.util.iconv import convert_to_utf8_with_iconv
10
+ from cutf.util.log import format_log_path
11
+ from cutf.util.path import copy_old_encoded_file
12
+
13
+
14
+ def handle_file(file_path: str, setting: AppSetting) -> FileScanResult:
15
+ """Scan and optionally convert a single file according to current settings.
16
+
17
+ Args:
18
+ file_path: Full path of the file to process.
19
+ setting: Runtime application options that control checks/conversion behavior.
20
+
21
+ Returns:
22
+ FileScanResult: Structured outcome including conversion, checks, and errors.
23
+ """
24
+ # Starting
25
+ file_name = os.path.basename(file_path)
26
+ encoding = None
27
+
28
+ try:
29
+ # Checking extension
30
+ _, extension = os.path.splitext(file_path)
31
+ normalized_extensions = {value.lower() for value in setting.extensions}
32
+ has_supported_extension = extension.lower() in normalized_extensions
33
+ if not has_supported_extension:
34
+ if setting.verbose:
35
+ rich.print(f"File {file_name} has no supported extension ({extension}). Skipping...")
36
+ return FileScanResult(
37
+ file_path=file_path,
38
+ file_name=file_name,
39
+ skipped=True,
40
+ )
41
+ # Load encoding
42
+ if setting.verbose:
43
+ rich.print(f"## Checking file \"{format_log_path(file_path)}\"...")
44
+
45
+ if setting.verbose:
46
+ rich.print(f"Opening file \"{file_path}\"...")
47
+ with open(file_path, "rb") as f:
48
+ raw_data = f.read()
49
+ result = chardet.detect(raw_data)
50
+ encoding = result["encoding"]
51
+
52
+ # Check encoding
53
+ if encoding is None:
54
+ raise RuntimeError(f"Cannot detect encoding of {file_name}")
55
+ is_already_utf8 = (
56
+ encoding.lower() in {"utf-8", "utf-8-sig", "utf-16", "utf-16le", "utf-16be"}
57
+ )
58
+
59
+ # Check if need to be converted
60
+ needs_convert = (not is_already_utf8) and setting.convert
61
+
62
+ # Copy old encoded (if enabled and needed)
63
+ if needs_convert and setting.copy_old_encoded:
64
+ old_copy_path = copy_old_encoded_file(file_path)
65
+ rich.print(f"Copied old encoded file to {format_log_path(old_copy_path)}")
66
+
67
+ # Exec operations
68
+ if needs_convert:
69
+ rich.print(f"File \"{file_name}\" has encoding {encoding}. Proceeding to convert and check...")
70
+ output_encoding = "utf-8"
71
+ convert_to_utf8_with_iconv(file_path, encoding, output_encoding)
72
+ missing_chars = check_illegal_chars(file_path, output_encoding)
73
+ if setting.verbose:
74
+ rich.print(f"Finished checking and converting file \"{file_name}\"!")
75
+ return FileScanResult(
76
+ file_path=file_path,
77
+ file_name=file_name,
78
+ encoding_before=encoding,
79
+ encoding_after=f"{output_encoding}(BOM)",
80
+ check_missing_char=missing_chars,
81
+ converted=True,
82
+ )
83
+ elif setting.checks:
84
+ rich.print(f"File \"{file_name}\" has encoding {encoding}. Proceeding to check...")
85
+ missing_chars = check_illegal_chars(file_path, encoding)
86
+ if setting.verbose:
87
+ rich.print(f"Finished checking file \"{file_name}\"!")
88
+ return FileScanResult(
89
+ file_path=file_path,
90
+ file_name=file_name,
91
+ encoding_before=encoding,
92
+ check_missing_char=missing_chars,
93
+ )
94
+ else:
95
+ rich.print(f"No operation to do on {file_name}")
96
+ return FileScanResult(
97
+ file_path=file_path,
98
+ file_name=file_name,
99
+ skipped=True,
100
+ )
101
+
102
+ except RuntimeError as e:
103
+ rich.print(f"[bold red]Conversion/checking of {file_name} interrupted because of an error: {e}[/bold red]")
104
+ return FileScanResult(
105
+ file_path=file_path,
106
+ file_name=file_name,
107
+ encoding_before=encoding,
108
+ error_skipped=True,
109
+ error_description=str(e),
110
+ )
111
+ # except FileNotFoundError as e:
112
+ # rich.print(f"[bold red]Conversion/checking of {file_name} interrupted because of an error: {e}[/bold red]")
113
+ # return FileScanResult(
114
+ # file_path=file_path,
115
+ # file_name=file_name,
116
+ # encoding_before=encoding,
117
+ # error_skipped=True,
118
+ # error_description=str(e),
119
+ # )
@@ -0,0 +1,174 @@
1
+ from collections import Counter
2
+
3
+ import rich
4
+
5
+ from cutf.model import AppSetting
6
+ from cutf.model.FileScanResult import FileScanResult
7
+ from cutf.util.log import format_log_error
8
+
9
+
10
+ def __print_encoding_before(results: list[FileScanResult]):
11
+ """Print a count of detected original encodings.
12
+
13
+ Args:
14
+ results: Collection of per-file scan outcomes.
15
+ """
16
+ encoding_counter = Counter()
17
+
18
+ # Scorri ogni FileScanResult
19
+ for result in results:
20
+ # Aggiungi l'encoding_before se non e None
21
+ if result.encoding_before:
22
+ encoding_counter[result.encoding_before] += 1
23
+
24
+ # Stampa ogni encoding e il numero di occorrenze
25
+ rich.print(f"@ List of encodings found during scanning ({len(encoding_counter.items())}):")
26
+ for encoding, count in encoding_counter.items():
27
+ rich.print(f"{encoding}: {count}")
28
+
29
+
30
+ def __print_converted_files(results: list[FileScanResult]):
31
+ """Print all files that were converted during processing.
32
+
33
+ Args:
34
+ results: Collection of per-file scan outcomes.
35
+ """
36
+ count = 0
37
+ rich.print("@ List of converted files:")
38
+ for result in results:
39
+ if result.converted:
40
+ count += 1
41
+ rich.print(f"Converted file {result.file_name} from encoding {result.encoding_before} to encoding {result.encoding_after}.")
42
+ if count == 0:
43
+ rich.print("0 Files converted.")
44
+
45
+
46
+ def __print_skipped_files(results: list[FileScanResult], print_all: bool):
47
+ """Print files skipped because no action was required.
48
+
49
+ Args:
50
+ results: Collection of per-file scan outcomes.
51
+ print_all: If ``True``, prints one row per skipped file.
52
+ """
53
+ count = 0
54
+ rich.print("@ List of skipped files:")
55
+ if print_all:
56
+ for result in results:
57
+ if result.skipped:
58
+ count += 1
59
+ rich.print(f"File {result.file_name} skipped because no action is required.")
60
+ if count == 0:
61
+ rich.print("0 skipped file founds.")
62
+ else:
63
+ for result in results:
64
+ if result.skipped:
65
+ count += 1
66
+ if count == 0:
67
+ rich.print("0 skipped file founds.")
68
+ else:
69
+ rich.print(f"{count} file skipped because no action was required.")
70
+
71
+
72
+ def __print_skipped_error_files(results: list[FileScanResult]):
73
+ """Print files skipped because of processing errors.
74
+
75
+ Args:
76
+ results: Collection of per-file scan outcomes.
77
+ """
78
+ count = 0
79
+ rich.print("@ List of skipped files (from errors):")
80
+ for result in results:
81
+ if result.error_skipped:
82
+ count += 1
83
+ rich.print(format_log_error(f"File {result.file_path} skipped because of an error: {result.error_description}"))
84
+ if count == 0:
85
+ rich.print("0 errors founds.")
86
+
87
+
88
+ def __print_missing_chars_on_comments(results: list[FileScanResult], print_mis_char_string: bool):
89
+ """Print missing character occurrences detected in comments.
90
+
91
+ Args:
92
+ results: Collection of per-file scan outcomes.
93
+ print_mis_char_string: If ``True``, include the original line string.
94
+ """
95
+ rich.print("@ List of missing chars found on comments:")
96
+ for result in results:
97
+ if result.check_missing_char is not None:
98
+ for file in result.check_missing_char:
99
+ if file.is_commented:
100
+ rich.print(f"File = {file.file_name} | Missing char Visibile = {file.char_found} | Line = {file.line} | Line Pos = {file.char_position} | File pos = {file.byte_sequence_file_pos}")
101
+ if print_mis_char_string:
102
+ rich.print(f"String = {file.string}")
103
+ rich.print("-------------------")
104
+
105
+
106
+ def __print_missing_chars_on_code(results: list[FileScanResult], print_mis_char_string: bool, only_relevant: bool):
107
+ """Print missing character occurrences detected in code lines.
108
+
109
+ Args:
110
+ results: Collection of per-file scan outcomes.
111
+ print_mis_char_string: If ``True``, include the original line string.
112
+ only_relevant: If ``True``, hide missing-char entries where the symbol is not visible.
113
+ """
114
+ rich.print("@ List of missing chars found on code:")
115
+ count = 0
116
+ for result in results:
117
+ if result.check_missing_char is not None:
118
+ for file in result.check_missing_char:
119
+ if not file.is_commented:
120
+ count += 1
121
+ if file.char_found:
122
+ rich.print(f"File = {file.file_name} | Missing char Visibile = {file.char_found} | Line = {file.line} | Line Pos = {file.char_position} | File pos = {file.byte_sequence_file_pos}")
123
+ else:
124
+ if not only_relevant:
125
+ rich.print(f"File = {file.file_name} | Missing char Visibile = {file.char_found} | Line = {file.line} | Line Pos = {file.char_position} | File pos = {file.byte_sequence_file_pos}")
126
+ if print_mis_char_string:
127
+ rich.print(f"String = {file.string}")
128
+ rich.print("-------------------")
129
+ if count == 0:
130
+ rich.print("0 missing chars on code founds.")
131
+
132
+
133
+ def print_results(results: list[FileScanResult], setting: AppSetting):
134
+ """Print the complete scan summary to the console.
135
+
136
+ Args:
137
+ results: Collection of per-file scan outcomes.
138
+ setting: Runtime settings controlling verbosity and filtering.
139
+ """
140
+
141
+ rich.print("\n\n")
142
+
143
+ rich.print("########################################################") if setting.verbose else None
144
+ rich.print("### START OF RESULTS ###################################")
145
+ rich.print("########################################################") if setting.verbose else None
146
+
147
+ # Print list of encoding before all
148
+ __print_encoding_before(results)
149
+ rich.print("\n")
150
+
151
+ # Print file converted
152
+ __print_converted_files(results)
153
+ rich.print("\n")
154
+
155
+ # File skipped
156
+ __print_skipped_files(results, setting.print_skipped_file_no_action)
157
+ rich.print("\n")
158
+
159
+ # File skipped (Error)
160
+ __print_skipped_error_files(results)
161
+ rich.print("\n")
162
+
163
+ # Missing chars (comments)
164
+ if not setting.print_result_only_relevant:
165
+ __print_missing_chars_on_comments(results, setting.print_missing_char_str)
166
+ rich.print("\n")
167
+
168
+ # Missing chars (code)
169
+ __print_missing_chars_on_code(results, setting.print_missing_char_str, setting.print_result_only_relevant)
170
+
171
+ rich.print("\n\n")
172
+ rich.print("########################################################") if setting.verbose else None
173
+ rich.print("### END OF RESULTS #####################################")
174
+ rich.print("########################################################") if setting.verbose else None
@@ -0,0 +1,29 @@
1
+ from dataclasses import dataclass
2
+
3
+
4
+ @dataclass
5
+ class AppSetting:
6
+ """Runtime configuration loaded from CLI flags.
7
+
8
+ Attributes:
9
+ input_path: Path of the input file or directory selected by the user.
10
+ is_file: ``True`` when ``input_path`` points to a file, ``False`` for directories.
11
+ extensions: Allowed file extensions to include in the scan.
12
+ checks: Enables illegal-character checks.
13
+ convert: Enables conversion to UTF-8 with BOM when needed.
14
+ copy_old_encoded: Saves a copy of legacy-encoded files before conversion.
15
+ print_missing_char_str: Prints the line text where missing characters are detected.
16
+ print_skipped_file_no_action: Prints all skipped files instead of just a count.
17
+ print_result_only_relevant: Hides less relevant missing-char entries.
18
+ verbose: Enables detailed progress logs.
19
+ """
20
+ input_path: str
21
+ is_file: bool
22
+ extensions: list[str]
23
+ checks: bool = False
24
+ convert: bool = False
25
+ copy_old_encoded: bool = False
26
+ print_missing_char_str: bool = False
27
+ print_skipped_file_no_action: bool = False
28
+ print_result_only_relevant: bool = False
29
+ verbose: bool = False
@@ -0,0 +1,31 @@
1
+ from dataclasses import dataclass
2
+
3
+ from cutf.model.MissingCharResult import MissingCharResult
4
+
5
+
6
+ @dataclass
7
+ class FileScanResult:
8
+ """Result payload produced when processing a single file.
9
+
10
+ Attributes:
11
+ file_path: Absolute or relative file path that was scanned.
12
+ file_name: File name extracted from ``file_path``.
13
+ encoding_before: Detected source encoding before any operation.
14
+ encoding_after: Target encoding after conversion, if conversion happened.
15
+ converted: ``True`` when conversion was executed successfully.
16
+ check_missing_char: List of missing-character findings, if checks were run.
17
+ error_skipped: ``True`` when the file was skipped because of an error.
18
+ error_name: Optional short error name (reserved for future use).
19
+ error_description: Human-readable error details.
20
+ skipped: ``True`` when no operation was needed for the file.
21
+ """
22
+ file_path: str
23
+ file_name: str
24
+ encoding_before: str | None = None
25
+ encoding_after: str | None = None
26
+ converted: bool = False
27
+ check_missing_char: list[MissingCharResult] | None = None
28
+ error_skipped: bool = False
29
+ error_name: str | None = None
30
+ error_description: str | None = None
31
+ skipped: bool = False
@@ -0,0 +1,23 @@
1
+ from dataclasses import dataclass
2
+
3
+
4
+ @dataclass
5
+ class MissingCharResult:
6
+ """Details about one replacement-character occurrence in a file.
7
+
8
+ Attributes:
9
+ is_commented: ``True`` if the line is considered part of a comment.
10
+ string: Original line content where the issue was found.
11
+ line: 1-based line number in the file.
12
+ file_name: Name of the file that contains the issue.
13
+ char_position: Character index of ``�`` inside the line, or ``-1`` when not visible.
14
+ char_found: ``True`` when the replacement character is visible in decoded text.
15
+ byte_sequence_file_pos: Byte index in the raw file where ``EF BF BD`` starts.
16
+ """
17
+ is_commented: bool
18
+ string: str
19
+ line: int
20
+ file_name: str
21
+ char_position: int
22
+ char_found: bool
23
+ byte_sequence_file_pos: int
cutf/model/__init__.py ADDED
@@ -0,0 +1,8 @@
1
+ """Data models used by CUFT."""
2
+
3
+ from cutf.model.AppSetting import AppSetting
4
+ from cutf.model.FileScanResult import FileScanResult
5
+ from cutf.model.MissingCharResult import MissingCharResult
6
+
7
+ __all__ = ["AppSetting", "FileScanResult", "MissingCharResult"]
8
+
cutf/util/__init__.py ADDED
File without changes
cutf/util/code.py ADDED
@@ -0,0 +1,43 @@
1
+ def is_line_commented(file_path: str, line_number: int) -> bool:
2
+ """Determine whether a specific line is inside a comment block.
3
+
4
+ Supports C-like single-line comments (``//``) and block comments
5
+ delimited by ``/*`` and ``*/``.
6
+
7
+ Args:
8
+ file_path: Path to the source file.
9
+ line_number: 1-based target line index.
10
+
11
+ Returns:
12
+ bool: ``True`` if the line is recognized as commented, otherwise ``False``.
13
+ """
14
+ in_block_comment = False
15
+ with open(file_path, "r", encoding="utf-8", errors="replace") as file:
16
+ for current_line_number, line in enumerate(file, start=1):
17
+ # Se siamo nella linea di interesse
18
+ if current_line_number == line_number:
19
+ # Verifica se la linea e commentata
20
+ line = line.strip().lstrip("\ufeff")
21
+ if in_block_comment:
22
+ # La riga e dentro un blocco di commento
23
+ return True
24
+ # Commenti su singola riga (//)
25
+ if line.startswith("//"):
26
+ return True
27
+ # Controllo se la riga e dentro un commento di blocco
28
+ if "/*" in line:
29
+ in_block_comment = True
30
+ if "*/" in line:
31
+ in_block_comment = False
32
+ return False
33
+ # La riga non e commentata
34
+ return False
35
+ # Gestisci l'inizio e la fine di un blocco di commento
36
+ if in_block_comment:
37
+ if "*/" in line:
38
+ in_block_comment = False
39
+ continue
40
+ if "/*" in line:
41
+ in_block_comment = True
42
+
43
+ return False # Se non troviamo mai la linea, significa che non e commentata
cutf/util/iconv.py ADDED
@@ -0,0 +1,51 @@
1
+ import os
2
+ import subprocess
3
+
4
+ import rich
5
+
6
+ from cutf.util.log import format_log_path
7
+
8
+
9
+ def convert_to_utf8_with_iconv(path: str, source_encoding: str, target_encoding: str):
10
+ """Convert a file encoding with iconv and write UTF-8 BOM output in place.
11
+
12
+ The function invokes the ``iconv`` executable, writes converted content to a
13
+ temporary file, prepends UTF-8 BOM bytes, and atomically replaces the original file.
14
+
15
+ Args:
16
+ path: File path to convert in place.
17
+ source_encoding: Input encoding used by iconv (``-f``).
18
+ target_encoding: Output encoding used by iconv (``-t``), usually ``utf-8``.
19
+ """
20
+ file_name = os.path.basename(path)
21
+ rich.print(f"Converting {file_name} to {target_encoding} with iconv...")
22
+
23
+ temp_file_path = path + ".tmp"
24
+ temp_bom_file_path = path + ".bom"
25
+
26
+ try:
27
+ command = ["iconv", "-f", source_encoding, "-t", target_encoding, path]
28
+
29
+ with open(temp_file_path, "w", encoding=target_encoding) as temp_file:
30
+ subprocess.run(command, stdout=temp_file, stderr=subprocess.PIPE, check=True)
31
+
32
+ # Aggiungi il BOM al file convertito
33
+ with open(temp_bom_file_path, "wb") as bom_file:
34
+ bom_file.write(b"\xef\xbb\xbf") # Scrivi il BOM (UTF-8)
35
+ with open(temp_file_path, "rb") as temp_file:
36
+ bom_file.write(temp_file.read()) # Aggiungi il contenuto del file convertito
37
+
38
+ # Sostituisci il file originale con il file con BOM
39
+ os.replace(temp_bom_file_path, path)
40
+ rich.print(f"Conversion completed for {format_log_path(os.path.basename(path))}")
41
+
42
+ except subprocess.CalledProcessError as e:
43
+ rich.print(f"Errore nella conversione di {path}: {e}")
44
+ if os.path.exists(temp_file_path):
45
+ os.remove(temp_file_path)
46
+ finally:
47
+ # Cancella i file temporanei, se esistono
48
+ if os.path.exists(temp_file_path):
49
+ os.remove(temp_file_path)
50
+ if os.path.exists(temp_bom_file_path):
51
+ os.remove(temp_bom_file_path)
cutf/util/log.py ADDED
@@ -0,0 +1,35 @@
1
+
2
+ def format_log_path(path: str) -> str:
3
+ """Format a path string using a rich magenta style token.
4
+
5
+ Args:
6
+ path: Path text to format.
7
+
8
+ Returns:
9
+ str: Rich-markup styled path.
10
+ """
11
+ return f"[bold magenta]{path}[/bold magenta]"
12
+
13
+
14
+ def format_log_warning(string: str) -> str:
15
+ """Format a warning message using a rich yellow style token.
16
+
17
+ Args:
18
+ string: Warning text to format.
19
+
20
+ Returns:
21
+ str: Rich-markup styled warning message.
22
+ """
23
+ return f"[bold yellow]{string}[/bold yellow]"
24
+
25
+
26
+ def format_log_error(string: str) -> str:
27
+ """Format an error message using a rich red style token.
28
+
29
+ Args:
30
+ string: Error text to format.
31
+
32
+ Returns:
33
+ str: Rich-markup styled error message.
34
+ """
35
+ return f"[bold red]{string}[/bold red]"
cutf/util/path.py ADDED
@@ -0,0 +1,38 @@
1
+ import os
2
+ import shutil
3
+ import tempfile
4
+
5
+
6
+ def copy_old_encoded_file(file_path: str) -> str:
7
+ """Copy a source file to the system temp backup folder.
8
+
9
+ The destination folder is ``<tempdir>/SrcChE`` and is created on demand.
10
+
11
+ Args:
12
+ file_path: Path of the file that should be copied.
13
+
14
+ Returns:
15
+ str: Full path of the copied backup file.
16
+
17
+ Raises:
18
+ FileNotFoundError: If ``file_path`` does not exist.
19
+ """
20
+
21
+ # Controlliamo se il file esiste
22
+ if not os.path.exists(file_path):
23
+ raise FileNotFoundError(f"File {file_path} does not exist.")
24
+
25
+ # Otteniamo la cartella temporanea di Windows (solitamente la variabile d'ambiente TEMP)
26
+ temp_dir = tempfile.gettempdir()
27
+
28
+ # Creiamo un percorso per la copia del file nella cartella temporanea
29
+ temp_file_path = os.path.join(temp_dir, "SrcChE")
30
+
31
+ if not os.path.exists(temp_file_path):
32
+ os.makedirs(temp_file_path)
33
+
34
+ # Copiamo il file nella cartella temporanea
35
+ dest = shutil.copy2(file_path, temp_file_path)
36
+
37
+ # Restituire il percorso del file temporaneo
38
+ return dest
@@ -0,0 +1,190 @@
1
+ Metadata-Version: 2.4
2
+ Name: cutf
3
+ Version: 0.0.8
4
+ Summary: CLI tool to scan and convert source files to UTF-8 with BOM.
5
+ Author: Gabliz
6
+ License: MIT License
7
+
8
+ Copyright (c) 2025 GaaabLiz
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+ License-File: LICENSE
28
+ Keywords: bom,cli,encoding,iconv,utf-8
29
+ Classifier: Intended Audience :: Developers
30
+ Classifier: Operating System :: OS Independent
31
+ Classifier: Programming Language :: Python :: 3
32
+ Classifier: Programming Language :: Python :: 3 :: Only
33
+ Requires-Python: >=3.10
34
+ Requires-Dist: chardet>=5.2.0
35
+ Requires-Dist: rich>=13.9.4
36
+ Description-Content-Type: text/markdown
37
+
38
+ # CUFT
39
+
40
+ CUFT is a CLI tool that scans source files, detects legacy encodings, and converts them to **UTF-8 with BOM**.
41
+
42
+ It can also report replacement characters (`�`) introduced by decoding issues.
43
+
44
+ ## Features
45
+
46
+ - Scan one file or an entire directory tree.
47
+ - Filter files by extension.
48
+ - Detect source encoding with `chardet`.
49
+ - Convert files to UTF-8 with BOM through `iconv`.
50
+ - Optional backup copy of original files.
51
+ - Detailed report for converted, skipped, and problematic files.
52
+
53
+ ## Requirements
54
+
55
+ - Python 3.10+
56
+ - [uv](https://docs.astral.sh/uv/) *(only for local development)*
57
+ - `iconv` available in your system `PATH`
58
+
59
+ ### Install `iconv`
60
+
61
+ - macOS: usually preinstalled (`iconv --version`)
62
+ - Linux: install from system package manager (for example `libc-bin` / `glibc` tools)
63
+ - Windows: download and install [GNU iconv for Windows (GnuWin32)](https://gnuwin32.sourceforge.net/packages/libiconv.htm) and make sure `iconv.exe` is in `PATH`
64
+
65
+ ## Installation
66
+
67
+ ### Option A – Install from PyPI (recommended)
68
+
69
+ No need to clone the repository. Just run:
70
+
71
+ ```bash
72
+ pip install cutf
73
+ ```
74
+
75
+ or with `uv`:
76
+
77
+ ```bash
78
+ uv tool install cutf
79
+ ```
80
+
81
+ Then use it directly:
82
+
83
+ ```bash
84
+ cutf --path ./src --all --extensions .py .txt
85
+ ```
86
+
87
+ ### Option B – Clone and run locally
88
+
89
+ #### 1) Clone the repository
90
+
91
+ ```bash
92
+ git clone https://github.com/<your-org>/cutf.git
93
+ cd cutf
94
+ ```
95
+
96
+ #### 2) Create environment and install dependencies with uv
97
+
98
+ ```bash
99
+ uv sync --all-groups
100
+ ```
101
+
102
+ #### 3) Run CUFT
103
+
104
+ ```bash
105
+ uv run cutf --path ./src --all --extensions .py .txt
106
+ ```
107
+
108
+ ## Usage
109
+
110
+ ```text
111
+ usage: cutf --path PATH [--checks] [--convert] [--copyOld]
112
+ [--printMissingCharString] [--printAllSkippedFile]
113
+ [--all] [--verbose] [--only-relevant]
114
+ [--extensions EXT [EXT ...]]
115
+ ```
116
+
117
+ ### Main options
118
+
119
+ - `--path`: file or directory to process.
120
+ - `--checks`: run missing-character checks.
121
+ - `--convert`: convert non-UTF files to UTF-8 with BOM.
122
+ - `--all`: enable both `--checks` and `--convert`.
123
+ - `--extensions`: list of extensions to scan (required), for example `.cpp .h .cs .ini`.
124
+ - `--copyOld`: copy original file before conversion into temp folder.
125
+ - `--printMissingCharString`: print the line content for each missing-character finding.
126
+ - `--printAllSkippedFile`: print every skipped file instead of only the count.
127
+ - `--only-relevant`: hide less relevant missing-character entries.
128
+ - `--verbose`: print extra execution logs.
129
+
130
+ ## Typical Commands
131
+
132
+ Run checks only:
133
+
134
+ ```bash
135
+ uv run cutf --path ./project --checks --extensions .py .js .ts
136
+ ```
137
+
138
+ Run conversion + checks:
139
+
140
+ ```bash
141
+ uv run cutf --path ./project --all --extensions .cpp .h --copyOld
142
+ ```
143
+
144
+ Process one file:
145
+
146
+ ```bash
147
+ uv run cutf --path ./src/main.cpp --all --extensions .cpp
148
+ ```
149
+
150
+ ## Development
151
+
152
+ Run tests:
153
+
154
+ ```bash
155
+ uv run pytest
156
+ ```
157
+
158
+ Run linter:
159
+
160
+ ```bash
161
+ uv run ruff check .
162
+ ```
163
+
164
+ Format code:
165
+
166
+ ```bash
167
+ uv run ruff format .
168
+ ```
169
+
170
+ ## FAQ
171
+
172
+ ### Why does CUFT require `--extensions`?
173
+ It prevents accidental processing of unrelated files and keeps scans predictable.
174
+
175
+ ### Why UTF-8 **with BOM**?
176
+ Some tools and Windows-oriented workflows require BOM for UTF-8 detection.
177
+
178
+ ### What happens if `iconv` is missing?
179
+ CUFT stops before processing and prints an error. Install `iconv` and retry.
180
+
181
+ ### Where are original files copied when `--copyOld` is enabled?
182
+ They are copied to `<system-temp>/SrcChE`.
183
+
184
+ ### Does CUFT modify UTF-8 files?
185
+ Only when conversion is requested and the file is detected as non-UTF. Otherwise files are skipped.
186
+
187
+ ## License
188
+
189
+ This project is distributed under the license in `LICENSE`.
190
+
@@ -0,0 +1,20 @@
1
+ cutf/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ cutf/app.py,sha256=Yze6qmTMh7ORMHUPPRjgPTeIUV3hQnXQrdECFMy5yfk,6595
3
+ cutf/controller/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ cutf/controller/fileChecker.py,sha256=3OzoyAVA0P4J7y97EgAMgFxUJZtRTLYCKAUJuviXGO8,3322
5
+ cutf/controller/fileController.py,sha256=84gB-wp8azGfu-kOf7fSDtEQGcUDgpZQA9mZVamHg88,4666
6
+ cutf/controller/resultHandler.py,sha256=UfxQ9iz3SU1C9VGuL9NY6jJESzJCBVOwHP0eRv6wz-M,6770
7
+ cutf/model/AppSetting.py,sha256=Iu-7Sy9mYsoy6-G7n6JtagOw2iP11RM5fBRvWTxlFmo,1193
8
+ cutf/model/FileScanResult.py,sha256=So3R7sOGro03YV9Yw1ABYspX9Fj-uEXTWXLvRodt8Gs,1267
9
+ cutf/model/MissingCharResult.py,sha256=BCdzZRDfPHNaMhIRTrJ29Cv6PyP9hJgUGyCmiUUCJXI,831
10
+ cutf/model/__init__.py,sha256=1_JoZQ01gT5-Xs2l2m_bnxjWdtaRYT8MP8P2cpIka4M,256
11
+ cutf/util/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
+ cutf/util/code.py,sha256=HtjY6XEn67Rn1mxBXtPY1_zSxaZTAO3xvJdft4NpwYQ,1767
13
+ cutf/util/iconv.py,sha256=jdnum1Nqu-IfQE8NUtJ0V-nViuClbSm01C8lesCFPQY,2030
14
+ cutf/util/log.py,sha256=TNgP8oH1C5WVR8NvP1g9agUSsUc6-r9ZuXwUrCnKej4,807
15
+ cutf/util/path.py,sha256=mnSvhj_oREcqUXZ-5lKRJh1Mw0KH8D7ybeEyMJRwTIk,1095
16
+ cutf-0.0.8.dist-info/METADATA,sha256=VxYDDBWgsyGjfCa3Q6KkwotX-FJIZCph-wdW7UY1NyQ,5303
17
+ cutf-0.0.8.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
18
+ cutf-0.0.8.dist-info/entry_points.txt,sha256=S-ygTSgHduL0RwSA24G2aCcvODm0EWIuHkF2hAXH_TA,39
19
+ cutf-0.0.8.dist-info/licenses/LICENSE,sha256=vo-PhwUi9tU24GL_1jckQKXonOeMIXigqe69KiDGuxM,1065
20
+ cutf-0.0.8.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.29.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ cutf = cutf.app:main
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 GaaabLiz
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.