pylizlib 0.0.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,30 @@
1
+ Metadata-Version: 2.2
2
+ Name: pylizlib
3
+ Version: 0.0.4
4
+ Summary: Script to convert files to UTF-8.
5
+ Author: Gabliz
6
+ Author-email:
7
+ Keywords: python,encoding,utf-8
8
+ Classifier: Development Status :: 1 - Planning
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Operating System :: Unix
12
+ Classifier: Operating System :: MacOS :: MacOS X
13
+ Classifier: Operating System :: Microsoft :: Windows
14
+ Description-Content-Type: text/markdown
15
+ Dynamic: author
16
+ Dynamic: classifier
17
+ Dynamic: description
18
+ Dynamic: description-content-type
19
+ Dynamic: keywords
20
+ Dynamic: summary
21
+
22
+
23
+ # CUFT
24
+ Little script to convert the encoding of any text into UTF-8 (With BOM).
25
+
26
+ ## Installation
27
+ ```bash
28
+ pip install cuft
29
+ ```
30
+
@@ -0,0 +1,8 @@
1
+ # CUFT
2
+ Little script to convert the encoding of any text into UTF-8 (With BOM).
3
+
4
+ ## Installation
5
+ ```bash
6
+ pip install cuft
7
+ ```
8
+
File without changes
@@ -0,0 +1,64 @@
1
+ import os
2
+
3
+ import rich
4
+
5
+ from model.MissingCharResult import MissingCharResult
6
+ from util.code import is_line_commented
7
+
8
+
9
+ def check_illegal_chars(file_path, source_encoding) -> list[MissingCharResult]:
10
+ results = []
11
+ with open(file_path, 'rb') as f:
12
+ raw_data = f.read()
13
+
14
+ # Verifica e gestisci il BOM per UTF-8
15
+ bom = b'\xef\xbb\xbf'
16
+ if raw_data.startswith(bom):
17
+ raw_data = raw_data[len(bom):] # Rimuovi il BOM
18
+
19
+ # Decodifica i byte usando l'encoding specificato (per esempio UTF-8)
20
+ try:
21
+ text_data = raw_data.decode(source_encoding, errors='replace')
22
+ except UnicodeDecodeError as e:
23
+ # Se c'è un errore di decodifica, avvisa l'utente
24
+ rich.print(f"\t[bold red]Error decoding file {os.path.basename(file_path)} with encoding {source_encoding}.[/bold red]")
25
+ raise RuntimeError(f"Error decoding file {os.path.basename(file_path)} during check_illegal_chars().")
26
+
27
+
28
+ # Itera sui caratteri per trovare il carattere illegale
29
+ for idx in range(len(raw_data) - 2): # -2 per evitare di uscire fuori dal range durante il confronto
30
+ if raw_data[idx] == 0xEF and raw_data[idx + 1] == 0xBF and raw_data[idx + 2] == 0xBD:
31
+ line_start = text_data.rfind('\n', 0, idx)
32
+ line_end = text_data.find('\n', idx)
33
+
34
+ # Ottieni la riga completa dove è presente l'errore
35
+ if line_end == -1:
36
+ line_end = len(text_data)
37
+
38
+ line = text_data[line_start + 1:line_end]
39
+ # Trova la posizione relativa all'interno della riga
40
+ char_pos_in_line = line.find('�')
41
+
42
+ line_number = text_data.count('\n', 0, idx) + 1
43
+
44
+ # Creiamo una versione evidenziata della riga
45
+ #highlighted_line = (line[:char_pos_in_line] +f"[bold red]{line[char_pos_in_line]}[/bold red]" +line[char_pos_in_line + 1:])
46
+
47
+ # Stampa il risultato formattato
48
+ rich.print(f"\t[bold yellow]Found illegal character at position {idx}, line {line_number} in file {os.path.basename(file_path)}[/bold yellow]")
49
+ #rich.print("\t" + highlighted_line)
50
+
51
+ # Aggiungi il risultato all'array, utilizzando MissingCharResult
52
+ result = MissingCharResult(
53
+ is_commented=is_line_commented(file_path, line_number),
54
+ string=line.lstrip(' \t'),
55
+ line=line_number,
56
+ file_name=os.path.basename(file_path),
57
+ char_position=char_pos_in_line,
58
+ char_found=char_pos_in_line != -1,
59
+ byte_sequence_file_pos=idx
60
+ )
61
+
62
+ results.append(result)
63
+
64
+ return results
@@ -0,0 +1,103 @@
1
+ import os
2
+
3
+ import chardet
4
+ import rich
5
+
6
+ from controller.fileChecker import check_illegal_chars
7
+ from model.AppSetting import AppSetting
8
+ from model.FileScanResult import FileScanResult
9
+ from util.iconv import convert_to_utf8_with_iconv
10
+ from util.log import format_log_path
11
+ from util.path import copy_old_encoded_file
12
+
13
+
14
+ def handle_file(file_path, setting: AppSetting) -> FileScanResult:
15
+
16
+ # Starting
17
+ file_name = os.path.basename(file_path)
18
+ encoding = None
19
+
20
+ try:
21
+ # Checking extension
22
+ _, extension = os.path.splitext(file_path)
23
+ has_supported_extension = extension.lower() in setting.extensions
24
+ if not has_supported_extension:
25
+ rich.print(f"File {file_name} has no supported extension ({extension}). Skipping...") if setting.verbose else None
26
+ return FileScanResult(
27
+ file_path=file_path,
28
+ file_name=file_name,
29
+ skipped=True,
30
+ )
31
+ rich.print(f"## Checking file \"{format_log_path(file_path)}\"...") if setting.verbose else None
32
+
33
+ # Load encoding
34
+ rich.print(f"Opening file \"{file_path}\"...") if setting.verbose else None
35
+ with open(file_path, 'rb') as f:
36
+ raw_data = f.read()
37
+ result = chardet.detect(raw_data)
38
+ encoding = result['encoding']
39
+
40
+ # Check encoding
41
+ if encoding is None:
42
+ raise RuntimeError(f"Cannot detect encoding of {file_name}")
43
+ is_already_utf8 = (encoding == "utf-8") or (encoding.lower() == 'utf-8-sig') or (encoding == "utf-16")
44
+
45
+ # Check if need to be converted
46
+ needs_convert = is_already_utf8 == False and setting.convert
47
+
48
+ # Copy old encoded (if enabled and needed)
49
+ if needs_convert and setting.copy_old_encoded:
50
+ old_copy_path = copy_old_encoded_file(file_path)
51
+ rich.print(f"Copied old encoded file to {format_log_path(old_copy_path)}")
52
+
53
+ # Exec operations
54
+ if needs_convert:
55
+ rich.print(f"File \"{file_name}\" has encoding {encoding}. Proceeding to convert and check...")
56
+ output_encoding = "utf-8"
57
+ convert_to_utf8_with_iconv(file_path, encoding, output_encoding)
58
+ missing_chars = check_illegal_chars(file_path, output_encoding)
59
+ rich.print(f"Finished checking and converting file \"{file_name}\"!") if setting.verbose else None
60
+ return FileScanResult(
61
+ file_path=file_path,
62
+ file_name=file_name,
63
+ encoding_before=encoding,
64
+ encoding_after=output_encoding + "(BOM)",
65
+ check_missing_char=missing_chars,
66
+ converted=True,
67
+ )
68
+ elif setting.checks:
69
+ rich.print(f"File \"{file_name}\" has encoding {encoding}. Proceeding to check...")
70
+ missing_chars = check_illegal_chars(file_path, encoding)
71
+ rich.print(f"Finished checking file \"{file_name}\"!") if setting.verbose else None
72
+ return FileScanResult(
73
+ file_path=file_path,
74
+ file_name=file_name,
75
+ encoding_before=encoding,
76
+ check_missing_char=missing_chars,
77
+ )
78
+ else:
79
+ rich.print(f"No operation to do on {file_name}")
80
+ return FileScanResult(
81
+ file_path=file_path,
82
+ file_name=file_name,
83
+ skipped=True
84
+ )
85
+
86
+ except RuntimeError as e:
87
+ rich.print(f"[bold red]Conversion/checking of {file_name} interrupted because of an error: {e}[/bold red]")
88
+ return FileScanResult(
89
+ file_path=file_path,
90
+ file_name=file_name,
91
+ encoding_before=encoding,
92
+ error_skipped=True,
93
+ error_description=str(e),
94
+ )
95
+ # except FileNotFoundError as e:
96
+ # rich.print(f"[bold red]Conversion/checking of {file_name} interrupted because of an error: {e}[/bold red]")
97
+ # return FileScanResult(
98
+ # file_path=file_path,
99
+ # file_name=file_name,
100
+ # encoding_before=encoding,
101
+ # error_skipped=True,
102
+ # error_description=str(e),
103
+ # )
@@ -0,0 +1,128 @@
1
+ from collections import Counter
2
+
3
+ import rich
4
+
5
+ from model.AppSetting import AppSetting
6
+ from model.FileScanResult import FileScanResult
7
+ from util.log import format_log_error
8
+
9
+
10
+ def __print_encoding_before(results: list[FileScanResult]):
11
+ encoding_counter = Counter()
12
+
13
+ # Scorri ogni FileScanResult
14
+ for result in results:
15
+ # Aggiungi l'encoding_before se non è None
16
+ if result.encoding_before:
17
+ encoding_counter[result.encoding_before] += 1
18
+
19
+ # Stampa ogni encoding e il numero di occorrenze
20
+ rich.print(f"@ List of encodings found during scanning ({len(encoding_counter.items())}):")
21
+ for encoding, count in encoding_counter.items():
22
+ rich.print(f"{encoding}: {count}")
23
+
24
+ def __print_converted_files(results: list[FileScanResult]):
25
+ count = 0
26
+ rich.print(f"@ List of converted files:")
27
+ for result in results:
28
+ if result.converted:
29
+ count += 1
30
+ rich.print(f"Converted file {result.file_name} from encoding {result.encoding_before} to encoding {result.encoding_after}.")
31
+ if count == 0:
32
+ rich.print("0 Files converted.")
33
+
34
+ def __print_skipped_files(results: list[FileScanResult], print_all: bool):
35
+ count = 0
36
+ rich.print(f"@ List of skipped files:")
37
+ if print_all:
38
+ for result in results:
39
+ if result.skipped:
40
+ count += 1
41
+ rich.print(f"File {result.file_name} skipped because no action is required.")
42
+ if count == 0:
43
+ rich.print("0 skipped file founds.")
44
+ else:
45
+ for result in results:
46
+ if result.skipped:
47
+ count += 1
48
+ if count == 0:
49
+ rich.print("0 skipped file founds.")
50
+ else:
51
+ rich.print(f"{count} file skipped because no action was required.")
52
+
53
+ def __print_skipped_error_files(results: list[FileScanResult]):
54
+ count = 0
55
+ rich.print(f"@ List of skipped files (from errors):")
56
+ for result in results:
57
+ if result.error_skipped:
58
+ count += 1
59
+ rich.print(format_log_error(f"File {result.file_path} skipped because of an error: {result.error_description}"))
60
+ if count == 0:
61
+ rich.print("0 errors founds.")
62
+
63
+ def __print_missing_chars_on_comments(results: list[FileScanResult], print_mis_char_string: bool):
64
+ rich.print(f"@ List of missing chars found on comments:")
65
+ for result in results:
66
+ if result.check_missing_char is not None:
67
+ for file in result.check_missing_char:
68
+ if file.is_commented:
69
+ rich.print(f"File = {file.file_name} | Missing char Visibile = {file.char_found} | Line = {file.line} | Line Pos = {file.char_position} | File pos = {file.byte_sequence_file_pos}")
70
+ if print_mis_char_string:
71
+ rich.print(f"String = {file.string}")
72
+ rich.print("-------------------")
73
+
74
+ def __print_missing_chars_on_code(results: list[FileScanResult], print_mis_char_string: bool, only_relevant: bool):
75
+ rich.print(f"@ List of missing chars found on code:")
76
+ count = 0
77
+ for result in results:
78
+ if result.check_missing_char is not None:
79
+ for file in result.check_missing_char:
80
+ if not file.is_commented:
81
+ count += 1
82
+ if file.char_found:
83
+ rich.print(f"File = {file.file_name} | Missing char Visibile = {file.char_found} | Line = {file.line} | Line Pos = {file.char_position} | File pos = {file.byte_sequence_file_pos}")
84
+ else:
85
+ if not only_relevant:
86
+ rich.print(f"File = {file.file_name} | Missing char Visibile = {file.char_found} | Line = {file.line} | Line Pos = {file.char_position} | File pos = {file.byte_sequence_file_pos}")
87
+ if print_mis_char_string:
88
+ rich.print(f"String = {file.string}")
89
+ rich.print("-------------------")
90
+ if count == 0:
91
+ rich.print("0 missing chars on code founds.")
92
+
93
+ def print_results(results: list[FileScanResult], setting: AppSetting):
94
+
95
+ rich.print("\n\n")
96
+
97
+ rich.print("########################################################") if setting.verbose else None
98
+ rich.print("### START OF RESULTS ###################################")
99
+ rich.print("########################################################") if setting.verbose else None
100
+
101
+ # Print list of encoding before all
102
+ __print_encoding_before(results)
103
+ rich.print("\n")
104
+
105
+ # Print file converted
106
+ __print_converted_files(results)
107
+ rich.print("\n")
108
+
109
+ # File skipped
110
+ __print_skipped_files(results, setting.print_skipped_file_no_action)
111
+ rich.print("\n")
112
+
113
+ # File skipped (Error)
114
+ __print_skipped_error_files(results)
115
+ rich.print("\n")
116
+
117
+ # Missing chars (comments)
118
+ if not setting.print_result_only_relevant:
119
+ __print_missing_chars_on_comments(results, setting.print_missing_char_str)
120
+ rich.print("\n")
121
+
122
+ # Missing chars (code)
123
+ __print_missing_chars_on_code(results, setting.print_missing_char_str, setting.print_result_only_relevant)
124
+
125
+ rich,print("\n\n")
126
+ rich.print("########################################################") if setting.verbose else None
127
+ rich.print("### END OF RESULTS #####################################")
128
+ rich.print("########################################################")if setting.verbose else None
@@ -0,0 +1,15 @@
1
+ from dataclasses import dataclass
2
+
3
+
4
+ @dataclass
5
+ class AppSetting:
6
+ input_path: str
7
+ is_file: bool
8
+ extensions: list[str]
9
+ checks: bool = False
10
+ convert: bool = False
11
+ copy_old_encoded: bool = False
12
+ print_missing_char_str: bool = False
13
+ print_skipped_file_no_action: bool = False
14
+ print_result_only_relevant: bool = False
15
+ verbose: bool = False
@@ -0,0 +1,17 @@
1
+ from dataclasses import dataclass
2
+
3
+ from model.MissingCharResult import MissingCharResult
4
+
5
+
6
+ @dataclass
7
+ class FileScanResult:
8
+ file_path: str
9
+ file_name: str
10
+ encoding_before: str | None = None
11
+ encoding_after: str | None = None
12
+ converted: bool = False
13
+ check_missing_char: list[MissingCharResult] | None = None
14
+ error_skipped: bool = False
15
+ error_name: str | None = None
16
+ error_description: str | None = None
17
+ skipped: bool = False
@@ -0,0 +1,12 @@
1
+ from dataclasses import dataclass
2
+
3
+
4
+ @dataclass
5
+ class MissingCharResult:
6
+ is_commented: bool
7
+ string: str
8
+ line: int
9
+ file_name: str
10
+ char_position: int
11
+ char_found: bool
12
+ byte_sequence_file_pos: int
File without changes
@@ -0,0 +1,30 @@
1
+ Metadata-Version: 2.2
2
+ Name: pylizlib
3
+ Version: 0.0.4
4
+ Summary: Script to convert files to UTF-8.
5
+ Author: Gabliz
6
+ Author-email:
7
+ Keywords: python,encoding,utf-8
8
+ Classifier: Development Status :: 1 - Planning
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Operating System :: Unix
12
+ Classifier: Operating System :: MacOS :: MacOS X
13
+ Classifier: Operating System :: Microsoft :: Windows
14
+ Description-Content-Type: text/markdown
15
+ Dynamic: author
16
+ Dynamic: classifier
17
+ Dynamic: description
18
+ Dynamic: description-content-type
19
+ Dynamic: keywords
20
+ Dynamic: summary
21
+
22
+
23
+ # CUFT
24
+ Little script to convert the encoding of any text into UTF-8 (With BOM).
25
+
26
+ ## Installation
27
+ ```bash
28
+ pip install cuft
29
+ ```
30
+
@@ -0,0 +1,21 @@
1
+ README.md
2
+ setup.py
3
+ controller/__init__.py
4
+ controller/fileChecker.py
5
+ controller/fileController.py
6
+ controller/resultHandler.py
7
+ model/AppSetting.py
8
+ model/FileScanResult.py
9
+ model/MissingCharResult.py
10
+ model/__init__.py
11
+ pylizlib.egg-info/PKG-INFO
12
+ pylizlib.egg-info/SOURCES.txt
13
+ pylizlib.egg-info/dependency_links.txt
14
+ pylizlib.egg-info/entry_points.txt
15
+ pylizlib.egg-info/top_level.txt
16
+ util/__init__.py
17
+ util/code.py
18
+ util/fileUtils.py
19
+ util/iconv.py
20
+ util/log.py
21
+ util/path.py
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ cuft = cuft:main
@@ -0,0 +1,3 @@
1
+ controller
2
+ model
3
+ util
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,39 @@
1
+ from setuptools import setup, find_packages
2
+ import codecs
3
+ import os
4
+
5
+ here = os.path.abspath(os.path.dirname(__file__))
6
+
7
+ with codecs.open(os.path.join(here, "README.md"), encoding="utf-8") as fh:
8
+ long_description = "\n" + fh.read()
9
+
10
+ VERSION = '0.0.4'
11
+ DESCRIPTION = 'Script to convert files to UTF-8.'
12
+ LONG_DESCRIPTION = 'Script to convert text files from any encoding to UTF-8 (With BOM).'
13
+
14
+ # Setting up
15
+ setup(
16
+ name="pylizlib",
17
+ version=VERSION,
18
+ author="Gabliz",
19
+ author_email="",
20
+ description=DESCRIPTION,
21
+ long_description_content_type="text/markdown",
22
+ long_description=long_description,
23
+ packages=find_packages(),
24
+ install_requires=[],
25
+ entry_points={
26
+ "console_scripts": [
27
+ "cuft=cuft:main",
28
+ ],
29
+ },
30
+ keywords=['python', 'encoding', 'utf-8'],
31
+ classifiers=[
32
+ "Development Status :: 1 - Planning",
33
+ "Intended Audience :: Developers",
34
+ "Programming Language :: Python :: 3",
35
+ "Operating System :: Unix",
36
+ "Operating System :: MacOS :: MacOS X",
37
+ "Operating System :: Microsoft :: Windows",
38
+ ]
39
+ )
File without changes
@@ -0,0 +1,31 @@
1
+ import re
2
+
3
+ def is_line_commented(file_path, line_number):
4
+ in_block_comment = False
5
+ with open(file_path, 'r') as file:
6
+ for current_line_number, line in enumerate(file, start=1):
7
+ # Se siamo nella linea di interesse
8
+ if current_line_number == line_number:
9
+ # Verifica se la linea è commentata
10
+ line = line.strip()
11
+ if in_block_comment:
12
+ return True # La riga è dentro un blocco di commento
13
+ # Commenti su singola riga (//)
14
+ if line.startswith("//"):
15
+ return True
16
+ # Controllo se la riga è dentro un commento di blocco
17
+ if "/*" in line:
18
+ in_block_comment = True
19
+ if "*/" in line:
20
+ in_block_comment = False
21
+ return False
22
+ return False # La riga non è commentata
23
+ # Gestisci l'inizio e la fine di un blocco di commento
24
+ if in_block_comment:
25
+ if "*/" in line:
26
+ in_block_comment = False
27
+ continue
28
+ if "/*" in line:
29
+ in_block_comment = True
30
+
31
+ return False # Se non troviamo mai la linea, significa che non è commentata
@@ -0,0 +1,2 @@
1
+ import os
2
+
@@ -0,0 +1,41 @@
1
+ import os
2
+ import subprocess
3
+
4
+ import rich
5
+
6
+ from util.log import format_log_path
7
+
8
+
9
+ def convert_to_utf8_with_iconv(path: str, source_encoding: str, target_encoding: str):
10
+ file_name = os.path.basename(path)
11
+ rich.print(f"Converting {file_name} to {target_encoding} with iconv...")
12
+
13
+ temp_file_path = path + ".tmp"
14
+ temp_bom_file_path = path + ".bom"
15
+
16
+ try:
17
+ command = ["iconv", "-f", source_encoding, "-t", target_encoding, path]
18
+
19
+ with open(temp_file_path, 'w', encoding=target_encoding) as temp_file:
20
+ subprocess.run(command, stdout=temp_file, stderr=subprocess.PIPE, check=True)
21
+
22
+ # Aggiungi il BOM al file convertito
23
+ with open(temp_bom_file_path, 'wb') as bom_file:
24
+ bom_file.write(b'\xef\xbb\xbf') # Scrivi il BOM (UTF-8)
25
+ with open(temp_file_path, 'rb') as temp_file:
26
+ bom_file.write(temp_file.read()) # Aggiungi il contenuto del file convertito
27
+
28
+ # Sostituisci il file originale con il file con BOM
29
+ os.replace(temp_bom_file_path, path)
30
+ rich.print(f"Conversion completed for {format_log_path(os.path.basename(path))}")
31
+
32
+ except subprocess.CalledProcessError as e:
33
+ rich.print(f"Errore nella conversione di {path}: {e}")
34
+ if os.path.exists(temp_file_path):
35
+ os.remove(temp_file_path)
36
+ finally:
37
+ # Cancella i file temporanei, se esistono
38
+ if os.path.exists(temp_file_path):
39
+ os.remove(temp_file_path)
40
+ if os.path.exists(temp_bom_file_path):
41
+ os.remove(temp_bom_file_path)
@@ -0,0 +1,9 @@
1
+
2
+ def format_log_path(path: str):
3
+ return f"[bold magenta]{path}[/bold magenta]"
4
+
5
+ def format_log_warning(string: str):
6
+ return f"[bold yellow]{string}[/bold yellow]"
7
+
8
+ def format_log_error(string: str):
9
+ return f"[bold red]{string}[/bold red]"
@@ -0,0 +1,26 @@
1
+ import os
2
+ import shutil
3
+ import tempfile
4
+
5
+
6
+ def copy_old_encoded_file(file_path):
7
+
8
+ # Controlliamo se il file esiste
9
+ if not os.path.exists(file_path):
10
+ raise FileNotFoundError(f"Il file {file_path} non esiste.")
11
+
12
+ # Otteniamo la cartella temporanea di Windows (solitamente la variabile d'ambiente TEMP)
13
+ temp_dir = tempfile.gettempdir()
14
+
15
+ # Creiamo un percorso per la copia del file nella cartella temporanea
16
+ file_name = os.path.basename(file_path)
17
+ temp_file_path = os.path.join(temp_dir, "SrcChE")
18
+
19
+ if not os.path.exists(temp_file_path):
20
+ os.makedirs(temp_file_path)
21
+
22
+ # Copiamo il file nella cartella temporanea
23
+ dest = shutil.copy2(file_path, temp_file_path)
24
+
25
+ # Restituire il percorso del file temporaneo
26
+ return dest