PyPI - pylizlib - Versions diffs - 0.0.4__tar.gz - Mend

pylizlib 0.0.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

pylizlib-0.0.4/PKG-INFO +30 -0
pylizlib-0.0.4/README.md +8 -0
pylizlib-0.0.4/controller/__init__.py +0 -0
pylizlib-0.0.4/controller/fileChecker.py +64 -0
pylizlib-0.0.4/controller/fileController.py +103 -0
pylizlib-0.0.4/controller/resultHandler.py +128 -0
pylizlib-0.0.4/model/AppSetting.py +15 -0
pylizlib-0.0.4/model/FileScanResult.py +17 -0
pylizlib-0.0.4/model/MissingCharResult.py +12 -0
pylizlib-0.0.4/model/__init__.py +0 -0
pylizlib-0.0.4/pylizlib.egg-info/PKG-INFO +30 -0
pylizlib-0.0.4/pylizlib.egg-info/SOURCES.txt +21 -0
pylizlib-0.0.4/pylizlib.egg-info/dependency_links.txt +1 -0
pylizlib-0.0.4/pylizlib.egg-info/entry_points.txt +2 -0
pylizlib-0.0.4/pylizlib.egg-info/top_level.txt +3 -0
pylizlib-0.0.4/setup.cfg +4 -0
pylizlib-0.0.4/setup.py +39 -0
pylizlib-0.0.4/util/__init__.py +0 -0
pylizlib-0.0.4/util/code.py +31 -0
pylizlib-0.0.4/util/fileUtils.py +2 -0
pylizlib-0.0.4/util/iconv.py +41 -0
pylizlib-0.0.4/util/log.py +9 -0
pylizlib-0.0.4/util/path.py +26 -0

pylizlib-0.0.4/PKG-INFO ADDED Viewed

@@ -0,0 +1,30 @@
+Metadata-Version: 2.2
+Name: pylizlib
+Version: 0.0.4
+Summary: Script to convert files to UTF-8.
+Author: Gabliz
+Author-email:
+Keywords: python,encoding,utf-8
+Classifier: Development Status :: 1 - Planning
+Classifier: Intended Audience :: Developers
+Classifier: Programming Language :: Python :: 3
+Classifier: Operating System :: Unix
+Classifier: Operating System :: MacOS :: MacOS X
+Classifier: Operating System :: Microsoft :: Windows
+Description-Content-Type: text/markdown
+Dynamic: author
+Dynamic: classifier
+Dynamic: description
+Dynamic: description-content-type
+Dynamic: keywords
+Dynamic: summary
+# CUFT
+Little script to convert the encoding of any text into UTF-8 (With BOM).
+## Installation
+```bash
+pip install cuft
+```

pylizlib-0.0.4/README.md ADDED Viewed

@@ -0,0 +1,8 @@
+# CUFT
+Little script to convert the encoding of any text into UTF-8 (With BOM).
+## Installation
+```bash
+pip install cuft
+```

pylizlib-0.0.4/controller/__init__.py ADDED Viewed

File without changes

pylizlib-0.0.4/controller/fileChecker.py ADDED Viewed

@@ -0,0 +1,64 @@
+import os
+import rich
+from model.MissingCharResult import MissingCharResult
+from util.code import is_line_commented
+def check_illegal_chars(file_path, source_encoding) -> list[MissingCharResult]:
+    results = []
+    with open(file_path, 'rb') as f:
+        raw_data = f.read()
+    # Verifica e gestisci il BOM per UTF-8
+    bom = b'\xef\xbb\xbf'
+    if raw_data.startswith(bom):
+        raw_data = raw_data[len(bom):]  # Rimuovi il BOM
+    # Decodifica i byte usando l'encoding specificato (per esempio UTF-8)
+    try:
+        text_data = raw_data.decode(source_encoding, errors='replace')
+    except UnicodeDecodeError as e:
+        # Se c'è un errore di decodifica, avvisa l'utente
+        rich.print(f"\t[bold red]Error decoding file {os.path.basename(file_path)} with encoding {source_encoding}.[/bold red]")
+        raise RuntimeError(f"Error decoding file {os.path.basename(file_path)} during check_illegal_chars().")
+    # Itera sui caratteri per trovare il carattere illegale
+    for idx in range(len(raw_data) - 2):  # -2 per evitare di uscire fuori dal range durante il confronto
+        if raw_data[idx] == 0xEF and raw_data[idx + 1] == 0xBF and raw_data[idx + 2] == 0xBD:
+            line_start = text_data.rfind('\n', 0, idx)
+            line_end = text_data.find('\n', idx)
+            # Ottieni la riga completa dove è presente l'errore
+            if line_end == -1:
+                line_end = len(text_data)
+            line = text_data[line_start + 1:line_end]
+            # Trova la posizione relativa all'interno della riga
+            char_pos_in_line = line.find('�')
+            line_number = text_data.count('\n', 0, idx) + 1
+            # Creiamo una versione evidenziata della riga
+            #highlighted_line = (line[:char_pos_in_line] +f"[bold red]{line[char_pos_in_line]}[/bold red]" +line[char_pos_in_line + 1:])
+            # Stampa il risultato formattato
+            rich.print(f"\t[bold yellow]Found illegal character at position {idx}, line {line_number} in file {os.path.basename(file_path)}[/bold yellow]")
+            #rich.print("\t" + highlighted_line)
+            # Aggiungi il risultato all'array, utilizzando MissingCharResult
+            result = MissingCharResult(
+                is_commented=is_line_commented(file_path, line_number),
+                string=line.lstrip(' \t'),
+                line=line_number,
+                file_name=os.path.basename(file_path),
+                char_position=char_pos_in_line,
+                char_found=char_pos_in_line != -1,
+                byte_sequence_file_pos=idx
+            )
+            results.append(result)
+    return results

pylizlib-0.0.4/controller/fileController.py ADDED Viewed

@@ -0,0 +1,103 @@
+import os
+import chardet
+import rich
+from controller.fileChecker import check_illegal_chars
+from model.AppSetting import AppSetting
+from model.FileScanResult import FileScanResult
+from util.iconv import convert_to_utf8_with_iconv
+from util.log import format_log_path
+from util.path import copy_old_encoded_file
+def handle_file(file_path, setting: AppSetting) -> FileScanResult:
+    # Starting
+    file_name = os.path.basename(file_path)
+    encoding = None
+    try:
+        # Checking extension
+        _, extension = os.path.splitext(file_path)
+        has_supported_extension = extension.lower() in setting.extensions
+        if not has_supported_extension:
+            rich.print(f"File {file_name} has no supported extension ({extension}). Skipping...") if setting.verbose else None
+            return FileScanResult(
+                file_path=file_path,
+                file_name=file_name,
+                skipped=True,
+            )
+        rich.print(f"## Checking file \"{format_log_path(file_path)}\"...") if setting.verbose else None
+        # Load encoding
+        rich.print(f"Opening file \"{file_path}\"...") if setting.verbose else None
+        with open(file_path, 'rb') as f:
+            raw_data = f.read()
+            result = chardet.detect(raw_data)
+            encoding = result['encoding']
+        # Check encoding
+        if encoding is None:
+            raise RuntimeError(f"Cannot detect encoding of {file_name}")
+        is_already_utf8 = (encoding == "utf-8") or (encoding.lower() == 'utf-8-sig') or (encoding == "utf-16")
+        # Check if need to be converted
+        needs_convert = is_already_utf8 == False and setting.convert
+        # Copy old encoded (if enabled and needed)
+        if needs_convert and setting.copy_old_encoded:
+            old_copy_path = copy_old_encoded_file(file_path)
+            rich.print(f"Copied old encoded file to {format_log_path(old_copy_path)}")
+        # Exec operations
+        if needs_convert:
+            rich.print(f"File \"{file_name}\" has encoding {encoding}. Proceeding to convert and check...")
+            output_encoding = "utf-8"
+            convert_to_utf8_with_iconv(file_path, encoding, output_encoding)
+            missing_chars = check_illegal_chars(file_path, output_encoding)
+            rich.print(f"Finished checking and converting file \"{file_name}\"!") if setting.verbose else None
+            return FileScanResult(
+                file_path=file_path,
+                file_name=file_name,
+                encoding_before=encoding,
+                encoding_after=output_encoding + "(BOM)",
+                check_missing_char=missing_chars,
+                converted=True,
+            )
+        elif setting.checks:
+            rich.print(f"File \"{file_name}\" has encoding {encoding}. Proceeding to check...")
+            missing_chars = check_illegal_chars(file_path, encoding)
+            rich.print(f"Finished checking file \"{file_name}\"!") if setting.verbose else None
+            return FileScanResult(
+                file_path=file_path,
+                file_name=file_name,
+                encoding_before=encoding,
+                check_missing_char=missing_chars,
+            )
+        else:
+            rich.print(f"No operation to do on {file_name}")
+            return FileScanResult(
+                file_path=file_path,
+                file_name=file_name,
+                skipped=True
+            )
+    except RuntimeError as e:
+        rich.print(f"[bold red]Conversion/checking of {file_name} interrupted because of an error: {e}[/bold red]")
+        return FileScanResult(
+                file_path=file_path,
+                file_name=file_name,
+                encoding_before=encoding,
+                error_skipped=True,
+                error_description=str(e),
+            )
+    # except FileNotFoundError as e:
+    #     rich.print(f"[bold red]Conversion/checking of {file_name} interrupted because of an error: {e}[/bold red]")
+    #     return FileScanResult(
+    #         file_path=file_path,
+    #         file_name=file_name,
+    #         encoding_before=encoding,
+    #         error_skipped=True,
+    #         error_description=str(e),
+    #     )

pylizlib-0.0.4/controller/resultHandler.py ADDED Viewed

@@ -0,0 +1,128 @@
+from collections import Counter
+import rich
+from model.AppSetting import AppSetting
+from model.FileScanResult import FileScanResult
+from util.log import format_log_error
+def __print_encoding_before(results: list[FileScanResult]):
+    encoding_counter = Counter()
+    # Scorri ogni FileScanResult
+    for result in results:
+        # Aggiungi l'encoding_before se non è None
+        if result.encoding_before:
+            encoding_counter[result.encoding_before] += 1
+    # Stampa ogni encoding e il numero di occorrenze
+    rich.print(f"@ List of encodings found during scanning ({len(encoding_counter.items())}):")
+    for encoding, count in encoding_counter.items():
+        rich.print(f"{encoding}: {count}")
+def __print_converted_files(results: list[FileScanResult]):
+    count = 0
+    rich.print(f"@ List of converted files:")
+    for result in results:
+        if result.converted:
+            count += 1
+            rich.print(f"Converted file {result.file_name} from encoding {result.encoding_before} to encoding {result.encoding_after}.")
+    if count == 0:
+        rich.print("0 Files converted.")
+def __print_skipped_files(results: list[FileScanResult], print_all: bool):
+    count = 0
+    rich.print(f"@ List of skipped files:")
+    if print_all:
+        for result in results:
+            if result.skipped:
+                count += 1
+                rich.print(f"File {result.file_name} skipped because no action is required.")
+        if count == 0:
+            rich.print("0 skipped file founds.")
+    else:
+        for result in results:
+            if result.skipped:
+                count += 1
+        if count == 0:
+            rich.print("0 skipped file founds.")
+        else:
+            rich.print(f"{count} file skipped because no action was required.")
+def __print_skipped_error_files(results: list[FileScanResult]):
+    count = 0
+    rich.print(f"@ List of skipped files (from errors):")
+    for result in results:
+        if result.error_skipped:
+            count += 1
+            rich.print(format_log_error(f"File {result.file_path} skipped because of an error: {result.error_description}"))
+    if count == 0:
+        rich.print("0 errors founds.")
+def __print_missing_chars_on_comments(results: list[FileScanResult], print_mis_char_string: bool):
+    rich.print(f"@ List of missing chars found on comments:")
+    for result in results:
+        if result.check_missing_char is not None:
+            for file in result.check_missing_char:
+                if file.is_commented:
+                    rich.print(f"File = {file.file_name} | Missing char Visibile = {file.char_found} | Line = {file.line} | Line Pos = {file.char_position} | File pos = {file.byte_sequence_file_pos}")
+                    if print_mis_char_string:
+                        rich.print(f"String = {file.string}")
+                        rich.print("-------------------")
+def __print_missing_chars_on_code(results: list[FileScanResult], print_mis_char_string: bool, only_relevant: bool):
+    rich.print(f"@ List of missing chars found on code:")
+    count = 0
+    for result in results:
+        if result.check_missing_char is not None:
+            for file in result.check_missing_char:
+                if not file.is_commented:
+                    count += 1
+                    if file.char_found:
+                        rich.print(f"File = {file.file_name} | Missing char Visibile = {file.char_found} | Line = {file.line} | Line Pos = {file.char_position} | File pos = {file.byte_sequence_file_pos}")
+                    else:
+                        if not only_relevant:
+                            rich.print(f"File = {file.file_name} | Missing char Visibile = {file.char_found} | Line = {file.line} | Line Pos = {file.char_position} | File pos = {file.byte_sequence_file_pos}")
+                    if print_mis_char_string:
+                        rich.print(f"String = {file.string}")
+                        rich.print("-------------------")
+    if count == 0:
+        rich.print("0 missing chars on code founds.")
+def print_results(results: list[FileScanResult], setting: AppSetting):
+    rich.print("\n\n")
+    rich.print("########################################################") if setting.verbose else None
+    rich.print("### START OF RESULTS ###################################")
+    rich.print("########################################################") if setting.verbose else None
+    # Print list of encoding before all
+    __print_encoding_before(results)
+    rich.print("\n")
+    # Print file converted
+    __print_converted_files(results)
+    rich.print("\n")
+    # File skipped
+    __print_skipped_files(results, setting.print_skipped_file_no_action)
+    rich.print("\n")
+    # File skipped (Error)
+    __print_skipped_error_files(results)
+    rich.print("\n")
+    # Missing chars (comments)
+    if not setting.print_result_only_relevant:
+        __print_missing_chars_on_comments(results, setting.print_missing_char_str)
+        rich.print("\n")
+    # Missing chars (code)
+    __print_missing_chars_on_code(results, setting.print_missing_char_str, setting.print_result_only_relevant)
+    rich,print("\n\n")
+    rich.print("########################################################") if setting.verbose else None
+    rich.print("### END OF RESULTS #####################################")
+    rich.print("########################################################")if setting.verbose else None

pylizlib-0.0.4/model/AppSetting.py ADDED Viewed

@@ -0,0 +1,15 @@
+from dataclasses import dataclass
+@dataclass
+class AppSetting:
+    input_path: str
+    is_file: bool
+    extensions: list[str]
+    checks: bool = False
+    convert: bool = False
+    copy_old_encoded: bool = False
+    print_missing_char_str: bool = False
+    print_skipped_file_no_action: bool = False
+    print_result_only_relevant: bool = False
+    verbose: bool = False

pylizlib-0.0.4/model/FileScanResult.py ADDED Viewed

@@ -0,0 +1,17 @@
+from dataclasses import dataclass
+from model.MissingCharResult import MissingCharResult
+@dataclass
+class FileScanResult:
+    file_path: str
+    file_name: str
+    encoding_before: str | None = None
+    encoding_after: str | None = None
+    converted: bool = False
+    check_missing_char: list[MissingCharResult] | None = None
+    error_skipped: bool = False
+    error_name: str | None = None
+    error_description: str | None = None
+    skipped: bool = False

pylizlib-0.0.4/model/MissingCharResult.py ADDED Viewed

@@ -0,0 +1,12 @@
+from dataclasses import dataclass
+@dataclass
+class MissingCharResult:
+    is_commented: bool
+    string: str
+    line: int
+    file_name: str
+    char_position: int
+    char_found: bool
+    byte_sequence_file_pos: int

pylizlib-0.0.4/model/__init__.py ADDED Viewed

File without changes

pylizlib-0.0.4/pylizlib.egg-info/PKG-INFO ADDED Viewed

@@ -0,0 +1,30 @@
+Metadata-Version: 2.2
+Name: pylizlib
+Version: 0.0.4
+Summary: Script to convert files to UTF-8.
+Author: Gabliz
+Author-email:
+Keywords: python,encoding,utf-8
+Classifier: Development Status :: 1 - Planning
+Classifier: Intended Audience :: Developers
+Classifier: Programming Language :: Python :: 3
+Classifier: Operating System :: Unix
+Classifier: Operating System :: MacOS :: MacOS X
+Classifier: Operating System :: Microsoft :: Windows
+Description-Content-Type: text/markdown
+Dynamic: author
+Dynamic: classifier
+Dynamic: description
+Dynamic: description-content-type
+Dynamic: keywords
+Dynamic: summary
+# CUFT
+Little script to convert the encoding of any text into UTF-8 (With BOM).
+## Installation
+```bash
+pip install cuft
+```

pylizlib-0.0.4/pylizlib.egg-info/SOURCES.txt ADDED Viewed

@@ -0,0 +1,21 @@
+README.md
+setup.py
+controller/__init__.py
+controller/fileChecker.py
+controller/fileController.py
+controller/resultHandler.py
+model/AppSetting.py
+model/FileScanResult.py
+model/MissingCharResult.py
+model/__init__.py
+pylizlib.egg-info/PKG-INFO
+pylizlib.egg-info/SOURCES.txt
+pylizlib.egg-info/dependency_links.txt
+pylizlib.egg-info/entry_points.txt
+pylizlib.egg-info/top_level.txt
+util/__init__.py
+util/code.py
+util/fileUtils.py
+util/iconv.py
+util/log.py
+util/path.py

pylizlib-0.0.4/pylizlib.egg-info/dependency_links.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+

pylizlib-0.0.4/pylizlib.egg-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [console_scripts]
2	+ cuft = cuft:main

pylizlib-0.0.4/pylizlib.egg-info/top_level.txt ADDED Viewed

@@ -0,0 +1,3 @@
+controller
+model
+util

pylizlib-0.0.4/setup.cfg ADDED Viewed

@@ -0,0 +1,4 @@
+[egg_info]
+tag_build =
+tag_date = 0

pylizlib-0.0.4/setup.py ADDED Viewed

@@ -0,0 +1,39 @@
+from setuptools import setup, find_packages
+import codecs
+import os
+here = os.path.abspath(os.path.dirname(__file__))
+with codecs.open(os.path.join(here, "README.md"), encoding="utf-8") as fh:
+    long_description = "\n" + fh.read()
+VERSION = '0.0.4'
+DESCRIPTION = 'Script to convert files to UTF-8.'
+LONG_DESCRIPTION = 'Script to convert text files from any encoding to UTF-8 (With BOM).'
+# Setting up
+setup(
+    name="pylizlib",
+    version=VERSION,
+    author="Gabliz",
+    author_email="",
+    description=DESCRIPTION,
+    long_description_content_type="text/markdown",
+    long_description=long_description,
+    packages=find_packages(),
+    install_requires=[],
+    entry_points={
+        "console_scripts": [
+            "cuft=cuft:main",
+        ],
+    },
+    keywords=['python', 'encoding', 'utf-8'],
+    classifiers=[
+        "Development Status :: 1 - Planning",
+        "Intended Audience :: Developers",
+        "Programming Language :: Python :: 3",
+        "Operating System :: Unix",
+        "Operating System :: MacOS :: MacOS X",
+        "Operating System :: Microsoft :: Windows",
+    ]
+)

pylizlib-0.0.4/util/__init__.py ADDED Viewed

File without changes

pylizlib-0.0.4/util/code.py ADDED Viewed

@@ -0,0 +1,31 @@
+import re
+def is_line_commented(file_path, line_number):
+    in_block_comment = False
+    with open(file_path, 'r') as file:
+        for current_line_number, line in enumerate(file, start=1):
+            # Se siamo nella linea di interesse
+            if current_line_number == line_number:
+                # Verifica se la linea è commentata
+                line = line.strip()
+                if in_block_comment:
+                    return True  # La riga è dentro un blocco di commento
+                # Commenti su singola riga (//)
+                if line.startswith("//"):
+                    return True
+                # Controllo se la riga è dentro un commento di blocco
+                if "/*" in line:
+                    in_block_comment = True
+                    if "*/" in line:
+                        in_block_comment = False
+                    return False
+                return False  # La riga non è commentata
+            # Gestisci l'inizio e la fine di un blocco di commento
+            if in_block_comment:
+                if "*/" in line:
+                    in_block_comment = False
+                continue
+            if "/*" in line:
+                in_block_comment = True
+    return False  # Se non troviamo mai la linea, significa che non è commentata

pylizlib-0.0.4/util/fileUtils.py ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ import os
2	+

pylizlib-0.0.4/util/iconv.py ADDED Viewed

@@ -0,0 +1,41 @@
+import os
+import subprocess
+import rich
+from util.log import format_log_path
+def convert_to_utf8_with_iconv(path: str, source_encoding: str, target_encoding: str):
+    file_name = os.path.basename(path)
+    rich.print(f"Converting {file_name} to {target_encoding} with iconv...")
+    temp_file_path = path + ".tmp"
+    temp_bom_file_path = path + ".bom"
+    try:
+        command = ["iconv", "-f", source_encoding, "-t", target_encoding, path]
+        with open(temp_file_path, 'w', encoding=target_encoding) as temp_file:
+            subprocess.run(command, stdout=temp_file, stderr=subprocess.PIPE, check=True)
+        # Aggiungi il BOM al file convertito
+        with open(temp_bom_file_path, 'wb') as bom_file:
+            bom_file.write(b'\xef\xbb\xbf')  # Scrivi il BOM (UTF-8)
+            with open(temp_file_path, 'rb') as temp_file:
+                bom_file.write(temp_file.read())  # Aggiungi il contenuto del file convertito
+        # Sostituisci il file originale con il file con BOM
+        os.replace(temp_bom_file_path, path)
+        rich.print(f"Conversion completed for {format_log_path(os.path.basename(path))}")
+    except subprocess.CalledProcessError as e:
+        rich.print(f"Errore nella conversione di {path}: {e}")
+        if os.path.exists(temp_file_path):
+            os.remove(temp_file_path)
+    finally:
+        # Cancella i file temporanei, se esistono
+        if os.path.exists(temp_file_path):
+            os.remove(temp_file_path)
+        if os.path.exists(temp_bom_file_path):
+            os.remove(temp_bom_file_path)

pylizlib-0.0.4/util/log.py ADDED Viewed

@@ -0,0 +1,9 @@
+def format_log_path(path: str):
+    return f"[bold magenta]{path}[/bold magenta]"
+def format_log_warning(string: str):
+    return f"[bold yellow]{string}[/bold yellow]"
+def format_log_error(string: str):
+    return f"[bold red]{string}[/bold red]"

pylizlib-0.0.4/util/path.py ADDED Viewed

@@ -0,0 +1,26 @@
+import os
+import shutil
+import tempfile
+def copy_old_encoded_file(file_path):
+    # Controlliamo se il file esiste
+    if not os.path.exists(file_path):
+        raise FileNotFoundError(f"Il file {file_path} non esiste.")
+    # Otteniamo la cartella temporanea di Windows (solitamente la variabile d'ambiente TEMP)
+    temp_dir = tempfile.gettempdir()
+    # Creiamo un percorso per la copia del file nella cartella temporanea
+    file_name = os.path.basename(file_path)
+    temp_file_path = os.path.join(temp_dir, "SrcChE")
+    if not os.path.exists(temp_file_path):
+        os.makedirs(temp_file_path)
+    # Copiamo il file nella cartella temporanea
+    dest = shutil.copy2(file_path, temp_file_path)
+    # Restituire il percorso del file temporaneo
+    return dest