PyPI - nm-tool-forge - Versions diffs - 0.2.4__tar.gz → 0.3.0__tar.gz - Mend

nm-tool-forge 0.2.4tar.gz → 0.3.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

{nm_tool_forge-0.2.4 → nm_tool_forge-0.3.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: nm-tool-forge
-Version: 0.2.4
+Version: 0.3.0
 Summary: Analyze MigMan log files and generate aggregated CSV, Markdown, HTML, and optional PDF reports.
 Author-email: Stefan Ewald <s.ew@outlook.de>
 License-Expression: MIT

{nm_tool_forge-0.2.4 → nm_tool_forge-0.3.0}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "nm-tool-forge"
-version = "0.2.4"
+version = "0.3.0"
 description = "Analyze MigMan log files and generate aggregated CSV, Markdown, HTML, and optional PDF reports."
 readme = { file = "README.md", content-type = "text/markdown" }
 requires-python = ">=3.10"

{nm_tool_forge-0.2.4 → nm_tool_forge-0.3.0}/src/csvchunking/__init__.py RENAMED Viewed

@@ -1,4 +1,4 @@
 from .chunker import ChunkResult, split_csv
 __all__ = ["ChunkResult", "split_csv"]
-__version__ = "0.2.4"
+__version__ = "0.3.0"

{nm_tool_forge-0.2.4 → nm_tool_forge-0.3.0}/src/csvchunking/chunker.py RENAMED Viewed

@@ -1,33 +1,50 @@
-import csv
-from dataclasses import dataclass
-from pathlib import Path
-@dataclass(frozen=True)
-class ChunkResult:
+import csv
+import re
+from dataclasses import dataclass
+from pathlib import Path
+@dataclass(frozen=True)
+class ChunkResult:
     input_file: Path
     output_dir: Path
-    chunk_size: int
-    data_rows_processed: int
-    files_created: int
-    output_files: tuple[Path, ...]
-def split_csv(
-    input_file: Path,
-    chunk_size: int,
-    encoding: str = "utf-8-sig",
-) -> ChunkResult:
-    if not Path(input_file).is_file():
-        raise FileNotFoundError(f"Input file not found: {input_file}")
-    if chunk_size <= 0:
-        raise ValueError("chunk_size must be greater than 0")
+    chunk_size: int
+    data_rows_processed: int
+    files_created: int
+    output_files: tuple[Path, ...]
+def cleanup_existing_chunks(output_dir: Path, input_file: Path) -> None:
+    output_dir = Path(output_dir)
+    if not output_dir.exists():
+        return
+    input_file = Path(input_file)
+    pattern = re.compile(
+        rf"^{re.escape(input_file.stem)}_\d{{2,}}{re.escape(input_file.suffix)}$"
+    )
+    for existing_file in output_dir.iterdir():
+        if existing_file.is_file() and pattern.fullmatch(existing_file.name):
+            existing_file.unlink()
+def split_csv(
+    input_file: Path,
+    chunk_size: int,
+    encoding: str = "utf-8-sig",
+) -> ChunkResult:
+    if not Path(input_file).is_file():
+        raise FileNotFoundError(f"Input file not found: {input_file}")
+    if chunk_size <= 0:
+        raise ValueError("chunk_size must be greater than 0")
     input_file = Path(input_file)
     output_dir = input_file.parent / input_file.stem
     output_dir.mkdir(exist_ok=True)
+    cleanup_existing_chunks(output_dir, input_file)
-    # Detect the delimiter automatically.
+    # Detect the delimiter automatically.
     with open(input_file, encoding=encoding, newline="") as f:
         sample = f.read(4096)
         f.seek(0)
@@ -38,10 +55,10 @@ def split_csv(
             dialect = csv.excel
             dialect.delimiter = ";"
         reader = csv.reader(f, dialect)
-        try:
-            header = next(reader)
-        except StopIteration as exc:
-            raise ValueError("Input file is empty.") from exc
+        try:
+            header = next(reader)
+        except StopIteration as exc:
+            raise ValueError("Input file is empty.") from exc
         chunk = []
         file_count = 0
         data_rows = 0

{nm_tool_forge-0.2.4 → nm_tool_forge-0.3.0}/src/loganalysis/__init__.py RENAMED Viewed

@@ -13,4 +13,4 @@ __all__ = [
     "run_analysis",
 ]
-__version__ = "0.2.4"
+__version__ = "0.3.0"

{nm_tool_forge-0.2.4 → nm_tool_forge-0.3.0}/src/loganalysis/analysis.py RENAMED Viewed

@@ -3,13 +3,13 @@ from __future__ import annotations
 from collections import Counter, defaultdict
 from pathlib import Path
-from .constants import DEFAULT_OUT_DIR, DEFAULT_TOP_EXAMPLES, EXIT_NO_LOG_FILES
-from .csv_export import write_csv
-from .encoding import count_physical_lines, detect_encoding
-from .filesystem import backup_file, ensure_dir
-from .models import AnalysisConfig, AnalysisRunResult, AnalysisSummary, FileAnalysis, MessageKey
-from .normalization import normalize_message
-from .parsing import iter_logical_entries, parse_entry
+from .constants import DEFAULT_OUT_DIR, DEFAULT_TOP_EXAMPLES, EXIT_NO_LOG_FILES
+from .csv_export import write_csv
+from .encoding import detect_encoding
+from .filesystem import backup_file, ensure_dir
+from .models import AnalysisConfig, AnalysisRunResult, AnalysisSummary, FileAnalysis, MessageKey
+from .normalization import normalize_message
+from .parsing import extract_last_import_block, iter_logical_entries_from_lines, parse_entry
 from .report_markdown import build_markdown_report
@@ -27,16 +27,22 @@ def analyze_file(file_path: Path) -> FileAnalysis:
     norm_examples: dict[MessageKey, Counter[str]] = defaultdict(Counter)
     unknown_lines = 0
-    total_entries = 0
-    encoding = detect_encoding(file_path)
-    total_lines = count_physical_lines(file_path, encoding=encoding)
-    for entry in iter_logical_entries(file_path, encoding=encoding):
-        total_entries += 1
-        parsed = parse_entry(entry)
-        if not parsed:
-            continue
+    total_entries = 0
+    encoding = detect_encoding(file_path)
+    try:
+        all_lines = file_path.read_text(encoding=encoding, errors="strict").splitlines(keepends=True)
+    except UnicodeDecodeError:
+        all_lines = file_path.read_text(encoding=encoding, errors="replace").splitlines(keepends=True)
+    import_block = extract_last_import_block(all_lines)
+    total_lines = len(import_block.lines)
+    for entry in iter_logical_entries_from_lines(import_block.lines):
+        total_entries += 1
+        parsed = parse_entry(entry)
+        if not parsed:
+            continue
         severity = parsed.severity
         message = parsed.message
@@ -58,8 +64,13 @@ def analyze_file(file_path: Path) -> FileAnalysis:
         unknown_lines=unknown_lines,
         raw_counts=raw_counts,
         norm_counts=norm_counts,
-        norm_examples=dict(norm_examples),
-    )
+        norm_examples=dict(norm_examples),
+        file_total_lines=len(all_lines),
+        analyzed_start_line_number=import_block.start_line_number,
+        analyzed_end_line_number=import_block.end_line_number,
+        import_start=import_block.start_metadata,
+        import_end=import_block.end_stats,
+    )
 def sorted_rows(counter: Counter[MessageKey]) -> list[tuple[str, str, int]]:

{nm_tool_forge-0.2.4 → nm_tool_forge-0.3.0}/src/loganalysis/cli.py RENAMED Viewed

@@ -1,8 +1,8 @@
 from __future__ import annotations
-import argparse
-from collections.abc import Sequence
-from pathlib import Path
+import argparse
+from collections.abc import Sequence
+from pathlib import Path
 from .analysis import NoLogFilesError, run_analysis
 from .constants import DEFAULT_LOGS_DIR, DEFAULT_OUT_DIR, DEFAULT_TOP_EXAMPLES, EXIT_SUCCESS
@@ -16,11 +16,11 @@ def build_parser() -> argparse.ArgumentParser:
     parser = argparse.ArgumentParser(
         description="Aggregated analysis of log files (INFO/ERROR/WARNING) in logs/*.txt",
     )
-    parser.add_argument(
-        "--logs-dir",
-        default=DEFAULT_LOGS_DIR,
-        help=f"Subdirectory with log files (Default: {DEFAULT_LOGS_DIR})",
-    )
+    parser.add_argument(
+        "--logs-dir",
+        default=DEFAULT_LOGS_DIR,
+        help=f"Subdirectory with log files (Default: {DEFAULT_LOGS_DIR})",
+    )
     parser.add_argument("--out-dir", default=DEFAULT_OUT_DIR, help=f"Output directory (Default: {DEFAULT_OUT_DIR})")
     parser.add_argument("--backup-dir", default=None, help="Backup directory (Default: <out-dir>/backup)")
     parser.add_argument(
@@ -74,14 +74,12 @@ def main(argv: Sequence[str] | None = None) -> int:
             print(f"- PDF: {result.pdf_path.resolve() if result.pdf_path else 'created'}")
         else:
             pdf_reason = result.convert_status.get("pdf_reason")
-            if pdf_reason == "pandoc_missing":
-                print("- PDF: skipped (pandoc not available)")
-            elif pdf_reason == "no_pdf_engine":
-                print("- PDF: skipped (no PDF engine found)")
+            if pdf_reason == "weasyprint_unavailable":
+                print("- PDF: skipped (WeasyPrint is required for the formatted PDF report)")
             elif pdf_reason == "html_failed":
                 print("- PDF: skipped (HTML conversion failed)")
-            elif pdf_reason == "pdf_not_created":
-                print("- PDF: not created")
+            elif pdf_reason == "weasyprint_failed":
+                print("- PDF: failed (WeasyPrint could not create the formatted PDF report)")
             else:
                 print("- PDF: failed")

{nm_tool_forge-0.2.4 → nm_tool_forge-0.3.0}/src/loganalysis/constants.py RENAMED Viewed

@@ -29,11 +29,32 @@ RE_INT = re.compile(r"\b\d+\b")
 RE_WHITESPACE = re.compile(r"\s+")
 RE_WINDOWS_PATH = re.compile(r"\b[a-zA-Z]:\\[^;\n\r,]*")
 RE_UNIX_PATH = re.compile(r"(?<![A-Za-z0-9])(?:/[^/;\s,:]+){2,}")
-RE_ENTRY_START = re.compile(
-    r"^\s*(?P<severity>Info|Information|I|Error|Err|E|Warning|Warn|W)\b(?:\s*[;,]|\t)",
-    re.IGNORECASE,
-)
-RE_LINE_PREFIX = re.compile(r"^\s*Line\s+\d+\s*:\s*", re.IGNORECASE)
+RE_ENTRY_START = re.compile(
+    r"^\s*(?P<severity>Info|Information|I|Error|Err|E|Warning|Warn|W)\b(?:\s*[;,]|\t)",
+    re.IGNORECASE,
+)
+RE_IMPORT_START = re.compile(
+    r"^Info;(?P<timestamp>\d{2}\.\d{2}\.\d{4}/\d{2}:\d{2}:\d{2})\s+"
+    r"Starting import:\s*(?P<body>.*)$"
+)
+RE_IMPORT_START_KV = re.compile(
+    r"(?P<key>company|environment|language|Proalpha version|user)\s+'(?P<value>[^']*)'"
+)
+RE_IMPORT_END = re.compile(
+    r'^Info;(?P<timestamp>\d{2}\.\d{2}\.\d{4}/\d{2}:\d{2}:\d{2})\s+-\s+'
+    r"End of import:\s+"
+    r'(?P<records_to_be_edited>[\d.]*)\s*records to be edited \(incl\. the "header record"\),\s*'
+    r"(?P<records_edited>[\d.]*)\s*records edited,\s*thereof\s*"
+    r"(?P<successful>[\d.]*)\s*successful,\s*"
+    r"(?P<errors>[\d.]*)\s*errors,\s*"
+    r"(?P<warnings>[\d.]*)\s*warnings,\s*"
+    r"(?P<info_messages>[\d.]*)\s*info messages\s*$"
+)
+RE_SUCCESS_RATE_SPAN = re.compile(
+    r'^<span class="success-rate success-rate--(?P<status>red|yellow|green)">'
+    r"(?P<value>\d{1,3},\d{2}%)</span>$"
+)
+RE_LINE_PREFIX = re.compile(r"^\s*Line\s+\d+\s*:\s*", re.IGNORECASE)
 RE_TRAILING_DATASET = re.compile(r"\s*;(?:[^\n;]*;){2,}[^\n;]*\s*$")
 RE_SEMANTIC_VALUE_CHAR = re.compile(r"[0-9A-Za-zÄÖÜäöüß]")
 RE_LOOKUP_ASSIGNMENT = re.compile(

nm_tool_forge-0.3.0/src/loganalysis/converters.py ADDED Viewed

@@ -0,0 +1,81 @@
+from __future__ import annotations
+import tempfile
+from pathlib import Path
+from .filesystem import ensure_dir
+from .report_html import render_report_html_document, resolve_report_date
+from .report_markdown import parse_report_markdown
+from .report_pdf import select_pdf_engine
+def convert_report_md_to_html_pdf(md_path: Path, html_path: Path, pdf_path: Path) -> dict[str, object]:
+    """Convert the generated markdown report to HTML and optionally to PDF."""
+    status: dict[str, object] = {
+        "html_created": False,
+        "pdf_created": False,
+        "pdf_reason": None,
+        "pdf_engine": None,
+    }
+    try:
+        ensure_dir(html_path.parent)
+        ensure_dir(pdf_path.parent)
+        markdown = md_path.read_text(encoding="utf-8")
+        report = parse_report_markdown(markdown)
+        report_date = resolve_report_date(report, md_path)
+        html_document = render_report_html_document(report, md_path.name, report_date)
+        html_path.write_text(html_document, encoding="utf-8")
+    except Exception as exc:
+        print(f"[ERROR] HTML generation failed: {exc}")
+        status["pdf_reason"] = "html_failed"
+        return status
+    html_created = html_path.exists() and html_path.stat().st_size > 0
+    status["html_created"] = html_created
+    if not html_created:
+        print("[ERROR] HTML generation failed: HTML file was not created.")
+        status["pdf_reason"] = "html_failed"
+        return status
+    pdf_engine = select_pdf_engine()
+    if pdf_engine is None:
+        print("[WARN] WeasyPrint is required for the formatted PDF report. HTML was generated, PDF skipped.")
+        status["pdf_reason"] = "weasyprint_unavailable"
+        return status
+    status["pdf_engine"] = pdf_engine
+    temp_pdf_path: Path | None = None
+    try:
+        with tempfile.NamedTemporaryFile(
+            prefix=f"{pdf_path.stem}.",
+            suffix=pdf_path.suffix,
+            dir=str(pdf_path.parent),
+            delete=False,
+        ) as handle:
+            temp_pdf_path = Path(handle.name)
+        from weasyprint import HTML
+        HTML(filename=str(html_path), base_url=str(html_path.parent)).write_pdf(str(temp_pdf_path))
+        if temp_pdf_path.exists() and temp_pdf_path.stat().st_size > 0:
+            temp_pdf_path.replace(pdf_path)
+            status["pdf_created"] = True
+            status["pdf_reason"] = None
+            print(f"PDF generated via {pdf_engine}: {pdf_path}")
+        else:
+            print("[ERROR] PDF generation finished without creating a PDF file.")
+            status["pdf_reason"] = "weasyprint_failed"
+    except Exception as exc:
+        print(f"[ERROR] PDF generation failed via {pdf_engine}: {exc}")
+        status["pdf_reason"] = "weasyprint_failed"
+    finally:
+        if temp_pdf_path and temp_pdf_path.exists():
+            temp_pdf_path.unlink()
+    return status

nm_tool_forge-0.3.0/src/loganalysis/encoding.py ADDED Viewed

@@ -0,0 +1,37 @@
+from __future__ import annotations
+from pathlib import Path
+import chardet
+def detect_encoding(file_path: Path) -> str:
+    """Detect text encoding, preferring strict UTF-8 over heuristic guesses."""
+    raw = file_path.read_bytes()
+    for encoding in ("utf-8-sig", "utf-8"):
+        try:
+            raw.decode(encoding)
+        except UnicodeDecodeError:
+            continue
+        return encoding
+    result = chardet.detect(raw)
+    detected = result.get("encoding") or "utf-8"
+    normalized = detected.lower().replace("_", "-")
+    if normalized in {"ascii", "us-ascii"} and any(byte >= 0x80 for byte in raw):
+        return "utf-8"
+    return detected
+def count_physical_lines(file_path: Path, encoding: str | None = None) -> int:
+    """Count physical lines in a text file using a detected or provided encoding."""
+    file_encoding = encoding or detect_encoding(file_path)
+    try:
+        with file_path.open("r", encoding=file_encoding, errors="strict") as file_handle:
+            return sum(1 for _ in file_handle)
+    except UnicodeDecodeError:
+        with file_path.open("r", encoding=file_encoding, errors="replace") as file_handle:
+            return sum(1 for _ in file_handle)

{nm_tool_forge-0.2.4 → nm_tool_forge-0.3.0}/src/loganalysis/models.py RENAMED Viewed

@@ -1,32 +1,77 @@
 from __future__ import annotations
-from collections import Counter
-from dataclasses import dataclass, field
-from pathlib import Path
-MessageKey = tuple[str, str]
+from collections import Counter
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Literal
+MessageKey = tuple[str, str]
+SuccessRateStatus = Literal["red", "yellow", "green", "none"]
 @dataclass(frozen=True)
-class ParsedLine:
-    """Structured representation of a parsed logical log entry."""
-    severity: str
-    message: str
+class ParsedLine:
+    """Structured representation of a parsed logical log entry."""
+    severity: str
+    message: str
+@dataclass(frozen=True)
+class ImportStartMetadata:
+    """Selected metadata from a MigMan `Starting import` line."""
+    timestamp: str
+    company: str | None = None
+    environment: str | None = None
+    language: str | None = None
+    proalpha_version: str | None = None
+    user: str | None = None
+@dataclass(frozen=True)
+class ImportEndStats:
+    """Counters and calculated success status from an `End of import` line."""
+    timestamp: str
+    records_to_be_edited: int
+    records_edited: int
+    successful: int
+    errors: int
+    warnings: int
+    info_messages: int
+    success_rate_percent: float | None
+    success_rate_status: SuccessRateStatus
+@dataclass(frozen=True)
+class ImportBlock:
+    """The physical lines and metadata for the selected import block."""
+    start_line_number: int | None
+    end_line_number: int | None
+    lines: tuple[str, ...]
+    start_metadata: ImportStartMetadata | None
+    end_stats: ImportEndStats | None
 @dataclass
-class FileAnalysis:
-    """Aggregated analysis results for a single log file."""
+class FileAnalysis:
+    """Aggregated analysis results for a single log file."""
     file: Path
     total_lines: int
     total_entries: int
     unknown_lines: int
     raw_counts: Counter[MessageKey]
-    norm_counts: Counter[MessageKey]
-    norm_examples: dict[MessageKey, Counter[str]]
-    backup_path: Path | None = None
+    norm_counts: Counter[MessageKey]
+    norm_examples: dict[MessageKey, Counter[str]]
+    backup_path: Path | None = None
+    file_total_lines: int = 0
+    analyzed_start_line_number: int | None = None
+    analyzed_end_line_number: int | None = None
+    import_start: ImportStartMetadata | None = None
+    import_end: ImportEndStats | None = None
 @dataclass

nm-tool-forge 0.2.4__tar.gz → 0.3.0__tar.gz

nm-tool-forge 0.2.4tar.gz → 0.3.0tar.gz