nm-tool-forge 0.2.5__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. {nm_tool_forge-0.2.5 → nm_tool_forge-0.3.0}/PKG-INFO +1 -1
  2. {nm_tool_forge-0.2.5 → nm_tool_forge-0.3.0}/pyproject.toml +1 -1
  3. {nm_tool_forge-0.2.5 → nm_tool_forge-0.3.0}/src/csvchunking/__init__.py +1 -1
  4. {nm_tool_forge-0.2.5 → nm_tool_forge-0.3.0}/src/loganalysis/__init__.py +1 -1
  5. {nm_tool_forge-0.2.5 → nm_tool_forge-0.3.0}/src/loganalysis/analysis.py +30 -19
  6. {nm_tool_forge-0.2.5 → nm_tool_forge-0.3.0}/src/loganalysis/cli.py +12 -14
  7. {nm_tool_forge-0.2.5 → nm_tool_forge-0.3.0}/src/loganalysis/constants.py +26 -5
  8. nm_tool_forge-0.3.0/src/loganalysis/converters.py +81 -0
  9. nm_tool_forge-0.3.0/src/loganalysis/encoding.py +37 -0
  10. {nm_tool_forge-0.2.5 → nm_tool_forge-0.3.0}/src/loganalysis/models.py +60 -15
  11. nm_tool_forge-0.3.0/src/loganalysis/parsing.py +211 -0
  12. {nm_tool_forge-0.2.5 → nm_tool_forge-0.3.0}/src/loganalysis/report_html.py +179 -16
  13. {nm_tool_forge-0.2.5 → nm_tool_forge-0.3.0}/src/loganalysis/report_markdown.py +152 -17
  14. {nm_tool_forge-0.2.5 → nm_tool_forge-0.3.0}/src/loganalysis/report_pdf.py +11 -22
  15. {nm_tool_forge-0.2.5 → nm_tool_forge-0.3.0}/src/nm_tool_forge.egg-info/PKG-INFO +1 -1
  16. {nm_tool_forge-0.2.5 → nm_tool_forge-0.3.0}/src/nm_tool_forge.egg-info/SOURCES.txt +3 -1
  17. nm_tool_forge-0.3.0/tests/test_analysis.py +106 -0
  18. nm_tool_forge-0.3.0/tests/test_encoding.py +25 -0
  19. nm_tool_forge-0.3.0/tests/test_parsing.py +127 -0
  20. nm_tool_forge-0.3.0/tests/test_report_html.py +177 -0
  21. nm_tool_forge-0.3.0/tests/test_report_markdown.py +200 -0
  22. nm_tool_forge-0.3.0/tests/test_report_pdf.py +133 -0
  23. nm_tool_forge-0.2.5/src/loganalysis/converters.py +0 -150
  24. nm_tool_forge-0.2.5/src/loganalysis/encoding.py +0 -22
  25. nm_tool_forge-0.2.5/src/loganalysis/parsing.py +0 -69
  26. nm_tool_forge-0.2.5/tests/test_analysis.py +0 -54
  27. nm_tool_forge-0.2.5/tests/test_parsing.py +0 -38
  28. nm_tool_forge-0.2.5/tests/test_report_html.py +0 -42
  29. nm_tool_forge-0.2.5/tests/test_report_markdown.py +0 -46
  30. {nm_tool_forge-0.2.5 → nm_tool_forge-0.3.0}/LICENSE +0 -0
  31. {nm_tool_forge-0.2.5 → nm_tool_forge-0.3.0}/README.md +0 -0
  32. {nm_tool_forge-0.2.5 → nm_tool_forge-0.3.0}/setup.cfg +0 -0
  33. {nm_tool_forge-0.2.5 → nm_tool_forge-0.3.0}/src/csvchunking/__main__.py +0 -0
  34. {nm_tool_forge-0.2.5 → nm_tool_forge-0.3.0}/src/csvchunking/chunker.py +0 -0
  35. {nm_tool_forge-0.2.5 → nm_tool_forge-0.3.0}/src/csvchunking/cli.py +0 -0
  36. {nm_tool_forge-0.2.5 → nm_tool_forge-0.3.0}/src/loganalysis/__main__.py +0 -0
  37. {nm_tool_forge-0.2.5 → nm_tool_forge-0.3.0}/src/loganalysis/csv_export.py +0 -0
  38. {nm_tool_forge-0.2.5 → nm_tool_forge-0.3.0}/src/loganalysis/filesystem.py +0 -0
  39. {nm_tool_forge-0.2.5 → nm_tool_forge-0.3.0}/src/loganalysis/normalization.py +0 -0
  40. {nm_tool_forge-0.2.5 → nm_tool_forge-0.3.0}/src/loganalysis/report_models.py +0 -0
  41. {nm_tool_forge-0.2.5 → nm_tool_forge-0.3.0}/src/loganalysis/selftest.py +0 -0
  42. {nm_tool_forge-0.2.5 → nm_tool_forge-0.3.0}/src/nm_tool_forge.egg-info/dependency_links.txt +0 -0
  43. {nm_tool_forge-0.2.5 → nm_tool_forge-0.3.0}/src/nm_tool_forge.egg-info/entry_points.txt +0 -0
  44. {nm_tool_forge-0.2.5 → nm_tool_forge-0.3.0}/src/nm_tool_forge.egg-info/requires.txt +0 -0
  45. {nm_tool_forge-0.2.5 → nm_tool_forge-0.3.0}/src/nm_tool_forge.egg-info/top_level.txt +0 -0
  46. {nm_tool_forge-0.2.5 → nm_tool_forge-0.3.0}/tests/test_csvchunking.py +0 -0
  47. {nm_tool_forge-0.2.5 → nm_tool_forge-0.3.0}/tests/test_normalization.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nm-tool-forge
3
- Version: 0.2.5
3
+ Version: 0.3.0
4
4
  Summary: Analyze MigMan log files and generate aggregated CSV, Markdown, HTML, and optional PDF reports.
5
5
  Author-email: Stefan Ewald <s.ew@outlook.de>
6
6
  License-Expression: MIT
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "nm-tool-forge"
7
- version = "0.2.5"
7
+ version = "0.3.0"
8
8
  description = "Analyze MigMan log files and generate aggregated CSV, Markdown, HTML, and optional PDF reports."
9
9
  readme = { file = "README.md", content-type = "text/markdown" }
10
10
  requires-python = ">=3.10"
@@ -1,4 +1,4 @@
1
1
  from .chunker import ChunkResult, split_csv
2
2
 
3
3
  __all__ = ["ChunkResult", "split_csv"]
4
- __version__ = "0.2.5"
4
+ __version__ = "0.3.0"
@@ -13,4 +13,4 @@ __all__ = [
13
13
  "run_analysis",
14
14
  ]
15
15
 
16
- __version__ = "0.2.5"
16
+ __version__ = "0.3.0"
@@ -3,13 +3,13 @@ from __future__ import annotations
3
3
  from collections import Counter, defaultdict
4
4
  from pathlib import Path
5
5
 
6
- from .constants import DEFAULT_OUT_DIR, DEFAULT_TOP_EXAMPLES, EXIT_NO_LOG_FILES
7
- from .csv_export import write_csv
8
- from .encoding import count_physical_lines, detect_encoding
9
- from .filesystem import backup_file, ensure_dir
10
- from .models import AnalysisConfig, AnalysisRunResult, AnalysisSummary, FileAnalysis, MessageKey
11
- from .normalization import normalize_message
12
- from .parsing import iter_logical_entries, parse_entry
6
+ from .constants import DEFAULT_OUT_DIR, DEFAULT_TOP_EXAMPLES, EXIT_NO_LOG_FILES
7
+ from .csv_export import write_csv
8
+ from .encoding import detect_encoding
9
+ from .filesystem import backup_file, ensure_dir
10
+ from .models import AnalysisConfig, AnalysisRunResult, AnalysisSummary, FileAnalysis, MessageKey
11
+ from .normalization import normalize_message
12
+ from .parsing import extract_last_import_block, iter_logical_entries_from_lines, parse_entry
13
13
  from .report_markdown import build_markdown_report
14
14
 
15
15
 
@@ -27,16 +27,22 @@ def analyze_file(file_path: Path) -> FileAnalysis:
27
27
  norm_examples: dict[MessageKey, Counter[str]] = defaultdict(Counter)
28
28
 
29
29
  unknown_lines = 0
30
- total_entries = 0
31
-
32
- encoding = detect_encoding(file_path)
33
- total_lines = count_physical_lines(file_path, encoding=encoding)
34
-
35
- for entry in iter_logical_entries(file_path, encoding=encoding):
36
- total_entries += 1
37
- parsed = parse_entry(entry)
38
- if not parsed:
39
- continue
30
+ total_entries = 0
31
+
32
+ encoding = detect_encoding(file_path)
33
+ try:
34
+ all_lines = file_path.read_text(encoding=encoding, errors="strict").splitlines(keepends=True)
35
+ except UnicodeDecodeError:
36
+ all_lines = file_path.read_text(encoding=encoding, errors="replace").splitlines(keepends=True)
37
+
38
+ import_block = extract_last_import_block(all_lines)
39
+ total_lines = len(import_block.lines)
40
+
41
+ for entry in iter_logical_entries_from_lines(import_block.lines):
42
+ total_entries += 1
43
+ parsed = parse_entry(entry)
44
+ if not parsed:
45
+ continue
40
46
 
41
47
  severity = parsed.severity
42
48
  message = parsed.message
@@ -58,8 +64,13 @@ def analyze_file(file_path: Path) -> FileAnalysis:
58
64
  unknown_lines=unknown_lines,
59
65
  raw_counts=raw_counts,
60
66
  norm_counts=norm_counts,
61
- norm_examples=dict(norm_examples),
62
- )
67
+ norm_examples=dict(norm_examples),
68
+ file_total_lines=len(all_lines),
69
+ analyzed_start_line_number=import_block.start_line_number,
70
+ analyzed_end_line_number=import_block.end_line_number,
71
+ import_start=import_block.start_metadata,
72
+ import_end=import_block.end_stats,
73
+ )
63
74
 
64
75
 
65
76
  def sorted_rows(counter: Counter[MessageKey]) -> list[tuple[str, str, int]]:
@@ -1,8 +1,8 @@
1
1
  from __future__ import annotations
2
2
 
3
- import argparse
4
- from collections.abc import Sequence
5
- from pathlib import Path
3
+ import argparse
4
+ from collections.abc import Sequence
5
+ from pathlib import Path
6
6
 
7
7
  from .analysis import NoLogFilesError, run_analysis
8
8
  from .constants import DEFAULT_LOGS_DIR, DEFAULT_OUT_DIR, DEFAULT_TOP_EXAMPLES, EXIT_SUCCESS
@@ -16,11 +16,11 @@ def build_parser() -> argparse.ArgumentParser:
16
16
  parser = argparse.ArgumentParser(
17
17
  description="Aggregated analysis of log files (INFO/ERROR/WARNING) in logs/*.txt",
18
18
  )
19
- parser.add_argument(
20
- "--logs-dir",
21
- default=DEFAULT_LOGS_DIR,
22
- help=f"Subdirectory with log files (Default: {DEFAULT_LOGS_DIR})",
23
- )
19
+ parser.add_argument(
20
+ "--logs-dir",
21
+ default=DEFAULT_LOGS_DIR,
22
+ help=f"Subdirectory with log files (Default: {DEFAULT_LOGS_DIR})",
23
+ )
24
24
  parser.add_argument("--out-dir", default=DEFAULT_OUT_DIR, help=f"Output directory (Default: {DEFAULT_OUT_DIR})")
25
25
  parser.add_argument("--backup-dir", default=None, help="Backup directory (Default: <out-dir>/backup)")
26
26
  parser.add_argument(
@@ -74,14 +74,12 @@ def main(argv: Sequence[str] | None = None) -> int:
74
74
  print(f"- PDF: {result.pdf_path.resolve() if result.pdf_path else 'created'}")
75
75
  else:
76
76
  pdf_reason = result.convert_status.get("pdf_reason")
77
- if pdf_reason == "pandoc_missing":
78
- print("- PDF: skipped (pandoc not available)")
79
- elif pdf_reason == "no_pdf_engine":
80
- print("- PDF: skipped (no PDF engine found)")
77
+ if pdf_reason == "weasyprint_unavailable":
78
+ print("- PDF: skipped (WeasyPrint is required for the formatted PDF report)")
81
79
  elif pdf_reason == "html_failed":
82
80
  print("- PDF: skipped (HTML conversion failed)")
83
- elif pdf_reason == "pdf_not_created":
84
- print("- PDF: not created")
81
+ elif pdf_reason == "weasyprint_failed":
82
+ print("- PDF: failed (WeasyPrint could not create the formatted PDF report)")
85
83
  else:
86
84
  print("- PDF: failed")
87
85
 
@@ -29,11 +29,32 @@ RE_INT = re.compile(r"\b\d+\b")
29
29
  RE_WHITESPACE = re.compile(r"\s+")
30
30
  RE_WINDOWS_PATH = re.compile(r"\b[a-zA-Z]:\\[^;\n\r,]*")
31
31
  RE_UNIX_PATH = re.compile(r"(?<![A-Za-z0-9])(?:/[^/;\s,:]+){2,}")
32
- RE_ENTRY_START = re.compile(
33
- r"^\s*(?P<severity>Info|Information|I|Error|Err|E|Warning|Warn|W)\b(?:\s*[;,]|\t)",
34
- re.IGNORECASE,
35
- )
36
- RE_LINE_PREFIX = re.compile(r"^\s*Line\s+\d+\s*:\s*", re.IGNORECASE)
32
+ RE_ENTRY_START = re.compile(
33
+ r"^\s*(?P<severity>Info|Information|I|Error|Err|E|Warning|Warn|W)\b(?:\s*[;,]|\t)",
34
+ re.IGNORECASE,
35
+ )
36
+ RE_IMPORT_START = re.compile(
37
+ r"^Info;(?P<timestamp>\d{2}\.\d{2}\.\d{4}/\d{2}:\d{2}:\d{2})\s+"
38
+ r"Starting import:\s*(?P<body>.*)$"
39
+ )
40
+ RE_IMPORT_START_KV = re.compile(
41
+ r"(?P<key>company|environment|language|Proalpha version|user)\s+'(?P<value>[^']*)'"
42
+ )
43
+ RE_IMPORT_END = re.compile(
44
+ r'^Info;(?P<timestamp>\d{2}\.\d{2}\.\d{4}/\d{2}:\d{2}:\d{2})\s+-\s+'
45
+ r"End of import:\s+"
46
+ r'(?P<records_to_be_edited>[\d.]*)\s*records to be edited \(incl\. the "header record"\),\s*'
47
+ r"(?P<records_edited>[\d.]*)\s*records edited,\s*thereof\s*"
48
+ r"(?P<successful>[\d.]*)\s*successful,\s*"
49
+ r"(?P<errors>[\d.]*)\s*errors,\s*"
50
+ r"(?P<warnings>[\d.]*)\s*warnings,\s*"
51
+ r"(?P<info_messages>[\d.]*)\s*info messages\s*$"
52
+ )
53
+ RE_SUCCESS_RATE_SPAN = re.compile(
54
+ r'^<span class="success-rate success-rate--(?P<status>red|yellow|green)">'
55
+ r"(?P<value>\d{1,3},\d{2}%)</span>$"
56
+ )
57
+ RE_LINE_PREFIX = re.compile(r"^\s*Line\s+\d+\s*:\s*", re.IGNORECASE)
37
58
  RE_TRAILING_DATASET = re.compile(r"\s*;(?:[^\n;]*;){2,}[^\n;]*\s*$")
38
59
  RE_SEMANTIC_VALUE_CHAR = re.compile(r"[0-9A-Za-zÄÖÜäöüß]")
39
60
  RE_LOOKUP_ASSIGNMENT = re.compile(
@@ -0,0 +1,81 @@
1
+ from __future__ import annotations
2
+
3
+ import tempfile
4
+ from pathlib import Path
5
+
6
+ from .filesystem import ensure_dir
7
+ from .report_html import render_report_html_document, resolve_report_date
8
+ from .report_markdown import parse_report_markdown
9
+ from .report_pdf import select_pdf_engine
10
+
11
+
12
+ def convert_report_md_to_html_pdf(md_path: Path, html_path: Path, pdf_path: Path) -> dict[str, object]:
13
+ """Convert the generated markdown report to HTML and optionally to PDF."""
14
+
15
+ status: dict[str, object] = {
16
+ "html_created": False,
17
+ "pdf_created": False,
18
+ "pdf_reason": None,
19
+ "pdf_engine": None,
20
+ }
21
+
22
+ try:
23
+ ensure_dir(html_path.parent)
24
+ ensure_dir(pdf_path.parent)
25
+
26
+ markdown = md_path.read_text(encoding="utf-8")
27
+ report = parse_report_markdown(markdown)
28
+ report_date = resolve_report_date(report, md_path)
29
+ html_document = render_report_html_document(report, md_path.name, report_date)
30
+ html_path.write_text(html_document, encoding="utf-8")
31
+ except Exception as exc:
32
+ print(f"[ERROR] HTML generation failed: {exc}")
33
+ status["pdf_reason"] = "html_failed"
34
+ return status
35
+
36
+ html_created = html_path.exists() and html_path.stat().st_size > 0
37
+ status["html_created"] = html_created
38
+ if not html_created:
39
+ print("[ERROR] HTML generation failed: HTML file was not created.")
40
+ status["pdf_reason"] = "html_failed"
41
+ return status
42
+
43
+ pdf_engine = select_pdf_engine()
44
+ if pdf_engine is None:
45
+ print("[WARN] WeasyPrint is required for the formatted PDF report. HTML was generated, PDF skipped.")
46
+ status["pdf_reason"] = "weasyprint_unavailable"
47
+ return status
48
+
49
+ status["pdf_engine"] = pdf_engine
50
+
51
+ temp_pdf_path: Path | None = None
52
+
53
+ try:
54
+ with tempfile.NamedTemporaryFile(
55
+ prefix=f"{pdf_path.stem}.",
56
+ suffix=pdf_path.suffix,
57
+ dir=str(pdf_path.parent),
58
+ delete=False,
59
+ ) as handle:
60
+ temp_pdf_path = Path(handle.name)
61
+
62
+ from weasyprint import HTML
63
+
64
+ HTML(filename=str(html_path), base_url=str(html_path.parent)).write_pdf(str(temp_pdf_path))
65
+
66
+ if temp_pdf_path.exists() and temp_pdf_path.stat().st_size > 0:
67
+ temp_pdf_path.replace(pdf_path)
68
+ status["pdf_created"] = True
69
+ status["pdf_reason"] = None
70
+ print(f"PDF generated via {pdf_engine}: {pdf_path}")
71
+ else:
72
+ print("[ERROR] PDF generation finished without creating a PDF file.")
73
+ status["pdf_reason"] = "weasyprint_failed"
74
+ except Exception as exc:
75
+ print(f"[ERROR] PDF generation failed via {pdf_engine}: {exc}")
76
+ status["pdf_reason"] = "weasyprint_failed"
77
+ finally:
78
+ if temp_pdf_path and temp_pdf_path.exists():
79
+ temp_pdf_path.unlink()
80
+
81
+ return status
@@ -0,0 +1,37 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+
5
+ import chardet
6
+
7
+
8
+ def detect_encoding(file_path: Path) -> str:
9
+ """Detect text encoding, preferring strict UTF-8 over heuristic guesses."""
10
+
11
+ raw = file_path.read_bytes()
12
+
13
+ for encoding in ("utf-8-sig", "utf-8"):
14
+ try:
15
+ raw.decode(encoding)
16
+ except UnicodeDecodeError:
17
+ continue
18
+ return encoding
19
+
20
+ result = chardet.detect(raw)
21
+ detected = result.get("encoding") or "utf-8"
22
+ normalized = detected.lower().replace("_", "-")
23
+ if normalized in {"ascii", "us-ascii"} and any(byte >= 0x80 for byte in raw):
24
+ return "utf-8"
25
+ return detected
26
+
27
+
28
+ def count_physical_lines(file_path: Path, encoding: str | None = None) -> int:
29
+ """Count physical lines in a text file using a detected or provided encoding."""
30
+
31
+ file_encoding = encoding or detect_encoding(file_path)
32
+ try:
33
+ with file_path.open("r", encoding=file_encoding, errors="strict") as file_handle:
34
+ return sum(1 for _ in file_handle)
35
+ except UnicodeDecodeError:
36
+ with file_path.open("r", encoding=file_encoding, errors="replace") as file_handle:
37
+ return sum(1 for _ in file_handle)
@@ -1,32 +1,77 @@
1
1
  from __future__ import annotations
2
2
 
3
- from collections import Counter
4
- from dataclasses import dataclass, field
5
- from pathlib import Path
6
-
7
- MessageKey = tuple[str, str]
3
+ from collections import Counter
4
+ from dataclasses import dataclass, field
5
+ from pathlib import Path
6
+ from typing import Literal
7
+
8
+ MessageKey = tuple[str, str]
9
+ SuccessRateStatus = Literal["red", "yellow", "green", "none"]
8
10
 
9
11
 
10
12
  @dataclass(frozen=True)
11
- class ParsedLine:
12
- """Structured representation of a parsed logical log entry."""
13
-
14
- severity: str
15
- message: str
13
+ class ParsedLine:
14
+ """Structured representation of a parsed logical log entry."""
15
+
16
+ severity: str
17
+ message: str
18
+
19
+
20
+ @dataclass(frozen=True)
21
+ class ImportStartMetadata:
22
+ """Selected metadata from a MigMan `Starting import` line."""
23
+
24
+ timestamp: str
25
+ company: str | None = None
26
+ environment: str | None = None
27
+ language: str | None = None
28
+ proalpha_version: str | None = None
29
+ user: str | None = None
30
+
31
+
32
+ @dataclass(frozen=True)
33
+ class ImportEndStats:
34
+ """Counters and calculated success status from an `End of import` line."""
35
+
36
+ timestamp: str
37
+ records_to_be_edited: int
38
+ records_edited: int
39
+ successful: int
40
+ errors: int
41
+ warnings: int
42
+ info_messages: int
43
+ success_rate_percent: float | None
44
+ success_rate_status: SuccessRateStatus
45
+
46
+
47
+ @dataclass(frozen=True)
48
+ class ImportBlock:
49
+ """The physical lines and metadata for the selected import block."""
50
+
51
+ start_line_number: int | None
52
+ end_line_number: int | None
53
+ lines: tuple[str, ...]
54
+ start_metadata: ImportStartMetadata | None
55
+ end_stats: ImportEndStats | None
16
56
 
17
57
 
18
58
  @dataclass
19
- class FileAnalysis:
20
- """Aggregated analysis results for a single log file."""
59
+ class FileAnalysis:
60
+ """Aggregated analysis results for a single log file."""
21
61
 
22
62
  file: Path
23
63
  total_lines: int
24
64
  total_entries: int
25
65
  unknown_lines: int
26
66
  raw_counts: Counter[MessageKey]
27
- norm_counts: Counter[MessageKey]
28
- norm_examples: dict[MessageKey, Counter[str]]
29
- backup_path: Path | None = None
67
+ norm_counts: Counter[MessageKey]
68
+ norm_examples: dict[MessageKey, Counter[str]]
69
+ backup_path: Path | None = None
70
+ file_total_lines: int = 0
71
+ analyzed_start_line_number: int | None = None
72
+ analyzed_end_line_number: int | None = None
73
+ import_start: ImportStartMetadata | None = None
74
+ import_end: ImportEndStats | None = None
30
75
 
31
76
 
32
77
  @dataclass
@@ -0,0 +1,211 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Iterable, Sequence
4
+ from pathlib import Path
5
+
6
+ from .constants import (
7
+ RE_ENTRY_START,
8
+ RE_IMPORT_END,
9
+ RE_IMPORT_START,
10
+ RE_IMPORT_START_KV,
11
+ RE_LINE_PREFIX,
12
+ RE_TRAILING_DATASET,
13
+ RE_WHITESPACE,
14
+ SEVERITY_ALIASES,
15
+ )
16
+ from .encoding import detect_encoding
17
+ from .models import ImportBlock, ImportEndStats, ImportStartMetadata, ParsedLine, SuccessRateStatus
18
+
19
+
20
+ def canonical_severity(raw: str) -> str:
21
+ """Map a raw severity token to the canonical output severity."""
22
+
23
+ severity = (raw or "").strip().upper()
24
+ return SEVERITY_ALIASES.get(severity, severity if severity else "UNKNOWN")
25
+
26
+
27
+ def is_entry_start(line: str) -> bool:
28
+ """Return whether a physical line starts a new logical log entry."""
29
+
30
+ return bool(RE_ENTRY_START.match(line))
31
+
32
+
33
+ def iter_logical_entries_from_lines(lines: Iterable[str]) -> Iterable[str]:
34
+ """Yield logical log entries assembled from a sequence of physical lines."""
35
+
36
+ current_lines: list[str] = []
37
+
38
+ for line in lines:
39
+ if is_entry_start(line):
40
+ if current_lines:
41
+ yield "".join(current_lines)
42
+ current_lines = [line]
43
+ continue
44
+
45
+ if current_lines:
46
+ current_lines.append(line)
47
+
48
+ if current_lines:
49
+ yield "".join(current_lines)
50
+
51
+
52
+ def iter_logical_entries(file_path: Path, encoding: str | None = None) -> Iterable[str]:
53
+ """Yield logical log entries assembled from one or more physical lines."""
54
+
55
+ file_encoding = encoding or detect_encoding(file_path)
56
+
57
+ with file_path.open("r", encoding=file_encoding, errors="replace") as file_handle:
58
+ yield from iter_logical_entries_from_lines(file_handle)
59
+
60
+
61
+ def parse_de_int(raw: str | None) -> int:
62
+ """Parse integers with German thousands separators. Empty values become 0."""
63
+
64
+ value = (raw or "").strip()
65
+ if not value:
66
+ return 0
67
+ return int(value.replace(".", ""))
68
+
69
+
70
+ def format_de_int(value: int) -> str:
71
+ """Format 17705 as 17.705."""
72
+
73
+ return f"{value:,}".replace(",", ".")
74
+
75
+
76
+ def calculate_success_rate(successful: int, records_edited: int) -> tuple[float | None, SuccessRateStatus]:
77
+ """Return percentage and status according to the report thresholds."""
78
+
79
+ if records_edited == 0:
80
+ return None, "none"
81
+
82
+ percent = successful / records_edited * 100
83
+ if percent < 95.0:
84
+ return percent, "red"
85
+ if percent < 98.5:
86
+ return percent, "yellow"
87
+ return percent, "green"
88
+
89
+
90
+ def parse_import_start_line(line: str) -> ImportStartMetadata | None:
91
+ """Parse timestamp and selected Starting-import key/value pairs."""
92
+
93
+ match = RE_IMPORT_START.match(line.rstrip("\r\n"))
94
+ if not match:
95
+ return None
96
+
97
+ values = {
98
+ kv_match.group("key"): kv_match.group("value")
99
+ for kv_match in RE_IMPORT_START_KV.finditer(match.group("body"))
100
+ }
101
+ return ImportStartMetadata(
102
+ timestamp=match.group("timestamp"),
103
+ company=values.get("company"),
104
+ environment=values.get("environment"),
105
+ language=values.get("language"),
106
+ proalpha_version=values.get("Proalpha version"),
107
+ user=values.get("user"),
108
+ )
109
+
110
+
111
+ def parse_import_end_line(line: str) -> ImportEndStats | None:
112
+ """Parse End-of-import counters and calculated success rate."""
113
+
114
+ match = RE_IMPORT_END.match(line.rstrip("\r\n"))
115
+ if not match:
116
+ return None
117
+
118
+ records_to_be_edited = parse_de_int(match.group("records_to_be_edited"))
119
+ records_edited = parse_de_int(match.group("records_edited"))
120
+ successful = parse_de_int(match.group("successful"))
121
+ errors = parse_de_int(match.group("errors"))
122
+ warnings = parse_de_int(match.group("warnings"))
123
+ info_messages = parse_de_int(match.group("info_messages"))
124
+ success_rate_percent, success_rate_status = calculate_success_rate(successful, records_edited)
125
+
126
+ return ImportEndStats(
127
+ timestamp=match.group("timestamp"),
128
+ records_to_be_edited=records_to_be_edited,
129
+ records_edited=records_edited,
130
+ successful=successful,
131
+ errors=errors,
132
+ warnings=warnings,
133
+ info_messages=info_messages,
134
+ success_rate_percent=success_rate_percent,
135
+ success_rate_status=success_rate_status,
136
+ )
137
+
138
+
139
+ def extract_last_import_block(lines: Sequence[str]) -> ImportBlock:
140
+ """Return only the last complete import block, scanning from EOF backwards."""
141
+
142
+ last_end_idx: int | None = None
143
+ end_stats: ImportEndStats | None = None
144
+ for idx in range(len(lines) - 1, -1, -1):
145
+ parsed_end = parse_import_end_line(lines[idx])
146
+ if parsed_end is not None:
147
+ last_end_idx = idx
148
+ end_stats = parsed_end
149
+ break
150
+
151
+ if last_end_idx is None:
152
+ return ImportBlock(
153
+ start_line_number=None,
154
+ end_line_number=None,
155
+ lines=tuple(lines),
156
+ start_metadata=None,
157
+ end_stats=None,
158
+ )
159
+
160
+ start_idx: int | None = None
161
+ start_metadata: ImportStartMetadata | None = None
162
+ for idx in range(last_end_idx, -1, -1):
163
+ parsed_start = parse_import_start_line(lines[idx])
164
+ if parsed_start is not None:
165
+ start_idx = idx
166
+ start_metadata = parsed_start
167
+ break
168
+
169
+ if start_idx is None:
170
+ return ImportBlock(
171
+ start_line_number=None,
172
+ end_line_number=last_end_idx + 1,
173
+ lines=tuple(lines),
174
+ start_metadata=None,
175
+ end_stats=end_stats,
176
+ )
177
+
178
+ return ImportBlock(
179
+ start_line_number=start_idx + 1,
180
+ end_line_number=last_end_idx + 1,
181
+ lines=tuple(lines[start_idx : last_end_idx + 1]),
182
+ start_metadata=start_metadata,
183
+ end_stats=end_stats,
184
+ )
185
+
186
+
187
+ def parse_entry(entry: str) -> ParsedLine | None:
188
+ """Parse one logical entry into severity and normalized message text."""
189
+
190
+ stripped = entry.strip()
191
+ if not stripped:
192
+ return None
193
+
194
+ match = RE_ENTRY_START.match(stripped)
195
+ if not match:
196
+ message = RE_WHITESPACE.sub(" ", stripped).strip()
197
+ return ParsedLine(severity="UNKNOWN", message=message or "(no message)")
198
+
199
+ severity = canonical_severity(match.group("severity") or "")
200
+ message = stripped[match.end():].strip()
201
+ message = RE_LINE_PREFIX.sub("", message, count=1)
202
+ message = RE_WHITESPACE.sub(" ", message).strip()
203
+ message = RE_TRAILING_DATASET.sub("", message).strip()
204
+ message = RE_WHITESPACE.sub(" ", message).strip()
205
+ return ParsedLine(severity=severity, message=message or "(no message)")
206
+
207
+
208
+ def parse_line(line: str) -> ParsedLine | None:
209
+ """Backward-compatible alias for parsing a logical entry."""
210
+
211
+ return parse_entry(line)