nm-tool-forge 0.2.5__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {nm_tool_forge-0.2.5 → nm_tool_forge-0.3.0}/PKG-INFO +1 -1
- {nm_tool_forge-0.2.5 → nm_tool_forge-0.3.0}/pyproject.toml +1 -1
- {nm_tool_forge-0.2.5 → nm_tool_forge-0.3.0}/src/csvchunking/__init__.py +1 -1
- {nm_tool_forge-0.2.5 → nm_tool_forge-0.3.0}/src/loganalysis/__init__.py +1 -1
- {nm_tool_forge-0.2.5 → nm_tool_forge-0.3.0}/src/loganalysis/analysis.py +30 -19
- {nm_tool_forge-0.2.5 → nm_tool_forge-0.3.0}/src/loganalysis/cli.py +12 -14
- {nm_tool_forge-0.2.5 → nm_tool_forge-0.3.0}/src/loganalysis/constants.py +26 -5
- nm_tool_forge-0.3.0/src/loganalysis/converters.py +81 -0
- nm_tool_forge-0.3.0/src/loganalysis/encoding.py +37 -0
- {nm_tool_forge-0.2.5 → nm_tool_forge-0.3.0}/src/loganalysis/models.py +60 -15
- nm_tool_forge-0.3.0/src/loganalysis/parsing.py +211 -0
- {nm_tool_forge-0.2.5 → nm_tool_forge-0.3.0}/src/loganalysis/report_html.py +179 -16
- {nm_tool_forge-0.2.5 → nm_tool_forge-0.3.0}/src/loganalysis/report_markdown.py +152 -17
- {nm_tool_forge-0.2.5 → nm_tool_forge-0.3.0}/src/loganalysis/report_pdf.py +11 -22
- {nm_tool_forge-0.2.5 → nm_tool_forge-0.3.0}/src/nm_tool_forge.egg-info/PKG-INFO +1 -1
- {nm_tool_forge-0.2.5 → nm_tool_forge-0.3.0}/src/nm_tool_forge.egg-info/SOURCES.txt +3 -1
- nm_tool_forge-0.3.0/tests/test_analysis.py +106 -0
- nm_tool_forge-0.3.0/tests/test_encoding.py +25 -0
- nm_tool_forge-0.3.0/tests/test_parsing.py +127 -0
- nm_tool_forge-0.3.0/tests/test_report_html.py +177 -0
- nm_tool_forge-0.3.0/tests/test_report_markdown.py +200 -0
- nm_tool_forge-0.3.0/tests/test_report_pdf.py +133 -0
- nm_tool_forge-0.2.5/src/loganalysis/converters.py +0 -150
- nm_tool_forge-0.2.5/src/loganalysis/encoding.py +0 -22
- nm_tool_forge-0.2.5/src/loganalysis/parsing.py +0 -69
- nm_tool_forge-0.2.5/tests/test_analysis.py +0 -54
- nm_tool_forge-0.2.5/tests/test_parsing.py +0 -38
- nm_tool_forge-0.2.5/tests/test_report_html.py +0 -42
- nm_tool_forge-0.2.5/tests/test_report_markdown.py +0 -46
- {nm_tool_forge-0.2.5 → nm_tool_forge-0.3.0}/LICENSE +0 -0
- {nm_tool_forge-0.2.5 → nm_tool_forge-0.3.0}/README.md +0 -0
- {nm_tool_forge-0.2.5 → nm_tool_forge-0.3.0}/setup.cfg +0 -0
- {nm_tool_forge-0.2.5 → nm_tool_forge-0.3.0}/src/csvchunking/__main__.py +0 -0
- {nm_tool_forge-0.2.5 → nm_tool_forge-0.3.0}/src/csvchunking/chunker.py +0 -0
- {nm_tool_forge-0.2.5 → nm_tool_forge-0.3.0}/src/csvchunking/cli.py +0 -0
- {nm_tool_forge-0.2.5 → nm_tool_forge-0.3.0}/src/loganalysis/__main__.py +0 -0
- {nm_tool_forge-0.2.5 → nm_tool_forge-0.3.0}/src/loganalysis/csv_export.py +0 -0
- {nm_tool_forge-0.2.5 → nm_tool_forge-0.3.0}/src/loganalysis/filesystem.py +0 -0
- {nm_tool_forge-0.2.5 → nm_tool_forge-0.3.0}/src/loganalysis/normalization.py +0 -0
- {nm_tool_forge-0.2.5 → nm_tool_forge-0.3.0}/src/loganalysis/report_models.py +0 -0
- {nm_tool_forge-0.2.5 → nm_tool_forge-0.3.0}/src/loganalysis/selftest.py +0 -0
- {nm_tool_forge-0.2.5 → nm_tool_forge-0.3.0}/src/nm_tool_forge.egg-info/dependency_links.txt +0 -0
- {nm_tool_forge-0.2.5 → nm_tool_forge-0.3.0}/src/nm_tool_forge.egg-info/entry_points.txt +0 -0
- {nm_tool_forge-0.2.5 → nm_tool_forge-0.3.0}/src/nm_tool_forge.egg-info/requires.txt +0 -0
- {nm_tool_forge-0.2.5 → nm_tool_forge-0.3.0}/src/nm_tool_forge.egg-info/top_level.txt +0 -0
- {nm_tool_forge-0.2.5 → nm_tool_forge-0.3.0}/tests/test_csvchunking.py +0 -0
- {nm_tool_forge-0.2.5 → nm_tool_forge-0.3.0}/tests/test_normalization.py +0 -0
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "nm-tool-forge"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.3.0"
|
|
8
8
|
description = "Analyze MigMan log files and generate aggregated CSV, Markdown, HTML, and optional PDF reports."
|
|
9
9
|
readme = { file = "README.md", content-type = "text/markdown" }
|
|
10
10
|
requires-python = ">=3.10"
|
|
@@ -3,13 +3,13 @@ from __future__ import annotations
|
|
|
3
3
|
from collections import Counter, defaultdict
|
|
4
4
|
from pathlib import Path
|
|
5
5
|
|
|
6
|
-
from .constants import DEFAULT_OUT_DIR, DEFAULT_TOP_EXAMPLES, EXIT_NO_LOG_FILES
|
|
7
|
-
from .csv_export import write_csv
|
|
8
|
-
from .encoding import
|
|
9
|
-
from .filesystem import backup_file, ensure_dir
|
|
10
|
-
from .models import AnalysisConfig, AnalysisRunResult, AnalysisSummary, FileAnalysis, MessageKey
|
|
11
|
-
from .normalization import normalize_message
|
|
12
|
-
from .parsing import
|
|
6
|
+
from .constants import DEFAULT_OUT_DIR, DEFAULT_TOP_EXAMPLES, EXIT_NO_LOG_FILES
|
|
7
|
+
from .csv_export import write_csv
|
|
8
|
+
from .encoding import detect_encoding
|
|
9
|
+
from .filesystem import backup_file, ensure_dir
|
|
10
|
+
from .models import AnalysisConfig, AnalysisRunResult, AnalysisSummary, FileAnalysis, MessageKey
|
|
11
|
+
from .normalization import normalize_message
|
|
12
|
+
from .parsing import extract_last_import_block, iter_logical_entries_from_lines, parse_entry
|
|
13
13
|
from .report_markdown import build_markdown_report
|
|
14
14
|
|
|
15
15
|
|
|
@@ -27,16 +27,22 @@ def analyze_file(file_path: Path) -> FileAnalysis:
|
|
|
27
27
|
norm_examples: dict[MessageKey, Counter[str]] = defaultdict(Counter)
|
|
28
28
|
|
|
29
29
|
unknown_lines = 0
|
|
30
|
-
total_entries = 0
|
|
31
|
-
|
|
32
|
-
encoding = detect_encoding(file_path)
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
30
|
+
total_entries = 0
|
|
31
|
+
|
|
32
|
+
encoding = detect_encoding(file_path)
|
|
33
|
+
try:
|
|
34
|
+
all_lines = file_path.read_text(encoding=encoding, errors="strict").splitlines(keepends=True)
|
|
35
|
+
except UnicodeDecodeError:
|
|
36
|
+
all_lines = file_path.read_text(encoding=encoding, errors="replace").splitlines(keepends=True)
|
|
37
|
+
|
|
38
|
+
import_block = extract_last_import_block(all_lines)
|
|
39
|
+
total_lines = len(import_block.lines)
|
|
40
|
+
|
|
41
|
+
for entry in iter_logical_entries_from_lines(import_block.lines):
|
|
42
|
+
total_entries += 1
|
|
43
|
+
parsed = parse_entry(entry)
|
|
44
|
+
if not parsed:
|
|
45
|
+
continue
|
|
40
46
|
|
|
41
47
|
severity = parsed.severity
|
|
42
48
|
message = parsed.message
|
|
@@ -58,8 +64,13 @@ def analyze_file(file_path: Path) -> FileAnalysis:
|
|
|
58
64
|
unknown_lines=unknown_lines,
|
|
59
65
|
raw_counts=raw_counts,
|
|
60
66
|
norm_counts=norm_counts,
|
|
61
|
-
norm_examples=dict(norm_examples),
|
|
62
|
-
|
|
67
|
+
norm_examples=dict(norm_examples),
|
|
68
|
+
file_total_lines=len(all_lines),
|
|
69
|
+
analyzed_start_line_number=import_block.start_line_number,
|
|
70
|
+
analyzed_end_line_number=import_block.end_line_number,
|
|
71
|
+
import_start=import_block.start_metadata,
|
|
72
|
+
import_end=import_block.end_stats,
|
|
73
|
+
)
|
|
63
74
|
|
|
64
75
|
|
|
65
76
|
def sorted_rows(counter: Counter[MessageKey]) -> list[tuple[str, str, int]]:
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
import argparse
|
|
4
|
-
from collections.abc import Sequence
|
|
5
|
-
from pathlib import Path
|
|
3
|
+
import argparse
|
|
4
|
+
from collections.abc import Sequence
|
|
5
|
+
from pathlib import Path
|
|
6
6
|
|
|
7
7
|
from .analysis import NoLogFilesError, run_analysis
|
|
8
8
|
from .constants import DEFAULT_LOGS_DIR, DEFAULT_OUT_DIR, DEFAULT_TOP_EXAMPLES, EXIT_SUCCESS
|
|
@@ -16,11 +16,11 @@ def build_parser() -> argparse.ArgumentParser:
|
|
|
16
16
|
parser = argparse.ArgumentParser(
|
|
17
17
|
description="Aggregated analysis of log files (INFO/ERROR/WARNING) in logs/*.txt",
|
|
18
18
|
)
|
|
19
|
-
parser.add_argument(
|
|
20
|
-
"--logs-dir",
|
|
21
|
-
default=DEFAULT_LOGS_DIR,
|
|
22
|
-
help=f"Subdirectory with log files (Default: {DEFAULT_LOGS_DIR})",
|
|
23
|
-
)
|
|
19
|
+
parser.add_argument(
|
|
20
|
+
"--logs-dir",
|
|
21
|
+
default=DEFAULT_LOGS_DIR,
|
|
22
|
+
help=f"Subdirectory with log files (Default: {DEFAULT_LOGS_DIR})",
|
|
23
|
+
)
|
|
24
24
|
parser.add_argument("--out-dir", default=DEFAULT_OUT_DIR, help=f"Output directory (Default: {DEFAULT_OUT_DIR})")
|
|
25
25
|
parser.add_argument("--backup-dir", default=None, help="Backup directory (Default: <out-dir>/backup)")
|
|
26
26
|
parser.add_argument(
|
|
@@ -74,14 +74,12 @@ def main(argv: Sequence[str] | None = None) -> int:
|
|
|
74
74
|
print(f"- PDF: {result.pdf_path.resolve() if result.pdf_path else 'created'}")
|
|
75
75
|
else:
|
|
76
76
|
pdf_reason = result.convert_status.get("pdf_reason")
|
|
77
|
-
if pdf_reason == "
|
|
78
|
-
print("- PDF: skipped (
|
|
79
|
-
elif pdf_reason == "no_pdf_engine":
|
|
80
|
-
print("- PDF: skipped (no PDF engine found)")
|
|
77
|
+
if pdf_reason == "weasyprint_unavailable":
|
|
78
|
+
print("- PDF: skipped (WeasyPrint is required for the formatted PDF report)")
|
|
81
79
|
elif pdf_reason == "html_failed":
|
|
82
80
|
print("- PDF: skipped (HTML conversion failed)")
|
|
83
|
-
elif pdf_reason == "
|
|
84
|
-
print("- PDF: not
|
|
81
|
+
elif pdf_reason == "weasyprint_failed":
|
|
82
|
+
print("- PDF: failed (WeasyPrint could not create the formatted PDF report)")
|
|
85
83
|
else:
|
|
86
84
|
print("- PDF: failed")
|
|
87
85
|
|
|
@@ -29,11 +29,32 @@ RE_INT = re.compile(r"\b\d+\b")
|
|
|
29
29
|
RE_WHITESPACE = re.compile(r"\s+")
|
|
30
30
|
RE_WINDOWS_PATH = re.compile(r"\b[a-zA-Z]:\\[^;\n\r,]*")
|
|
31
31
|
RE_UNIX_PATH = re.compile(r"(?<![A-Za-z0-9])(?:/[^/;\s,:]+){2,}")
|
|
32
|
-
RE_ENTRY_START = re.compile(
|
|
33
|
-
r"^\s*(?P<severity>Info|Information|I|Error|Err|E|Warning|Warn|W)\b(?:\s*[;,]|\t)",
|
|
34
|
-
re.IGNORECASE,
|
|
35
|
-
)
|
|
36
|
-
|
|
32
|
+
RE_ENTRY_START = re.compile(
|
|
33
|
+
r"^\s*(?P<severity>Info|Information|I|Error|Err|E|Warning|Warn|W)\b(?:\s*[;,]|\t)",
|
|
34
|
+
re.IGNORECASE,
|
|
35
|
+
)
|
|
36
|
+
RE_IMPORT_START = re.compile(
|
|
37
|
+
r"^Info;(?P<timestamp>\d{2}\.\d{2}\.\d{4}/\d{2}:\d{2}:\d{2})\s+"
|
|
38
|
+
r"Starting import:\s*(?P<body>.*)$"
|
|
39
|
+
)
|
|
40
|
+
RE_IMPORT_START_KV = re.compile(
|
|
41
|
+
r"(?P<key>company|environment|language|Proalpha version|user)\s+'(?P<value>[^']*)'"
|
|
42
|
+
)
|
|
43
|
+
RE_IMPORT_END = re.compile(
|
|
44
|
+
r'^Info;(?P<timestamp>\d{2}\.\d{2}\.\d{4}/\d{2}:\d{2}:\d{2})\s+-\s+'
|
|
45
|
+
r"End of import:\s+"
|
|
46
|
+
r'(?P<records_to_be_edited>[\d.]*)\s*records to be edited \(incl\. the "header record"\),\s*'
|
|
47
|
+
r"(?P<records_edited>[\d.]*)\s*records edited,\s*thereof\s*"
|
|
48
|
+
r"(?P<successful>[\d.]*)\s*successful,\s*"
|
|
49
|
+
r"(?P<errors>[\d.]*)\s*errors,\s*"
|
|
50
|
+
r"(?P<warnings>[\d.]*)\s*warnings,\s*"
|
|
51
|
+
r"(?P<info_messages>[\d.]*)\s*info messages\s*$"
|
|
52
|
+
)
|
|
53
|
+
RE_SUCCESS_RATE_SPAN = re.compile(
|
|
54
|
+
r'^<span class="success-rate success-rate--(?P<status>red|yellow|green)">'
|
|
55
|
+
r"(?P<value>\d{1,3},\d{2}%)</span>$"
|
|
56
|
+
)
|
|
57
|
+
RE_LINE_PREFIX = re.compile(r"^\s*Line\s+\d+\s*:\s*", re.IGNORECASE)
|
|
37
58
|
RE_TRAILING_DATASET = re.compile(r"\s*;(?:[^\n;]*;){2,}[^\n;]*\s*$")
|
|
38
59
|
RE_SEMANTIC_VALUE_CHAR = re.compile(r"[0-9A-Za-zÄÖÜäöüß]")
|
|
39
60
|
RE_LOOKUP_ASSIGNMENT = re.compile(
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import tempfile
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from .filesystem import ensure_dir
|
|
7
|
+
from .report_html import render_report_html_document, resolve_report_date
|
|
8
|
+
from .report_markdown import parse_report_markdown
|
|
9
|
+
from .report_pdf import select_pdf_engine
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def convert_report_md_to_html_pdf(md_path: Path, html_path: Path, pdf_path: Path) -> dict[str, object]:
|
|
13
|
+
"""Convert the generated markdown report to HTML and optionally to PDF."""
|
|
14
|
+
|
|
15
|
+
status: dict[str, object] = {
|
|
16
|
+
"html_created": False,
|
|
17
|
+
"pdf_created": False,
|
|
18
|
+
"pdf_reason": None,
|
|
19
|
+
"pdf_engine": None,
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
try:
|
|
23
|
+
ensure_dir(html_path.parent)
|
|
24
|
+
ensure_dir(pdf_path.parent)
|
|
25
|
+
|
|
26
|
+
markdown = md_path.read_text(encoding="utf-8")
|
|
27
|
+
report = parse_report_markdown(markdown)
|
|
28
|
+
report_date = resolve_report_date(report, md_path)
|
|
29
|
+
html_document = render_report_html_document(report, md_path.name, report_date)
|
|
30
|
+
html_path.write_text(html_document, encoding="utf-8")
|
|
31
|
+
except Exception as exc:
|
|
32
|
+
print(f"[ERROR] HTML generation failed: {exc}")
|
|
33
|
+
status["pdf_reason"] = "html_failed"
|
|
34
|
+
return status
|
|
35
|
+
|
|
36
|
+
html_created = html_path.exists() and html_path.stat().st_size > 0
|
|
37
|
+
status["html_created"] = html_created
|
|
38
|
+
if not html_created:
|
|
39
|
+
print("[ERROR] HTML generation failed: HTML file was not created.")
|
|
40
|
+
status["pdf_reason"] = "html_failed"
|
|
41
|
+
return status
|
|
42
|
+
|
|
43
|
+
pdf_engine = select_pdf_engine()
|
|
44
|
+
if pdf_engine is None:
|
|
45
|
+
print("[WARN] WeasyPrint is required for the formatted PDF report. HTML was generated, PDF skipped.")
|
|
46
|
+
status["pdf_reason"] = "weasyprint_unavailable"
|
|
47
|
+
return status
|
|
48
|
+
|
|
49
|
+
status["pdf_engine"] = pdf_engine
|
|
50
|
+
|
|
51
|
+
temp_pdf_path: Path | None = None
|
|
52
|
+
|
|
53
|
+
try:
|
|
54
|
+
with tempfile.NamedTemporaryFile(
|
|
55
|
+
prefix=f"{pdf_path.stem}.",
|
|
56
|
+
suffix=pdf_path.suffix,
|
|
57
|
+
dir=str(pdf_path.parent),
|
|
58
|
+
delete=False,
|
|
59
|
+
) as handle:
|
|
60
|
+
temp_pdf_path = Path(handle.name)
|
|
61
|
+
|
|
62
|
+
from weasyprint import HTML
|
|
63
|
+
|
|
64
|
+
HTML(filename=str(html_path), base_url=str(html_path.parent)).write_pdf(str(temp_pdf_path))
|
|
65
|
+
|
|
66
|
+
if temp_pdf_path.exists() and temp_pdf_path.stat().st_size > 0:
|
|
67
|
+
temp_pdf_path.replace(pdf_path)
|
|
68
|
+
status["pdf_created"] = True
|
|
69
|
+
status["pdf_reason"] = None
|
|
70
|
+
print(f"PDF generated via {pdf_engine}: {pdf_path}")
|
|
71
|
+
else:
|
|
72
|
+
print("[ERROR] PDF generation finished without creating a PDF file.")
|
|
73
|
+
status["pdf_reason"] = "weasyprint_failed"
|
|
74
|
+
except Exception as exc:
|
|
75
|
+
print(f"[ERROR] PDF generation failed via {pdf_engine}: {exc}")
|
|
76
|
+
status["pdf_reason"] = "weasyprint_failed"
|
|
77
|
+
finally:
|
|
78
|
+
if temp_pdf_path and temp_pdf_path.exists():
|
|
79
|
+
temp_pdf_path.unlink()
|
|
80
|
+
|
|
81
|
+
return status
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
import chardet
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def detect_encoding(file_path: Path) -> str:
|
|
9
|
+
"""Detect text encoding, preferring strict UTF-8 over heuristic guesses."""
|
|
10
|
+
|
|
11
|
+
raw = file_path.read_bytes()
|
|
12
|
+
|
|
13
|
+
for encoding in ("utf-8-sig", "utf-8"):
|
|
14
|
+
try:
|
|
15
|
+
raw.decode(encoding)
|
|
16
|
+
except UnicodeDecodeError:
|
|
17
|
+
continue
|
|
18
|
+
return encoding
|
|
19
|
+
|
|
20
|
+
result = chardet.detect(raw)
|
|
21
|
+
detected = result.get("encoding") or "utf-8"
|
|
22
|
+
normalized = detected.lower().replace("_", "-")
|
|
23
|
+
if normalized in {"ascii", "us-ascii"} and any(byte >= 0x80 for byte in raw):
|
|
24
|
+
return "utf-8"
|
|
25
|
+
return detected
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def count_physical_lines(file_path: Path, encoding: str | None = None) -> int:
|
|
29
|
+
"""Count physical lines in a text file using a detected or provided encoding."""
|
|
30
|
+
|
|
31
|
+
file_encoding = encoding or detect_encoding(file_path)
|
|
32
|
+
try:
|
|
33
|
+
with file_path.open("r", encoding=file_encoding, errors="strict") as file_handle:
|
|
34
|
+
return sum(1 for _ in file_handle)
|
|
35
|
+
except UnicodeDecodeError:
|
|
36
|
+
with file_path.open("r", encoding=file_encoding, errors="replace") as file_handle:
|
|
37
|
+
return sum(1 for _ in file_handle)
|
|
@@ -1,32 +1,77 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from collections import Counter
|
|
4
|
-
from dataclasses import dataclass, field
|
|
5
|
-
from pathlib import Path
|
|
6
|
-
|
|
7
|
-
|
|
3
|
+
from collections import Counter
|
|
4
|
+
from dataclasses import dataclass, field
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Literal
|
|
7
|
+
|
|
8
|
+
MessageKey = tuple[str, str]
|
|
9
|
+
SuccessRateStatus = Literal["red", "yellow", "green", "none"]
|
|
8
10
|
|
|
9
11
|
|
|
10
12
|
@dataclass(frozen=True)
|
|
11
|
-
class ParsedLine:
|
|
12
|
-
"""Structured representation of a parsed logical log entry."""
|
|
13
|
-
|
|
14
|
-
severity: str
|
|
15
|
-
message: str
|
|
13
|
+
class ParsedLine:
|
|
14
|
+
"""Structured representation of a parsed logical log entry."""
|
|
15
|
+
|
|
16
|
+
severity: str
|
|
17
|
+
message: str
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass(frozen=True)
|
|
21
|
+
class ImportStartMetadata:
|
|
22
|
+
"""Selected metadata from a MigMan `Starting import` line."""
|
|
23
|
+
|
|
24
|
+
timestamp: str
|
|
25
|
+
company: str | None = None
|
|
26
|
+
environment: str | None = None
|
|
27
|
+
language: str | None = None
|
|
28
|
+
proalpha_version: str | None = None
|
|
29
|
+
user: str | None = None
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass(frozen=True)
|
|
33
|
+
class ImportEndStats:
|
|
34
|
+
"""Counters and calculated success status from an `End of import` line."""
|
|
35
|
+
|
|
36
|
+
timestamp: str
|
|
37
|
+
records_to_be_edited: int
|
|
38
|
+
records_edited: int
|
|
39
|
+
successful: int
|
|
40
|
+
errors: int
|
|
41
|
+
warnings: int
|
|
42
|
+
info_messages: int
|
|
43
|
+
success_rate_percent: float | None
|
|
44
|
+
success_rate_status: SuccessRateStatus
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@dataclass(frozen=True)
|
|
48
|
+
class ImportBlock:
|
|
49
|
+
"""The physical lines and metadata for the selected import block."""
|
|
50
|
+
|
|
51
|
+
start_line_number: int | None
|
|
52
|
+
end_line_number: int | None
|
|
53
|
+
lines: tuple[str, ...]
|
|
54
|
+
start_metadata: ImportStartMetadata | None
|
|
55
|
+
end_stats: ImportEndStats | None
|
|
16
56
|
|
|
17
57
|
|
|
18
58
|
@dataclass
|
|
19
|
-
class FileAnalysis:
|
|
20
|
-
"""Aggregated analysis results for a single log file."""
|
|
59
|
+
class FileAnalysis:
|
|
60
|
+
"""Aggregated analysis results for a single log file."""
|
|
21
61
|
|
|
22
62
|
file: Path
|
|
23
63
|
total_lines: int
|
|
24
64
|
total_entries: int
|
|
25
65
|
unknown_lines: int
|
|
26
66
|
raw_counts: Counter[MessageKey]
|
|
27
|
-
norm_counts: Counter[MessageKey]
|
|
28
|
-
norm_examples: dict[MessageKey, Counter[str]]
|
|
29
|
-
backup_path: Path | None = None
|
|
67
|
+
norm_counts: Counter[MessageKey]
|
|
68
|
+
norm_examples: dict[MessageKey, Counter[str]]
|
|
69
|
+
backup_path: Path | None = None
|
|
70
|
+
file_total_lines: int = 0
|
|
71
|
+
analyzed_start_line_number: int | None = None
|
|
72
|
+
analyzed_end_line_number: int | None = None
|
|
73
|
+
import_start: ImportStartMetadata | None = None
|
|
74
|
+
import_end: ImportEndStats | None = None
|
|
30
75
|
|
|
31
76
|
|
|
32
77
|
@dataclass
|
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections.abc import Iterable, Sequence
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from .constants import (
|
|
7
|
+
RE_ENTRY_START,
|
|
8
|
+
RE_IMPORT_END,
|
|
9
|
+
RE_IMPORT_START,
|
|
10
|
+
RE_IMPORT_START_KV,
|
|
11
|
+
RE_LINE_PREFIX,
|
|
12
|
+
RE_TRAILING_DATASET,
|
|
13
|
+
RE_WHITESPACE,
|
|
14
|
+
SEVERITY_ALIASES,
|
|
15
|
+
)
|
|
16
|
+
from .encoding import detect_encoding
|
|
17
|
+
from .models import ImportBlock, ImportEndStats, ImportStartMetadata, ParsedLine, SuccessRateStatus
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def canonical_severity(raw: str) -> str:
|
|
21
|
+
"""Map a raw severity token to the canonical output severity."""
|
|
22
|
+
|
|
23
|
+
severity = (raw or "").strip().upper()
|
|
24
|
+
return SEVERITY_ALIASES.get(severity, severity if severity else "UNKNOWN")
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def is_entry_start(line: str) -> bool:
|
|
28
|
+
"""Return whether a physical line starts a new logical log entry."""
|
|
29
|
+
|
|
30
|
+
return bool(RE_ENTRY_START.match(line))
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def iter_logical_entries_from_lines(lines: Iterable[str]) -> Iterable[str]:
|
|
34
|
+
"""Yield logical log entries assembled from a sequence of physical lines."""
|
|
35
|
+
|
|
36
|
+
current_lines: list[str] = []
|
|
37
|
+
|
|
38
|
+
for line in lines:
|
|
39
|
+
if is_entry_start(line):
|
|
40
|
+
if current_lines:
|
|
41
|
+
yield "".join(current_lines)
|
|
42
|
+
current_lines = [line]
|
|
43
|
+
continue
|
|
44
|
+
|
|
45
|
+
if current_lines:
|
|
46
|
+
current_lines.append(line)
|
|
47
|
+
|
|
48
|
+
if current_lines:
|
|
49
|
+
yield "".join(current_lines)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def iter_logical_entries(file_path: Path, encoding: str | None = None) -> Iterable[str]:
|
|
53
|
+
"""Yield logical log entries assembled from one or more physical lines."""
|
|
54
|
+
|
|
55
|
+
file_encoding = encoding or detect_encoding(file_path)
|
|
56
|
+
|
|
57
|
+
with file_path.open("r", encoding=file_encoding, errors="replace") as file_handle:
|
|
58
|
+
yield from iter_logical_entries_from_lines(file_handle)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def parse_de_int(raw: str | None) -> int:
|
|
62
|
+
"""Parse integers with German thousands separators. Empty values become 0."""
|
|
63
|
+
|
|
64
|
+
value = (raw or "").strip()
|
|
65
|
+
if not value:
|
|
66
|
+
return 0
|
|
67
|
+
return int(value.replace(".", ""))
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def format_de_int(value: int) -> str:
|
|
71
|
+
"""Format 17705 as 17.705."""
|
|
72
|
+
|
|
73
|
+
return f"{value:,}".replace(",", ".")
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def calculate_success_rate(successful: int, records_edited: int) -> tuple[float | None, SuccessRateStatus]:
|
|
77
|
+
"""Return percentage and status according to the report thresholds."""
|
|
78
|
+
|
|
79
|
+
if records_edited == 0:
|
|
80
|
+
return None, "none"
|
|
81
|
+
|
|
82
|
+
percent = successful / records_edited * 100
|
|
83
|
+
if percent < 95.0:
|
|
84
|
+
return percent, "red"
|
|
85
|
+
if percent < 98.5:
|
|
86
|
+
return percent, "yellow"
|
|
87
|
+
return percent, "green"
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def parse_import_start_line(line: str) -> ImportStartMetadata | None:
|
|
91
|
+
"""Parse timestamp and selected Starting-import key/value pairs."""
|
|
92
|
+
|
|
93
|
+
match = RE_IMPORT_START.match(line.rstrip("\r\n"))
|
|
94
|
+
if not match:
|
|
95
|
+
return None
|
|
96
|
+
|
|
97
|
+
values = {
|
|
98
|
+
kv_match.group("key"): kv_match.group("value")
|
|
99
|
+
for kv_match in RE_IMPORT_START_KV.finditer(match.group("body"))
|
|
100
|
+
}
|
|
101
|
+
return ImportStartMetadata(
|
|
102
|
+
timestamp=match.group("timestamp"),
|
|
103
|
+
company=values.get("company"),
|
|
104
|
+
environment=values.get("environment"),
|
|
105
|
+
language=values.get("language"),
|
|
106
|
+
proalpha_version=values.get("Proalpha version"),
|
|
107
|
+
user=values.get("user"),
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def parse_import_end_line(line: str) -> ImportEndStats | None:
|
|
112
|
+
"""Parse End-of-import counters and calculated success rate."""
|
|
113
|
+
|
|
114
|
+
match = RE_IMPORT_END.match(line.rstrip("\r\n"))
|
|
115
|
+
if not match:
|
|
116
|
+
return None
|
|
117
|
+
|
|
118
|
+
records_to_be_edited = parse_de_int(match.group("records_to_be_edited"))
|
|
119
|
+
records_edited = parse_de_int(match.group("records_edited"))
|
|
120
|
+
successful = parse_de_int(match.group("successful"))
|
|
121
|
+
errors = parse_de_int(match.group("errors"))
|
|
122
|
+
warnings = parse_de_int(match.group("warnings"))
|
|
123
|
+
info_messages = parse_de_int(match.group("info_messages"))
|
|
124
|
+
success_rate_percent, success_rate_status = calculate_success_rate(successful, records_edited)
|
|
125
|
+
|
|
126
|
+
return ImportEndStats(
|
|
127
|
+
timestamp=match.group("timestamp"),
|
|
128
|
+
records_to_be_edited=records_to_be_edited,
|
|
129
|
+
records_edited=records_edited,
|
|
130
|
+
successful=successful,
|
|
131
|
+
errors=errors,
|
|
132
|
+
warnings=warnings,
|
|
133
|
+
info_messages=info_messages,
|
|
134
|
+
success_rate_percent=success_rate_percent,
|
|
135
|
+
success_rate_status=success_rate_status,
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def extract_last_import_block(lines: Sequence[str]) -> ImportBlock:
|
|
140
|
+
"""Return only the last complete import block, scanning from EOF backwards."""
|
|
141
|
+
|
|
142
|
+
last_end_idx: int | None = None
|
|
143
|
+
end_stats: ImportEndStats | None = None
|
|
144
|
+
for idx in range(len(lines) - 1, -1, -1):
|
|
145
|
+
parsed_end = parse_import_end_line(lines[idx])
|
|
146
|
+
if parsed_end is not None:
|
|
147
|
+
last_end_idx = idx
|
|
148
|
+
end_stats = parsed_end
|
|
149
|
+
break
|
|
150
|
+
|
|
151
|
+
if last_end_idx is None:
|
|
152
|
+
return ImportBlock(
|
|
153
|
+
start_line_number=None,
|
|
154
|
+
end_line_number=None,
|
|
155
|
+
lines=tuple(lines),
|
|
156
|
+
start_metadata=None,
|
|
157
|
+
end_stats=None,
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
start_idx: int | None = None
|
|
161
|
+
start_metadata: ImportStartMetadata | None = None
|
|
162
|
+
for idx in range(last_end_idx, -1, -1):
|
|
163
|
+
parsed_start = parse_import_start_line(lines[idx])
|
|
164
|
+
if parsed_start is not None:
|
|
165
|
+
start_idx = idx
|
|
166
|
+
start_metadata = parsed_start
|
|
167
|
+
break
|
|
168
|
+
|
|
169
|
+
if start_idx is None:
|
|
170
|
+
return ImportBlock(
|
|
171
|
+
start_line_number=None,
|
|
172
|
+
end_line_number=last_end_idx + 1,
|
|
173
|
+
lines=tuple(lines),
|
|
174
|
+
start_metadata=None,
|
|
175
|
+
end_stats=end_stats,
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
return ImportBlock(
|
|
179
|
+
start_line_number=start_idx + 1,
|
|
180
|
+
end_line_number=last_end_idx + 1,
|
|
181
|
+
lines=tuple(lines[start_idx : last_end_idx + 1]),
|
|
182
|
+
start_metadata=start_metadata,
|
|
183
|
+
end_stats=end_stats,
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def parse_entry(entry: str) -> ParsedLine | None:
|
|
188
|
+
"""Parse one logical entry into severity and normalized message text."""
|
|
189
|
+
|
|
190
|
+
stripped = entry.strip()
|
|
191
|
+
if not stripped:
|
|
192
|
+
return None
|
|
193
|
+
|
|
194
|
+
match = RE_ENTRY_START.match(stripped)
|
|
195
|
+
if not match:
|
|
196
|
+
message = RE_WHITESPACE.sub(" ", stripped).strip()
|
|
197
|
+
return ParsedLine(severity="UNKNOWN", message=message or "(no message)")
|
|
198
|
+
|
|
199
|
+
severity = canonical_severity(match.group("severity") or "")
|
|
200
|
+
message = stripped[match.end():].strip()
|
|
201
|
+
message = RE_LINE_PREFIX.sub("", message, count=1)
|
|
202
|
+
message = RE_WHITESPACE.sub(" ", message).strip()
|
|
203
|
+
message = RE_TRAILING_DATASET.sub("", message).strip()
|
|
204
|
+
message = RE_WHITESPACE.sub(" ", message).strip()
|
|
205
|
+
return ParsedLine(severity=severity, message=message or "(no message)")
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def parse_line(line: str) -> ParsedLine | None:
|
|
209
|
+
"""Backward-compatible alias for parsing a logical entry."""
|
|
210
|
+
|
|
211
|
+
return parse_entry(line)
|