nm-tool-forge 0.2.4__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {nm_tool_forge-0.2.4 → nm_tool_forge-0.3.0}/PKG-INFO +1 -1
- {nm_tool_forge-0.2.4 → nm_tool_forge-0.3.0}/pyproject.toml +1 -1
- {nm_tool_forge-0.2.4 → nm_tool_forge-0.3.0}/src/csvchunking/__init__.py +1 -1
- {nm_tool_forge-0.2.4 → nm_tool_forge-0.3.0}/src/csvchunking/chunker.py +44 -27
- {nm_tool_forge-0.2.4 → nm_tool_forge-0.3.0}/src/loganalysis/__init__.py +1 -1
- {nm_tool_forge-0.2.4 → nm_tool_forge-0.3.0}/src/loganalysis/analysis.py +30 -19
- {nm_tool_forge-0.2.4 → nm_tool_forge-0.3.0}/src/loganalysis/cli.py +12 -14
- {nm_tool_forge-0.2.4 → nm_tool_forge-0.3.0}/src/loganalysis/constants.py +26 -5
- nm_tool_forge-0.3.0/src/loganalysis/converters.py +81 -0
- nm_tool_forge-0.3.0/src/loganalysis/encoding.py +37 -0
- {nm_tool_forge-0.2.4 → nm_tool_forge-0.3.0}/src/loganalysis/models.py +60 -15
- nm_tool_forge-0.3.0/src/loganalysis/parsing.py +211 -0
- {nm_tool_forge-0.2.4 → nm_tool_forge-0.3.0}/src/loganalysis/report_html.py +179 -16
- {nm_tool_forge-0.2.4 → nm_tool_forge-0.3.0}/src/loganalysis/report_markdown.py +152 -17
- {nm_tool_forge-0.2.4 → nm_tool_forge-0.3.0}/src/loganalysis/report_pdf.py +11 -22
- {nm_tool_forge-0.2.4 → nm_tool_forge-0.3.0}/src/nm_tool_forge.egg-info/PKG-INFO +1 -1
- {nm_tool_forge-0.2.4 → nm_tool_forge-0.3.0}/src/nm_tool_forge.egg-info/SOURCES.txt +3 -1
- nm_tool_forge-0.3.0/tests/test_analysis.py +106 -0
- nm_tool_forge-0.3.0/tests/test_csvchunking.py +153 -0
- nm_tool_forge-0.3.0/tests/test_encoding.py +25 -0
- nm_tool_forge-0.3.0/tests/test_parsing.py +127 -0
- nm_tool_forge-0.3.0/tests/test_report_html.py +177 -0
- nm_tool_forge-0.3.0/tests/test_report_markdown.py +200 -0
- nm_tool_forge-0.3.0/tests/test_report_pdf.py +133 -0
- nm_tool_forge-0.2.4/src/loganalysis/converters.py +0 -150
- nm_tool_forge-0.2.4/src/loganalysis/encoding.py +0 -22
- nm_tool_forge-0.2.4/src/loganalysis/parsing.py +0 -69
- nm_tool_forge-0.2.4/tests/test_analysis.py +0 -54
- nm_tool_forge-0.2.4/tests/test_csvchunking.py +0 -63
- nm_tool_forge-0.2.4/tests/test_parsing.py +0 -38
- nm_tool_forge-0.2.4/tests/test_report_html.py +0 -42
- nm_tool_forge-0.2.4/tests/test_report_markdown.py +0 -46
- {nm_tool_forge-0.2.4 → nm_tool_forge-0.3.0}/LICENSE +0 -0
- {nm_tool_forge-0.2.4 → nm_tool_forge-0.3.0}/README.md +0 -0
- {nm_tool_forge-0.2.4 → nm_tool_forge-0.3.0}/setup.cfg +0 -0
- {nm_tool_forge-0.2.4 → nm_tool_forge-0.3.0}/src/csvchunking/__main__.py +0 -0
- {nm_tool_forge-0.2.4 → nm_tool_forge-0.3.0}/src/csvchunking/cli.py +0 -0
- {nm_tool_forge-0.2.4 → nm_tool_forge-0.3.0}/src/loganalysis/__main__.py +0 -0
- {nm_tool_forge-0.2.4 → nm_tool_forge-0.3.0}/src/loganalysis/csv_export.py +0 -0
- {nm_tool_forge-0.2.4 → nm_tool_forge-0.3.0}/src/loganalysis/filesystem.py +0 -0
- {nm_tool_forge-0.2.4 → nm_tool_forge-0.3.0}/src/loganalysis/normalization.py +0 -0
- {nm_tool_forge-0.2.4 → nm_tool_forge-0.3.0}/src/loganalysis/report_models.py +0 -0
- {nm_tool_forge-0.2.4 → nm_tool_forge-0.3.0}/src/loganalysis/selftest.py +0 -0
- {nm_tool_forge-0.2.4 → nm_tool_forge-0.3.0}/src/nm_tool_forge.egg-info/dependency_links.txt +0 -0
- {nm_tool_forge-0.2.4 → nm_tool_forge-0.3.0}/src/nm_tool_forge.egg-info/entry_points.txt +0 -0
- {nm_tool_forge-0.2.4 → nm_tool_forge-0.3.0}/src/nm_tool_forge.egg-info/requires.txt +0 -0
- {nm_tool_forge-0.2.4 → nm_tool_forge-0.3.0}/src/nm_tool_forge.egg-info/top_level.txt +0 -0
- {nm_tool_forge-0.2.4 → nm_tool_forge-0.3.0}/tests/test_normalization.py +0 -0
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "nm-tool-forge"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.3.0"
|
|
8
8
|
description = "Analyze MigMan log files and generate aggregated CSV, Markdown, HTML, and optional PDF reports."
|
|
9
9
|
readme = { file = "README.md", content-type = "text/markdown" }
|
|
10
10
|
requires-python = ">=3.10"
|
|
@@ -1,33 +1,50 @@
|
|
|
1
|
-
import csv
|
|
2
|
-
|
|
3
|
-
from
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
1
|
+
import csv
|
|
2
|
+
import re
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@dataclass(frozen=True)
|
|
8
|
+
class ChunkResult:
|
|
8
9
|
input_file: Path
|
|
9
10
|
output_dir: Path
|
|
10
|
-
chunk_size: int
|
|
11
|
-
data_rows_processed: int
|
|
12
|
-
files_created: int
|
|
13
|
-
output_files: tuple[Path, ...]
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
def
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
11
|
+
chunk_size: int
|
|
12
|
+
data_rows_processed: int
|
|
13
|
+
files_created: int
|
|
14
|
+
output_files: tuple[Path, ...]
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def cleanup_existing_chunks(output_dir: Path, input_file: Path) -> None:
|
|
18
|
+
output_dir = Path(output_dir)
|
|
19
|
+
if not output_dir.exists():
|
|
20
|
+
return
|
|
21
|
+
|
|
22
|
+
input_file = Path(input_file)
|
|
23
|
+
pattern = re.compile(
|
|
24
|
+
rf"^{re.escape(input_file.stem)}_\d{{2,}}{re.escape(input_file.suffix)}$"
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
for existing_file in output_dir.iterdir():
|
|
28
|
+
if existing_file.is_file() and pattern.fullmatch(existing_file.name):
|
|
29
|
+
existing_file.unlink()
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def split_csv(
|
|
33
|
+
input_file: Path,
|
|
34
|
+
chunk_size: int,
|
|
35
|
+
encoding: str = "utf-8-sig",
|
|
36
|
+
) -> ChunkResult:
|
|
37
|
+
if not Path(input_file).is_file():
|
|
38
|
+
raise FileNotFoundError(f"Input file not found: {input_file}")
|
|
39
|
+
if chunk_size <= 0:
|
|
40
|
+
raise ValueError("chunk_size must be greater than 0")
|
|
25
41
|
|
|
26
42
|
input_file = Path(input_file)
|
|
27
43
|
output_dir = input_file.parent / input_file.stem
|
|
28
44
|
output_dir.mkdir(exist_ok=True)
|
|
45
|
+
cleanup_existing_chunks(output_dir, input_file)
|
|
29
46
|
|
|
30
|
-
# Detect the delimiter automatically.
|
|
47
|
+
# Detect the delimiter automatically.
|
|
31
48
|
with open(input_file, encoding=encoding, newline="") as f:
|
|
32
49
|
sample = f.read(4096)
|
|
33
50
|
f.seek(0)
|
|
@@ -38,10 +55,10 @@ def split_csv(
|
|
|
38
55
|
dialect = csv.excel
|
|
39
56
|
dialect.delimiter = ";"
|
|
40
57
|
reader = csv.reader(f, dialect)
|
|
41
|
-
try:
|
|
42
|
-
header = next(reader)
|
|
43
|
-
except StopIteration as exc:
|
|
44
|
-
raise ValueError("Input file is empty.") from exc
|
|
58
|
+
try:
|
|
59
|
+
header = next(reader)
|
|
60
|
+
except StopIteration as exc:
|
|
61
|
+
raise ValueError("Input file is empty.") from exc
|
|
45
62
|
chunk = []
|
|
46
63
|
file_count = 0
|
|
47
64
|
data_rows = 0
|
|
@@ -3,13 +3,13 @@ from __future__ import annotations
|
|
|
3
3
|
from collections import Counter, defaultdict
|
|
4
4
|
from pathlib import Path
|
|
5
5
|
|
|
6
|
-
from .constants import DEFAULT_OUT_DIR, DEFAULT_TOP_EXAMPLES, EXIT_NO_LOG_FILES
|
|
7
|
-
from .csv_export import write_csv
|
|
8
|
-
from .encoding import
|
|
9
|
-
from .filesystem import backup_file, ensure_dir
|
|
10
|
-
from .models import AnalysisConfig, AnalysisRunResult, AnalysisSummary, FileAnalysis, MessageKey
|
|
11
|
-
from .normalization import normalize_message
|
|
12
|
-
from .parsing import
|
|
6
|
+
from .constants import DEFAULT_OUT_DIR, DEFAULT_TOP_EXAMPLES, EXIT_NO_LOG_FILES
|
|
7
|
+
from .csv_export import write_csv
|
|
8
|
+
from .encoding import detect_encoding
|
|
9
|
+
from .filesystem import backup_file, ensure_dir
|
|
10
|
+
from .models import AnalysisConfig, AnalysisRunResult, AnalysisSummary, FileAnalysis, MessageKey
|
|
11
|
+
from .normalization import normalize_message
|
|
12
|
+
from .parsing import extract_last_import_block, iter_logical_entries_from_lines, parse_entry
|
|
13
13
|
from .report_markdown import build_markdown_report
|
|
14
14
|
|
|
15
15
|
|
|
@@ -27,16 +27,22 @@ def analyze_file(file_path: Path) -> FileAnalysis:
|
|
|
27
27
|
norm_examples: dict[MessageKey, Counter[str]] = defaultdict(Counter)
|
|
28
28
|
|
|
29
29
|
unknown_lines = 0
|
|
30
|
-
total_entries = 0
|
|
31
|
-
|
|
32
|
-
encoding = detect_encoding(file_path)
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
30
|
+
total_entries = 0
|
|
31
|
+
|
|
32
|
+
encoding = detect_encoding(file_path)
|
|
33
|
+
try:
|
|
34
|
+
all_lines = file_path.read_text(encoding=encoding, errors="strict").splitlines(keepends=True)
|
|
35
|
+
except UnicodeDecodeError:
|
|
36
|
+
all_lines = file_path.read_text(encoding=encoding, errors="replace").splitlines(keepends=True)
|
|
37
|
+
|
|
38
|
+
import_block = extract_last_import_block(all_lines)
|
|
39
|
+
total_lines = len(import_block.lines)
|
|
40
|
+
|
|
41
|
+
for entry in iter_logical_entries_from_lines(import_block.lines):
|
|
42
|
+
total_entries += 1
|
|
43
|
+
parsed = parse_entry(entry)
|
|
44
|
+
if not parsed:
|
|
45
|
+
continue
|
|
40
46
|
|
|
41
47
|
severity = parsed.severity
|
|
42
48
|
message = parsed.message
|
|
@@ -58,8 +64,13 @@ def analyze_file(file_path: Path) -> FileAnalysis:
|
|
|
58
64
|
unknown_lines=unknown_lines,
|
|
59
65
|
raw_counts=raw_counts,
|
|
60
66
|
norm_counts=norm_counts,
|
|
61
|
-
norm_examples=dict(norm_examples),
|
|
62
|
-
|
|
67
|
+
norm_examples=dict(norm_examples),
|
|
68
|
+
file_total_lines=len(all_lines),
|
|
69
|
+
analyzed_start_line_number=import_block.start_line_number,
|
|
70
|
+
analyzed_end_line_number=import_block.end_line_number,
|
|
71
|
+
import_start=import_block.start_metadata,
|
|
72
|
+
import_end=import_block.end_stats,
|
|
73
|
+
)
|
|
63
74
|
|
|
64
75
|
|
|
65
76
|
def sorted_rows(counter: Counter[MessageKey]) -> list[tuple[str, str, int]]:
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
import argparse
|
|
4
|
-
from collections.abc import Sequence
|
|
5
|
-
from pathlib import Path
|
|
3
|
+
import argparse
|
|
4
|
+
from collections.abc import Sequence
|
|
5
|
+
from pathlib import Path
|
|
6
6
|
|
|
7
7
|
from .analysis import NoLogFilesError, run_analysis
|
|
8
8
|
from .constants import DEFAULT_LOGS_DIR, DEFAULT_OUT_DIR, DEFAULT_TOP_EXAMPLES, EXIT_SUCCESS
|
|
@@ -16,11 +16,11 @@ def build_parser() -> argparse.ArgumentParser:
|
|
|
16
16
|
parser = argparse.ArgumentParser(
|
|
17
17
|
description="Aggregated analysis of log files (INFO/ERROR/WARNING) in logs/*.txt",
|
|
18
18
|
)
|
|
19
|
-
parser.add_argument(
|
|
20
|
-
"--logs-dir",
|
|
21
|
-
default=DEFAULT_LOGS_DIR,
|
|
22
|
-
help=f"Subdirectory with log files (Default: {DEFAULT_LOGS_DIR})",
|
|
23
|
-
)
|
|
19
|
+
parser.add_argument(
|
|
20
|
+
"--logs-dir",
|
|
21
|
+
default=DEFAULT_LOGS_DIR,
|
|
22
|
+
help=f"Subdirectory with log files (Default: {DEFAULT_LOGS_DIR})",
|
|
23
|
+
)
|
|
24
24
|
parser.add_argument("--out-dir", default=DEFAULT_OUT_DIR, help=f"Output directory (Default: {DEFAULT_OUT_DIR})")
|
|
25
25
|
parser.add_argument("--backup-dir", default=None, help="Backup directory (Default: <out-dir>/backup)")
|
|
26
26
|
parser.add_argument(
|
|
@@ -74,14 +74,12 @@ def main(argv: Sequence[str] | None = None) -> int:
|
|
|
74
74
|
print(f"- PDF: {result.pdf_path.resolve() if result.pdf_path else 'created'}")
|
|
75
75
|
else:
|
|
76
76
|
pdf_reason = result.convert_status.get("pdf_reason")
|
|
77
|
-
if pdf_reason == "
|
|
78
|
-
print("- PDF: skipped (
|
|
79
|
-
elif pdf_reason == "no_pdf_engine":
|
|
80
|
-
print("- PDF: skipped (no PDF engine found)")
|
|
77
|
+
if pdf_reason == "weasyprint_unavailable":
|
|
78
|
+
print("- PDF: skipped (WeasyPrint is required for the formatted PDF report)")
|
|
81
79
|
elif pdf_reason == "html_failed":
|
|
82
80
|
print("- PDF: skipped (HTML conversion failed)")
|
|
83
|
-
elif pdf_reason == "
|
|
84
|
-
print("- PDF: not
|
|
81
|
+
elif pdf_reason == "weasyprint_failed":
|
|
82
|
+
print("- PDF: failed (WeasyPrint could not create the formatted PDF report)")
|
|
85
83
|
else:
|
|
86
84
|
print("- PDF: failed")
|
|
87
85
|
|
|
@@ -29,11 +29,32 @@ RE_INT = re.compile(r"\b\d+\b")
|
|
|
29
29
|
RE_WHITESPACE = re.compile(r"\s+")
|
|
30
30
|
RE_WINDOWS_PATH = re.compile(r"\b[a-zA-Z]:\\[^;\n\r,]*")
|
|
31
31
|
RE_UNIX_PATH = re.compile(r"(?<![A-Za-z0-9])(?:/[^/;\s,:]+){2,}")
|
|
32
|
-
RE_ENTRY_START = re.compile(
|
|
33
|
-
r"^\s*(?P<severity>Info|Information|I|Error|Err|E|Warning|Warn|W)\b(?:\s*[;,]|\t)",
|
|
34
|
-
re.IGNORECASE,
|
|
35
|
-
)
|
|
36
|
-
|
|
32
|
+
RE_ENTRY_START = re.compile(
|
|
33
|
+
r"^\s*(?P<severity>Info|Information|I|Error|Err|E|Warning|Warn|W)\b(?:\s*[;,]|\t)",
|
|
34
|
+
re.IGNORECASE,
|
|
35
|
+
)
|
|
36
|
+
RE_IMPORT_START = re.compile(
|
|
37
|
+
r"^Info;(?P<timestamp>\d{2}\.\d{2}\.\d{4}/\d{2}:\d{2}:\d{2})\s+"
|
|
38
|
+
r"Starting import:\s*(?P<body>.*)$"
|
|
39
|
+
)
|
|
40
|
+
RE_IMPORT_START_KV = re.compile(
|
|
41
|
+
r"(?P<key>company|environment|language|Proalpha version|user)\s+'(?P<value>[^']*)'"
|
|
42
|
+
)
|
|
43
|
+
RE_IMPORT_END = re.compile(
|
|
44
|
+
r'^Info;(?P<timestamp>\d{2}\.\d{2}\.\d{4}/\d{2}:\d{2}:\d{2})\s+-\s+'
|
|
45
|
+
r"End of import:\s+"
|
|
46
|
+
r'(?P<records_to_be_edited>[\d.]*)\s*records to be edited \(incl\. the "header record"\),\s*'
|
|
47
|
+
r"(?P<records_edited>[\d.]*)\s*records edited,\s*thereof\s*"
|
|
48
|
+
r"(?P<successful>[\d.]*)\s*successful,\s*"
|
|
49
|
+
r"(?P<errors>[\d.]*)\s*errors,\s*"
|
|
50
|
+
r"(?P<warnings>[\d.]*)\s*warnings,\s*"
|
|
51
|
+
r"(?P<info_messages>[\d.]*)\s*info messages\s*$"
|
|
52
|
+
)
|
|
53
|
+
RE_SUCCESS_RATE_SPAN = re.compile(
|
|
54
|
+
r'^<span class="success-rate success-rate--(?P<status>red|yellow|green)">'
|
|
55
|
+
r"(?P<value>\d{1,3},\d{2}%)</span>$"
|
|
56
|
+
)
|
|
57
|
+
RE_LINE_PREFIX = re.compile(r"^\s*Line\s+\d+\s*:\s*", re.IGNORECASE)
|
|
37
58
|
RE_TRAILING_DATASET = re.compile(r"\s*;(?:[^\n;]*;){2,}[^\n;]*\s*$")
|
|
38
59
|
RE_SEMANTIC_VALUE_CHAR = re.compile(r"[0-9A-Za-zÄÖÜäöüß]")
|
|
39
60
|
RE_LOOKUP_ASSIGNMENT = re.compile(
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import tempfile
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from .filesystem import ensure_dir
|
|
7
|
+
from .report_html import render_report_html_document, resolve_report_date
|
|
8
|
+
from .report_markdown import parse_report_markdown
|
|
9
|
+
from .report_pdf import select_pdf_engine
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def convert_report_md_to_html_pdf(md_path: Path, html_path: Path, pdf_path: Path) -> dict[str, object]:
|
|
13
|
+
"""Convert the generated markdown report to HTML and optionally to PDF."""
|
|
14
|
+
|
|
15
|
+
status: dict[str, object] = {
|
|
16
|
+
"html_created": False,
|
|
17
|
+
"pdf_created": False,
|
|
18
|
+
"pdf_reason": None,
|
|
19
|
+
"pdf_engine": None,
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
try:
|
|
23
|
+
ensure_dir(html_path.parent)
|
|
24
|
+
ensure_dir(pdf_path.parent)
|
|
25
|
+
|
|
26
|
+
markdown = md_path.read_text(encoding="utf-8")
|
|
27
|
+
report = parse_report_markdown(markdown)
|
|
28
|
+
report_date = resolve_report_date(report, md_path)
|
|
29
|
+
html_document = render_report_html_document(report, md_path.name, report_date)
|
|
30
|
+
html_path.write_text(html_document, encoding="utf-8")
|
|
31
|
+
except Exception as exc:
|
|
32
|
+
print(f"[ERROR] HTML generation failed: {exc}")
|
|
33
|
+
status["pdf_reason"] = "html_failed"
|
|
34
|
+
return status
|
|
35
|
+
|
|
36
|
+
html_created = html_path.exists() and html_path.stat().st_size > 0
|
|
37
|
+
status["html_created"] = html_created
|
|
38
|
+
if not html_created:
|
|
39
|
+
print("[ERROR] HTML generation failed: HTML file was not created.")
|
|
40
|
+
status["pdf_reason"] = "html_failed"
|
|
41
|
+
return status
|
|
42
|
+
|
|
43
|
+
pdf_engine = select_pdf_engine()
|
|
44
|
+
if pdf_engine is None:
|
|
45
|
+
print("[WARN] WeasyPrint is required for the formatted PDF report. HTML was generated, PDF skipped.")
|
|
46
|
+
status["pdf_reason"] = "weasyprint_unavailable"
|
|
47
|
+
return status
|
|
48
|
+
|
|
49
|
+
status["pdf_engine"] = pdf_engine
|
|
50
|
+
|
|
51
|
+
temp_pdf_path: Path | None = None
|
|
52
|
+
|
|
53
|
+
try:
|
|
54
|
+
with tempfile.NamedTemporaryFile(
|
|
55
|
+
prefix=f"{pdf_path.stem}.",
|
|
56
|
+
suffix=pdf_path.suffix,
|
|
57
|
+
dir=str(pdf_path.parent),
|
|
58
|
+
delete=False,
|
|
59
|
+
) as handle:
|
|
60
|
+
temp_pdf_path = Path(handle.name)
|
|
61
|
+
|
|
62
|
+
from weasyprint import HTML
|
|
63
|
+
|
|
64
|
+
HTML(filename=str(html_path), base_url=str(html_path.parent)).write_pdf(str(temp_pdf_path))
|
|
65
|
+
|
|
66
|
+
if temp_pdf_path.exists() and temp_pdf_path.stat().st_size > 0:
|
|
67
|
+
temp_pdf_path.replace(pdf_path)
|
|
68
|
+
status["pdf_created"] = True
|
|
69
|
+
status["pdf_reason"] = None
|
|
70
|
+
print(f"PDF generated via {pdf_engine}: {pdf_path}")
|
|
71
|
+
else:
|
|
72
|
+
print("[ERROR] PDF generation finished without creating a PDF file.")
|
|
73
|
+
status["pdf_reason"] = "weasyprint_failed"
|
|
74
|
+
except Exception as exc:
|
|
75
|
+
print(f"[ERROR] PDF generation failed via {pdf_engine}: {exc}")
|
|
76
|
+
status["pdf_reason"] = "weasyprint_failed"
|
|
77
|
+
finally:
|
|
78
|
+
if temp_pdf_path and temp_pdf_path.exists():
|
|
79
|
+
temp_pdf_path.unlink()
|
|
80
|
+
|
|
81
|
+
return status
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
import chardet
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def detect_encoding(file_path: Path) -> str:
|
|
9
|
+
"""Detect text encoding, preferring strict UTF-8 over heuristic guesses."""
|
|
10
|
+
|
|
11
|
+
raw = file_path.read_bytes()
|
|
12
|
+
|
|
13
|
+
for encoding in ("utf-8-sig", "utf-8"):
|
|
14
|
+
try:
|
|
15
|
+
raw.decode(encoding)
|
|
16
|
+
except UnicodeDecodeError:
|
|
17
|
+
continue
|
|
18
|
+
return encoding
|
|
19
|
+
|
|
20
|
+
result = chardet.detect(raw)
|
|
21
|
+
detected = result.get("encoding") or "utf-8"
|
|
22
|
+
normalized = detected.lower().replace("_", "-")
|
|
23
|
+
if normalized in {"ascii", "us-ascii"} and any(byte >= 0x80 for byte in raw):
|
|
24
|
+
return "utf-8"
|
|
25
|
+
return detected
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def count_physical_lines(file_path: Path, encoding: str | None = None) -> int:
|
|
29
|
+
"""Count physical lines in a text file using a detected or provided encoding."""
|
|
30
|
+
|
|
31
|
+
file_encoding = encoding or detect_encoding(file_path)
|
|
32
|
+
try:
|
|
33
|
+
with file_path.open("r", encoding=file_encoding, errors="strict") as file_handle:
|
|
34
|
+
return sum(1 for _ in file_handle)
|
|
35
|
+
except UnicodeDecodeError:
|
|
36
|
+
with file_path.open("r", encoding=file_encoding, errors="replace") as file_handle:
|
|
37
|
+
return sum(1 for _ in file_handle)
|
|
@@ -1,32 +1,77 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from collections import Counter
|
|
4
|
-
from dataclasses import dataclass, field
|
|
5
|
-
from pathlib import Path
|
|
6
|
-
|
|
7
|
-
|
|
3
|
+
from collections import Counter
|
|
4
|
+
from dataclasses import dataclass, field
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Literal
|
|
7
|
+
|
|
8
|
+
MessageKey = tuple[str, str]
|
|
9
|
+
SuccessRateStatus = Literal["red", "yellow", "green", "none"]
|
|
8
10
|
|
|
9
11
|
|
|
10
12
|
@dataclass(frozen=True)
|
|
11
|
-
class ParsedLine:
|
|
12
|
-
"""Structured representation of a parsed logical log entry."""
|
|
13
|
-
|
|
14
|
-
severity: str
|
|
15
|
-
message: str
|
|
13
|
+
class ParsedLine:
|
|
14
|
+
"""Structured representation of a parsed logical log entry."""
|
|
15
|
+
|
|
16
|
+
severity: str
|
|
17
|
+
message: str
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass(frozen=True)
|
|
21
|
+
class ImportStartMetadata:
|
|
22
|
+
"""Selected metadata from a MigMan `Starting import` line."""
|
|
23
|
+
|
|
24
|
+
timestamp: str
|
|
25
|
+
company: str | None = None
|
|
26
|
+
environment: str | None = None
|
|
27
|
+
language: str | None = None
|
|
28
|
+
proalpha_version: str | None = None
|
|
29
|
+
user: str | None = None
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass(frozen=True)
|
|
33
|
+
class ImportEndStats:
|
|
34
|
+
"""Counters and calculated success status from an `End of import` line."""
|
|
35
|
+
|
|
36
|
+
timestamp: str
|
|
37
|
+
records_to_be_edited: int
|
|
38
|
+
records_edited: int
|
|
39
|
+
successful: int
|
|
40
|
+
errors: int
|
|
41
|
+
warnings: int
|
|
42
|
+
info_messages: int
|
|
43
|
+
success_rate_percent: float | None
|
|
44
|
+
success_rate_status: SuccessRateStatus
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@dataclass(frozen=True)
|
|
48
|
+
class ImportBlock:
|
|
49
|
+
"""The physical lines and metadata for the selected import block."""
|
|
50
|
+
|
|
51
|
+
start_line_number: int | None
|
|
52
|
+
end_line_number: int | None
|
|
53
|
+
lines: tuple[str, ...]
|
|
54
|
+
start_metadata: ImportStartMetadata | None
|
|
55
|
+
end_stats: ImportEndStats | None
|
|
16
56
|
|
|
17
57
|
|
|
18
58
|
@dataclass
|
|
19
|
-
class FileAnalysis:
|
|
20
|
-
"""Aggregated analysis results for a single log file."""
|
|
59
|
+
class FileAnalysis:
|
|
60
|
+
"""Aggregated analysis results for a single log file."""
|
|
21
61
|
|
|
22
62
|
file: Path
|
|
23
63
|
total_lines: int
|
|
24
64
|
total_entries: int
|
|
25
65
|
unknown_lines: int
|
|
26
66
|
raw_counts: Counter[MessageKey]
|
|
27
|
-
norm_counts: Counter[MessageKey]
|
|
28
|
-
norm_examples: dict[MessageKey, Counter[str]]
|
|
29
|
-
backup_path: Path | None = None
|
|
67
|
+
norm_counts: Counter[MessageKey]
|
|
68
|
+
norm_examples: dict[MessageKey, Counter[str]]
|
|
69
|
+
backup_path: Path | None = None
|
|
70
|
+
file_total_lines: int = 0
|
|
71
|
+
analyzed_start_line_number: int | None = None
|
|
72
|
+
analyzed_end_line_number: int | None = None
|
|
73
|
+
import_start: ImportStartMetadata | None = None
|
|
74
|
+
import_end: ImportEndStats | None = None
|
|
30
75
|
|
|
31
76
|
|
|
32
77
|
@dataclass
|