nm-tool-forge 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- loganalysis/__init__.py +16 -0
- loganalysis/__main__.py +5 -0
- loganalysis/analysis.py +175 -0
- loganalysis/cli.py +88 -0
- loganalysis/constants.py +126 -0
- loganalysis/converters.py +150 -0
- loganalysis/csv_export.py +18 -0
- loganalysis/encoding.py +22 -0
- loganalysis/filesystem.py +26 -0
- loganalysis/models.py +63 -0
- loganalysis/normalization.py +97 -0
- loganalysis/parsing.py +69 -0
- loganalysis/report_html.py +378 -0
- loganalysis/report_markdown.py +209 -0
- loganalysis/report_models.py +31 -0
- loganalysis/report_pdf.py +74 -0
- loganalysis/selftest.py +58 -0
- nm_tool_forge-0.1.0.dist-info/METADATA +198 -0
- nm_tool_forge-0.1.0.dist-info/RECORD +23 -0
- nm_tool_forge-0.1.0.dist-info/WHEEL +5 -0
- nm_tool_forge-0.1.0.dist-info/entry_points.txt +3 -0
- nm_tool_forge-0.1.0.dist-info/licenses/LICENSE +21 -0
- nm_tool_forge-0.1.0.dist-info/top_level.txt +1 -0
loganalysis/__init__.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from .analysis import analyze_file, run_analysis
|
|
4
|
+
from .converters import convert_report_md_to_html_pdf
|
|
5
|
+
from .normalization import normalize_message
|
|
6
|
+
from .parsing import iter_logical_entries
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
"analyze_file",
|
|
10
|
+
"convert_report_md_to_html_pdf",
|
|
11
|
+
"iter_logical_entries",
|
|
12
|
+
"normalize_message",
|
|
13
|
+
"run_analysis",
|
|
14
|
+
]
|
|
15
|
+
|
|
16
|
+
__version__ = "0.1.0"
|
loganalysis/__main__.py
ADDED
loganalysis/analysis.py
ADDED
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections import Counter, defaultdict
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from .constants import DEFAULT_OUT_DIR, DEFAULT_TOP_EXAMPLES, EXIT_NO_LOG_FILES
|
|
7
|
+
from .csv_export import write_csv
|
|
8
|
+
from .encoding import count_physical_lines, detect_encoding
|
|
9
|
+
from .filesystem import backup_file, ensure_dir
|
|
10
|
+
from .models import AnalysisConfig, AnalysisRunResult, AnalysisSummary, FileAnalysis, MessageKey
|
|
11
|
+
from .normalization import normalize_message
|
|
12
|
+
from .parsing import iter_logical_entries, parse_entry
|
|
13
|
+
from .report_markdown import build_markdown_report
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class NoLogFilesError(FileNotFoundError):
|
|
17
|
+
"""Raised when no matching log files are found for an analysis run."""
|
|
18
|
+
|
|
19
|
+
exit_code = EXIT_NO_LOG_FILES
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def analyze_file(file_path: Path) -> FileAnalysis:
|
|
23
|
+
"""Analyze one log file and aggregate raw and normalized message counts."""
|
|
24
|
+
|
|
25
|
+
raw_counts: Counter[MessageKey] = Counter()
|
|
26
|
+
norm_counts: Counter[MessageKey] = Counter()
|
|
27
|
+
norm_examples: dict[MessageKey, Counter[str]] = defaultdict(Counter)
|
|
28
|
+
|
|
29
|
+
unknown_lines = 0
|
|
30
|
+
total_entries = 0
|
|
31
|
+
|
|
32
|
+
encoding = detect_encoding(file_path)
|
|
33
|
+
total_lines = count_physical_lines(file_path, encoding=encoding)
|
|
34
|
+
|
|
35
|
+
for entry in iter_logical_entries(file_path, encoding=encoding):
|
|
36
|
+
total_entries += 1
|
|
37
|
+
parsed = parse_entry(entry)
|
|
38
|
+
if not parsed:
|
|
39
|
+
continue
|
|
40
|
+
|
|
41
|
+
severity = parsed.severity
|
|
42
|
+
message = parsed.message
|
|
43
|
+
|
|
44
|
+
if severity == "UNKNOWN":
|
|
45
|
+
unknown_lines += 1
|
|
46
|
+
|
|
47
|
+
raw_counts[(severity, message)] += 1
|
|
48
|
+
|
|
49
|
+
normalized_message = normalize_message(message)
|
|
50
|
+
norm_key = (severity, normalized_message)
|
|
51
|
+
norm_counts[norm_key] += 1
|
|
52
|
+
norm_examples[norm_key][message] += 1
|
|
53
|
+
|
|
54
|
+
return FileAnalysis(
|
|
55
|
+
file=file_path,
|
|
56
|
+
total_lines=total_lines,
|
|
57
|
+
total_entries=total_entries,
|
|
58
|
+
unknown_lines=unknown_lines,
|
|
59
|
+
raw_counts=raw_counts,
|
|
60
|
+
norm_counts=norm_counts,
|
|
61
|
+
norm_examples=dict(norm_examples),
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def sorted_rows(counter: Counter[MessageKey]) -> list[tuple[str, str, int]]:
|
|
66
|
+
"""Return aggregated rows in a stable severity/count/message order."""
|
|
67
|
+
|
|
68
|
+
return [
|
|
69
|
+
(severity, message, count)
|
|
70
|
+
for (severity, message), count in sorted(
|
|
71
|
+
counter.items(),
|
|
72
|
+
key=lambda item: (item[0][0], -item[1], item[0][1]),
|
|
73
|
+
)
|
|
74
|
+
]
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def find_log_files(logs_dir: Path) -> list[Path]:
|
|
78
|
+
"""Return all supported log files from the configured logs directory."""
|
|
79
|
+
|
|
80
|
+
return sorted(logs_dir.glob("*.txt"))
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def build_default_config(
|
|
84
|
+
*,
|
|
85
|
+
logs_dir: Path,
|
|
86
|
+
out_dir: Path | None = None,
|
|
87
|
+
backup_dir: Path | None = None,
|
|
88
|
+
top_examples: int = DEFAULT_TOP_EXAMPLES,
|
|
89
|
+
convert: bool = False,
|
|
90
|
+
) -> AnalysisConfig:
|
|
91
|
+
"""Build an analysis configuration with default output locations."""
|
|
92
|
+
|
|
93
|
+
resolved_out_dir = out_dir or Path(DEFAULT_OUT_DIR)
|
|
94
|
+
resolved_backup_dir = backup_dir or (resolved_out_dir / "backup")
|
|
95
|
+
return AnalysisConfig(
|
|
96
|
+
logs_dir=logs_dir,
|
|
97
|
+
out_dir=resolved_out_dir,
|
|
98
|
+
backup_dir=resolved_backup_dir,
|
|
99
|
+
top_examples=top_examples,
|
|
100
|
+
convert=convert,
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def run_analysis(config: AnalysisConfig) -> AnalysisRunResult:
|
|
105
|
+
"""Run the full analysis workflow and write CSV and report outputs."""
|
|
106
|
+
|
|
107
|
+
logs_dir = config.logs_dir
|
|
108
|
+
out_dir = config.out_dir
|
|
109
|
+
backup_dir = config.backup_dir or (out_dir / "backup")
|
|
110
|
+
|
|
111
|
+
ensure_dir(out_dir)
|
|
112
|
+
ensure_dir(backup_dir)
|
|
113
|
+
|
|
114
|
+
log_files = find_log_files(logs_dir)
|
|
115
|
+
if not log_files:
|
|
116
|
+
raise NoLogFilesError(f"No *.txt files found in: {logs_dir.resolve()}")
|
|
117
|
+
|
|
118
|
+
summary = AnalysisSummary(
|
|
119
|
+
analyses=[],
|
|
120
|
+
global_raw=Counter(),
|
|
121
|
+
global_norm=Counter(),
|
|
122
|
+
global_norm_examples={},
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
for log_file in log_files:
|
|
126
|
+
backup_path = backup_file(log_file, backup_dir)
|
|
127
|
+
analysis = analyze_file(log_file)
|
|
128
|
+
analysis.backup_path = backup_path
|
|
129
|
+
summary.analyses.append(analysis)
|
|
130
|
+
|
|
131
|
+
summary.global_raw.update(analysis.raw_counts)
|
|
132
|
+
summary.global_norm.update(analysis.norm_counts)
|
|
133
|
+
for key, counter in analysis.norm_examples.items():
|
|
134
|
+
summary.global_norm_examples.setdefault(key, Counter()).update(counter)
|
|
135
|
+
|
|
136
|
+
write_csv(
|
|
137
|
+
out_dir / f"{log_file.stem}.aggregated.csv",
|
|
138
|
+
sorted_rows(analysis.raw_counts),
|
|
139
|
+
headers=["SEVERITY", "MESSAGE", "COUNT"],
|
|
140
|
+
)
|
|
141
|
+
write_csv(
|
|
142
|
+
out_dir / f"{log_file.stem}.aggregated.normalized.csv",
|
|
143
|
+
sorted_rows(analysis.norm_counts),
|
|
144
|
+
headers=["SEVERITY", "MESSAGE_NORMALIZED", "COUNT"],
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
write_csv(
|
|
148
|
+
out_dir / "summary.all_files.csv",
|
|
149
|
+
sorted_rows(summary.global_raw),
|
|
150
|
+
headers=["SEVERITY", "MESSAGE", "COUNT"],
|
|
151
|
+
)
|
|
152
|
+
write_csv(
|
|
153
|
+
out_dir / "summary.all_files.normalized.csv",
|
|
154
|
+
sorted_rows(summary.global_norm),
|
|
155
|
+
headers=["SEVERITY", "MESSAGE_NORMALIZED", "COUNT"],
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
report_path = out_dir / "report.md"
|
|
159
|
+
report_path.write_text(build_markdown_report(summary, config), encoding="utf-8", newline="\n")
|
|
160
|
+
|
|
161
|
+
result = AnalysisRunResult(
|
|
162
|
+
out_dir=out_dir,
|
|
163
|
+
backup_dir=backup_dir,
|
|
164
|
+
report_path=report_path,
|
|
165
|
+
summary=summary,
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
if config.convert:
|
|
169
|
+
from .converters import convert_report_md_to_html_pdf
|
|
170
|
+
|
|
171
|
+
result.html_path = out_dir / "report.html"
|
|
172
|
+
result.pdf_path = out_dir / "report.pdf"
|
|
173
|
+
result.convert_status = convert_report_md_to_html_pdf(result.report_path, result.html_path, result.pdf_path)
|
|
174
|
+
|
|
175
|
+
return result
|
loganalysis/cli.py
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Sequence
|
|
6
|
+
|
|
7
|
+
from .analysis import NoLogFilesError, run_analysis
|
|
8
|
+
from .constants import DEFAULT_LOGS_DIR, DEFAULT_OUT_DIR, DEFAULT_TOP_EXAMPLES, EXIT_SUCCESS
|
|
9
|
+
from .models import AnalysisConfig
|
|
10
|
+
from .selftest import run_self_tests
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def build_parser() -> argparse.ArgumentParser:
|
|
14
|
+
"""Build the command-line parser for the log analysis tool."""
|
|
15
|
+
|
|
16
|
+
parser = argparse.ArgumentParser(
|
|
17
|
+
description="Aggregated analysis of log files (INFO/ERROR/WARNING) in logs/*.txt",
|
|
18
|
+
)
|
|
19
|
+
parser.add_argument("--logs-dir", default=DEFAULT_LOGS_DIR, help=f"Subdirectory with log files (Default: {DEFAULT_LOGS_DIR})")
|
|
20
|
+
parser.add_argument("--out-dir", default=DEFAULT_OUT_DIR, help=f"Output directory (Default: {DEFAULT_OUT_DIR})")
|
|
21
|
+
parser.add_argument("--backup-dir", default=None, help="Backup directory (Default: <out-dir>/backup)")
|
|
22
|
+
parser.add_argument(
|
|
23
|
+
"--top-examples",
|
|
24
|
+
type=int,
|
|
25
|
+
default=DEFAULT_TOP_EXAMPLES,
|
|
26
|
+
help=f"Number of example variants per normalized message in the report (Default: {DEFAULT_TOP_EXAMPLES})",
|
|
27
|
+
)
|
|
28
|
+
parser.add_argument(
|
|
29
|
+
"--convert",
|
|
30
|
+
action="store_true",
|
|
31
|
+
help="Convert report.md to report.html and report.pdf when supported after analysis.",
|
|
32
|
+
)
|
|
33
|
+
parser.add_argument("--self-test", action="store_true", help="Run built-in self-tests and exit.")
|
|
34
|
+
return parser
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def main(argv: Sequence[str] | None = None) -> int:
|
|
38
|
+
"""CLI entry point for `python -m loganalysis` and the console script."""
|
|
39
|
+
|
|
40
|
+
parser = build_parser()
|
|
41
|
+
args = parser.parse_args(argv)
|
|
42
|
+
|
|
43
|
+
if args.self_test:
|
|
44
|
+
run_self_tests()
|
|
45
|
+
print("Self-tests passed.")
|
|
46
|
+
return EXIT_SUCCESS
|
|
47
|
+
|
|
48
|
+
config = AnalysisConfig(
|
|
49
|
+
logs_dir=Path(args.logs_dir),
|
|
50
|
+
out_dir=Path(args.out_dir),
|
|
51
|
+
backup_dir=Path(args.backup_dir) if args.backup_dir else (Path(args.out_dir) / "backup"),
|
|
52
|
+
top_examples=args.top_examples,
|
|
53
|
+
convert=args.convert,
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
try:
|
|
57
|
+
result = run_analysis(config)
|
|
58
|
+
except NoLogFilesError as exc:
|
|
59
|
+
print(str(exc))
|
|
60
|
+
return exc.exit_code
|
|
61
|
+
|
|
62
|
+
if config.convert:
|
|
63
|
+
print(f"Converting {result.report_path} to HTML and PDF...")
|
|
64
|
+
if result.convert_status.get("html_created"):
|
|
65
|
+
print(f"- HTML: {result.html_path.resolve() if result.html_path else 'created'}")
|
|
66
|
+
else:
|
|
67
|
+
print("- HTML: failed")
|
|
68
|
+
|
|
69
|
+
if result.convert_status.get("pdf_created"):
|
|
70
|
+
print(f"- PDF: {result.pdf_path.resolve() if result.pdf_path else 'created'}")
|
|
71
|
+
else:
|
|
72
|
+
pdf_reason = result.convert_status.get("pdf_reason")
|
|
73
|
+
if pdf_reason == "pandoc_missing":
|
|
74
|
+
print("- PDF: skipped (pandoc not available)")
|
|
75
|
+
elif pdf_reason == "no_pdf_engine":
|
|
76
|
+
print("- PDF: skipped (no PDF engine found)")
|
|
77
|
+
elif pdf_reason == "html_failed":
|
|
78
|
+
print("- PDF: skipped (HTML conversion failed)")
|
|
79
|
+
elif pdf_reason == "pdf_not_created":
|
|
80
|
+
print("- PDF: not created")
|
|
81
|
+
else:
|
|
82
|
+
print("- PDF: failed")
|
|
83
|
+
|
|
84
|
+
print("Analysis completed.")
|
|
85
|
+
print(f"- Backups: {result.backup_dir.resolve()}")
|
|
86
|
+
print(f"- Outputs: {result.out_dir.resolve()}")
|
|
87
|
+
print(f"- Report: {result.report_path.resolve()}")
|
|
88
|
+
return EXIT_SUCCESS
|
loganalysis/constants.py
ADDED
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
|
|
5
|
+
DEFAULT_LOGS_DIR = "logs"
|
|
6
|
+
DEFAULT_OUT_DIR = "log_analyse_out"
|
|
7
|
+
DEFAULT_TOP_EXAMPLES = 3
|
|
8
|
+
DEFAULT_REPORT_TITLE = "Log Analysis Report"
|
|
9
|
+
REPORT_TOP_PER_FILE = 20
|
|
10
|
+
REPORT_TOP_GLOBAL = 50
|
|
11
|
+
|
|
12
|
+
EXIT_SUCCESS = 0
|
|
13
|
+
EXIT_NO_LOG_FILES = 2
|
|
14
|
+
|
|
15
|
+
SEVERITY_ALIASES = {
|
|
16
|
+
"INFO": "INFO",
|
|
17
|
+
"INFORMATION": "INFO",
|
|
18
|
+
"I": "INFO",
|
|
19
|
+
"ERROR": "ERROR",
|
|
20
|
+
"ERR": "ERROR",
|
|
21
|
+
"E": "ERROR",
|
|
22
|
+
"WARNING": "WARNING",
|
|
23
|
+
"WARN": "WARNING",
|
|
24
|
+
"W": "WARNING",
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
RE_GUID = re.compile(r"\b[0-9a-fA-F]{8}\b-[0-9a-fA-F]{4}\b-[0-9a-fA-F]{4}\b-[0-9a-fA-F]{4}\b-[0-9a-fA-F]{12}\b")
|
|
28
|
+
RE_INT = re.compile(r"\b\d+\b")
|
|
29
|
+
RE_WHITESPACE = re.compile(r"\s+")
|
|
30
|
+
RE_WINDOWS_PATH = re.compile(r"\b[a-zA-Z]:\\[^;\n\r,]*")
|
|
31
|
+
RE_UNIX_PATH = re.compile(r"(?<![A-Za-z0-9])(?:/[^/;\s,:]+){2,}")
|
|
32
|
+
RE_ENTRY_START = re.compile(
|
|
33
|
+
r"^\s*(?P<severity>Info|Information|I|Error|Err|E|Warning|Warn|W)\b(?:\s*[;,]|\t)",
|
|
34
|
+
re.IGNORECASE,
|
|
35
|
+
)
|
|
36
|
+
RE_LINE_PREFIX = re.compile(r"^\s*Line\s+\d+\s*:\s*", re.IGNORECASE)
|
|
37
|
+
RE_TRAILING_DATASET = re.compile(r"\s*;(?:[^\n;]*;){2,}[^\n;]*\s*$")
|
|
38
|
+
RE_SEMANTIC_VALUE_CHAR = re.compile(r"[0-9A-Za-zÄÖÜäöüß]")
|
|
39
|
+
RE_LOOKUP_ASSIGNMENT = re.compile(
|
|
40
|
+
r'(?P<head>\b(?:Mandatory Field Invalid|Conversion)\b.*?=)'
|
|
41
|
+
r'(?P<value>.*?)'
|
|
42
|
+
r'\s*[.,;:]*\s*(?:\d+\s+)?'
|
|
43
|
+
r'The record was not found in table\s+"(?P<table>[^"]+)"\.?',
|
|
44
|
+
re.IGNORECASE,
|
|
45
|
+
)
|
|
46
|
+
RE_VALIDATE_QUOTED_KEY = re.compile(
|
|
47
|
+
r"(?P<head>\bValidate\b.*?\{[^}]+\}\s*')"
|
|
48
|
+
r"(?P<value>[^']*)"
|
|
49
|
+
r"(?P<tail>':)",
|
|
50
|
+
re.IGNORECASE,
|
|
51
|
+
)
|
|
52
|
+
RE_QUOTED_VALUE = re.compile(r"\s*'[^']*'\s*")
|
|
53
|
+
RE_MARKDOWN_TABLE_SEPARATOR = re.compile(r"^\s*\|?(?:\s*:?-+:?\s*\|)+\s*$")
|
|
54
|
+
RE_HTML_BREAK = re.compile(r"<br\s*/?>", re.IGNORECASE)
|
|
55
|
+
RE_INLINE_CODE_SPAN = re.compile(r"(`[^`]*`)")
|
|
56
|
+
RE_LEADING_TIMESTAMP_PLACEHOLDER = re.compile(r"^\s*<N>\.<N>\.<N>/<N>:<N>:<N>\s*(?:-\s*)?")
|
|
57
|
+
|
|
58
|
+
LATEX_SPECIAL_CHAR_REPLACEMENTS = {
|
|
59
|
+
"\\": r"\textbackslash{}",
|
|
60
|
+
"{": r"\{",
|
|
61
|
+
"}": r"\}",
|
|
62
|
+
"_": r"\_",
|
|
63
|
+
"%": r"\%",
|
|
64
|
+
"&": r"\&",
|
|
65
|
+
"#": r"\#",
|
|
66
|
+
"$": r"\$",
|
|
67
|
+
"~": r"\textasciitilde{}",
|
|
68
|
+
"^": r"\textasciicircum{}",
|
|
69
|
+
}
|
|
70
|
+
LATEX_PDF_ENGINES = {"xelatex", "pdflatex"}
|
|
71
|
+
COMMON_MOJIBAKE_TOKENS = ("ÔÇ£", "ÔÇ¥", "–", "—", "„", "“", "â€")
|
|
72
|
+
|
|
73
|
+
NORMALIZATION_SELF_TEST_CASES: tuple[tuple[str, str], ...] = (
|
|
74
|
+
(
|
|
75
|
+
'Mandatory Field Invalid: X =. The record was not found in table "Teile".',
|
|
76
|
+
'Mandatory Field Invalid: X =<EMPTY> The record was not found in table "Teile".',
|
|
77
|
+
),
|
|
78
|
+
(
|
|
79
|
+
'Mandatory Field Invalid: X =., The record was not found in table "Teile".',
|
|
80
|
+
'Mandatory Field Invalid: X =<EMPTY> The record was not found in table "Teile".',
|
|
81
|
+
),
|
|
82
|
+
(
|
|
83
|
+
'Mandatory Field Invalid: X = ; The record was not found in table "Teile".',
|
|
84
|
+
'Mandatory Field Invalid: X =<EMPTY> The record was not found in table "Teile".',
|
|
85
|
+
),
|
|
86
|
+
(
|
|
87
|
+
'Mandatory Field Invalid: X =3000613.40 138 The record was not found in table "Teile".',
|
|
88
|
+
'Mandatory Field Invalid: X =<VALUE> The record was not found in table "Teile".',
|
|
89
|
+
),
|
|
90
|
+
(
|
|
91
|
+
'Conversion: X =3100110. 138 The record was not found in table "Teile".',
|
|
92
|
+
'Conversion: X =<VALUE> The record was not found in table "Teile".',
|
|
93
|
+
),
|
|
94
|
+
(
|
|
95
|
+
"Validate gbS_EAN: s_trg00021 {Teil} '7006563.1,6RS':",
|
|
96
|
+
"Validate gbS_EAN: s_trg00021 {Teil} '<VALUE>':",
|
|
97
|
+
),
|
|
98
|
+
(
|
|
99
|
+
"Validate gbS_EAN: s_trg00021 {Teil} '., ':",
|
|
100
|
+
"Validate gbS_EAN: s_trg00021 {Teil} '<EMPTY>':",
|
|
101
|
+
),
|
|
102
|
+
(
|
|
103
|
+
"See /var/log/app/output.txt for details",
|
|
104
|
+
"See <PATH> for details",
|
|
105
|
+
),
|
|
106
|
+
(
|
|
107
|
+
"Error x40 exclusion criteria: s_art00002 {Teil} '.': {Der Datensatz ist nicht angelegt.}",
|
|
108
|
+
"Error x40 exclusion criteria: s_art00002 {Teil}: {Der Datensatz ist nicht angelegt.}",
|
|
109
|
+
),
|
|
110
|
+
(
|
|
111
|
+
"Error x40 exclusion criteria: s_art00002 {Teil} '.,': {Der Datensatz ist nicht angelegt.}",
|
|
112
|
+
"Error x40 exclusion criteria: s_art00002 {Teil}: {Der Datensatz ist nicht angelegt.}",
|
|
113
|
+
),
|
|
114
|
+
(
|
|
115
|
+
"Error x40 exclusion criteria: s_art00002 {Teil} '2053052.35': {Der Datensatz ist nicht angelegt.}",
|
|
116
|
+
"Error x40 exclusion criteria: s_art00002 {Teil}: {Der Datensatz ist nicht angelegt.}",
|
|
117
|
+
),
|
|
118
|
+
(
|
|
119
|
+
"Error x40 exclusion criteria: s_art00002 {Teil} '179020.6,3': {Der Datensatz ist nicht angelegt.}",
|
|
120
|
+
"Error x40 exclusion criteria: s_art00002 {Teil}: {Der Datensatz ist nicht angelegt.}",
|
|
121
|
+
),
|
|
122
|
+
(
|
|
123
|
+
"Error x40 exclusion criteria: s_art00002 {Teil} '9008001.': {Der Datensatz ist nicht angelegt.}",
|
|
124
|
+
"Error x40 exclusion criteria: s_art00002 {Teil}: {Der Datensatz ist nicht angelegt.}",
|
|
125
|
+
),
|
|
126
|
+
)
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import subprocess
|
|
4
|
+
import tempfile
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from .filesystem import ensure_dir
|
|
8
|
+
from .report_html import render_report_html_document, resolve_report_date
|
|
9
|
+
from .report_markdown import parse_report_markdown
|
|
10
|
+
from .report_pdf import build_pdf_safe_markdown, contains_common_mojibake, select_pdf_engine
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def convert_report_md_to_html_pdf(md_path: Path, html_path: Path, pdf_path: Path) -> dict[str, object]:
|
|
14
|
+
"""Convert the generated markdown report to HTML and optionally to PDF."""
|
|
15
|
+
|
|
16
|
+
status: dict[str, object] = {
|
|
17
|
+
"html_created": False,
|
|
18
|
+
"pdf_created": False,
|
|
19
|
+
"pdf_reason": None,
|
|
20
|
+
"pdf_engine": None,
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
try:
|
|
24
|
+
ensure_dir(html_path.parent)
|
|
25
|
+
ensure_dir(pdf_path.parent)
|
|
26
|
+
|
|
27
|
+
markdown = md_path.read_text(encoding="utf-8")
|
|
28
|
+
report = parse_report_markdown(markdown)
|
|
29
|
+
report_date = resolve_report_date(report, md_path)
|
|
30
|
+
html_document = render_report_html_document(report, md_path.name, report_date)
|
|
31
|
+
html_path.write_text(html_document, encoding="utf-8")
|
|
32
|
+
except Exception as exc:
|
|
33
|
+
print(f"[ERROR] HTML generation failed: {exc}")
|
|
34
|
+
status["pdf_reason"] = "html_failed"
|
|
35
|
+
return status
|
|
36
|
+
|
|
37
|
+
html_created = html_path.exists() and html_path.stat().st_size > 0
|
|
38
|
+
status["html_created"] = html_created
|
|
39
|
+
if not html_created:
|
|
40
|
+
print("[ERROR] HTML generation failed: HTML file was not created.")
|
|
41
|
+
status["pdf_reason"] = "html_failed"
|
|
42
|
+
return status
|
|
43
|
+
|
|
44
|
+
pdf_engine = select_pdf_engine()
|
|
45
|
+
if pdf_engine is None:
|
|
46
|
+
print("[WARN] No supported PDF engine found. HTML was generated, PDF skipped.")
|
|
47
|
+
status["pdf_reason"] = "no_pdf_engine"
|
|
48
|
+
return status
|
|
49
|
+
|
|
50
|
+
status["pdf_engine"] = pdf_engine
|
|
51
|
+
|
|
52
|
+
pdf_source_path = md_path
|
|
53
|
+
temp_md_path: Path | None = None
|
|
54
|
+
temp_pdf_path: Path | None = None
|
|
55
|
+
|
|
56
|
+
try:
|
|
57
|
+
with tempfile.NamedTemporaryFile(
|
|
58
|
+
prefix=f"{pdf_path.stem}.",
|
|
59
|
+
suffix=pdf_path.suffix,
|
|
60
|
+
dir=str(pdf_path.parent),
|
|
61
|
+
delete=False,
|
|
62
|
+
) as handle:
|
|
63
|
+
temp_pdf_path = Path(handle.name)
|
|
64
|
+
|
|
65
|
+
if pdf_engine == "weasyprint":
|
|
66
|
+
from weasyprint import HTML
|
|
67
|
+
|
|
68
|
+
HTML(filename=str(html_path), base_url=str(html_path.parent)).write_pdf(str(temp_pdf_path))
|
|
69
|
+
elif pdf_engine == "wkhtmltopdf":
|
|
70
|
+
import shutil
|
|
71
|
+
|
|
72
|
+
wkhtmltopdf_path = shutil.which("wkhtmltopdf")
|
|
73
|
+
if wkhtmltopdf_path is None:
|
|
74
|
+
print("[WARN] wkhtmltopdf is not available anymore. PDF skipped.")
|
|
75
|
+
status["pdf_reason"] = "no_pdf_engine"
|
|
76
|
+
return status
|
|
77
|
+
|
|
78
|
+
subprocess.run(
|
|
79
|
+
[
|
|
80
|
+
wkhtmltopdf_path,
|
|
81
|
+
"--enable-local-file-access",
|
|
82
|
+
"--page-size",
|
|
83
|
+
"A4",
|
|
84
|
+
"--orientation",
|
|
85
|
+
"Portrait",
|
|
86
|
+
str(html_path),
|
|
87
|
+
str(temp_pdf_path),
|
|
88
|
+
],
|
|
89
|
+
check=True,
|
|
90
|
+
)
|
|
91
|
+
else:
|
|
92
|
+
import shutil
|
|
93
|
+
|
|
94
|
+
pandoc_path = shutil.which("pandoc")
|
|
95
|
+
if pandoc_path is None:
|
|
96
|
+
print("[WARN] pandoc is not installed or not in PATH. HTML generated, PDF skipped.")
|
|
97
|
+
status["pdf_reason"] = "pandoc_missing"
|
|
98
|
+
return status
|
|
99
|
+
|
|
100
|
+
pdf_safe_markdown = build_pdf_safe_markdown(markdown)
|
|
101
|
+
if pdf_safe_markdown != markdown:
|
|
102
|
+
with tempfile.NamedTemporaryFile(
|
|
103
|
+
prefix=f"{md_path.stem}.pdfsafe.",
|
|
104
|
+
suffix=md_path.suffix,
|
|
105
|
+
dir=str(md_path.parent),
|
|
106
|
+
mode="w",
|
|
107
|
+
encoding="utf-8",
|
|
108
|
+
newline="\n",
|
|
109
|
+
delete=False,
|
|
110
|
+
) as handle:
|
|
111
|
+
handle.write(pdf_safe_markdown)
|
|
112
|
+
pdf_source_path = temp_md_path = Path(handle.name)
|
|
113
|
+
|
|
114
|
+
if contains_common_mojibake(markdown):
|
|
115
|
+
print(
|
|
116
|
+
"[WARN] Report contains suspicious mojibake sequences. PDF escaping was applied, "
|
|
117
|
+
"but the source encoding should still be checked."
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
subprocess.run(
|
|
121
|
+
[
|
|
122
|
+
pandoc_path,
|
|
123
|
+
str(pdf_source_path),
|
|
124
|
+
"-o",
|
|
125
|
+
str(temp_pdf_path),
|
|
126
|
+
"--from",
|
|
127
|
+
"markdown",
|
|
128
|
+
f"--pdf-engine={pdf_engine}",
|
|
129
|
+
],
|
|
130
|
+
check=True,
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
if temp_pdf_path.exists() and temp_pdf_path.stat().st_size > 0:
|
|
134
|
+
temp_pdf_path.replace(pdf_path)
|
|
135
|
+
status["pdf_created"] = True
|
|
136
|
+
status["pdf_reason"] = None
|
|
137
|
+
print(f"PDF generated via {pdf_engine}: {pdf_path}")
|
|
138
|
+
else:
|
|
139
|
+
print("[ERROR] PDF generation finished without creating a PDF file.")
|
|
140
|
+
status["pdf_reason"] = "pdf_not_created"
|
|
141
|
+
except Exception as exc:
|
|
142
|
+
print(f"[ERROR] PDF generation failed via {pdf_engine}: {exc}")
|
|
143
|
+
status["pdf_reason"] = "pdf_failed"
|
|
144
|
+
finally:
|
|
145
|
+
if temp_pdf_path and temp_pdf_path.exists():
|
|
146
|
+
temp_pdf_path.unlink()
|
|
147
|
+
if temp_md_path and temp_md_path.exists():
|
|
148
|
+
temp_md_path.unlink()
|
|
149
|
+
|
|
150
|
+
return status
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import csv
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Iterable
|
|
6
|
+
|
|
7
|
+
from .filesystem import ensure_dir
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def write_csv(path: Path, rows: Iterable[tuple[str, str, int]], headers: list[str]) -> None:
|
|
11
|
+
"""Write semicolon-separated CSV output."""
|
|
12
|
+
|
|
13
|
+
ensure_dir(path.parent)
|
|
14
|
+
with path.open("w", encoding="utf-8", newline="") as file_handle:
|
|
15
|
+
writer = csv.writer(file_handle, delimiter=";")
|
|
16
|
+
writer.writerow(headers)
|
|
17
|
+
for row in rows:
|
|
18
|
+
writer.writerow(row)
|
loganalysis/encoding.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
import chardet
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def detect_encoding(file_path: Path) -> str:
|
|
9
|
+
"""Best-effort encoding detection based on file header bytes."""
|
|
10
|
+
|
|
11
|
+
with file_path.open("rb") as file_handle:
|
|
12
|
+
raw = file_handle.read(4096)
|
|
13
|
+
result = chardet.detect(raw)
|
|
14
|
+
return result["encoding"] or "utf-8"
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def count_physical_lines(file_path: Path, encoding: str | None = None) -> int:
|
|
18
|
+
"""Count physical lines in a text file using a detected or provided encoding."""
|
|
19
|
+
|
|
20
|
+
file_encoding = encoding or detect_encoding(file_path)
|
|
21
|
+
with file_path.open("r", encoding=file_encoding, errors="replace") as file_handle:
|
|
22
|
+
return sum(1 for _ in file_handle)
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import datetime as dt
|
|
4
|
+
import shutil
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def now_stamp() -> str:
|
|
9
|
+
"""Return a filesystem-safe timestamp."""
|
|
10
|
+
|
|
11
|
+
return dt.datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def ensure_dir(path: Path) -> None:
|
|
15
|
+
"""Create a directory and all missing parents."""
|
|
16
|
+
|
|
17
|
+
path.mkdir(parents=True, exist_ok=True)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def backup_file(src: Path, backup_root: Path) -> Path:
|
|
21
|
+
"""Create a timestamped backup copy of a source file."""
|
|
22
|
+
|
|
23
|
+
ensure_dir(backup_root)
|
|
24
|
+
dst = backup_root / f"{src.name}.{now_stamp()}.bak"
|
|
25
|
+
shutil.copy2(src, dst)
|
|
26
|
+
return dst
|