nm-tool-forge 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,16 @@
1
+ from __future__ import annotations
2
+
3
+ from .analysis import analyze_file, run_analysis
4
+ from .converters import convert_report_md_to_html_pdf
5
+ from .normalization import normalize_message
6
+ from .parsing import iter_logical_entries
7
+
8
+ __all__ = [
9
+ "analyze_file",
10
+ "convert_report_md_to_html_pdf",
11
+ "iter_logical_entries",
12
+ "normalize_message",
13
+ "run_analysis",
14
+ ]
15
+
16
+ __version__ = "0.1.0"
@@ -0,0 +1,5 @@
1
+ from __future__ import annotations
2
+
3
+ from .cli import main
4
+
5
+ raise SystemExit(main())
@@ -0,0 +1,175 @@
1
+ from __future__ import annotations
2
+
3
+ from collections import Counter, defaultdict
4
+ from pathlib import Path
5
+
6
+ from .constants import DEFAULT_OUT_DIR, DEFAULT_TOP_EXAMPLES, EXIT_NO_LOG_FILES
7
+ from .csv_export import write_csv
8
+ from .encoding import count_physical_lines, detect_encoding
9
+ from .filesystem import backup_file, ensure_dir
10
+ from .models import AnalysisConfig, AnalysisRunResult, AnalysisSummary, FileAnalysis, MessageKey
11
+ from .normalization import normalize_message
12
+ from .parsing import iter_logical_entries, parse_entry
13
+ from .report_markdown import build_markdown_report
14
+
15
+
16
+ class NoLogFilesError(FileNotFoundError):
17
+ """Raised when no matching log files are found for an analysis run."""
18
+
19
+ exit_code = EXIT_NO_LOG_FILES
20
+
21
+
22
+ def analyze_file(file_path: Path) -> FileAnalysis:
23
+ """Analyze one log file and aggregate raw and normalized message counts."""
24
+
25
+ raw_counts: Counter[MessageKey] = Counter()
26
+ norm_counts: Counter[MessageKey] = Counter()
27
+ norm_examples: dict[MessageKey, Counter[str]] = defaultdict(Counter)
28
+
29
+ unknown_lines = 0
30
+ total_entries = 0
31
+
32
+ encoding = detect_encoding(file_path)
33
+ total_lines = count_physical_lines(file_path, encoding=encoding)
34
+
35
+ for entry in iter_logical_entries(file_path, encoding=encoding):
36
+ total_entries += 1
37
+ parsed = parse_entry(entry)
38
+ if not parsed:
39
+ continue
40
+
41
+ severity = parsed.severity
42
+ message = parsed.message
43
+
44
+ if severity == "UNKNOWN":
45
+ unknown_lines += 1
46
+
47
+ raw_counts[(severity, message)] += 1
48
+
49
+ normalized_message = normalize_message(message)
50
+ norm_key = (severity, normalized_message)
51
+ norm_counts[norm_key] += 1
52
+ norm_examples[norm_key][message] += 1
53
+
54
+ return FileAnalysis(
55
+ file=file_path,
56
+ total_lines=total_lines,
57
+ total_entries=total_entries,
58
+ unknown_lines=unknown_lines,
59
+ raw_counts=raw_counts,
60
+ norm_counts=norm_counts,
61
+ norm_examples=dict(norm_examples),
62
+ )
63
+
64
+
65
+ def sorted_rows(counter: Counter[MessageKey]) -> list[tuple[str, str, int]]:
66
+ """Return aggregated rows in a stable severity/count/message order."""
67
+
68
+ return [
69
+ (severity, message, count)
70
+ for (severity, message), count in sorted(
71
+ counter.items(),
72
+ key=lambda item: (item[0][0], -item[1], item[0][1]),
73
+ )
74
+ ]
75
+
76
+
77
+ def find_log_files(logs_dir: Path) -> list[Path]:
78
+ """Return all supported log files from the configured logs directory."""
79
+
80
+ return sorted(logs_dir.glob("*.txt"))
81
+
82
+
83
+ def build_default_config(
84
+ *,
85
+ logs_dir: Path,
86
+ out_dir: Path | None = None,
87
+ backup_dir: Path | None = None,
88
+ top_examples: int = DEFAULT_TOP_EXAMPLES,
89
+ convert: bool = False,
90
+ ) -> AnalysisConfig:
91
+ """Build an analysis configuration with default output locations."""
92
+
93
+ resolved_out_dir = out_dir or Path(DEFAULT_OUT_DIR)
94
+ resolved_backup_dir = backup_dir or (resolved_out_dir / "backup")
95
+ return AnalysisConfig(
96
+ logs_dir=logs_dir,
97
+ out_dir=resolved_out_dir,
98
+ backup_dir=resolved_backup_dir,
99
+ top_examples=top_examples,
100
+ convert=convert,
101
+ )
102
+
103
+
104
+ def run_analysis(config: AnalysisConfig) -> AnalysisRunResult:
105
+ """Run the full analysis workflow and write CSV and report outputs."""
106
+
107
+ logs_dir = config.logs_dir
108
+ out_dir = config.out_dir
109
+ backup_dir = config.backup_dir or (out_dir / "backup")
110
+
111
+ ensure_dir(out_dir)
112
+ ensure_dir(backup_dir)
113
+
114
+ log_files = find_log_files(logs_dir)
115
+ if not log_files:
116
+ raise NoLogFilesError(f"No *.txt files found in: {logs_dir.resolve()}")
117
+
118
+ summary = AnalysisSummary(
119
+ analyses=[],
120
+ global_raw=Counter(),
121
+ global_norm=Counter(),
122
+ global_norm_examples={},
123
+ )
124
+
125
+ for log_file in log_files:
126
+ backup_path = backup_file(log_file, backup_dir)
127
+ analysis = analyze_file(log_file)
128
+ analysis.backup_path = backup_path
129
+ summary.analyses.append(analysis)
130
+
131
+ summary.global_raw.update(analysis.raw_counts)
132
+ summary.global_norm.update(analysis.norm_counts)
133
+ for key, counter in analysis.norm_examples.items():
134
+ summary.global_norm_examples.setdefault(key, Counter()).update(counter)
135
+
136
+ write_csv(
137
+ out_dir / f"{log_file.stem}.aggregated.csv",
138
+ sorted_rows(analysis.raw_counts),
139
+ headers=["SEVERITY", "MESSAGE", "COUNT"],
140
+ )
141
+ write_csv(
142
+ out_dir / f"{log_file.stem}.aggregated.normalized.csv",
143
+ sorted_rows(analysis.norm_counts),
144
+ headers=["SEVERITY", "MESSAGE_NORMALIZED", "COUNT"],
145
+ )
146
+
147
+ write_csv(
148
+ out_dir / "summary.all_files.csv",
149
+ sorted_rows(summary.global_raw),
150
+ headers=["SEVERITY", "MESSAGE", "COUNT"],
151
+ )
152
+ write_csv(
153
+ out_dir / "summary.all_files.normalized.csv",
154
+ sorted_rows(summary.global_norm),
155
+ headers=["SEVERITY", "MESSAGE_NORMALIZED", "COUNT"],
156
+ )
157
+
158
+ report_path = out_dir / "report.md"
159
+ report_path.write_text(build_markdown_report(summary, config), encoding="utf-8", newline="\n")
160
+
161
+ result = AnalysisRunResult(
162
+ out_dir=out_dir,
163
+ backup_dir=backup_dir,
164
+ report_path=report_path,
165
+ summary=summary,
166
+ )
167
+
168
+ if config.convert:
169
+ from .converters import convert_report_md_to_html_pdf
170
+
171
+ result.html_path = out_dir / "report.html"
172
+ result.pdf_path = out_dir / "report.pdf"
173
+ result.convert_status = convert_report_md_to_html_pdf(result.report_path, result.html_path, result.pdf_path)
174
+
175
+ return result
loganalysis/cli.py ADDED
@@ -0,0 +1,88 @@
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ from pathlib import Path
5
+ from typing import Sequence
6
+
7
+ from .analysis import NoLogFilesError, run_analysis
8
+ from .constants import DEFAULT_LOGS_DIR, DEFAULT_OUT_DIR, DEFAULT_TOP_EXAMPLES, EXIT_SUCCESS
9
+ from .models import AnalysisConfig
10
+ from .selftest import run_self_tests
11
+
12
+
13
+ def build_parser() -> argparse.ArgumentParser:
14
+ """Build the command-line parser for the log analysis tool."""
15
+
16
+ parser = argparse.ArgumentParser(
17
+ description="Aggregated analysis of log files (INFO/ERROR/WARNING) in logs/*.txt",
18
+ )
19
+ parser.add_argument("--logs-dir", default=DEFAULT_LOGS_DIR, help=f"Subdirectory with log files (Default: {DEFAULT_LOGS_DIR})")
20
+ parser.add_argument("--out-dir", default=DEFAULT_OUT_DIR, help=f"Output directory (Default: {DEFAULT_OUT_DIR})")
21
+ parser.add_argument("--backup-dir", default=None, help="Backup directory (Default: <out-dir>/backup)")
22
+ parser.add_argument(
23
+ "--top-examples",
24
+ type=int,
25
+ default=DEFAULT_TOP_EXAMPLES,
26
+ help=f"Number of example variants per normalized message in the report (Default: {DEFAULT_TOP_EXAMPLES})",
27
+ )
28
+ parser.add_argument(
29
+ "--convert",
30
+ action="store_true",
31
+ help="Convert report.md to report.html and report.pdf when supported after analysis.",
32
+ )
33
+ parser.add_argument("--self-test", action="store_true", help="Run built-in self-tests and exit.")
34
+ return parser
35
+
36
+
37
+ def main(argv: Sequence[str] | None = None) -> int:
38
+ """CLI entry point for `python -m loganalysis` and the console script."""
39
+
40
+ parser = build_parser()
41
+ args = parser.parse_args(argv)
42
+
43
+ if args.self_test:
44
+ run_self_tests()
45
+ print("Self-tests passed.")
46
+ return EXIT_SUCCESS
47
+
48
+ config = AnalysisConfig(
49
+ logs_dir=Path(args.logs_dir),
50
+ out_dir=Path(args.out_dir),
51
+ backup_dir=Path(args.backup_dir) if args.backup_dir else (Path(args.out_dir) / "backup"),
52
+ top_examples=args.top_examples,
53
+ convert=args.convert,
54
+ )
55
+
56
+ try:
57
+ result = run_analysis(config)
58
+ except NoLogFilesError as exc:
59
+ print(str(exc))
60
+ return exc.exit_code
61
+
62
+ if config.convert:
63
+ print(f"Converting {result.report_path} to HTML and PDF...")
64
+ if result.convert_status.get("html_created"):
65
+ print(f"- HTML: {result.html_path.resolve() if result.html_path else 'created'}")
66
+ else:
67
+ print("- HTML: failed")
68
+
69
+ if result.convert_status.get("pdf_created"):
70
+ print(f"- PDF: {result.pdf_path.resolve() if result.pdf_path else 'created'}")
71
+ else:
72
+ pdf_reason = result.convert_status.get("pdf_reason")
73
+ if pdf_reason == "pandoc_missing":
74
+ print("- PDF: skipped (pandoc not available)")
75
+ elif pdf_reason == "no_pdf_engine":
76
+ print("- PDF: skipped (no PDF engine found)")
77
+ elif pdf_reason == "html_failed":
78
+ print("- PDF: skipped (HTML conversion failed)")
79
+ elif pdf_reason == "pdf_not_created":
80
+ print("- PDF: not created")
81
+ else:
82
+ print("- PDF: failed")
83
+
84
+ print("Analysis completed.")
85
+ print(f"- Backups: {result.backup_dir.resolve()}")
86
+ print(f"- Outputs: {result.out_dir.resolve()}")
87
+ print(f"- Report: {result.report_path.resolve()}")
88
+ return EXIT_SUCCESS
@@ -0,0 +1,126 @@
1
+ from __future__ import annotations
2
+
3
+ import re
4
+
5
+ DEFAULT_LOGS_DIR = "logs"
6
+ DEFAULT_OUT_DIR = "log_analyse_out"
7
+ DEFAULT_TOP_EXAMPLES = 3
8
+ DEFAULT_REPORT_TITLE = "Log Analysis Report"
9
+ REPORT_TOP_PER_FILE = 20
10
+ REPORT_TOP_GLOBAL = 50
11
+
12
+ EXIT_SUCCESS = 0
13
+ EXIT_NO_LOG_FILES = 2
14
+
15
+ SEVERITY_ALIASES = {
16
+ "INFO": "INFO",
17
+ "INFORMATION": "INFO",
18
+ "I": "INFO",
19
+ "ERROR": "ERROR",
20
+ "ERR": "ERROR",
21
+ "E": "ERROR",
22
+ "WARNING": "WARNING",
23
+ "WARN": "WARNING",
24
+ "W": "WARNING",
25
+ }
26
+
27
+ RE_GUID = re.compile(r"\b[0-9a-fA-F]{8}\b-[0-9a-fA-F]{4}\b-[0-9a-fA-F]{4}\b-[0-9a-fA-F]{4}\b-[0-9a-fA-F]{12}\b")
28
+ RE_INT = re.compile(r"\b\d+\b")
29
+ RE_WHITESPACE = re.compile(r"\s+")
30
+ RE_WINDOWS_PATH = re.compile(r"\b[a-zA-Z]:\\[^;\n\r,]*")
31
+ RE_UNIX_PATH = re.compile(r"(?<![A-Za-z0-9])(?:/[^/;\s,:]+){2,}")
32
+ RE_ENTRY_START = re.compile(
33
+ r"^\s*(?P<severity>Info|Information|I|Error|Err|E|Warning|Warn|W)\b(?:\s*[;,]|\t)",
34
+ re.IGNORECASE,
35
+ )
36
+ RE_LINE_PREFIX = re.compile(r"^\s*Line\s+\d+\s*:\s*", re.IGNORECASE)
37
+ RE_TRAILING_DATASET = re.compile(r"\s*;(?:[^\n;]*;){2,}[^\n;]*\s*$")
38
+ RE_SEMANTIC_VALUE_CHAR = re.compile(r"[0-9A-Za-zÄÖÜäöüß]")
39
+ RE_LOOKUP_ASSIGNMENT = re.compile(
40
+ r'(?P<head>\b(?:Mandatory Field Invalid|Conversion)\b.*?=)'
41
+ r'(?P<value>.*?)'
42
+ r'\s*[.,;:]*\s*(?:\d+\s+)?'
43
+ r'The record was not found in table\s+"(?P<table>[^"]+)"\.?',
44
+ re.IGNORECASE,
45
+ )
46
+ RE_VALIDATE_QUOTED_KEY = re.compile(
47
+ r"(?P<head>\bValidate\b.*?\{[^}]+\}\s*')"
48
+ r"(?P<value>[^']*)"
49
+ r"(?P<tail>':)",
50
+ re.IGNORECASE,
51
+ )
52
+ RE_QUOTED_VALUE = re.compile(r"\s*'[^']*'\s*")
53
+ RE_MARKDOWN_TABLE_SEPARATOR = re.compile(r"^\s*\|?(?:\s*:?-+:?\s*\|)+\s*$")
54
+ RE_HTML_BREAK = re.compile(r"<br\s*/?>", re.IGNORECASE)
55
+ RE_INLINE_CODE_SPAN = re.compile(r"(`[^`]*`)")
56
+ RE_LEADING_TIMESTAMP_PLACEHOLDER = re.compile(r"^\s*<N>\.<N>\.<N>/<N>:<N>:<N>\s*(?:-\s*)?")
57
+
58
+ LATEX_SPECIAL_CHAR_REPLACEMENTS = {
59
+ "\\": r"\textbackslash{}",
60
+ "{": r"\{",
61
+ "}": r"\}",
62
+ "_": r"\_",
63
+ "%": r"\%",
64
+ "&": r"\&",
65
+ "#": r"\#",
66
+ "$": r"\$",
67
+ "~": r"\textasciitilde{}",
68
+ "^": r"\textasciicircum{}",
69
+ }
70
+ LATEX_PDF_ENGINES = {"xelatex", "pdflatex"}
71
+ COMMON_MOJIBAKE_TOKENS = ("ÔÇ£", "ÔÇ¥", "–", "—", "„", "“", "â€")
72
+
73
+ NORMALIZATION_SELF_TEST_CASES: tuple[tuple[str, str], ...] = (
74
+ (
75
+ 'Mandatory Field Invalid: X =. The record was not found in table "Teile".',
76
+ 'Mandatory Field Invalid: X =<EMPTY> The record was not found in table "Teile".',
77
+ ),
78
+ (
79
+ 'Mandatory Field Invalid: X =., The record was not found in table "Teile".',
80
+ 'Mandatory Field Invalid: X =<EMPTY> The record was not found in table "Teile".',
81
+ ),
82
+ (
83
+ 'Mandatory Field Invalid: X = ; The record was not found in table "Teile".',
84
+ 'Mandatory Field Invalid: X =<EMPTY> The record was not found in table "Teile".',
85
+ ),
86
+ (
87
+ 'Mandatory Field Invalid: X =3000613.40 138 The record was not found in table "Teile".',
88
+ 'Mandatory Field Invalid: X =<VALUE> The record was not found in table "Teile".',
89
+ ),
90
+ (
91
+ 'Conversion: X =3100110. 138 The record was not found in table "Teile".',
92
+ 'Conversion: X =<VALUE> The record was not found in table "Teile".',
93
+ ),
94
+ (
95
+ "Validate gbS_EAN: s_trg00021 {Teil} '7006563.1,6RS':",
96
+ "Validate gbS_EAN: s_trg00021 {Teil} '<VALUE>':",
97
+ ),
98
+ (
99
+ "Validate gbS_EAN: s_trg00021 {Teil} '., ':",
100
+ "Validate gbS_EAN: s_trg00021 {Teil} '<EMPTY>':",
101
+ ),
102
+ (
103
+ "See /var/log/app/output.txt for details",
104
+ "See <PATH> for details",
105
+ ),
106
+ (
107
+ "Error x40 exclusion criteria: s_art00002 {Teil} '.': {Der Datensatz ist nicht angelegt.}",
108
+ "Error x40 exclusion criteria: s_art00002 {Teil}: {Der Datensatz ist nicht angelegt.}",
109
+ ),
110
+ (
111
+ "Error x40 exclusion criteria: s_art00002 {Teil} '.,': {Der Datensatz ist nicht angelegt.}",
112
+ "Error x40 exclusion criteria: s_art00002 {Teil}: {Der Datensatz ist nicht angelegt.}",
113
+ ),
114
+ (
115
+ "Error x40 exclusion criteria: s_art00002 {Teil} '2053052.35': {Der Datensatz ist nicht angelegt.}",
116
+ "Error x40 exclusion criteria: s_art00002 {Teil}: {Der Datensatz ist nicht angelegt.}",
117
+ ),
118
+ (
119
+ "Error x40 exclusion criteria: s_art00002 {Teil} '179020.6,3': {Der Datensatz ist nicht angelegt.}",
120
+ "Error x40 exclusion criteria: s_art00002 {Teil}: {Der Datensatz ist nicht angelegt.}",
121
+ ),
122
+ (
123
+ "Error x40 exclusion criteria: s_art00002 {Teil} '9008001.': {Der Datensatz ist nicht angelegt.}",
124
+ "Error x40 exclusion criteria: s_art00002 {Teil}: {Der Datensatz ist nicht angelegt.}",
125
+ ),
126
+ )
@@ -0,0 +1,150 @@
1
+ from __future__ import annotations
2
+
3
+ import subprocess
4
+ import tempfile
5
+ from pathlib import Path
6
+
7
+ from .filesystem import ensure_dir
8
+ from .report_html import render_report_html_document, resolve_report_date
9
+ from .report_markdown import parse_report_markdown
10
+ from .report_pdf import build_pdf_safe_markdown, contains_common_mojibake, select_pdf_engine
11
+
12
+
13
+ def convert_report_md_to_html_pdf(md_path: Path, html_path: Path, pdf_path: Path) -> dict[str, object]:
14
+ """Convert the generated markdown report to HTML and optionally to PDF."""
15
+
16
+ status: dict[str, object] = {
17
+ "html_created": False,
18
+ "pdf_created": False,
19
+ "pdf_reason": None,
20
+ "pdf_engine": None,
21
+ }
22
+
23
+ try:
24
+ ensure_dir(html_path.parent)
25
+ ensure_dir(pdf_path.parent)
26
+
27
+ markdown = md_path.read_text(encoding="utf-8")
28
+ report = parse_report_markdown(markdown)
29
+ report_date = resolve_report_date(report, md_path)
30
+ html_document = render_report_html_document(report, md_path.name, report_date)
31
+ html_path.write_text(html_document, encoding="utf-8")
32
+ except Exception as exc:
33
+ print(f"[ERROR] HTML generation failed: {exc}")
34
+ status["pdf_reason"] = "html_failed"
35
+ return status
36
+
37
+ html_created = html_path.exists() and html_path.stat().st_size > 0
38
+ status["html_created"] = html_created
39
+ if not html_created:
40
+ print("[ERROR] HTML generation failed: HTML file was not created.")
41
+ status["pdf_reason"] = "html_failed"
42
+ return status
43
+
44
+ pdf_engine = select_pdf_engine()
45
+ if pdf_engine is None:
46
+ print("[WARN] No supported PDF engine found. HTML was generated, PDF skipped.")
47
+ status["pdf_reason"] = "no_pdf_engine"
48
+ return status
49
+
50
+ status["pdf_engine"] = pdf_engine
51
+
52
+ pdf_source_path = md_path
53
+ temp_md_path: Path | None = None
54
+ temp_pdf_path: Path | None = None
55
+
56
+ try:
57
+ with tempfile.NamedTemporaryFile(
58
+ prefix=f"{pdf_path.stem}.",
59
+ suffix=pdf_path.suffix,
60
+ dir=str(pdf_path.parent),
61
+ delete=False,
62
+ ) as handle:
63
+ temp_pdf_path = Path(handle.name)
64
+
65
+ if pdf_engine == "weasyprint":
66
+ from weasyprint import HTML
67
+
68
+ HTML(filename=str(html_path), base_url=str(html_path.parent)).write_pdf(str(temp_pdf_path))
69
+ elif pdf_engine == "wkhtmltopdf":
70
+ import shutil
71
+
72
+ wkhtmltopdf_path = shutil.which("wkhtmltopdf")
73
+ if wkhtmltopdf_path is None:
74
+ print("[WARN] wkhtmltopdf is not available anymore. PDF skipped.")
75
+ status["pdf_reason"] = "no_pdf_engine"
76
+ return status
77
+
78
+ subprocess.run(
79
+ [
80
+ wkhtmltopdf_path,
81
+ "--enable-local-file-access",
82
+ "--page-size",
83
+ "A4",
84
+ "--orientation",
85
+ "Portrait",
86
+ str(html_path),
87
+ str(temp_pdf_path),
88
+ ],
89
+ check=True,
90
+ )
91
+ else:
92
+ import shutil
93
+
94
+ pandoc_path = shutil.which("pandoc")
95
+ if pandoc_path is None:
96
+ print("[WARN] pandoc is not installed or not in PATH. HTML generated, PDF skipped.")
97
+ status["pdf_reason"] = "pandoc_missing"
98
+ return status
99
+
100
+ pdf_safe_markdown = build_pdf_safe_markdown(markdown)
101
+ if pdf_safe_markdown != markdown:
102
+ with tempfile.NamedTemporaryFile(
103
+ prefix=f"{md_path.stem}.pdfsafe.",
104
+ suffix=md_path.suffix,
105
+ dir=str(md_path.parent),
106
+ mode="w",
107
+ encoding="utf-8",
108
+ newline="\n",
109
+ delete=False,
110
+ ) as handle:
111
+ handle.write(pdf_safe_markdown)
112
+ pdf_source_path = temp_md_path = Path(handle.name)
113
+
114
+ if contains_common_mojibake(markdown):
115
+ print(
116
+ "[WARN] Report contains suspicious mojibake sequences. PDF escaping was applied, "
117
+ "but the source encoding should still be checked."
118
+ )
119
+
120
+ subprocess.run(
121
+ [
122
+ pandoc_path,
123
+ str(pdf_source_path),
124
+ "-o",
125
+ str(temp_pdf_path),
126
+ "--from",
127
+ "markdown",
128
+ f"--pdf-engine={pdf_engine}",
129
+ ],
130
+ check=True,
131
+ )
132
+
133
+ if temp_pdf_path.exists() and temp_pdf_path.stat().st_size > 0:
134
+ temp_pdf_path.replace(pdf_path)
135
+ status["pdf_created"] = True
136
+ status["pdf_reason"] = None
137
+ print(f"PDF generated via {pdf_engine}: {pdf_path}")
138
+ else:
139
+ print("[ERROR] PDF generation finished without creating a PDF file.")
140
+ status["pdf_reason"] = "pdf_not_created"
141
+ except Exception as exc:
142
+ print(f"[ERROR] PDF generation failed via {pdf_engine}: {exc}")
143
+ status["pdf_reason"] = "pdf_failed"
144
+ finally:
145
+ if temp_pdf_path and temp_pdf_path.exists():
146
+ temp_pdf_path.unlink()
147
+ if temp_md_path and temp_md_path.exists():
148
+ temp_md_path.unlink()
149
+
150
+ return status
@@ -0,0 +1,18 @@
1
+ from __future__ import annotations
2
+
3
+ import csv
4
+ from pathlib import Path
5
+ from typing import Iterable
6
+
7
+ from .filesystem import ensure_dir
8
+
9
+
10
+ def write_csv(path: Path, rows: Iterable[tuple[str, str, int]], headers: list[str]) -> None:
11
+ """Write semicolon-separated CSV output."""
12
+
13
+ ensure_dir(path.parent)
14
+ with path.open("w", encoding="utf-8", newline="") as file_handle:
15
+ writer = csv.writer(file_handle, delimiter=";")
16
+ writer.writerow(headers)
17
+ for row in rows:
18
+ writer.writerow(row)
@@ -0,0 +1,22 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+
5
+ import chardet
6
+
7
+
8
+ def detect_encoding(file_path: Path) -> str:
9
+ """Best-effort encoding detection based on file header bytes."""
10
+
11
+ with file_path.open("rb") as file_handle:
12
+ raw = file_handle.read(4096)
13
+ result = chardet.detect(raw)
14
+ return result["encoding"] or "utf-8"
15
+
16
+
17
+ def count_physical_lines(file_path: Path, encoding: str | None = None) -> int:
18
+ """Count physical lines in a text file using a detected or provided encoding."""
19
+
20
+ file_encoding = encoding or detect_encoding(file_path)
21
+ with file_path.open("r", encoding=file_encoding, errors="replace") as file_handle:
22
+ return sum(1 for _ in file_handle)
@@ -0,0 +1,26 @@
1
+ from __future__ import annotations
2
+
3
+ import datetime as dt
4
+ import shutil
5
+ from pathlib import Path
6
+
7
+
8
+ def now_stamp() -> str:
9
+ """Return a filesystem-safe timestamp."""
10
+
11
+ return dt.datetime.now().strftime("%Y%m%d_%H%M%S")
12
+
13
+
14
+ def ensure_dir(path: Path) -> None:
15
+ """Create a directory and all missing parents."""
16
+
17
+ path.mkdir(parents=True, exist_ok=True)
18
+
19
+
20
+ def backup_file(src: Path, backup_root: Path) -> Path:
21
+ """Create a timestamped backup copy of a source file."""
22
+
23
+ ensure_dir(backup_root)
24
+ dst = backup_root / f"{src.name}.{now_stamp()}.bak"
25
+ shutil.copy2(src, dst)
26
+ return dst