nm-tool-forge 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
loganalysis/models.py ADDED
@@ -0,0 +1,63 @@
1
+ from __future__ import annotations
2
+
3
+ from collections import Counter
4
+ from dataclasses import dataclass, field
5
+ from pathlib import Path
6
+
7
+ MessageKey = tuple[str, str]
8
+
9
+
10
+ @dataclass(frozen=True)
11
+ class ParsedLine:
12
+ """Structured representation of a parsed logical log entry."""
13
+
14
+ severity: str
15
+ message: str
16
+
17
+
18
+ @dataclass
19
+ class FileAnalysis:
20
+ """Aggregated analysis results for a single log file."""
21
+
22
+ file: Path
23
+ total_lines: int
24
+ total_entries: int
25
+ unknown_lines: int
26
+ raw_counts: Counter[MessageKey]
27
+ norm_counts: Counter[MessageKey]
28
+ norm_examples: dict[MessageKey, Counter[str]]
29
+ backup_path: Path | None = None
30
+
31
+
32
+ @dataclass
33
+ class AnalysisSummary:
34
+ """Combined analysis data across all processed files."""
35
+
36
+ analyses: list[FileAnalysis] = field(default_factory=list)
37
+ global_raw: Counter[MessageKey] = field(default_factory=Counter)
38
+ global_norm: Counter[MessageKey] = field(default_factory=Counter)
39
+ global_norm_examples: dict[MessageKey, Counter[str]] = field(default_factory=dict)
40
+
41
+
42
+ @dataclass(frozen=True)
43
+ class AnalysisConfig:
44
+ """Configuration for a full analysis run."""
45
+
46
+ logs_dir: Path
47
+ out_dir: Path
48
+ backup_dir: Path | None = None
49
+ top_examples: int = 3
50
+ convert: bool = False
51
+
52
+
53
+ @dataclass
54
+ class AnalysisRunResult:
55
+ """Paths and conversion results produced by a full analysis run."""
56
+
57
+ out_dir: Path
58
+ backup_dir: Path
59
+ report_path: Path
60
+ summary: AnalysisSummary
61
+ html_path: Path | None = None
62
+ pdf_path: Path | None = None
63
+ convert_status: dict[str, object] = field(default_factory=dict)
@@ -0,0 +1,97 @@
1
+ from __future__ import annotations
2
+
3
+ import re
4
+
5
+ from .constants import (
6
+ RE_GUID,
7
+ RE_INT,
8
+ RE_LOOKUP_ASSIGNMENT,
9
+ RE_QUOTED_VALUE,
10
+ RE_SEMANTIC_VALUE_CHAR,
11
+ RE_UNIX_PATH,
12
+ RE_VALIDATE_QUOTED_KEY,
13
+ RE_WHITESPACE,
14
+ RE_WINDOWS_PATH,
15
+ )
16
+
17
+
18
+ def classify_value_fragment(value: str) -> str:
19
+ """Classify a matched value fragment as meaningful or empty."""
20
+
21
+ stripped = (value or "").strip()
22
+ if not stripped:
23
+ return "<EMPTY>"
24
+ if not RE_SEMANTIC_VALUE_CHAR.search(stripped):
25
+ return "<EMPTY>"
26
+ return "<VALUE>"
27
+
28
+
29
+ def normalize_lookup_assignment_message(message: str) -> str:
30
+ """Normalize value-bearing lookup error messages."""
31
+
32
+ def replace(match: re.Match[str]) -> str:
33
+ value_token = classify_value_fragment(match.group("value"))
34
+ table_name = match.group("table")
35
+ return f'{match.group("head")}{value_token} The record was not found in table "{table_name}".'
36
+
37
+ return RE_LOOKUP_ASSIGNMENT.sub(replace, message)
38
+
39
+
40
+ def normalize_validate_key_message(message: str) -> str:
41
+ """Normalize Validate... {Key} 'value' style messages."""
42
+
43
+ def replace(match: re.Match[str]) -> str:
44
+ value_token = classify_value_fragment(match.group("value"))
45
+ return f'{match.group("head")}{value_token}{match.group("tail")}'
46
+
47
+ return RE_VALIDATE_QUOTED_KEY.sub(replace, message)
48
+
49
+
50
+ def semantic_normalize_message(message: str) -> str:
51
+ """Apply semantic normalization rules before generic token normalization."""
52
+
53
+ normalized = message.strip()
54
+ normalized = normalize_lookup_assignment_message(normalized)
55
+ normalized = normalize_validate_key_message(normalized)
56
+ return normalized
57
+
58
+
59
+ def normalize_generic_quoted_values(message: str) -> str:
60
+ """Remove quoted values that are no longer semantically relevant."""
61
+
62
+ def replace(match: re.Match[str]) -> str:
63
+ matched = match.group(0).strip()
64
+ inner = matched[1:-1] if len(matched) >= 2 else ""
65
+ if inner in {"<VALUE>", "<EMPTY>"}:
66
+ return match.group(0)
67
+ return ""
68
+
69
+ return RE_QUOTED_VALUE.sub(replace, message)
70
+
71
+
72
+ def cleanup_normalized_message(message: str) -> str:
73
+ """Tidy whitespace and punctuation artifacts after normalization."""
74
+
75
+ normalized = RE_WHITESPACE.sub(" ", message).strip()
76
+ normalized = re.sub(r"\s+:", ":", normalized)
77
+ normalized = re.sub(r"\s+,", ",", normalized)
78
+ normalized = re.sub(r"\s+\.", ".", normalized)
79
+ normalized = re.sub(r"\s+;", ";", normalized)
80
+ normalized = re.sub(r"\s+\)", ")", normalized)
81
+ normalized = re.sub(r"\(\s+", "(", normalized)
82
+ normalized = re.sub(r"\s{2,}", " ", normalized)
83
+ return normalized.strip()
84
+
85
+
86
+ def normalize_message(message: str) -> str:
87
+ """Normalize a log message for aggregation."""
88
+
89
+ normalized = message.strip()
90
+ normalized = semantic_normalize_message(normalized)
91
+ normalized = RE_GUID.sub("<GUID>", normalized)
92
+ normalized = RE_WINDOWS_PATH.sub("<PATH>", normalized)
93
+ normalized = RE_UNIX_PATH.sub("<PATH>", normalized)
94
+ normalized = normalize_generic_quoted_values(normalized)
95
+ normalized = RE_INT.sub("<N>", normalized)
96
+ normalized = cleanup_normalized_message(normalized)
97
+ return normalized if normalized else "(no message)"
loganalysis/parsing.py ADDED
@@ -0,0 +1,69 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+ from typing import Iterable
5
+
6
+ from .constants import RE_ENTRY_START, RE_LINE_PREFIX, RE_TRAILING_DATASET, RE_WHITESPACE, SEVERITY_ALIASES
7
+ from .encoding import detect_encoding
8
+ from .models import ParsedLine
9
+
10
+
11
+ def canonical_severity(raw: str) -> str:
12
+ """Map a raw severity token to the canonical output severity."""
13
+
14
+ severity = (raw or "").strip().upper()
15
+ return SEVERITY_ALIASES.get(severity, severity if severity else "UNKNOWN")
16
+
17
+
18
+ def is_entry_start(line: str) -> bool:
19
+ """Return whether a physical line starts a new logical log entry."""
20
+
21
+ return bool(RE_ENTRY_START.match(line))
22
+
23
+
24
+ def iter_logical_entries(file_path: Path, encoding: str | None = None) -> Iterable[str]:
25
+ """Yield logical log entries assembled from one or more physical lines."""
26
+
27
+ current_lines: list[str] = []
28
+ file_encoding = encoding or detect_encoding(file_path)
29
+
30
+ with file_path.open("r", encoding=file_encoding, errors="replace") as file_handle:
31
+ for line in file_handle:
32
+ if is_entry_start(line):
33
+ if current_lines:
34
+ yield "".join(current_lines)
35
+ current_lines = [line]
36
+ continue
37
+
38
+ if current_lines:
39
+ current_lines.append(line)
40
+
41
+ if current_lines:
42
+ yield "".join(current_lines)
43
+
44
+
45
+ def parse_entry(entry: str) -> ParsedLine | None:
46
+ """Parse one logical entry into severity and normalized message text."""
47
+
48
+ stripped = entry.strip()
49
+ if not stripped:
50
+ return None
51
+
52
+ match = RE_ENTRY_START.match(stripped)
53
+ if not match:
54
+ message = RE_WHITESPACE.sub(" ", stripped).strip()
55
+ return ParsedLine(severity="UNKNOWN", message=message or "(no message)")
56
+
57
+ severity = canonical_severity(match.group("severity") or "")
58
+ message = stripped[match.end():].strip()
59
+ message = RE_LINE_PREFIX.sub("", message, count=1)
60
+ message = RE_WHITESPACE.sub(" ", message).strip()
61
+ message = RE_TRAILING_DATASET.sub("", message).strip()
62
+ message = RE_WHITESPACE.sub(" ", message).strip()
63
+ return ParsedLine(severity=severity, message=message or "(no message)")
64
+
65
+
66
+ def parse_line(line: str) -> ParsedLine | None:
67
+ """Backward-compatible alias for parsing a logical entry."""
68
+
69
+ return parse_entry(line)
@@ -0,0 +1,378 @@
1
+ from __future__ import annotations
2
+
3
+ import datetime as dt
4
+ import html as html_lib
5
+ from pathlib import Path
6
+
7
+ from .constants import RE_HTML_BREAK, RE_INLINE_CODE_SPAN, RE_LEADING_TIMESTAMP_PLACEHOLDER
8
+ from .report_models import ReportDocument, ReportSection, ReportTable
9
+
10
+
11
+ def sanitize_html_text(text: str) -> str:
12
+ """Remove HTML-only placeholder fragments before rendering."""
13
+
14
+ return RE_LEADING_TIMESTAMP_PLACEHOLDER.sub("", text).strip()
15
+
16
+
17
+ def render_inline_markdown_html(text: str) -> str:
18
+ """Render the supported inline markdown subset to HTML."""
19
+
20
+ parts = RE_INLINE_CODE_SPAN.split(sanitize_html_text(text))
21
+ rendered_parts: list[str] = []
22
+ for part in parts:
23
+ if not part:
24
+ continue
25
+ if part.startswith("`") and part.endswith("`"):
26
+ rendered_parts.append(f"<code>{html_lib.escape(part[1:-1], quote=False)}</code>")
27
+ else:
28
+ rendered_parts.append(html_lib.escape(part, quote=False))
29
+ return "".join(rendered_parts)
30
+
31
+
32
+ def render_metadata_list_html(items: tuple[tuple[str, str], ...], css_class: str) -> str:
33
+ """Render report metadata entries as a compact bullet list."""
34
+
35
+ if not items:
36
+ return ""
37
+
38
+ rendered_items: list[str] = []
39
+ for label, value in items:
40
+ label_html = html_lib.escape(label, quote=False)
41
+ value_html = render_inline_markdown_html(value)
42
+ separator = ": " if value else ""
43
+ rendered_items.append(f"<li><span class=\"meta-label\">{label_html}</span>{separator}{value_html}</li>")
44
+ return f"<ul class=\"{css_class}\">{''.join(rendered_items)}</ul>"
45
+
46
+
47
+ def render_examples_html(cell_text: str) -> str:
48
+ """Render `<br>`-separated example values as stacked HTML blocks."""
49
+
50
+ examples = [item.strip() for item in RE_HTML_BREAK.split(cell_text) if item.strip()]
51
+ if not examples:
52
+ return ""
53
+
54
+ rendered = "".join(f"<div>{render_inline_markdown_html(example)}</div>" for example in examples)
55
+ return f"<div class=\"examples\">{rendered}</div>"
56
+
57
+
58
+ def report_table_column_class(index: int) -> str:
59
+ """Return the semantic CSS class for a report table column."""
60
+
61
+ column_classes = ("col-severity", "col-count", "col-message", "col-examples")
62
+ if 0 <= index < len(column_classes):
63
+ return column_classes[index]
64
+ return f"col-{index + 1}"
65
+
66
+
67
+ def render_report_table_html(table: ReportTable) -> str:
68
+ """Render one report table to HTML."""
69
+
70
+ thead_cells = "".join(
71
+ f"<th class=\"{report_table_column_class(index)}\">{html_lib.escape(header, quote=False)}</th>"
72
+ for index, header in enumerate(table.headers)
73
+ )
74
+
75
+ body_rows: list[str] = []
76
+ for row in table.rows:
77
+ cells: list[str] = []
78
+ for index, cell in enumerate(row):
79
+ header = table.headers[index].lower() if index < len(table.headers) else ""
80
+ if header == "examples":
81
+ cell_html = render_examples_html(cell)
82
+ else:
83
+ cell_html = render_inline_markdown_html(cell)
84
+ cells.append(f"<td class=\"{report_table_column_class(index)}\">{cell_html}</td>")
85
+ body_rows.append(f"<tr>{''.join(cells)}</tr>")
86
+
87
+ return (
88
+ "<div class=\"report-table-wrap\">"
89
+ "<table class=\"report-table\">"
90
+ f"<thead><tr>{thead_cells}</tr></thead>"
91
+ f"<tbody>{''.join(body_rows)}</tbody>"
92
+ "</table>"
93
+ "</div>"
94
+ )
95
+
96
+
97
+ def css_string_literal(text: str) -> str:
98
+ """Escape plain text for use in CSS string literals."""
99
+
100
+ escaped = text.replace("\\", r"\\").replace('"', r"\"").replace("\n", r"\A ")
101
+ return f'"{escaped}"'
102
+
103
+
104
+ def resolve_report_date(report: ReportDocument, md_path: Path) -> str:
105
+ """Resolve the report date shown in the page header."""
106
+
107
+ for label, value in report.metadata:
108
+ if label.lower() != "timestamp":
109
+ continue
110
+ raw_value = value.replace("`", "").strip()
111
+ try:
112
+ return dt.datetime.fromisoformat(raw_value).date().isoformat()
113
+ except ValueError:
114
+ if len(raw_value) >= 10:
115
+ return raw_value[:10]
116
+
117
+ return dt.datetime.fromtimestamp(md_path.stat().st_mtime).date().isoformat()
118
+
119
+
120
+ def build_report_styles(report_name: str, report_date: str) -> str:
121
+ """Return the shared CSS used by HTML and HTML-to-PDF rendering."""
122
+
123
+ return f"""
124
+ @page {{
125
+ size: A4 portrait;
126
+ margin: 1.2cm 1cm 1.4cm 1.1cm;
127
+
128
+ @top-left {{
129
+ content: {css_string_literal(report_name)};
130
+ font-family: Arial, Helvetica, sans-serif;
131
+ font-size: 9pt;
132
+ color: #222;
133
+ }}
134
+
135
+ @top-right {{
136
+ content: {css_string_literal(report_date)};
137
+ font-family: Arial, Helvetica, sans-serif;
138
+ font-size: 9pt;
139
+ color: #222;
140
+ }}
141
+
142
+ @bottom-center {{
143
+ content: counter(page) " / " counter(pages);
144
+ font-family: Arial, Helvetica, sans-serif;
145
+ font-size: 9pt;
146
+ color: #222;
147
+ }}
148
+ }}
149
+
150
+ html, body {{
151
+ font-family: Arial, Helvetica, sans-serif;
152
+ font-size: 10pt;
153
+ line-height: 1.35;
154
+ color: #222;
155
+ margin: 0;
156
+ padding: 0;
157
+ }}
158
+
159
+ body {{
160
+ background: #fff;
161
+ }}
162
+
163
+ .report-shell {{
164
+ width: 100%;
165
+ }}
166
+
167
+ h1 {{
168
+ font-size: 24pt;
169
+ font-weight: 500;
170
+ margin: 0 0 0.35cm 0;
171
+ border-bottom: 1px solid #666;
172
+ padding-bottom: 0.18cm;
173
+ }}
174
+
175
+ h2 {{
176
+ font-size: 14pt;
177
+ font-weight: 500;
178
+ margin: 0.35cm 0 0.16cm 0;
179
+ }}
180
+
181
+ h3 {{
182
+ font-size: 11pt;
183
+ font-weight: 600;
184
+ margin: 0.24cm 0 0.12cm 0;
185
+ }}
186
+
187
+ p {{
188
+ margin: 0.1cm 0 0.2cm 0;
189
+ }}
190
+
191
+ ul {{
192
+ margin: 0.1cm 0 0.35cm 0.45cm;
193
+ padding-left: 0.35cm;
194
+ }}
195
+
196
+ li {{
197
+ margin: 0.05cm 0;
198
+ }}
199
+
200
+ code {{
201
+ color: #c8a46a;
202
+ background: transparent;
203
+ padding: 0;
204
+ border-radius: 0;
205
+ font-size: 0.98em;
206
+ }}
207
+
208
+ section.file-block {{
209
+ margin: 0.22cm 0 0.34cm 0;
210
+ }}
211
+
212
+ section.file-block > :first-child {{
213
+ margin-top: 0;
214
+ }}
215
+
216
+ .section-note {{
217
+ margin: 0.1cm 0 0.24cm 0;
218
+ color: #555;
219
+ font-style: italic;
220
+ }}
221
+
222
+ .report-table-wrap {{
223
+ width: 100%;
224
+ box-sizing: border-box;
225
+ padding-right: 1cm;
226
+ margin: 0.15cm 0 0.35cm 0;
227
+ }}
228
+
229
+ .report-table {{
230
+ width: 100%;
231
+ max-width: 100%;
232
+ border-collapse: collapse;
233
+ table-layout: fixed;
234
+ margin: 0;
235
+ font-size: 9.5pt;
236
+ }}
237
+
238
+ .report-table,
239
+ .report-table th,
240
+ .report-table td,
241
+ .report-table-wrap {{
242
+ box-sizing: border-box;
243
+ }}
244
+
245
+ .report-table thead {{
246
+ display: table-header-group;
247
+ }}
248
+
249
+ .report-table thead th {{
250
+ text-align: left;
251
+ font-weight: 700;
252
+ border-bottom: 1px solid #444;
253
+ padding: 0.12cm 0.16cm 0.12cm 0.16cm;
254
+ }}
255
+
256
+ .report-table tbody td {{
257
+ vertical-align: top;
258
+ padding: 0.12cm 0.16cm;
259
+ border-bottom: 1px solid #777;
260
+ }}
261
+
262
+ .report-table .col-severity {{
263
+ width: 11%;
264
+ white-space: nowrap;
265
+ overflow-wrap: normal;
266
+ word-break: normal;
267
+ hyphens: manual;
268
+ }}
269
+
270
+ .report-table .col-count {{
271
+ width: 8%;
272
+ text-align: right;
273
+ font-variant-numeric: tabular-nums;
274
+ }}
275
+
276
+ .report-table .col-message {{
277
+ width: 29%;
278
+ }}
279
+
280
+ .report-table .col-examples {{
281
+ width: 52%;
282
+ padding-right: 0.18cm;
283
+ overflow-wrap: anywhere;
284
+ word-break: break-word;
285
+ hyphens: auto;
286
+ white-space: normal;
287
+ }}
288
+
289
+ .report-table td,
290
+ .report-table th {{
291
+ overflow-wrap: break-word;
292
+ word-break: normal;
293
+ hyphens: auto;
294
+ }}
295
+
296
+ .report-table tr,
297
+ .report-table td,
298
+ .report-table th {{
299
+ break-inside: avoid;
300
+ page-break-inside: avoid;
301
+ }}
302
+
303
+ .examples {{
304
+ width: 100%;
305
+ max-width: 100%;
306
+ line-height: 1.22;
307
+ }}
308
+
309
+ .examples > div {{
310
+ display: block;
311
+ width: 100%;
312
+ max-width: 100%;
313
+ overflow-wrap: anywhere;
314
+ word-break: break-word;
315
+ white-space: normal;
316
+ }}
317
+
318
+ .examples > div + div {{
319
+ margin-top: 0.06cm;
320
+ }}
321
+
322
+ @media screen {{
323
+ body {{
324
+ margin: 1rem auto;
325
+ max-width: 1180px;
326
+ padding: 0 1rem 2rem;
327
+ }}
328
+ }}
329
+ """
330
+
331
+
332
+ def render_report_html_document(report: ReportDocument, report_name: str, report_date: str) -> str:
333
+ """Render a structured report document to a full standalone HTML document."""
334
+
335
+ section_html_parts: list[str] = []
336
+ for section in report.sections:
337
+ section_html_parts.append(render_report_section_html(section))
338
+
339
+ meta_html = render_metadata_list_html(report.metadata, "report-meta")
340
+ styles = build_report_styles(report_name, report_date)
341
+ return f"""<!DOCTYPE html>
342
+ <html lang="en">
343
+ <head>
344
+ <meta charset="UTF-8">
345
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
346
+ <title>{html_lib.escape(report.title, quote=False)}</title>
347
+ <style>
348
+ {styles}
349
+ </style>
350
+ </head>
351
+ <body>
352
+ <main class="report-shell">
353
+ <h1>{render_inline_markdown_html(report.title)}</h1>
354
+ {meta_html}
355
+ {''.join(section_html_parts)}
356
+ </main>
357
+ </body>
358
+ </html>
359
+ """
360
+
361
+
362
+ def render_report_section_html(section: ReportSection) -> str:
363
+ """Render one report section to HTML."""
364
+
365
+ section_class = "file-block summary-block" if section.title.lower().startswith("overall summary") else "file-block"
366
+ parts = [f"<section class=\"{section_class}\">"]
367
+ parts.append(f"<h2>{render_inline_markdown_html(section.title)}</h2>")
368
+ metadata_html = render_metadata_list_html(section.metadata, "file-meta")
369
+ if metadata_html:
370
+ parts.append(metadata_html)
371
+ if section.subtitle:
372
+ parts.append(f"<h3>{render_inline_markdown_html(section.subtitle)}</h3>")
373
+ if section.note:
374
+ parts.append(f"<p class=\"section-note\">{render_inline_markdown_html(section.note)}</p>")
375
+ if section.table:
376
+ parts.append(render_report_table_html(section.table))
377
+ parts.append("</section>")
378
+ return "".join(parts)