PyPI - nm-tool-forge - Versions diffs - 0.1.0__py3-none-any.whl - Mend

nm-tool-forge 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

loganalysis/__init__.py +16 -0
loganalysis/__main__.py +5 -0
loganalysis/analysis.py +175 -0
loganalysis/cli.py +88 -0
loganalysis/constants.py +126 -0
loganalysis/converters.py +150 -0
loganalysis/csv_export.py +18 -0
loganalysis/encoding.py +22 -0
loganalysis/filesystem.py +26 -0
loganalysis/models.py +63 -0
loganalysis/normalization.py +97 -0
loganalysis/parsing.py +69 -0
loganalysis/report_html.py +378 -0
loganalysis/report_markdown.py +209 -0
loganalysis/report_models.py +31 -0
loganalysis/report_pdf.py +74 -0
loganalysis/selftest.py +58 -0
nm_tool_forge-0.1.0.dist-info/METADATA +198 -0
nm_tool_forge-0.1.0.dist-info/RECORD +23 -0
nm_tool_forge-0.1.0.dist-info/WHEEL +5 -0
nm_tool_forge-0.1.0.dist-info/entry_points.txt +3 -0
nm_tool_forge-0.1.0.dist-info/licenses/LICENSE +21 -0
nm_tool_forge-0.1.0.dist-info/top_level.txt +1 -0

loganalysis/models.py ADDED Viewed

@@ -0,0 +1,63 @@
+from __future__ import annotations
+from collections import Counter
+from dataclasses import dataclass, field
+from pathlib import Path
+MessageKey = tuple[str, str]
+@dataclass(frozen=True)
+class ParsedLine:
+    """Structured representation of a parsed logical log entry."""
+    severity: str
+    message: str
+@dataclass
+class FileAnalysis:
+    """Aggregated analysis results for a single log file."""
+    file: Path
+    total_lines: int
+    total_entries: int
+    unknown_lines: int
+    raw_counts: Counter[MessageKey]
+    norm_counts: Counter[MessageKey]
+    norm_examples: dict[MessageKey, Counter[str]]
+    backup_path: Path | None = None
+@dataclass
+class AnalysisSummary:
+    """Combined analysis data across all processed files."""
+    analyses: list[FileAnalysis] = field(default_factory=list)
+    global_raw: Counter[MessageKey] = field(default_factory=Counter)
+    global_norm: Counter[MessageKey] = field(default_factory=Counter)
+    global_norm_examples: dict[MessageKey, Counter[str]] = field(default_factory=dict)
+@dataclass(frozen=True)
+class AnalysisConfig:
+    """Configuration for a full analysis run."""
+    logs_dir: Path
+    out_dir: Path
+    backup_dir: Path | None = None
+    top_examples: int = 3
+    convert: bool = False
+@dataclass
+class AnalysisRunResult:
+    """Paths and conversion results produced by a full analysis run."""
+    out_dir: Path
+    backup_dir: Path
+    report_path: Path
+    summary: AnalysisSummary
+    html_path: Path | None = None
+    pdf_path: Path | None = None
+    convert_status: dict[str, object] = field(default_factory=dict)

loganalysis/normalization.py ADDED Viewed

@@ -0,0 +1,97 @@
+from __future__ import annotations
+import re
+from .constants import (
+    RE_GUID,
+    RE_INT,
+    RE_LOOKUP_ASSIGNMENT,
+    RE_QUOTED_VALUE,
+    RE_SEMANTIC_VALUE_CHAR,
+    RE_UNIX_PATH,
+    RE_VALIDATE_QUOTED_KEY,
+    RE_WHITESPACE,
+    RE_WINDOWS_PATH,
+)
+def classify_value_fragment(value: str) -> str:
+    """Classify a matched value fragment as meaningful or empty."""
+    stripped = (value or "").strip()
+    if not stripped:
+        return "<EMPTY>"
+    if not RE_SEMANTIC_VALUE_CHAR.search(stripped):
+        return "<EMPTY>"
+    return "<VALUE>"
+def normalize_lookup_assignment_message(message: str) -> str:
+    """Normalize value-bearing lookup error messages."""
+    def replace(match: re.Match[str]) -> str:
+        value_token = classify_value_fragment(match.group("value"))
+        table_name = match.group("table")
+        return f'{match.group("head")}{value_token} The record was not found in table "{table_name}".'
+    return RE_LOOKUP_ASSIGNMENT.sub(replace, message)
+def normalize_validate_key_message(message: str) -> str:
+    """Normalize Validate... {Key} 'value' style messages."""
+    def replace(match: re.Match[str]) -> str:
+        value_token = classify_value_fragment(match.group("value"))
+        return f'{match.group("head")}{value_token}{match.group("tail")}'
+    return RE_VALIDATE_QUOTED_KEY.sub(replace, message)
+def semantic_normalize_message(message: str) -> str:
+    """Apply semantic normalization rules before generic token normalization."""
+    normalized = message.strip()
+    normalized = normalize_lookup_assignment_message(normalized)
+    normalized = normalize_validate_key_message(normalized)
+    return normalized
+def normalize_generic_quoted_values(message: str) -> str:
+    """Remove quoted values that are no longer semantically relevant."""
+    def replace(match: re.Match[str]) -> str:
+        matched = match.group(0).strip()
+        inner = matched[1:-1] if len(matched) >= 2 else ""
+        if inner in {"<VALUE>", "<EMPTY>"}:
+            return match.group(0)
+        return ""
+    return RE_QUOTED_VALUE.sub(replace, message)
+def cleanup_normalized_message(message: str) -> str:
+    """Tidy whitespace and punctuation artifacts after normalization."""
+    normalized = RE_WHITESPACE.sub(" ", message).strip()
+    normalized = re.sub(r"\s+:", ":", normalized)
+    normalized = re.sub(r"\s+,", ",", normalized)
+    normalized = re.sub(r"\s+\.", ".", normalized)
+    normalized = re.sub(r"\s+;", ";", normalized)
+    normalized = re.sub(r"\s+\)", ")", normalized)
+    normalized = re.sub(r"\(\s+", "(", normalized)
+    normalized = re.sub(r"\s{2,}", " ", normalized)
+    return normalized.strip()
+def normalize_message(message: str) -> str:
+    """Normalize a log message for aggregation."""
+    normalized = message.strip()
+    normalized = semantic_normalize_message(normalized)
+    normalized = RE_GUID.sub("<GUID>", normalized)
+    normalized = RE_WINDOWS_PATH.sub("<PATH>", normalized)
+    normalized = RE_UNIX_PATH.sub("<PATH>", normalized)
+    normalized = normalize_generic_quoted_values(normalized)
+    normalized = RE_INT.sub("<N>", normalized)
+    normalized = cleanup_normalized_message(normalized)
+    return normalized if normalized else "(no message)"

loganalysis/parsing.py ADDED Viewed

@@ -0,0 +1,69 @@
+from __future__ import annotations
+from pathlib import Path
+from typing import Iterable
+from .constants import RE_ENTRY_START, RE_LINE_PREFIX, RE_TRAILING_DATASET, RE_WHITESPACE, SEVERITY_ALIASES
+from .encoding import detect_encoding
+from .models import ParsedLine
+def canonical_severity(raw: str) -> str:
+    """Map a raw severity token to the canonical output severity."""
+    severity = (raw or "").strip().upper()
+    return SEVERITY_ALIASES.get(severity, severity if severity else "UNKNOWN")
+def is_entry_start(line: str) -> bool:
+    """Return whether a physical line starts a new logical log entry."""
+    return bool(RE_ENTRY_START.match(line))
+def iter_logical_entries(file_path: Path, encoding: str | None = None) -> Iterable[str]:
+    """Yield logical log entries assembled from one or more physical lines."""
+    current_lines: list[str] = []
+    file_encoding = encoding or detect_encoding(file_path)
+    with file_path.open("r", encoding=file_encoding, errors="replace") as file_handle:
+        for line in file_handle:
+            if is_entry_start(line):
+                if current_lines:
+                    yield "".join(current_lines)
+                current_lines = [line]
+                continue
+            if current_lines:
+                current_lines.append(line)
+    if current_lines:
+        yield "".join(current_lines)
+def parse_entry(entry: str) -> ParsedLine | None:
+    """Parse one logical entry into severity and normalized message text."""
+    stripped = entry.strip()
+    if not stripped:
+        return None
+    match = RE_ENTRY_START.match(stripped)
+    if not match:
+        message = RE_WHITESPACE.sub(" ", stripped).strip()
+        return ParsedLine(severity="UNKNOWN", message=message or "(no message)")
+    severity = canonical_severity(match.group("severity") or "")
+    message = stripped[match.end():].strip()
+    message = RE_LINE_PREFIX.sub("", message, count=1)
+    message = RE_WHITESPACE.sub(" ", message).strip()
+    message = RE_TRAILING_DATASET.sub("", message).strip()
+    message = RE_WHITESPACE.sub(" ", message).strip()
+    return ParsedLine(severity=severity, message=message or "(no message)")
+def parse_line(line: str) -> ParsedLine | None:
+    """Backward-compatible alias for parsing a logical entry."""
+    return parse_entry(line)

loganalysis/report_html.py ADDED Viewed

@@ -0,0 +1,378 @@
+from __future__ import annotations
+import datetime as dt
+import html as html_lib
+from pathlib import Path
+from .constants import RE_HTML_BREAK, RE_INLINE_CODE_SPAN, RE_LEADING_TIMESTAMP_PLACEHOLDER
+from .report_models import ReportDocument, ReportSection, ReportTable
+def sanitize_html_text(text: str) -> str:
+    """Remove HTML-only placeholder fragments before rendering."""
+    return RE_LEADING_TIMESTAMP_PLACEHOLDER.sub("", text).strip()
+def render_inline_markdown_html(text: str) -> str:
+    """Render the supported inline markdown subset to HTML."""
+    parts = RE_INLINE_CODE_SPAN.split(sanitize_html_text(text))
+    rendered_parts: list[str] = []
+    for part in parts:
+        if not part:
+            continue
+        if part.startswith("`") and part.endswith("`"):
+            rendered_parts.append(f"<code>{html_lib.escape(part[1:-1], quote=False)}</code>")
+        else:
+            rendered_parts.append(html_lib.escape(part, quote=False))
+    return "".join(rendered_parts)
+def render_metadata_list_html(items: tuple[tuple[str, str], ...], css_class: str) -> str:
+    """Render report metadata entries as a compact bullet list."""
+    if not items:
+        return ""
+    rendered_items: list[str] = []
+    for label, value in items:
+        label_html = html_lib.escape(label, quote=False)
+        value_html = render_inline_markdown_html(value)
+        separator = ": " if value else ""
+        rendered_items.append(f"<li><span class=\"meta-label\">{label_html}</span>{separator}{value_html}</li>")
+    return f"<ul class=\"{css_class}\">{''.join(rendered_items)}</ul>"
+def render_examples_html(cell_text: str) -> str:
+    """Render `<br>`-separated example values as stacked HTML blocks."""
+    examples = [item.strip() for item in RE_HTML_BREAK.split(cell_text) if item.strip()]
+    if not examples:
+        return ""
+    rendered = "".join(f"<div>{render_inline_markdown_html(example)}</div>" for example in examples)
+    return f"<div class=\"examples\">{rendered}</div>"
+def report_table_column_class(index: int) -> str:
+    """Return the semantic CSS class for a report table column."""
+    column_classes = ("col-severity", "col-count", "col-message", "col-examples")
+    if 0 <= index < len(column_classes):
+        return column_classes[index]
+    return f"col-{index + 1}"
+def render_report_table_html(table: ReportTable) -> str:
+    """Render one report table to HTML."""
+    thead_cells = "".join(
+        f"<th class=\"{report_table_column_class(index)}\">{html_lib.escape(header, quote=False)}</th>"
+        for index, header in enumerate(table.headers)
+    )
+    body_rows: list[str] = []
+    for row in table.rows:
+        cells: list[str] = []
+        for index, cell in enumerate(row):
+            header = table.headers[index].lower() if index < len(table.headers) else ""
+            if header == "examples":
+                cell_html = render_examples_html(cell)
+            else:
+                cell_html = render_inline_markdown_html(cell)
+            cells.append(f"<td class=\"{report_table_column_class(index)}\">{cell_html}</td>")
+        body_rows.append(f"<tr>{''.join(cells)}</tr>")
+    return (
+        "<div class=\"report-table-wrap\">"
+        "<table class=\"report-table\">"
+        f"<thead><tr>{thead_cells}</tr></thead>"
+        f"<tbody>{''.join(body_rows)}</tbody>"
+        "</table>"
+        "</div>"
+    )
+def css_string_literal(text: str) -> str:
+    """Escape plain text for use in CSS string literals."""
+    escaped = text.replace("\\", r"\\").replace('"', r"\"").replace("\n", r"\A ")
+    return f'"{escaped}"'
+def resolve_report_date(report: ReportDocument, md_path: Path) -> str:
+    """Resolve the report date shown in the page header."""
+    for label, value in report.metadata:
+        if label.lower() != "timestamp":
+            continue
+        raw_value = value.replace("`", "").strip()
+        try:
+            return dt.datetime.fromisoformat(raw_value).date().isoformat()
+        except ValueError:
+            if len(raw_value) >= 10:
+                return raw_value[:10]
+    return dt.datetime.fromtimestamp(md_path.stat().st_mtime).date().isoformat()
+def build_report_styles(report_name: str, report_date: str) -> str:
+    """Return the shared CSS used by HTML and HTML-to-PDF rendering."""
+    return f"""
+    @page {{
+        size: A4 portrait;
+        margin: 1.2cm 1cm 1.4cm 1.1cm;
+        @top-left {{
+            content: {css_string_literal(report_name)};
+            font-family: Arial, Helvetica, sans-serif;
+            font-size: 9pt;
+            color: #222;
+        }}
+        @top-right {{
+            content: {css_string_literal(report_date)};
+            font-family: Arial, Helvetica, sans-serif;
+            font-size: 9pt;
+            color: #222;
+        }}
+        @bottom-center {{
+            content: counter(page) " / " counter(pages);
+            font-family: Arial, Helvetica, sans-serif;
+            font-size: 9pt;
+            color: #222;
+        }}
+    }}
+    html, body {{
+        font-family: Arial, Helvetica, sans-serif;
+        font-size: 10pt;
+        line-height: 1.35;
+        color: #222;
+        margin: 0;
+        padding: 0;
+    }}
+    body {{
+        background: #fff;
+    }}
+    .report-shell {{
+        width: 100%;
+    }}
+    h1 {{
+        font-size: 24pt;
+        font-weight: 500;
+        margin: 0 0 0.35cm 0;
+        border-bottom: 1px solid #666;
+        padding-bottom: 0.18cm;
+    }}
+    h2 {{
+        font-size: 14pt;
+        font-weight: 500;
+        margin: 0.35cm 0 0.16cm 0;
+    }}
+    h3 {{
+        font-size: 11pt;
+        font-weight: 600;
+        margin: 0.24cm 0 0.12cm 0;
+    }}
+    p {{
+        margin: 0.1cm 0 0.2cm 0;
+    }}
+    ul {{
+        margin: 0.1cm 0 0.35cm 0.45cm;
+        padding-left: 0.35cm;
+    }}
+    li {{
+        margin: 0.05cm 0;
+    }}
+    code {{
+        color: #c8a46a;
+        background: transparent;
+        padding: 0;
+        border-radius: 0;
+        font-size: 0.98em;
+    }}
+    section.file-block {{
+        margin: 0.22cm 0 0.34cm 0;
+    }}
+    section.file-block > :first-child {{
+        margin-top: 0;
+    }}
+    .section-note {{
+        margin: 0.1cm 0 0.24cm 0;
+        color: #555;
+        font-style: italic;
+    }}
+    .report-table-wrap {{
+        width: 100%;
+        box-sizing: border-box;
+        padding-right: 1cm;
+        margin: 0.15cm 0 0.35cm 0;
+    }}
+    .report-table {{
+        width: 100%;
+        max-width: 100%;
+        border-collapse: collapse;
+        table-layout: fixed;
+        margin: 0;
+        font-size: 9.5pt;
+    }}
+    .report-table,
+    .report-table th,
+    .report-table td,
+    .report-table-wrap {{
+        box-sizing: border-box;
+    }}
+    .report-table thead {{
+        display: table-header-group;
+    }}
+    .report-table thead th {{
+        text-align: left;
+        font-weight: 700;
+        border-bottom: 1px solid #444;
+        padding: 0.12cm 0.16cm 0.12cm 0.16cm;
+    }}
+    .report-table tbody td {{
+        vertical-align: top;
+        padding: 0.12cm 0.16cm;
+        border-bottom: 1px solid #777;
+    }}
+    .report-table .col-severity {{
+        width: 11%;
+        white-space: nowrap;
+        overflow-wrap: normal;
+        word-break: normal;
+        hyphens: manual;
+    }}
+    .report-table .col-count {{
+        width: 8%;
+        text-align: right;
+        font-variant-numeric: tabular-nums;
+    }}
+    .report-table .col-message {{
+        width: 29%;
+    }}
+    .report-table .col-examples {{
+        width: 52%;
+        padding-right: 0.18cm;
+        overflow-wrap: anywhere;
+        word-break: break-word;
+        hyphens: auto;
+        white-space: normal;
+    }}
+    .report-table td,
+    .report-table th {{
+        overflow-wrap: break-word;
+        word-break: normal;
+        hyphens: auto;
+    }}
+    .report-table tr,
+    .report-table td,
+    .report-table th {{
+        break-inside: avoid;
+        page-break-inside: avoid;
+    }}
+    .examples {{
+        width: 100%;
+        max-width: 100%;
+        line-height: 1.22;
+    }}
+    .examples > div {{
+        display: block;
+        width: 100%;
+        max-width: 100%;
+        overflow-wrap: anywhere;
+        word-break: break-word;
+        white-space: normal;
+    }}
+    .examples > div + div {{
+        margin-top: 0.06cm;
+    }}
+    @media screen {{
+        body {{
+            margin: 1rem auto;
+            max-width: 1180px;
+            padding: 0 1rem 2rem;
+        }}
+    }}
+    """
+def render_report_html_document(report: ReportDocument, report_name: str, report_date: str) -> str:
+    """Render a structured report document to a full standalone HTML document."""
+    section_html_parts: list[str] = []
+    for section in report.sections:
+        section_html_parts.append(render_report_section_html(section))
+    meta_html = render_metadata_list_html(report.metadata, "report-meta")
+    styles = build_report_styles(report_name, report_date)
+    return f"""<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>{html_lib.escape(report.title, quote=False)}</title>
+    <style>
+{styles}
+    </style>
+</head>
+<body>
+    <main class="report-shell">
+        <h1>{render_inline_markdown_html(report.title)}</h1>
+        {meta_html}
+        {''.join(section_html_parts)}
+    </main>
+</body>
+</html>
+"""
+def render_report_section_html(section: ReportSection) -> str:
+    """Render one report section to HTML."""
+    section_class = "file-block summary-block" if section.title.lower().startswith("overall summary") else "file-block"
+    parts = [f"<section class=\"{section_class}\">"]
+    parts.append(f"<h2>{render_inline_markdown_html(section.title)}</h2>")
+    metadata_html = render_metadata_list_html(section.metadata, "file-meta")
+    if metadata_html:
+        parts.append(metadata_html)
+    if section.subtitle:
+        parts.append(f"<h3>{render_inline_markdown_html(section.subtitle)}</h3>")
+    if section.note:
+        parts.append(f"<p class=\"section-note\">{render_inline_markdown_html(section.note)}</p>")
+    if section.table:
+        parts.append(render_report_table_html(section.table))
+    parts.append("</section>")
+    return "".join(parts)