PyPI - log-lens-cli - Versions diffs - 1.0.0__py3-none-any.whl - Mend

log-lens-cli 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

log_lens/__init__.py +3 -0
log_lens/__main__.py +6 -0
log_lens/analyzers/__init__.py +0 -0
log_lens/analyzers/anomaly.py +107 -0
log_lens/analyzers/core.py +73 -0
log_lens/analyzers/errors.py +96 -0
log_lens/analyzers/http.py +66 -0
log_lens/cli.py +249 -0
log_lens/demo.py +180 -0
log_lens/models.py +360 -0
log_lens/output/__init__.py +0 -0
log_lens/output/console.py +258 -0
log_lens/output/html_report.py +287 -0
log_lens/parsers/__init__.py +0 -0
log_lens/parsers/apache.py +118 -0
log_lens/parsers/auto.py +157 -0
log_lens/parsers/common.py +127 -0
log_lens/parsers/json_parser.py +135 -0
log_lens/parsers/syslog_parser.py +100 -0
log_lens/renderers/__init__.py +0 -0
log_lens/renderers/charts.py +93 -0
log_lens_cli-1.0.0.dist-info/METADATA +335 -0
log_lens_cli-1.0.0.dist-info/RECORD +26 -0
log_lens_cli-1.0.0.dist-info/WHEEL +4 -0
log_lens_cli-1.0.0.dist-info/entry_points.txt +2 -0
log_lens_cli-1.0.0.dist-info/licenses/LICENSE +21 -0

log_lens/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+"""log-lens — Application Log Analyzer CLI."""
+__version__ = "1.0.0"

log_lens/__main__.py ADDED Viewed

@@ -0,0 +1,6 @@
+"""Allow running with python -m log_lens."""
+from log_lens.cli import cli
+if __name__ == "__main__":
+    cli()

log_lens/analyzers/__init__.py ADDED Viewed

File without changes

log_lens/analyzers/anomaly.py ADDED Viewed

@@ -0,0 +1,107 @@
+"""Anomaly / spike detection in log data."""
+from __future__ import annotations
+import math
+from ..models import Anomaly, LogEntry
+def detect_anomalies(
+    entries: list[LogEntry],
+    events_per_hour: dict[str, int],
+    z_threshold: float = 2.0,
+) -> list[Anomaly]:
+    """Detect anomalies in log data using statistical analysis.
+    Looks for:
+    1. Volume spikes — hours with unusually high event count
+    2. Error rate spikes — hours with unusually high error ratio
+    """
+    anomalies: list[Anomaly] = []
+    if len(events_per_hour) < 3:
+        return anomalies
+    # --- Volume spike detection ---
+    volumes = list(events_per_hour.values())
+    mean_vol = sum(volumes) / len(volumes)
+    std_vol = _stddev(volumes, mean_vol)
+    if std_vol > 0:
+        for hour, count in events_per_hour.items():
+            z_score = (count - mean_vol) / std_vol
+            if z_score > z_threshold:
+                severity = _z_to_severity(z_score)
+                anomalies.append(Anomaly(
+                    timestamp=hour,
+                    description=f"Volume spike: {count} events (avg {mean_vol:.0f})",
+                    severity=severity,
+                    metric="volume",
+                    value=float(count),
+                    baseline=mean_vol,
+                ))
+    # --- Error rate spike detection ---
+    error_per_hour = _error_counts_per_hour(entries)
+    if error_per_hour and len(error_per_hour) >= 3:
+        error_rates: dict[str, float] = {}
+        for hour in events_per_hour:
+            total = events_per_hour[hour]
+            errors = error_per_hour.get(hour, 0)
+            if total > 0:
+                error_rates[hour] = errors / total * 100
+        if error_rates:
+            rates = list(error_rates.values())
+            mean_rate = sum(rates) / len(rates)
+            std_rate = _stddev(rates, mean_rate)
+            if std_rate > 0:
+                for hour, rate in error_rates.items():
+                    z_score = (rate - mean_rate) / std_rate
+                    if z_score > z_threshold and rate > 1.0:  # at least 1% error rate
+                        severity = _z_to_severity(z_score)
+                        anomalies.append(Anomaly(
+                            timestamp=hour,
+                            description=f"Error rate spike: {rate:.1f}% (avg {mean_rate:.1f}%)",
+                            severity=severity,
+                            metric="error_rate",
+                            value=rate,
+                            baseline=mean_rate,
+                        ))
+    # Sort by severity (critical first)
+    severity_order = {"critical": 0, "high": 1, "medium": 2, "low": 3}
+    anomalies.sort(key=lambda a: severity_order.get(a.severity, 4))
+    return anomalies
+def _error_counts_per_hour(entries: list[LogEntry]) -> dict[str, int]:
+    """Count error entries per hour."""
+    counts: dict[str, int] = {}
+    for e in entries:
+        if e.timestamp and e.is_error:
+            key = e.timestamp.strftime("%Y-%m-%d %H:00")
+            counts[key] = counts.get(key, 0) + 1
+    return counts
+def _stddev(values: list[float | int], mean: float) -> float:
+    """Calculate standard deviation."""
+    if len(values) < 2:
+        return 0.0
+    variance = sum((v - mean) ** 2 for v in values) / len(values)
+    return math.sqrt(variance)
+def _z_to_severity(z: float) -> str:
+    """Map z-score to severity level."""
+    if z > 4.0:
+        return "critical"
+    if z > 3.0:
+        return "high"
+    if z > 2.5:
+        return "medium"
+    return "low"

log_lens/analyzers/core.py ADDED Viewed

@@ -0,0 +1,73 @@
+"""Core analyzer — orchestrates all analysis on parsed log entries."""
+from __future__ import annotations
+from ..models import LogAnalysis, LogEntry, LogFormat
+from .anomaly import detect_anomalies
+from .errors import cluster_errors
+from .http import analyze_http
+def analyze(
+    entries: list[LogEntry],
+    format_detected: LogFormat = LogFormat.UNKNOWN,
+    total_lines: int = 0,
+    failed_lines: int = 0,
+    source_files: list[str] | None = None,
+) -> LogAnalysis:
+    """Run full analysis on parsed log entries."""
+    analysis = LogAnalysis(
+        total_lines=total_lines or len(entries),
+        parsed_lines=len(entries),
+        failed_lines=failed_lines,
+        format_detected=format_detected,
+        source_files=source_files or [],
+    )
+    if not entries:
+        return analysis
+    # Time range
+    timestamped = [e for e in entries if e.timestamp]
+    if timestamped:
+        analysis.time_start = min(e.timestamp for e in timestamped)
+        analysis.time_end = max(e.timestamp for e in timestamped)
+    # Level distribution
+    analysis.level_counts = _count_levels(entries)
+    # Timeline (events per hour)
+    analysis.events_per_hour = _events_per_hour(entries)
+    # Error clustering
+    error_entries = [e for e in entries if e.is_error]
+    analysis.error_clusters = cluster_errors(error_entries)
+    # HTTP analysis (if access log data is present)
+    http_entries = [e for e in entries if e.fields.get("status")]
+    if http_entries:
+        analysis.http_stats = analyze_http(http_entries)
+    # Anomaly detection
+    analysis.anomalies = detect_anomalies(entries, analysis.events_per_hour)
+    return analysis
+def _count_levels(entries: list[LogEntry]) -> dict[str, int]:
+    """Count entries per log level."""
+    counts: dict[str, int] = {}
+    for e in entries:
+        key = e.level.value
+        counts[key] = counts.get(key, 0) + 1
+    return counts
+def _events_per_hour(entries: list[LogEntry]) -> dict[str, int]:
+    """Group entries by hour."""
+    hours: dict[str, int] = {}
+    for e in entries:
+        if e.timestamp:
+            key = e.timestamp.strftime("%Y-%m-%d %H:00")
+            hours[key] = hours.get(key, 0) + 1
+    return dict(sorted(hours.items()))

log_lens/analyzers/errors.py ADDED Viewed

@@ -0,0 +1,96 @@
+"""Error clustering — groups similar error messages into patterns."""
+from __future__ import annotations
+import re
+from ..models import ErrorCluster, LogEntry
+def cluster_errors(error_entries: list[LogEntry], max_clusters: int = 25) -> list[ErrorCluster]:
+    """Group similar error messages into clusters.
+    Normalizes messages by replacing variable parts (IDs, timestamps,
+    IPs, numbers, UUIDs, paths) with placeholders, then groups by
+    normalized pattern.
+    """
+    if not error_entries:
+        return []
+    pattern_map: dict[str, ErrorCluster] = {}
+    for entry in error_entries:
+        normalized = normalize_message(entry.message)
+        if normalized not in pattern_map:
+            pattern_map[normalized] = ErrorCluster(
+                pattern=normalized,
+                count=0,
+                level=entry.level,
+                first_seen=entry.timestamp,
+                last_seen=entry.timestamp,
+                samples=[],
+            )
+        cluster = pattern_map[normalized]
+        cluster.count += 1
+        if entry.timestamp:
+            if cluster.first_seen is None or entry.timestamp < cluster.first_seen:
+                cluster.first_seen = entry.timestamp
+            if cluster.last_seen is None or entry.timestamp > cluster.last_seen:
+                cluster.last_seen = entry.timestamp
+        # Keep up to 3 sample messages
+        if len(cluster.samples) < 3:
+            cluster.samples.append(entry.message)
+    # Sort by count descending, return top N
+    clusters = sorted(pattern_map.values(), key=lambda c: c.count, reverse=True)
+    return clusters[:max_clusters]
+def normalize_message(msg: str) -> str:
+    """Normalize error message to find recurring patterns.
+    Replaces variable parts with placeholders:
+      - UUIDs → <UUID>
+      - IP addresses → <IP>
+      - Numbers (>2 digits) → <N>
+      - Hex hashes → <HASH>
+      - File paths → <PATH>
+      - Quoted strings → <STR>
+      - Timestamps → <TS>
+    """
+    result = msg
+    # UUIDs: 8-4-4-4-12 hex
+    result = re.sub(
+        r'[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}',
+        '<UUID>',
+        result,
+    )
+    # IP addresses (v4)
+    result = re.sub(r'\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b', '<IP>', result)
+    # Hex hashes (>= 8 chars)
+    result = re.sub(r'\b[0-9a-fA-F]{8,}\b', '<HASH>', result)
+    # ISO timestamps
+    result = re.sub(
+        r'\d{4}-\d{2}-\d{2}[T ]\d{2}:\d{2}:\d{2}(?:\.\d+)?(?:Z|[+-]\d{2}:?\d{2})?',
+        '<TS>',
+        result,
+    )
+    # Numbers (3+ digits) but keep port numbers and small nums
+    result = re.sub(r'\b\d{3,}\b', '<N>', result)
+    # File paths (unix style)
+    result = re.sub(r'/[\w./-]+', '<PATH>', result)
+    # Collapse whitespace
+    result = re.sub(r'\s+', ' ', result).strip()
+    return result

log_lens/analyzers/http.py ADDED Viewed

@@ -0,0 +1,66 @@
+"""HTTP-specific log analysis — status codes, latency, endpoints."""
+from __future__ import annotations
+from ..models import HttpStats, LogEntry
+def analyze_http(entries: list[LogEntry]) -> HttpStats:
+    """Analyze HTTP-related log entries (typically from access logs)."""
+    stats = HttpStats()
+    for entry in entries:
+        status = entry.fields.get("status")
+        if status is None:
+            continue
+        stats.total_requests += 1
+        # Status codes
+        status_int = int(status)
+        stats.status_codes[status_int] = stats.status_codes.get(status_int, 0) + 1
+        # Methods
+        method = entry.fields.get("method", "UNKNOWN")
+        stats.methods[method] = stats.methods.get(method, 0) + 1
+        # Endpoints
+        path = entry.fields.get("path", "")
+        if path:
+            # Normalize path: remove query params and collapse IDs
+            normalized = _normalize_path(path)
+            stats.endpoints[normalized] = stats.endpoints.get(normalized, 0) + 1
+        # Latency
+        duration_ms = entry.fields.get("duration_ms")
+        if duration_ms is not None:
+            try:
+                stats.latencies_ms.append(float(duration_ms))
+            except (ValueError, TypeError):
+                pass
+    return stats
+def _normalize_path(path: str) -> str:
+    """Normalize URL path for grouping.
+    Replaces numeric IDs and UUIDs with placeholders.
+    /api/users/123/orders/456 → /api/users/:id/orders/:id
+    """
+    import re
+    # Remove query string
+    path = path.split("?")[0]
+    # Replace UUIDs
+    path = re.sub(
+        r'/[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}',
+        '/:uuid',
+        path,
+    )
+    # Replace numeric path segments
+    path = re.sub(r'/\d+', '/:id', path)
+    return path

log_lens/cli.py ADDED Viewed

@@ -0,0 +1,249 @@
+"""CLI entry point for log-lens."""
+from __future__ import annotations
+import sys
+from pathlib import Path
+import click
+from rich.console import Console
+from . import __version__
+from .analyzers.core import analyze
+from .demo import DEMOS
+from .models import LogFormat
+from .output.console import render_analysis
+from .output.html_report import export_html
+from .parsers.auto import collect_log_files, parse_files
+console = Console()
+_FORMAT_MAP = {
+    "auto": None,
+    "json": LogFormat.JSON,
+    "apache": LogFormat.APACHE,
+    "syslog": LogFormat.SYSLOG,
+    "common": LogFormat.COMMON,
+}
+@click.group()
+@click.version_option(__version__, prog_name="log-lens")
+def cli():
+    """🔍 log-lens — Application Log Analyzer.
+    Parse, analyze, and visualize application logs. Supports JSON,
+    Apache/Nginx, Syslog, and common text log formats. Detects error
+    patterns, frequency spikes, HTTP latency issues, and generates
+    interactive HTML dashboard reports.
+    """
+@cli.command()
+@click.argument("path", type=click.Path(exists=True), default=".")
+@click.option("--format", "fmt", type=click.Choice(["auto", "json", "apache", "syslog", "common"], case_sensitive=False), default="auto", help="Log format (auto-detected by default)")
+@click.option("--html", type=click.Path(), help="Export HTML dashboard report")
+@click.option("--top-errors", "-e", "top_errors", type=int, default=10, help="Number of top errors to show")
+def analyze_cmd(path: str, fmt: str, html: str | None, top_errors: int):
+    """Analyze log files — full analysis with all metrics.
+    Examples:
+        log-lens analyze .
+        log-lens analyze /var/log/app.log
+        log-lens analyze logs/ --format json --html report.html
+        log-lens analyze access.log --format apache
+    """
+    log_format = _FORMAT_MAP.get(fmt)
+    files = collect_log_files(path)
+    if not files:
+        console.print("[red]No log files found at the specified path.[/]")
+        sys.exit(1)
+    console.print(f"[dim]Scanning {len(files)} file(s)...[/]")
+    entries, detected, total, failed, src_files = parse_files(files, log_format)
+    if not entries:
+        console.print("[yellow]No log entries could be parsed.[/]")
+        return
+    analysis = analyze(
+        entries,
+        format_detected=detected,
+        total_lines=total,
+        failed_lines=failed,
+        source_files=src_files,
+    )
+    render_analysis(analysis)
+    if html:
+        export_html(analysis, html)
+        console.print(f"[green]HTML report → {html}[/]")
+@cli.command()
+@click.argument("path", type=click.Path(exists=True))
+@click.option("--format", "fmt", type=click.Choice(["auto", "json", "apache", "syslog", "common"], case_sensitive=False), default="auto", help="Log format")
+def errors(path: str, fmt: str):
+    """Show only error analysis — top error patterns and clusters.
+    Examples:
+        log-lens errors app.log
+        log-lens errors /var/log/ --format json
+    """
+    log_format = _FORMAT_MAP.get(fmt)
+    files = collect_log_files(path)
+    if not files:
+        console.print("[red]No log files found.[/]")
+        sys.exit(1)
+    entries, detected, total, failed, src_files = parse_files(files, log_format)
+    if not entries:
+        console.print("[yellow]No log entries could be parsed.[/]")
+        return
+    analysis = analyze(entries, detected, total, failed, src_files)
+    if not analysis.error_clusters:
+        console.print("[green]No errors found! 🎉[/]")
+        return
+    from .output.console import _render_errors
+    console.print(f"\n  [bold]Found {analysis.error_count:,} errors in {analysis.parsed_lines:,} log entries[/]")
+    console.print(f"  [dim]Error rate: {analysis.error_rate:.2f}%[/]\n")
+    _render_errors(analysis)
+@cli.command()
+@click.argument("path", type=click.Path(exists=True))
+@click.option("--format", "fmt", type=click.Choice(["auto", "json", "apache", "syslog", "common"], case_sensitive=False), default="auto", help="Log format")
+def timeline(path: str, fmt: str):
+    """Show event timeline — events per hour with spike detection.
+    Examples:
+        log-lens timeline app.log
+        log-lens timeline /var/log/nginx/ --format apache
+    """
+    log_format = _FORMAT_MAP.get(fmt)
+    files = collect_log_files(path)
+    if not files:
+        console.print("[red]No log files found.[/]")
+        sys.exit(1)
+    entries, detected, total, failed, src_files = parse_files(files, log_format)
+    if not entries:
+        console.print("[yellow]No log entries could be parsed.[/]")
+        return
+    analysis = analyze(entries, detected, total, failed, src_files)
+    from .output.console import _render_anomalies, _render_timeline
+    console.print(f"\n  [bold]Timeline: {analysis.duration_str} duration, {analysis.events_per_second:.1f} events/sec[/]\n")
+    _render_timeline(analysis)
+    if analysis.anomalies:
+        _render_anomalies(analysis)
+    else:
+        console.print("\n  [green]No anomalies detected.[/]")
+@cli.command()
+@click.argument("path", type=click.Path(exists=True))
+@click.option("--format", "fmt", type=click.Choice(["auto", "apache"], case_sensitive=False), default="auto", help="Log format")
+def http(path: str, fmt: str):
+    """Show HTTP analysis — status codes, latency, endpoints.
+    Best used with Apache/Nginx access logs.
+    Examples:
+        log-lens http access.log
+        log-lens http /var/log/nginx/ --format apache
+    """
+    log_format = _FORMAT_MAP.get(fmt)
+    files = collect_log_files(path)
+    if not files:
+        console.print("[red]No log files found.[/]")
+        sys.exit(1)
+    entries, detected, total, failed, src_files = parse_files(files, log_format)
+    if not entries:
+        console.print("[yellow]No log entries could be parsed.[/]")
+        return
+    analysis = analyze(entries, detected, total, failed, src_files)
+    if not analysis.http_stats or analysis.http_stats.total_requests == 0:
+        console.print("[yellow]No HTTP data found. This command works best with access logs.[/]")
+        return
+    from .output.console import _render_http
+    console.print(f"\n  [bold]HTTP Analysis: {analysis.http_stats.total_requests:,} requests[/]\n")
+    _render_http(analysis)
+@cli.command()
+@click.option("--type", "demo_type", type=click.Choice(["all", "json", "apache", "syslog", "common"], case_sensitive=False), default="all", help="Demo type")
+@click.option("--html", type=click.Path(), help="Export HTML report")
+def demo(demo_type: str, html: str | None):
+    """Run demo with sample log data.
+    Examples:
+        log-lens demo
+        log-lens demo --type json
+        log-lens demo --type apache --html demo-report.html
+    """
+    import tempfile
+    console.print()
+    console.print("[bold blue]🔍 log-lens — Demo Mode[/]")
+    console.print("[dim]Analyzing sample log files...[/]\n")
+    if demo_type == "all":
+        demos = list(DEMOS.items())
+    else:
+        demos = [(demo_type, DEMOS[demo_type])]
+    for name, (filename, content) in demos:
+        console.print(f"\n[bold]━━━ {name.upper()} ({filename}) ━━━[/]\n")
+        with tempfile.TemporaryDirectory() as tmpdir:
+            fpath = Path(tmpdir) / filename
+            fpath.write_text(content, encoding="utf-8")
+            fmt_map = {
+                "json": LogFormat.JSON,
+                "apache": LogFormat.APACHE,
+                "syslog": LogFormat.SYSLOG,
+                "common": LogFormat.COMMON,
+            }
+            log_format = fmt_map.get(name)
+            entries, detected, total, failed, src_files = parse_files([str(fpath)], log_format)
+            if entries:
+                analysis = analyze(entries, detected, total, failed, src_files)
+                render_analysis(analysis)
+                if html and name == demos[-1][0]:
+                    export_html(analysis, html)
+                    console.print(f"[green]HTML report → {html}[/]")
+            else:
+                console.print("[yellow]No entries parsed.[/]")