log-lens-cli 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
log_lens/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ """log-lens — Application Log Analyzer CLI."""
2
+
3
+ __version__ = "1.0.0"
log_lens/__main__.py ADDED
@@ -0,0 +1,6 @@
1
+ """Allow running with python -m log_lens."""
2
+
3
+ from log_lens.cli import cli
4
+
5
+ if __name__ == "__main__":
6
+ cli()
File without changes
@@ -0,0 +1,107 @@
1
+ """Anomaly / spike detection in log data."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import math
6
+
7
+ from ..models import Anomaly, LogEntry
8
+
9
+
10
+ def detect_anomalies(
11
+ entries: list[LogEntry],
12
+ events_per_hour: dict[str, int],
13
+ z_threshold: float = 2.0,
14
+ ) -> list[Anomaly]:
15
+ """Detect anomalies in log data using statistical analysis.
16
+
17
+ Looks for:
18
+ 1. Volume spikes — hours with unusually high event count
19
+ 2. Error rate spikes — hours with unusually high error ratio
20
+ """
21
+ anomalies: list[Anomaly] = []
22
+
23
+ if len(events_per_hour) < 3:
24
+ return anomalies
25
+
26
+ # --- Volume spike detection ---
27
+ volumes = list(events_per_hour.values())
28
+ mean_vol = sum(volumes) / len(volumes)
29
+ std_vol = _stddev(volumes, mean_vol)
30
+
31
+ if std_vol > 0:
32
+ for hour, count in events_per_hour.items():
33
+ z_score = (count - mean_vol) / std_vol
34
+ if z_score > z_threshold:
35
+ severity = _z_to_severity(z_score)
36
+ anomalies.append(Anomaly(
37
+ timestamp=hour,
38
+ description=f"Volume spike: {count} events (avg {mean_vol:.0f})",
39
+ severity=severity,
40
+ metric="volume",
41
+ value=float(count),
42
+ baseline=mean_vol,
43
+ ))
44
+
45
+ # --- Error rate spike detection ---
46
+ error_per_hour = _error_counts_per_hour(entries)
47
+ if error_per_hour and len(error_per_hour) >= 3:
48
+ error_rates: dict[str, float] = {}
49
+ for hour in events_per_hour:
50
+ total = events_per_hour[hour]
51
+ errors = error_per_hour.get(hour, 0)
52
+ if total > 0:
53
+ error_rates[hour] = errors / total * 100
54
+
55
+ if error_rates:
56
+ rates = list(error_rates.values())
57
+ mean_rate = sum(rates) / len(rates)
58
+ std_rate = _stddev(rates, mean_rate)
59
+
60
+ if std_rate > 0:
61
+ for hour, rate in error_rates.items():
62
+ z_score = (rate - mean_rate) / std_rate
63
+ if z_score > z_threshold and rate > 1.0: # at least 1% error rate
64
+ severity = _z_to_severity(z_score)
65
+ anomalies.append(Anomaly(
66
+ timestamp=hour,
67
+ description=f"Error rate spike: {rate:.1f}% (avg {mean_rate:.1f}%)",
68
+ severity=severity,
69
+ metric="error_rate",
70
+ value=rate,
71
+ baseline=mean_rate,
72
+ ))
73
+
74
+ # Sort by severity (critical first)
75
+ severity_order = {"critical": 0, "high": 1, "medium": 2, "low": 3}
76
+ anomalies.sort(key=lambda a: severity_order.get(a.severity, 4))
77
+
78
+ return anomalies
79
+
80
+
81
+ def _error_counts_per_hour(entries: list[LogEntry]) -> dict[str, int]:
82
+ """Count error entries per hour."""
83
+ counts: dict[str, int] = {}
84
+ for e in entries:
85
+ if e.timestamp and e.is_error:
86
+ key = e.timestamp.strftime("%Y-%m-%d %H:00")
87
+ counts[key] = counts.get(key, 0) + 1
88
+ return counts
89
+
90
+
91
+ def _stddev(values: list[float | int], mean: float) -> float:
92
+ """Calculate standard deviation."""
93
+ if len(values) < 2:
94
+ return 0.0
95
+ variance = sum((v - mean) ** 2 for v in values) / len(values)
96
+ return math.sqrt(variance)
97
+
98
+
99
+ def _z_to_severity(z: float) -> str:
100
+ """Map z-score to severity level."""
101
+ if z > 4.0:
102
+ return "critical"
103
+ if z > 3.0:
104
+ return "high"
105
+ if z > 2.5:
106
+ return "medium"
107
+ return "low"
@@ -0,0 +1,73 @@
1
+ """Core analyzer — orchestrates all analysis on parsed log entries."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from ..models import LogAnalysis, LogEntry, LogFormat
6
+ from .anomaly import detect_anomalies
7
+ from .errors import cluster_errors
8
+ from .http import analyze_http
9
+
10
+
11
+ def analyze(
12
+ entries: list[LogEntry],
13
+ format_detected: LogFormat = LogFormat.UNKNOWN,
14
+ total_lines: int = 0,
15
+ failed_lines: int = 0,
16
+ source_files: list[str] | None = None,
17
+ ) -> LogAnalysis:
18
+ """Run full analysis on parsed log entries."""
19
+ analysis = LogAnalysis(
20
+ total_lines=total_lines or len(entries),
21
+ parsed_lines=len(entries),
22
+ failed_lines=failed_lines,
23
+ format_detected=format_detected,
24
+ source_files=source_files or [],
25
+ )
26
+
27
+ if not entries:
28
+ return analysis
29
+
30
+ # Time range
31
+ timestamped = [e for e in entries if e.timestamp]
32
+ if timestamped:
33
+ analysis.time_start = min(e.timestamp for e in timestamped)
34
+ analysis.time_end = max(e.timestamp for e in timestamped)
35
+
36
+ # Level distribution
37
+ analysis.level_counts = _count_levels(entries)
38
+
39
+ # Timeline (events per hour)
40
+ analysis.events_per_hour = _events_per_hour(entries)
41
+
42
+ # Error clustering
43
+ error_entries = [e for e in entries if e.is_error]
44
+ analysis.error_clusters = cluster_errors(error_entries)
45
+
46
+ # HTTP analysis (if access log data is present)
47
+ http_entries = [e for e in entries if e.fields.get("status")]
48
+ if http_entries:
49
+ analysis.http_stats = analyze_http(http_entries)
50
+
51
+ # Anomaly detection
52
+ analysis.anomalies = detect_anomalies(entries, analysis.events_per_hour)
53
+
54
+ return analysis
55
+
56
+
57
+ def _count_levels(entries: list[LogEntry]) -> dict[str, int]:
58
+ """Count entries per log level."""
59
+ counts: dict[str, int] = {}
60
+ for e in entries:
61
+ key = e.level.value
62
+ counts[key] = counts.get(key, 0) + 1
63
+ return counts
64
+
65
+
66
+ def _events_per_hour(entries: list[LogEntry]) -> dict[str, int]:
67
+ """Group entries by hour."""
68
+ hours: dict[str, int] = {}
69
+ for e in entries:
70
+ if e.timestamp:
71
+ key = e.timestamp.strftime("%Y-%m-%d %H:00")
72
+ hours[key] = hours.get(key, 0) + 1
73
+ return dict(sorted(hours.items()))
@@ -0,0 +1,96 @@
1
+ """Error clustering — groups similar error messages into patterns."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+
7
+ from ..models import ErrorCluster, LogEntry
8
+
9
+
10
+ def cluster_errors(error_entries: list[LogEntry], max_clusters: int = 25) -> list[ErrorCluster]:
11
+ """Group similar error messages into clusters.
12
+
13
+ Normalizes messages by replacing variable parts (IDs, timestamps,
14
+ IPs, numbers, UUIDs, paths) with placeholders, then groups by
15
+ normalized pattern.
16
+ """
17
+ if not error_entries:
18
+ return []
19
+
20
+ pattern_map: dict[str, ErrorCluster] = {}
21
+
22
+ for entry in error_entries:
23
+ normalized = normalize_message(entry.message)
24
+
25
+ if normalized not in pattern_map:
26
+ pattern_map[normalized] = ErrorCluster(
27
+ pattern=normalized,
28
+ count=0,
29
+ level=entry.level,
30
+ first_seen=entry.timestamp,
31
+ last_seen=entry.timestamp,
32
+ samples=[],
33
+ )
34
+
35
+ cluster = pattern_map[normalized]
36
+ cluster.count += 1
37
+
38
+ if entry.timestamp:
39
+ if cluster.first_seen is None or entry.timestamp < cluster.first_seen:
40
+ cluster.first_seen = entry.timestamp
41
+ if cluster.last_seen is None or entry.timestamp > cluster.last_seen:
42
+ cluster.last_seen = entry.timestamp
43
+
44
+ # Keep up to 3 sample messages
45
+ if len(cluster.samples) < 3:
46
+ cluster.samples.append(entry.message)
47
+
48
+ # Sort by count descending, return top N
49
+ clusters = sorted(pattern_map.values(), key=lambda c: c.count, reverse=True)
50
+ return clusters[:max_clusters]
51
+
52
+
53
+ def normalize_message(msg: str) -> str:
54
+ """Normalize error message to find recurring patterns.
55
+
56
+ Replaces variable parts with placeholders:
57
+ - UUIDs → <UUID>
58
+ - IP addresses → <IP>
59
+ - Numbers (>2 digits) → <N>
60
+ - Hex hashes → <HASH>
61
+ - File paths → <PATH>
62
+ - Quoted strings → <STR>
63
+ - Timestamps → <TS>
64
+ """
65
+ result = msg
66
+
67
+ # UUIDs: 8-4-4-4-12 hex
68
+ result = re.sub(
69
+ r'[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}',
70
+ '<UUID>',
71
+ result,
72
+ )
73
+
74
+ # IP addresses (v4)
75
+ result = re.sub(r'\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b', '<IP>', result)
76
+
77
+ # Hex hashes (>= 8 chars)
78
+ result = re.sub(r'\b[0-9a-fA-F]{8,}\b', '<HASH>', result)
79
+
80
+ # ISO timestamps
81
+ result = re.sub(
82
+ r'\d{4}-\d{2}-\d{2}[T ]\d{2}:\d{2}:\d{2}(?:\.\d+)?(?:Z|[+-]\d{2}:?\d{2})?',
83
+ '<TS>',
84
+ result,
85
+ )
86
+
87
+ # Numbers (3+ digits) but keep port numbers and small nums
88
+ result = re.sub(r'\b\d{3,}\b', '<N>', result)
89
+
90
+ # File paths (unix style)
91
+ result = re.sub(r'/[\w./-]+', '<PATH>', result)
92
+
93
+ # Collapse whitespace
94
+ result = re.sub(r'\s+', ' ', result).strip()
95
+
96
+ return result
@@ -0,0 +1,66 @@
1
+ """HTTP-specific log analysis — status codes, latency, endpoints."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from ..models import HttpStats, LogEntry
6
+
7
+
8
+ def analyze_http(entries: list[LogEntry]) -> HttpStats:
9
+ """Analyze HTTP-related log entries (typically from access logs)."""
10
+ stats = HttpStats()
11
+
12
+ for entry in entries:
13
+ status = entry.fields.get("status")
14
+ if status is None:
15
+ continue
16
+
17
+ stats.total_requests += 1
18
+
19
+ # Status codes
20
+ status_int = int(status)
21
+ stats.status_codes[status_int] = stats.status_codes.get(status_int, 0) + 1
22
+
23
+ # Methods
24
+ method = entry.fields.get("method", "UNKNOWN")
25
+ stats.methods[method] = stats.methods.get(method, 0) + 1
26
+
27
+ # Endpoints
28
+ path = entry.fields.get("path", "")
29
+ if path:
30
+ # Normalize path: remove query params and collapse IDs
31
+ normalized = _normalize_path(path)
32
+ stats.endpoints[normalized] = stats.endpoints.get(normalized, 0) + 1
33
+
34
+ # Latency
35
+ duration_ms = entry.fields.get("duration_ms")
36
+ if duration_ms is not None:
37
+ try:
38
+ stats.latencies_ms.append(float(duration_ms))
39
+ except (ValueError, TypeError):
40
+ pass
41
+
42
+ return stats
43
+
44
+
45
+ def _normalize_path(path: str) -> str:
46
+ """Normalize URL path for grouping.
47
+
48
+ Replaces numeric IDs and UUIDs with placeholders.
49
+ /api/users/123/orders/456 → /api/users/:id/orders/:id
50
+ """
51
+ import re
52
+
53
+ # Remove query string
54
+ path = path.split("?")[0]
55
+
56
+ # Replace UUIDs
57
+ path = re.sub(
58
+ r'/[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}',
59
+ '/:uuid',
60
+ path,
61
+ )
62
+
63
+ # Replace numeric path segments
64
+ path = re.sub(r'/\d+', '/:id', path)
65
+
66
+ return path
log_lens/cli.py ADDED
@@ -0,0 +1,249 @@
1
+ """CLI entry point for log-lens."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import sys
6
+ from pathlib import Path
7
+
8
+ import click
9
+ from rich.console import Console
10
+
11
+ from . import __version__
12
+ from .analyzers.core import analyze
13
+ from .demo import DEMOS
14
+ from .models import LogFormat
15
+ from .output.console import render_analysis
16
+ from .output.html_report import export_html
17
+ from .parsers.auto import collect_log_files, parse_files
18
+
19
+ console = Console()
20
+
21
+ _FORMAT_MAP = {
22
+ "auto": None,
23
+ "json": LogFormat.JSON,
24
+ "apache": LogFormat.APACHE,
25
+ "syslog": LogFormat.SYSLOG,
26
+ "common": LogFormat.COMMON,
27
+ }
28
+
29
+
30
+ @click.group()
31
+ @click.version_option(__version__, prog_name="log-lens")
32
+ def cli():
33
+ """🔍 log-lens — Application Log Analyzer.
34
+
35
+ Parse, analyze, and visualize application logs. Supports JSON,
36
+ Apache/Nginx, Syslog, and common text log formats. Detects error
37
+ patterns, frequency spikes, HTTP latency issues, and generates
38
+ interactive HTML dashboard reports.
39
+ """
40
+
41
+
42
+ @cli.command()
43
+ @click.argument("path", type=click.Path(exists=True), default=".")
44
+ @click.option("--format", "fmt", type=click.Choice(["auto", "json", "apache", "syslog", "common"], case_sensitive=False), default="auto", help="Log format (auto-detected by default)")
45
+ @click.option("--html", type=click.Path(), help="Export HTML dashboard report")
46
+ @click.option("--top-errors", "-e", "top_errors", type=int, default=10, help="Number of top errors to show")
47
+ def analyze_cmd(path: str, fmt: str, html: str | None, top_errors: int):
48
+ """Analyze log files — full analysis with all metrics.
49
+
50
+ Examples:
51
+
52
+ log-lens analyze .
53
+
54
+ log-lens analyze /var/log/app.log
55
+
56
+ log-lens analyze logs/ --format json --html report.html
57
+
58
+ log-lens analyze access.log --format apache
59
+ """
60
+ log_format = _FORMAT_MAP.get(fmt)
61
+
62
+ files = collect_log_files(path)
63
+ if not files:
64
+ console.print("[red]No log files found at the specified path.[/]")
65
+ sys.exit(1)
66
+
67
+ console.print(f"[dim]Scanning {len(files)} file(s)...[/]")
68
+
69
+ entries, detected, total, failed, src_files = parse_files(files, log_format)
70
+
71
+ if not entries:
72
+ console.print("[yellow]No log entries could be parsed.[/]")
73
+ return
74
+
75
+ analysis = analyze(
76
+ entries,
77
+ format_detected=detected,
78
+ total_lines=total,
79
+ failed_lines=failed,
80
+ source_files=src_files,
81
+ )
82
+
83
+ render_analysis(analysis)
84
+
85
+ if html:
86
+ export_html(analysis, html)
87
+ console.print(f"[green]HTML report → {html}[/]")
88
+
89
+
90
+ @cli.command()
91
+ @click.argument("path", type=click.Path(exists=True))
92
+ @click.option("--format", "fmt", type=click.Choice(["auto", "json", "apache", "syslog", "common"], case_sensitive=False), default="auto", help="Log format")
93
+ def errors(path: str, fmt: str):
94
+ """Show only error analysis — top error patterns and clusters.
95
+
96
+ Examples:
97
+
98
+ log-lens errors app.log
99
+
100
+ log-lens errors /var/log/ --format json
101
+ """
102
+ log_format = _FORMAT_MAP.get(fmt)
103
+ files = collect_log_files(path)
104
+ if not files:
105
+ console.print("[red]No log files found.[/]")
106
+ sys.exit(1)
107
+
108
+ entries, detected, total, failed, src_files = parse_files(files, log_format)
109
+
110
+ if not entries:
111
+ console.print("[yellow]No log entries could be parsed.[/]")
112
+ return
113
+
114
+ analysis = analyze(entries, detected, total, failed, src_files)
115
+
116
+ if not analysis.error_clusters:
117
+ console.print("[green]No errors found! 🎉[/]")
118
+ return
119
+
120
+ from .output.console import _render_errors
121
+ console.print(f"\n [bold]Found {analysis.error_count:,} errors in {analysis.parsed_lines:,} log entries[/]")
122
+ console.print(f" [dim]Error rate: {analysis.error_rate:.2f}%[/]\n")
123
+ _render_errors(analysis)
124
+
125
+
126
+ @cli.command()
127
+ @click.argument("path", type=click.Path(exists=True))
128
+ @click.option("--format", "fmt", type=click.Choice(["auto", "json", "apache", "syslog", "common"], case_sensitive=False), default="auto", help="Log format")
129
+ def timeline(path: str, fmt: str):
130
+ """Show event timeline — events per hour with spike detection.
131
+
132
+ Examples:
133
+
134
+ log-lens timeline app.log
135
+
136
+ log-lens timeline /var/log/nginx/ --format apache
137
+ """
138
+ log_format = _FORMAT_MAP.get(fmt)
139
+ files = collect_log_files(path)
140
+ if not files:
141
+ console.print("[red]No log files found.[/]")
142
+ sys.exit(1)
143
+
144
+ entries, detected, total, failed, src_files = parse_files(files, log_format)
145
+
146
+ if not entries:
147
+ console.print("[yellow]No log entries could be parsed.[/]")
148
+ return
149
+
150
+ analysis = analyze(entries, detected, total, failed, src_files)
151
+
152
+ from .output.console import _render_anomalies, _render_timeline
153
+ console.print(f"\n [bold]Timeline: {analysis.duration_str} duration, {analysis.events_per_second:.1f} events/sec[/]\n")
154
+ _render_timeline(analysis)
155
+
156
+ if analysis.anomalies:
157
+ _render_anomalies(analysis)
158
+ else:
159
+ console.print("\n [green]No anomalies detected.[/]")
160
+
161
+
162
+ @cli.command()
163
+ @click.argument("path", type=click.Path(exists=True))
164
+ @click.option("--format", "fmt", type=click.Choice(["auto", "apache"], case_sensitive=False), default="auto", help="Log format")
165
+ def http(path: str, fmt: str):
166
+ """Show HTTP analysis — status codes, latency, endpoints.
167
+
168
+ Best used with Apache/Nginx access logs.
169
+
170
+ Examples:
171
+
172
+ log-lens http access.log
173
+
174
+ log-lens http /var/log/nginx/ --format apache
175
+ """
176
+ log_format = _FORMAT_MAP.get(fmt)
177
+ files = collect_log_files(path)
178
+ if not files:
179
+ console.print("[red]No log files found.[/]")
180
+ sys.exit(1)
181
+
182
+ entries, detected, total, failed, src_files = parse_files(files, log_format)
183
+
184
+ if not entries:
185
+ console.print("[yellow]No log entries could be parsed.[/]")
186
+ return
187
+
188
+ analysis = analyze(entries, detected, total, failed, src_files)
189
+
190
+ if not analysis.http_stats or analysis.http_stats.total_requests == 0:
191
+ console.print("[yellow]No HTTP data found. This command works best with access logs.[/]")
192
+ return
193
+
194
+ from .output.console import _render_http
195
+ console.print(f"\n [bold]HTTP Analysis: {analysis.http_stats.total_requests:,} requests[/]\n")
196
+ _render_http(analysis)
197
+
198
+
199
+ @cli.command()
200
+ @click.option("--type", "demo_type", type=click.Choice(["all", "json", "apache", "syslog", "common"], case_sensitive=False), default="all", help="Demo type")
201
+ @click.option("--html", type=click.Path(), help="Export HTML report")
202
+ def demo(demo_type: str, html: str | None):
203
+ """Run demo with sample log data.
204
+
205
+ Examples:
206
+
207
+ log-lens demo
208
+
209
+ log-lens demo --type json
210
+
211
+ log-lens demo --type apache --html demo-report.html
212
+ """
213
+ import tempfile
214
+
215
+ console.print()
216
+ console.print("[bold blue]🔍 log-lens — Demo Mode[/]")
217
+ console.print("[dim]Analyzing sample log files...[/]\n")
218
+
219
+ if demo_type == "all":
220
+ demos = list(DEMOS.items())
221
+ else:
222
+ demos = [(demo_type, DEMOS[demo_type])]
223
+
224
+ for name, (filename, content) in demos:
225
+ console.print(f"\n[bold]━━━ {name.upper()} ({filename}) ━━━[/]\n")
226
+
227
+ with tempfile.TemporaryDirectory() as tmpdir:
228
+ fpath = Path(tmpdir) / filename
229
+ fpath.write_text(content, encoding="utf-8")
230
+
231
+ fmt_map = {
232
+ "json": LogFormat.JSON,
233
+ "apache": LogFormat.APACHE,
234
+ "syslog": LogFormat.SYSLOG,
235
+ "common": LogFormat.COMMON,
236
+ }
237
+ log_format = fmt_map.get(name)
238
+
239
+ entries, detected, total, failed, src_files = parse_files([str(fpath)], log_format)
240
+
241
+ if entries:
242
+ analysis = analyze(entries, detected, total, failed, src_files)
243
+ render_analysis(analysis)
244
+
245
+ if html and name == demos[-1][0]:
246
+ export_html(analysis, html)
247
+ console.print(f"[green]HTML report → {html}[/]")
248
+ else:
249
+ console.print("[yellow]No entries parsed.[/]")