logtap 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- logtap/__init__.py +8 -0
- logtap/__main__.py +6 -0
- logtap/api/__init__.py +5 -0
- logtap/api/app.py +45 -0
- logtap/api/dependencies.py +61 -0
- logtap/api/routes/__init__.py +1 -0
- logtap/api/routes/files.py +38 -0
- logtap/api/routes/health.py +19 -0
- logtap/api/routes/logs.py +249 -0
- logtap/api/routes/parsed.py +102 -0
- logtap/cli/__init__.py +1 -0
- logtap/cli/commands/__init__.py +1 -0
- logtap/cli/commands/files.py +86 -0
- logtap/cli/commands/query.py +127 -0
- logtap/cli/commands/serve.py +78 -0
- logtap/cli/commands/tail.py +121 -0
- logtap/cli/main.py +50 -0
- logtap/core/__init__.py +16 -0
- logtap/core/parsers/__init__.py +20 -0
- logtap/core/parsers/apache.py +165 -0
- logtap/core/parsers/auto.py +118 -0
- logtap/core/parsers/base.py +164 -0
- logtap/core/parsers/json_parser.py +119 -0
- logtap/core/parsers/nginx.py +108 -0
- logtap/core/parsers/syslog.py +80 -0
- logtap/core/reader.py +160 -0
- logtap/core/search.py +142 -0
- logtap/core/validation.py +52 -0
- logtap/models/__init__.py +11 -0
- logtap/models/config.py +39 -0
- logtap/models/responses.py +65 -0
- logtap-0.2.0.dist-info/METADATA +317 -0
- logtap-0.2.0.dist-info/RECORD +36 -0
- logtap-0.2.0.dist-info/WHEEL +4 -0
- logtap-0.2.0.dist-info/entry_points.txt +3 -0
- logtap-0.2.0.dist-info/licenses/LICENSE +674 -0
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
"""Query command for logtap CLI."""
|
|
2
|
+
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
import typer
|
|
6
|
+
from rich.console import Console
|
|
7
|
+
from rich.panel import Panel
|
|
8
|
+
|
|
9
|
+
console = Console()
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def query(
|
|
13
|
+
filename: str = typer.Argument(
|
|
14
|
+
"syslog",
|
|
15
|
+
help="Name of the log file to query.",
|
|
16
|
+
),
|
|
17
|
+
server: str = typer.Option(
|
|
18
|
+
"http://localhost:8000",
|
|
19
|
+
"--server",
|
|
20
|
+
"-s",
|
|
21
|
+
help="URL of the logtap server.",
|
|
22
|
+
envvar="LOGTAP_SERVER",
|
|
23
|
+
),
|
|
24
|
+
term: Optional[str] = typer.Option(
|
|
25
|
+
None,
|
|
26
|
+
"--term",
|
|
27
|
+
"-t",
|
|
28
|
+
help="Substring to search for.",
|
|
29
|
+
),
|
|
30
|
+
regex: Optional[str] = typer.Option(
|
|
31
|
+
None,
|
|
32
|
+
"--regex",
|
|
33
|
+
"-r",
|
|
34
|
+
help="Regex pattern to match.",
|
|
35
|
+
),
|
|
36
|
+
limit: int = typer.Option(
|
|
37
|
+
50,
|
|
38
|
+
"--limit",
|
|
39
|
+
"-n",
|
|
40
|
+
help="Number of lines to return.",
|
|
41
|
+
),
|
|
42
|
+
api_key: Optional[str] = typer.Option(
|
|
43
|
+
None,
|
|
44
|
+
"--api-key",
|
|
45
|
+
"-k",
|
|
46
|
+
help="API key for authentication.",
|
|
47
|
+
envvar="LOGTAP_API_KEY",
|
|
48
|
+
),
|
|
49
|
+
output: str = typer.Option(
|
|
50
|
+
"pretty",
|
|
51
|
+
"--output",
|
|
52
|
+
"-o",
|
|
53
|
+
help="Output format: pretty, json, plain.",
|
|
54
|
+
),
|
|
55
|
+
case_sensitive: bool = typer.Option(
|
|
56
|
+
True,
|
|
57
|
+
"--case-sensitive/--ignore-case",
|
|
58
|
+
"-c/-i",
|
|
59
|
+
help="Whether search is case-sensitive.",
|
|
60
|
+
),
|
|
61
|
+
) -> None:
|
|
62
|
+
"""
|
|
63
|
+
Query logs from a logtap server.
|
|
64
|
+
|
|
65
|
+
Example:
|
|
66
|
+
logtap query syslog
|
|
67
|
+
logtap query auth.log --term "Failed password"
|
|
68
|
+
logtap query syslog --regex "error.*connection" --limit 100
|
|
69
|
+
"""
|
|
70
|
+
|
|
71
|
+
import httpx
|
|
72
|
+
|
|
73
|
+
# Build request
|
|
74
|
+
headers = {}
|
|
75
|
+
if api_key:
|
|
76
|
+
headers["X-API-Key"] = api_key
|
|
77
|
+
|
|
78
|
+
params = {
|
|
79
|
+
"filename": filename,
|
|
80
|
+
"limit": limit,
|
|
81
|
+
"case_sensitive": case_sensitive,
|
|
82
|
+
}
|
|
83
|
+
if term:
|
|
84
|
+
params["term"] = term
|
|
85
|
+
if regex:
|
|
86
|
+
params["regex"] = regex
|
|
87
|
+
|
|
88
|
+
try:
|
|
89
|
+
with httpx.Client(timeout=30.0) as client:
|
|
90
|
+
response = client.get(f"{server}/logs", params=params, headers=headers)
|
|
91
|
+
|
|
92
|
+
if response.status_code != 200:
|
|
93
|
+
error_detail = response.json().get("detail", response.text)
|
|
94
|
+
console.print(f"[bold red]Error:[/bold red] {error_detail}")
|
|
95
|
+
raise typer.Exit(1)
|
|
96
|
+
|
|
97
|
+
data = response.json()
|
|
98
|
+
lines = data.get("lines", [])
|
|
99
|
+
count = data.get("count", len(lines))
|
|
100
|
+
|
|
101
|
+
# Format output
|
|
102
|
+
if output == "json":
|
|
103
|
+
console.print_json(data=data)
|
|
104
|
+
elif output == "plain":
|
|
105
|
+
for line in lines:
|
|
106
|
+
console.print(line)
|
|
107
|
+
else:
|
|
108
|
+
# Pretty output with panel
|
|
109
|
+
if lines:
|
|
110
|
+
content = "\n".join(lines)
|
|
111
|
+
panel = Panel(
|
|
112
|
+
content,
|
|
113
|
+
title=f"[bold blue]{filename}[/bold blue]",
|
|
114
|
+
subtitle=f"[dim]{count} lines[/dim]",
|
|
115
|
+
border_style="blue",
|
|
116
|
+
)
|
|
117
|
+
console.print(panel)
|
|
118
|
+
else:
|
|
119
|
+
console.print(f"[dim]No matching lines found in {filename}[/dim]")
|
|
120
|
+
|
|
121
|
+
except httpx.ConnectError:
|
|
122
|
+
console.print(f"[bold red]Error:[/bold red] Could not connect to {server}")
|
|
123
|
+
console.print("[dim]Is the logtap server running? Start it with 'logtap serve'[/dim]")
|
|
124
|
+
raise typer.Exit(1)
|
|
125
|
+
except Exception as e:
|
|
126
|
+
console.print(f"[bold red]Error:[/bold red] {e}")
|
|
127
|
+
raise typer.Exit(1)
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
"""Server command for logtap CLI."""
|
|
2
|
+
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
import typer
|
|
6
|
+
from rich.console import Console
|
|
7
|
+
|
|
8
|
+
console = Console()
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def serve(
|
|
12
|
+
host: str = typer.Option(
|
|
13
|
+
"0.0.0.0",
|
|
14
|
+
"--host",
|
|
15
|
+
"-h",
|
|
16
|
+
help="Host to bind to.",
|
|
17
|
+
),
|
|
18
|
+
port: int = typer.Option(
|
|
19
|
+
8000,
|
|
20
|
+
"--port",
|
|
21
|
+
"-p",
|
|
22
|
+
help="Port to bind to.",
|
|
23
|
+
),
|
|
24
|
+
reload: bool = typer.Option(
|
|
25
|
+
False,
|
|
26
|
+
"--reload",
|
|
27
|
+
"-r",
|
|
28
|
+
help="Enable auto-reload for development.",
|
|
29
|
+
),
|
|
30
|
+
api_key: Optional[str] = typer.Option(
|
|
31
|
+
None,
|
|
32
|
+
"--api-key",
|
|
33
|
+
"-k",
|
|
34
|
+
help="API key for authentication. Can also be set via LOGTAP_API_KEY env var.",
|
|
35
|
+
envvar="LOGTAP_API_KEY",
|
|
36
|
+
),
|
|
37
|
+
log_dir: str = typer.Option(
|
|
38
|
+
"/var/log",
|
|
39
|
+
"--log-dir",
|
|
40
|
+
"-d",
|
|
41
|
+
help="Directory containing log files.",
|
|
42
|
+
envvar="LOGTAP_LOG_DIRECTORY",
|
|
43
|
+
),
|
|
44
|
+
) -> None:
|
|
45
|
+
"""
|
|
46
|
+
Start the logtap API server.
|
|
47
|
+
|
|
48
|
+
Example:
|
|
49
|
+
logtap serve
|
|
50
|
+
logtap serve --port 9000
|
|
51
|
+
logtap serve --api-key mysecretkey
|
|
52
|
+
"""
|
|
53
|
+
import os
|
|
54
|
+
|
|
55
|
+
import uvicorn
|
|
56
|
+
|
|
57
|
+
# Set environment variables for the app
|
|
58
|
+
os.environ["LOGTAP_HOST"] = host
|
|
59
|
+
os.environ["LOGTAP_PORT"] = str(port)
|
|
60
|
+
os.environ["LOGTAP_LOG_DIRECTORY"] = log_dir
|
|
61
|
+
if api_key:
|
|
62
|
+
os.environ["LOGTAP_API_KEY"] = api_key
|
|
63
|
+
|
|
64
|
+
console.print("[bold green]Starting logtap server[/bold green]")
|
|
65
|
+
console.print(f" [dim]Host:[/dim] {host}")
|
|
66
|
+
console.print(f" [dim]Port:[/dim] {port}")
|
|
67
|
+
console.print(f" [dim]Log directory:[/dim] {log_dir}")
|
|
68
|
+
console.print(f" [dim]Auth:[/dim] {'enabled' if api_key else 'disabled'}")
|
|
69
|
+
console.print()
|
|
70
|
+
console.print(f"[dim]API docs available at[/dim] http://{host}:{port}/docs")
|
|
71
|
+
console.print()
|
|
72
|
+
|
|
73
|
+
uvicorn.run(
|
|
74
|
+
"logtap.api.app:app",
|
|
75
|
+
host=host,
|
|
76
|
+
port=port,
|
|
77
|
+
reload=reload,
|
|
78
|
+
)
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
"""Tail command for logtap CLI - real-time log streaming."""
|
|
2
|
+
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
import typer
|
|
6
|
+
from rich.console import Console
|
|
7
|
+
|
|
8
|
+
console = Console()
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def tail(
|
|
12
|
+
filename: str = typer.Argument(
|
|
13
|
+
"syslog",
|
|
14
|
+
help="Name of the log file to tail.",
|
|
15
|
+
),
|
|
16
|
+
server: str = typer.Option(
|
|
17
|
+
"http://localhost:8000",
|
|
18
|
+
"--server",
|
|
19
|
+
"-s",
|
|
20
|
+
help="URL of the logtap server.",
|
|
21
|
+
envvar="LOGTAP_SERVER",
|
|
22
|
+
),
|
|
23
|
+
follow: bool = typer.Option(
|
|
24
|
+
False,
|
|
25
|
+
"--follow",
|
|
26
|
+
"-f",
|
|
27
|
+
help="Follow log output (like tail -f). Requires WebSocket support.",
|
|
28
|
+
),
|
|
29
|
+
lines: int = typer.Option(
|
|
30
|
+
10,
|
|
31
|
+
"--lines",
|
|
32
|
+
"-n",
|
|
33
|
+
help="Number of lines to show initially.",
|
|
34
|
+
),
|
|
35
|
+
api_key: Optional[str] = typer.Option(
|
|
36
|
+
None,
|
|
37
|
+
"--api-key",
|
|
38
|
+
"-k",
|
|
39
|
+
help="API key for authentication.",
|
|
40
|
+
envvar="LOGTAP_API_KEY",
|
|
41
|
+
),
|
|
42
|
+
) -> None:
|
|
43
|
+
"""
|
|
44
|
+
Tail a log file, optionally following new entries.
|
|
45
|
+
|
|
46
|
+
Example:
|
|
47
|
+
logtap tail syslog
|
|
48
|
+
logtap tail auth.log -f
|
|
49
|
+
logtap tail syslog --lines 100
|
|
50
|
+
"""
|
|
51
|
+
import httpx
|
|
52
|
+
|
|
53
|
+
# First, get initial lines
|
|
54
|
+
headers = {}
|
|
55
|
+
if api_key:
|
|
56
|
+
headers["X-API-Key"] = api_key
|
|
57
|
+
|
|
58
|
+
params = {
|
|
59
|
+
"filename": filename,
|
|
60
|
+
"limit": lines,
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
try:
|
|
64
|
+
with httpx.Client(timeout=30.0) as client:
|
|
65
|
+
response = client.get(f"{server}/logs", params=params, headers=headers)
|
|
66
|
+
|
|
67
|
+
if response.status_code != 200:
|
|
68
|
+
error_detail = response.json().get("detail", response.text)
|
|
69
|
+
console.print(f"[bold red]Error:[/bold red] {error_detail}")
|
|
70
|
+
raise typer.Exit(1)
|
|
71
|
+
|
|
72
|
+
data = response.json()
|
|
73
|
+
log_lines = data.get("lines", [])
|
|
74
|
+
|
|
75
|
+
# Print initial lines
|
|
76
|
+
for line in log_lines:
|
|
77
|
+
console.print(line)
|
|
78
|
+
|
|
79
|
+
if follow:
|
|
80
|
+
console.print()
|
|
81
|
+
console.print("[dim]Streaming new entries... (Ctrl+C to stop)[/dim]")
|
|
82
|
+
console.print()
|
|
83
|
+
|
|
84
|
+
# Stream new entries via WebSocket
|
|
85
|
+
import asyncio
|
|
86
|
+
|
|
87
|
+
async def stream_logs():
|
|
88
|
+
import websockets
|
|
89
|
+
|
|
90
|
+
ws_url = server.replace("http://", "ws://").replace("https://", "wss://")
|
|
91
|
+
ws_url = f"{ws_url}/logs/stream?filename={filename}"
|
|
92
|
+
|
|
93
|
+
extra_headers = {}
|
|
94
|
+
if api_key:
|
|
95
|
+
extra_headers["X-API-Key"] = api_key
|
|
96
|
+
|
|
97
|
+
try:
|
|
98
|
+
async with websockets.connect(ws_url, extra_headers=extra_headers) as ws:
|
|
99
|
+
async for message in ws:
|
|
100
|
+
console.print(message)
|
|
101
|
+
except websockets.exceptions.InvalidStatusCode as e:
|
|
102
|
+
if e.status_code == 404:
|
|
103
|
+
console.print("[yellow]Streaming not available.[/yellow]")
|
|
104
|
+
console.print("[dim]Server may need updating.[/dim]")
|
|
105
|
+
else:
|
|
106
|
+
console.print(f"[red]WebSocket error: {e}[/red]")
|
|
107
|
+
except Exception as e:
|
|
108
|
+
console.print(f"[red]Streaming error: {e}[/red]")
|
|
109
|
+
|
|
110
|
+
try:
|
|
111
|
+
asyncio.run(stream_logs())
|
|
112
|
+
except KeyboardInterrupt:
|
|
113
|
+
console.print("\n[dim]Stopped.[/dim]")
|
|
114
|
+
|
|
115
|
+
except httpx.ConnectError:
|
|
116
|
+
console.print(f"[bold red]Error:[/bold red] Could not connect to {server}")
|
|
117
|
+
console.print("[dim]Is the logtap server running? Start it with 'logtap serve'[/dim]")
|
|
118
|
+
raise typer.Exit(1)
|
|
119
|
+
except Exception as e:
|
|
120
|
+
console.print(f"[bold red]Error:[/bold red] {e}")
|
|
121
|
+
raise typer.Exit(1)
|
logtap/cli/main.py
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
"""Main CLI application for logtap."""
|
|
2
|
+
|
|
3
|
+
import typer
|
|
4
|
+
|
|
5
|
+
from logtap import __version__
|
|
6
|
+
from logtap.cli.commands import files, query, serve, tail
|
|
7
|
+
|
|
8
|
+
app = typer.Typer(
|
|
9
|
+
name="logtap",
|
|
10
|
+
help="A CLI-first log access tool for Unix systems. Remote log file access without SSH.",
|
|
11
|
+
rich_markup_mode="rich",
|
|
12
|
+
no_args_is_help=True,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def version_callback(value: bool) -> None:
|
|
17
|
+
"""Print version and exit."""
|
|
18
|
+
if value:
|
|
19
|
+
typer.echo(f"logtap {__version__}")
|
|
20
|
+
raise typer.Exit()
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@app.callback()
|
|
24
|
+
def main(
|
|
25
|
+
version: bool = typer.Option(
|
|
26
|
+
None,
|
|
27
|
+
"--version",
|
|
28
|
+
"-v",
|
|
29
|
+
help="Show version and exit.",
|
|
30
|
+
callback=version_callback,
|
|
31
|
+
is_eager=True,
|
|
32
|
+
),
|
|
33
|
+
) -> None:
|
|
34
|
+
"""
|
|
35
|
+
logtap - Remote log file access without SSH.
|
|
36
|
+
|
|
37
|
+
Start a server with 'logtap serve' or query a remote server with 'logtap query'.
|
|
38
|
+
"""
|
|
39
|
+
pass
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
# Add commands
|
|
43
|
+
app.command()(serve.serve)
|
|
44
|
+
app.command()(query.query)
|
|
45
|
+
app.command()(tail.tail)
|
|
46
|
+
app.command()(files.files)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
if __name__ == "__main__":
|
|
50
|
+
app()
|
logtap/core/__init__.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"""Core business logic for logtap."""
|
|
2
|
+
|
|
3
|
+
from logtap.core.reader import get_file_lines, get_file_lines_async, tail, tail_async
|
|
4
|
+
from logtap.core.search import filter_lines
|
|
5
|
+
from logtap.core.validation import is_filename_valid, is_limit_valid, is_search_term_valid
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
"tail",
|
|
9
|
+
"tail_async",
|
|
10
|
+
"get_file_lines",
|
|
11
|
+
"get_file_lines_async",
|
|
12
|
+
"is_filename_valid",
|
|
13
|
+
"is_search_term_valid",
|
|
14
|
+
"is_limit_valid",
|
|
15
|
+
"filter_lines",
|
|
16
|
+
]
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
"""Log format parsers for logtap."""
|
|
2
|
+
|
|
3
|
+
from logtap.core.parsers.apache import ApacheParser
|
|
4
|
+
from logtap.core.parsers.auto import AutoParser, detect_format
|
|
5
|
+
from logtap.core.parsers.base import LogLevel, LogParser, ParsedLogEntry
|
|
6
|
+
from logtap.core.parsers.json_parser import JsonLogParser
|
|
7
|
+
from logtap.core.parsers.nginx import NginxParser
|
|
8
|
+
from logtap.core.parsers.syslog import SyslogParser
|
|
9
|
+
|
|
10
|
+
__all__ = [
|
|
11
|
+
"LogParser",
|
|
12
|
+
"ParsedLogEntry",
|
|
13
|
+
"LogLevel",
|
|
14
|
+
"SyslogParser",
|
|
15
|
+
"JsonLogParser",
|
|
16
|
+
"NginxParser",
|
|
17
|
+
"ApacheParser",
|
|
18
|
+
"AutoParser",
|
|
19
|
+
"detect_format",
|
|
20
|
+
]
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
"""Apache access log parser."""
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from typing import Optional
|
|
6
|
+
|
|
7
|
+
from logtap.core.parsers.base import LogLevel, LogParser, ParsedLogEntry
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class ApacheParser(LogParser):
|
|
11
|
+
"""
|
|
12
|
+
Parser for Apache access log format.
|
|
13
|
+
|
|
14
|
+
Common combined format:
|
|
15
|
+
%h %l %u %t "%r" %>s %b "%{Referer}i" "%{User-Agent}i"
|
|
16
|
+
|
|
17
|
+
Example:
|
|
18
|
+
192.168.1.1 - frank [08/Jan/2024:10:23:45 -0500] "GET / HTTP/1.0" 200 1234
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
# Combined log format pattern (same as nginx essentially)
|
|
22
|
+
PATTERN = re.compile(
|
|
23
|
+
r"^(\S+)\s+" # Remote host
|
|
24
|
+
r"(\S+)\s+" # Identity
|
|
25
|
+
r"(\S+)\s+" # Remote user
|
|
26
|
+
r"\[([^\]]+)\]\s+" # Time
|
|
27
|
+
r'"([^"]*)"\s+' # Request
|
|
28
|
+
r"(\d{3})\s+" # Status
|
|
29
|
+
r"(\d+|-)\s*" # Bytes
|
|
30
|
+
r'(?:"([^"]*)"\s*)?' # Referer
|
|
31
|
+
r'(?:"([^"]*)")?' # User agent
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
# Error log pattern
|
|
35
|
+
ERROR_PATTERN = re.compile(
|
|
36
|
+
r"^\[([^\]]+)\]\s+" # Timestamp
|
|
37
|
+
r"\[(\w+)\]\s+" # Level
|
|
38
|
+
r"(?:\[pid\s+(\d+)\]\s+)?" # PID (optional)
|
|
39
|
+
r"(?:\[client\s+([^\]]+)\]\s+)?" # Client (optional)
|
|
40
|
+
r"(.*)$" # Message
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
@property
|
|
44
|
+
def name(self) -> str:
|
|
45
|
+
return "apache"
|
|
46
|
+
|
|
47
|
+
def can_parse(self, line: str) -> bool:
|
|
48
|
+
"""Check if line matches apache format."""
|
|
49
|
+
return bool(self.PATTERN.match(line) or self.ERROR_PATTERN.match(line))
|
|
50
|
+
|
|
51
|
+
def parse(self, line: str) -> ParsedLogEntry:
|
|
52
|
+
"""Parse an apache log line."""
|
|
53
|
+
# Try access log format first
|
|
54
|
+
match = self.PATTERN.match(line)
|
|
55
|
+
if match:
|
|
56
|
+
return self._parse_access_log(line, match)
|
|
57
|
+
|
|
58
|
+
# Try error log format
|
|
59
|
+
match = self.ERROR_PATTERN.match(line)
|
|
60
|
+
if match:
|
|
61
|
+
return self._parse_error_log(line, match)
|
|
62
|
+
|
|
63
|
+
return ParsedLogEntry(
|
|
64
|
+
raw=line,
|
|
65
|
+
message=line,
|
|
66
|
+
level=self._detect_level_from_content(line),
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
def _parse_access_log(self, line: str, match: re.Match) -> ParsedLogEntry:
|
|
70
|
+
"""Parse apache access log line."""
|
|
71
|
+
groups = match.groups()
|
|
72
|
+
remote_host = groups[0]
|
|
73
|
+
remote_user = groups[2] if groups[2] != "-" else None
|
|
74
|
+
time_str = groups[3]
|
|
75
|
+
request = groups[4]
|
|
76
|
+
status = int(groups[5])
|
|
77
|
+
bytes_sent = int(groups[6]) if groups[6] != "-" else 0
|
|
78
|
+
referer = groups[7] if len(groups) > 7 and groups[7] != "-" else None
|
|
79
|
+
user_agent = groups[8] if len(groups) > 8 else None
|
|
80
|
+
|
|
81
|
+
timestamp = self._parse_apache_time(time_str)
|
|
82
|
+
level = self._status_to_level(status)
|
|
83
|
+
|
|
84
|
+
request_parts = request.split() if request else []
|
|
85
|
+
method = request_parts[0] if len(request_parts) > 0 else None
|
|
86
|
+
path = request_parts[1] if len(request_parts) > 1 else None
|
|
87
|
+
|
|
88
|
+
return ParsedLogEntry(
|
|
89
|
+
raw=line,
|
|
90
|
+
message=f"{method} {path} -> {status}" if method and path else request,
|
|
91
|
+
timestamp=timestamp,
|
|
92
|
+
level=level,
|
|
93
|
+
source=remote_host,
|
|
94
|
+
metadata={
|
|
95
|
+
"remote_host": remote_host,
|
|
96
|
+
"remote_user": remote_user,
|
|
97
|
+
"request": request,
|
|
98
|
+
"method": method,
|
|
99
|
+
"path": path,
|
|
100
|
+
"status": status,
|
|
101
|
+
"bytes_sent": bytes_sent,
|
|
102
|
+
"referer": referer,
|
|
103
|
+
"user_agent": user_agent,
|
|
104
|
+
"log_type": "access",
|
|
105
|
+
},
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
def _parse_error_log(self, line: str, match: re.Match) -> ParsedLogEntry:
|
|
109
|
+
"""Parse apache error log line."""
|
|
110
|
+
groups = match.groups()
|
|
111
|
+
time_str = groups[0]
|
|
112
|
+
level_str = groups[1]
|
|
113
|
+
pid = groups[2]
|
|
114
|
+
client = groups[3]
|
|
115
|
+
message = groups[4]
|
|
116
|
+
|
|
117
|
+
timestamp = self._parse_error_time(time_str)
|
|
118
|
+
level = LogLevel.from_string(level_str) or self._detect_level_from_content(message)
|
|
119
|
+
|
|
120
|
+
return ParsedLogEntry(
|
|
121
|
+
raw=line,
|
|
122
|
+
message=message,
|
|
123
|
+
timestamp=timestamp,
|
|
124
|
+
level=level,
|
|
125
|
+
source=client,
|
|
126
|
+
metadata={
|
|
127
|
+
"pid": pid,
|
|
128
|
+
"client": client,
|
|
129
|
+
"log_type": "error",
|
|
130
|
+
},
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
def _parse_apache_time(self, time_str: str) -> Optional[datetime]:
|
|
134
|
+
"""Parse apache time format: 08/Jan/2024:10:23:45 -0500"""
|
|
135
|
+
try:
|
|
136
|
+
time_str = time_str.split()[0] if " " in time_str else time_str
|
|
137
|
+
return datetime.strptime(time_str, "%d/%b/%Y:%H:%M:%S")
|
|
138
|
+
except ValueError:
|
|
139
|
+
return None
|
|
140
|
+
|
|
141
|
+
def _parse_error_time(self, time_str: str) -> Optional[datetime]:
|
|
142
|
+
"""Parse apache error log time format."""
|
|
143
|
+
formats = [
|
|
144
|
+
"%a %b %d %H:%M:%S.%f %Y",
|
|
145
|
+
"%a %b %d %H:%M:%S %Y",
|
|
146
|
+
"%Y-%m-%d %H:%M:%S.%f",
|
|
147
|
+
"%Y-%m-%d %H:%M:%S",
|
|
148
|
+
]
|
|
149
|
+
for fmt in formats:
|
|
150
|
+
try:
|
|
151
|
+
return datetime.strptime(time_str, fmt)
|
|
152
|
+
except ValueError:
|
|
153
|
+
continue
|
|
154
|
+
return None
|
|
155
|
+
|
|
156
|
+
def _status_to_level(self, status: int) -> LogLevel:
|
|
157
|
+
"""Convert HTTP status code to log level."""
|
|
158
|
+
if status >= 500:
|
|
159
|
+
return LogLevel.ERROR
|
|
160
|
+
elif status >= 400:
|
|
161
|
+
return LogLevel.WARNING
|
|
162
|
+
elif status >= 300:
|
|
163
|
+
return LogLevel.NOTICE
|
|
164
|
+
else:
|
|
165
|
+
return LogLevel.INFO
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
"""Auto-detection and parsing of log formats."""
|
|
2
|
+
|
|
3
|
+
from typing import List, Optional, Type
|
|
4
|
+
|
|
5
|
+
from logtap.core.parsers.apache import ApacheParser
|
|
6
|
+
from logtap.core.parsers.base import LogParser, ParsedLogEntry
|
|
7
|
+
from logtap.core.parsers.json_parser import JsonLogParser
|
|
8
|
+
from logtap.core.parsers.nginx import NginxParser
|
|
9
|
+
from logtap.core.parsers.syslog import SyslogParser
|
|
10
|
+
|
|
11
|
+
# Parser priority order (more specific formats first)
|
|
12
|
+
PARSERS: List[Type[LogParser]] = [
|
|
13
|
+
JsonLogParser, # JSON is very specific
|
|
14
|
+
NginxParser, # Nginx before Apache (almost identical)
|
|
15
|
+
ApacheParser,
|
|
16
|
+
SyslogParser, # Syslog is more generic, try last
|
|
17
|
+
]
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def detect_format(lines: List[str], sample_size: int = 10) -> Optional[LogParser]:
|
|
21
|
+
"""
|
|
22
|
+
Detect the log format by sampling lines.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
lines: List of log lines to analyze.
|
|
26
|
+
sample_size: Number of lines to sample for detection.
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
A LogParser instance that can handle the format, or None.
|
|
30
|
+
"""
|
|
31
|
+
if not lines:
|
|
32
|
+
return None
|
|
33
|
+
|
|
34
|
+
# Sample lines from the input
|
|
35
|
+
sample = lines[:sample_size]
|
|
36
|
+
|
|
37
|
+
# Try each parser and count successes
|
|
38
|
+
parser_scores = {}
|
|
39
|
+
|
|
40
|
+
for parser_cls in PARSERS:
|
|
41
|
+
parser = parser_cls()
|
|
42
|
+
matches = sum(1 for line in sample if line.strip() and parser.can_parse(line))
|
|
43
|
+
if matches > 0:
|
|
44
|
+
parser_scores[parser_cls] = matches / len([line for line in sample if line.strip()])
|
|
45
|
+
|
|
46
|
+
if not parser_scores:
|
|
47
|
+
return None
|
|
48
|
+
|
|
49
|
+
# Return parser with highest match rate
|
|
50
|
+
best_parser_cls = max(parser_scores, key=parser_scores.get)
|
|
51
|
+
return best_parser_cls()
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class AutoParser(LogParser):
|
|
55
|
+
"""
|
|
56
|
+
Parser that auto-detects the log format.
|
|
57
|
+
|
|
58
|
+
On first parse, it samples lines to detect the format,
|
|
59
|
+
then uses the appropriate parser for subsequent lines.
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
def __init__(self):
|
|
63
|
+
self._detected_parser: Optional[LogParser] = None
|
|
64
|
+
self._parsers = [cls() for cls in PARSERS]
|
|
65
|
+
|
|
66
|
+
@property
|
|
67
|
+
def name(self) -> str:
|
|
68
|
+
if self._detected_parser:
|
|
69
|
+
return f"auto:{self._detected_parser.name}"
|
|
70
|
+
return "auto"
|
|
71
|
+
|
|
72
|
+
def can_parse(self, line: str) -> bool:
|
|
73
|
+
"""Auto parser can attempt to parse any line."""
|
|
74
|
+
return True
|
|
75
|
+
|
|
76
|
+
def parse(self, line: str) -> ParsedLogEntry:
|
|
77
|
+
"""Parse a line, auto-detecting format if needed."""
|
|
78
|
+
line = line.strip()
|
|
79
|
+
|
|
80
|
+
if not line:
|
|
81
|
+
return ParsedLogEntry(raw=line, message=line)
|
|
82
|
+
|
|
83
|
+
# If we've detected a format, use it
|
|
84
|
+
if self._detected_parser and self._detected_parser.can_parse(line):
|
|
85
|
+
return self._detected_parser.parse(line)
|
|
86
|
+
|
|
87
|
+
# Try each parser in order
|
|
88
|
+
for parser in self._parsers:
|
|
89
|
+
if parser.can_parse(line):
|
|
90
|
+
self._detected_parser = parser
|
|
91
|
+
return parser.parse(line)
|
|
92
|
+
|
|
93
|
+
# Fallback: return unparsed entry
|
|
94
|
+
return ParsedLogEntry(
|
|
95
|
+
raw=line,
|
|
96
|
+
message=line,
|
|
97
|
+
level=self._detect_level_from_content(line),
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
def parse_many(self, lines: List[str]) -> List[ParsedLogEntry]:
|
|
101
|
+
"""
|
|
102
|
+
Parse multiple lines with format detection.
|
|
103
|
+
|
|
104
|
+
Uses the first few lines to detect format, then applies
|
|
105
|
+
consistently to all lines.
|
|
106
|
+
"""
|
|
107
|
+
if not lines:
|
|
108
|
+
return []
|
|
109
|
+
|
|
110
|
+
# Detect format from sample
|
|
111
|
+
self._detected_parser = detect_format(lines)
|
|
112
|
+
|
|
113
|
+
# Parse all lines
|
|
114
|
+
return [self.parse(line) for line in lines]
|
|
115
|
+
|
|
116
|
+
def reset(self):
|
|
117
|
+
"""Reset format detection."""
|
|
118
|
+
self._detected_parser = None
|