logtap 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- logtap/__init__.py +8 -0
- logtap/__main__.py +6 -0
- logtap/api/__init__.py +5 -0
- logtap/api/app.py +45 -0
- logtap/api/dependencies.py +61 -0
- logtap/api/routes/__init__.py +1 -0
- logtap/api/routes/files.py +38 -0
- logtap/api/routes/health.py +19 -0
- logtap/api/routes/logs.py +249 -0
- logtap/api/routes/parsed.py +102 -0
- logtap/cli/__init__.py +1 -0
- logtap/cli/commands/__init__.py +1 -0
- logtap/cli/commands/files.py +86 -0
- logtap/cli/commands/query.py +127 -0
- logtap/cli/commands/serve.py +78 -0
- logtap/cli/commands/tail.py +121 -0
- logtap/cli/main.py +50 -0
- logtap/core/__init__.py +16 -0
- logtap/core/parsers/__init__.py +20 -0
- logtap/core/parsers/apache.py +165 -0
- logtap/core/parsers/auto.py +118 -0
- logtap/core/parsers/base.py +164 -0
- logtap/core/parsers/json_parser.py +119 -0
- logtap/core/parsers/nginx.py +108 -0
- logtap/core/parsers/syslog.py +80 -0
- logtap/core/reader.py +160 -0
- logtap/core/search.py +142 -0
- logtap/core/validation.py +52 -0
- logtap/models/__init__.py +11 -0
- logtap/models/config.py +39 -0
- logtap/models/responses.py +65 -0
- logtap-0.2.0.dist-info/METADATA +317 -0
- logtap-0.2.0.dist-info/RECORD +36 -0
- logtap-0.2.0.dist-info/WHEEL +4 -0
- logtap-0.2.0.dist-info/entry_points.txt +3 -0
- logtap-0.2.0.dist-info/licenses/LICENSE +674 -0
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
"""Base classes for log parsers."""
|
|
2
|
+
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
from dataclasses import dataclass, field
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
from enum import Enum
|
|
7
|
+
from typing import Any, Dict, List, Optional
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class LogLevel(str, Enum):
|
|
11
|
+
"""Standard log severity levels."""
|
|
12
|
+
|
|
13
|
+
EMERGENCY = "EMERGENCY" # System is unusable
|
|
14
|
+
ALERT = "ALERT" # Action must be taken immediately
|
|
15
|
+
CRITICAL = "CRITICAL" # Critical conditions
|
|
16
|
+
ERROR = "ERROR" # Error conditions
|
|
17
|
+
WARNING = "WARNING" # Warning conditions
|
|
18
|
+
NOTICE = "NOTICE" # Normal but significant
|
|
19
|
+
INFO = "INFO" # Informational
|
|
20
|
+
DEBUG = "DEBUG" # Debug-level messages
|
|
21
|
+
|
|
22
|
+
@classmethod
|
|
23
|
+
def from_string(cls, level: str) -> Optional["LogLevel"]:
|
|
24
|
+
"""Parse a log level from string."""
|
|
25
|
+
level_map = {
|
|
26
|
+
# Standard names
|
|
27
|
+
"emergency": cls.EMERGENCY,
|
|
28
|
+
"emerg": cls.EMERGENCY,
|
|
29
|
+
"alert": cls.ALERT,
|
|
30
|
+
"critical": cls.CRITICAL,
|
|
31
|
+
"crit": cls.CRITICAL,
|
|
32
|
+
"error": cls.ERROR,
|
|
33
|
+
"err": cls.ERROR,
|
|
34
|
+
"warning": cls.WARNING,
|
|
35
|
+
"warn": cls.WARNING,
|
|
36
|
+
"notice": cls.NOTICE,
|
|
37
|
+
"info": cls.INFO,
|
|
38
|
+
"information": cls.INFO,
|
|
39
|
+
"informational": cls.INFO,
|
|
40
|
+
"debug": cls.DEBUG,
|
|
41
|
+
# Numeric syslog levels
|
|
42
|
+
"0": cls.EMERGENCY,
|
|
43
|
+
"1": cls.ALERT,
|
|
44
|
+
"2": cls.CRITICAL,
|
|
45
|
+
"3": cls.ERROR,
|
|
46
|
+
"4": cls.WARNING,
|
|
47
|
+
"5": cls.NOTICE,
|
|
48
|
+
"6": cls.INFO,
|
|
49
|
+
"7": cls.DEBUG,
|
|
50
|
+
}
|
|
51
|
+
return level_map.get(level.lower())
|
|
52
|
+
|
|
53
|
+
@property
|
|
54
|
+
def severity(self) -> int:
|
|
55
|
+
"""Get numeric severity (0=most severe, 7=least severe)."""
|
|
56
|
+
severity_map = {
|
|
57
|
+
LogLevel.EMERGENCY: 0,
|
|
58
|
+
LogLevel.ALERT: 1,
|
|
59
|
+
LogLevel.CRITICAL: 2,
|
|
60
|
+
LogLevel.ERROR: 3,
|
|
61
|
+
LogLevel.WARNING: 4,
|
|
62
|
+
LogLevel.NOTICE: 5,
|
|
63
|
+
LogLevel.INFO: 6,
|
|
64
|
+
LogLevel.DEBUG: 7,
|
|
65
|
+
}
|
|
66
|
+
return severity_map[self]
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
@dataclass
|
|
70
|
+
class ParsedLogEntry:
|
|
71
|
+
"""A parsed log entry with structured fields."""
|
|
72
|
+
|
|
73
|
+
raw: str # Original log line
|
|
74
|
+
message: str # Main message content
|
|
75
|
+
timestamp: Optional[datetime] = None
|
|
76
|
+
level: Optional[LogLevel] = None
|
|
77
|
+
source: Optional[str] = None # hostname, process, etc.
|
|
78
|
+
metadata: Dict[str, Any] = field(default_factory=dict)
|
|
79
|
+
|
|
80
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
81
|
+
"""Convert to dictionary for JSON serialization."""
|
|
82
|
+
return {
|
|
83
|
+
"raw": self.raw,
|
|
84
|
+
"message": self.message,
|
|
85
|
+
"timestamp": self.timestamp.isoformat() if self.timestamp else None,
|
|
86
|
+
"level": self.level.value if self.level else None,
|
|
87
|
+
"source": self.source,
|
|
88
|
+
"metadata": self.metadata,
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
class LogParser(ABC):
|
|
93
|
+
"""Abstract base class for log parsers."""
|
|
94
|
+
|
|
95
|
+
@property
|
|
96
|
+
@abstractmethod
|
|
97
|
+
def name(self) -> str:
|
|
98
|
+
"""Name of this parser format."""
|
|
99
|
+
pass
|
|
100
|
+
|
|
101
|
+
@abstractmethod
|
|
102
|
+
def can_parse(self, line: str) -> bool:
|
|
103
|
+
"""
|
|
104
|
+
Check if this parser can handle the given line.
|
|
105
|
+
|
|
106
|
+
Args:
|
|
107
|
+
line: A log line to check.
|
|
108
|
+
|
|
109
|
+
Returns:
|
|
110
|
+
True if this parser can parse the line.
|
|
111
|
+
"""
|
|
112
|
+
pass
|
|
113
|
+
|
|
114
|
+
@abstractmethod
|
|
115
|
+
def parse(self, line: str) -> ParsedLogEntry:
|
|
116
|
+
"""
|
|
117
|
+
Parse a log line into structured format.
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
line: The log line to parse.
|
|
121
|
+
|
|
122
|
+
Returns:
|
|
123
|
+
A ParsedLogEntry with extracted fields.
|
|
124
|
+
"""
|
|
125
|
+
pass
|
|
126
|
+
|
|
127
|
+
def parse_many(self, lines: List[str]) -> List[ParsedLogEntry]:
|
|
128
|
+
"""
|
|
129
|
+
Parse multiple log lines.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
lines: List of log lines to parse.
|
|
133
|
+
|
|
134
|
+
Returns:
|
|
135
|
+
List of ParsedLogEntry objects.
|
|
136
|
+
"""
|
|
137
|
+
return [self.parse(line) for line in lines]
|
|
138
|
+
|
|
139
|
+
def _detect_level_from_content(self, content: str) -> Optional[LogLevel]:
|
|
140
|
+
"""
|
|
141
|
+
Detect log level from message content.
|
|
142
|
+
|
|
143
|
+
Common patterns like "ERROR:", "[error]", etc.
|
|
144
|
+
"""
|
|
145
|
+
content_lower = content.lower()
|
|
146
|
+
|
|
147
|
+
# Check for explicit level indicators
|
|
148
|
+
level_patterns = [
|
|
149
|
+
(["emergency", "emerg"], LogLevel.EMERGENCY),
|
|
150
|
+
(["alert"], LogLevel.ALERT),
|
|
151
|
+
(["critical", "crit", "fatal"], LogLevel.CRITICAL),
|
|
152
|
+
(["error", "err", "fail", "failed"], LogLevel.ERROR),
|
|
153
|
+
(["warning", "warn"], LogLevel.WARNING),
|
|
154
|
+
(["notice"], LogLevel.NOTICE),
|
|
155
|
+
(["info"], LogLevel.INFO),
|
|
156
|
+
(["debug", "trace"], LogLevel.DEBUG),
|
|
157
|
+
]
|
|
158
|
+
|
|
159
|
+
for patterns, level in level_patterns:
|
|
160
|
+
for pattern in patterns:
|
|
161
|
+
if pattern in content_lower:
|
|
162
|
+
return level
|
|
163
|
+
|
|
164
|
+
return None
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
"""JSON log format parser."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from typing import Any, Dict, Optional
|
|
6
|
+
|
|
7
|
+
from logtap.core.parsers.base import LogLevel, LogParser, ParsedLogEntry
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class JsonLogParser(LogParser):
|
|
11
|
+
"""
|
|
12
|
+
Parser for JSON-formatted log lines.
|
|
13
|
+
|
|
14
|
+
Handles common JSON log formats with fields like:
|
|
15
|
+
- message, msg, log
|
|
16
|
+
- level, severity, loglevel
|
|
17
|
+
- timestamp, time, @timestamp, ts
|
|
18
|
+
- source, logger, name
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
# Common field names for each attribute
|
|
22
|
+
MESSAGE_FIELDS = ["message", "msg", "log", "text", "body"]
|
|
23
|
+
LEVEL_FIELDS = ["level", "severity", "loglevel", "log_level", "lvl"]
|
|
24
|
+
TIMESTAMP_FIELDS = ["timestamp", "time", "@timestamp", "ts", "datetime", "date"]
|
|
25
|
+
SOURCE_FIELDS = ["source", "logger", "name", "service", "app", "application"]
|
|
26
|
+
|
|
27
|
+
@property
|
|
28
|
+
def name(self) -> str:
|
|
29
|
+
return "json"
|
|
30
|
+
|
|
31
|
+
def can_parse(self, line: str) -> bool:
|
|
32
|
+
"""Check if line is valid JSON."""
|
|
33
|
+
line = line.strip()
|
|
34
|
+
if not line.startswith("{"):
|
|
35
|
+
return False
|
|
36
|
+
try:
|
|
37
|
+
json.loads(line)
|
|
38
|
+
return True
|
|
39
|
+
except (json.JSONDecodeError, ValueError):
|
|
40
|
+
return False
|
|
41
|
+
|
|
42
|
+
def parse(self, line: str) -> ParsedLogEntry:
|
|
43
|
+
"""Parse a JSON log line."""
|
|
44
|
+
try:
|
|
45
|
+
data = json.loads(line.strip())
|
|
46
|
+
except (json.JSONDecodeError, ValueError):
|
|
47
|
+
return ParsedLogEntry(
|
|
48
|
+
raw=line,
|
|
49
|
+
message=line,
|
|
50
|
+
level=self._detect_level_from_content(line),
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
# Extract message
|
|
54
|
+
message = self._get_field(data, self.MESSAGE_FIELDS, line)
|
|
55
|
+
|
|
56
|
+
# Extract level
|
|
57
|
+
level_str = self._get_field(data, self.LEVEL_FIELDS)
|
|
58
|
+
level = None
|
|
59
|
+
if level_str:
|
|
60
|
+
level = LogLevel.from_string(str(level_str))
|
|
61
|
+
if not level:
|
|
62
|
+
level = self._detect_level_from_content(message)
|
|
63
|
+
|
|
64
|
+
# Extract timestamp
|
|
65
|
+
timestamp_str = self._get_field(data, self.TIMESTAMP_FIELDS)
|
|
66
|
+
timestamp = self._parse_timestamp(timestamp_str) if timestamp_str else None
|
|
67
|
+
|
|
68
|
+
# Extract source
|
|
69
|
+
source = self._get_field(data, self.SOURCE_FIELDS)
|
|
70
|
+
|
|
71
|
+
return ParsedLogEntry(
|
|
72
|
+
raw=line,
|
|
73
|
+
message=message,
|
|
74
|
+
timestamp=timestamp,
|
|
75
|
+
level=level,
|
|
76
|
+
source=source,
|
|
77
|
+
metadata=data,
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
def _get_field(self, data: Dict[str, Any], field_names: list, default: Any = None) -> Any:
|
|
81
|
+
"""Get first matching field from data."""
|
|
82
|
+
for field in field_names:
|
|
83
|
+
if field in data:
|
|
84
|
+
return data[field]
|
|
85
|
+
# Check case-insensitive
|
|
86
|
+
for key in data:
|
|
87
|
+
if key.lower() == field.lower():
|
|
88
|
+
return data[key]
|
|
89
|
+
return default
|
|
90
|
+
|
|
91
|
+
def _parse_timestamp(self, value: Any) -> Optional[datetime]:
|
|
92
|
+
"""Parse timestamp from various formats."""
|
|
93
|
+
if isinstance(value, datetime):
|
|
94
|
+
return value
|
|
95
|
+
|
|
96
|
+
if isinstance(value, (int, float)):
|
|
97
|
+
# Unix timestamp
|
|
98
|
+
try:
|
|
99
|
+
return datetime.fromtimestamp(value)
|
|
100
|
+
except (ValueError, OSError):
|
|
101
|
+
pass
|
|
102
|
+
|
|
103
|
+
if isinstance(value, str):
|
|
104
|
+
# Try common formats
|
|
105
|
+
formats = [
|
|
106
|
+
"%Y-%m-%dT%H:%M:%S.%fZ",
|
|
107
|
+
"%Y-%m-%dT%H:%M:%SZ",
|
|
108
|
+
"%Y-%m-%dT%H:%M:%S.%f",
|
|
109
|
+
"%Y-%m-%dT%H:%M:%S",
|
|
110
|
+
"%Y-%m-%d %H:%M:%S.%f",
|
|
111
|
+
"%Y-%m-%d %H:%M:%S",
|
|
112
|
+
]
|
|
113
|
+
for fmt in formats:
|
|
114
|
+
try:
|
|
115
|
+
return datetime.strptime(value, fmt)
|
|
116
|
+
except ValueError:
|
|
117
|
+
continue
|
|
118
|
+
|
|
119
|
+
return None
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
"""Nginx access log parser."""
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from typing import Optional
|
|
6
|
+
|
|
7
|
+
from logtap.core.parsers.base import LogLevel, LogParser, ParsedLogEntry
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class NginxParser(LogParser):
|
|
11
|
+
"""
|
|
12
|
+
Parser for Nginx access log format (combined log format).
|
|
13
|
+
|
|
14
|
+
Example:
|
|
15
|
+
192.168.1.1 - - [08/Jan/2024:10:23:45 +0000] "GET /api HTTP/1.1" 200 45
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
# Combined log format pattern
|
|
19
|
+
PATTERN = re.compile(
|
|
20
|
+
r"^(\S+)\s+" # Remote address
|
|
21
|
+
r"(\S+)\s+" # Identity (usually -)
|
|
22
|
+
r"(\S+)\s+" # Remote user (usually -)
|
|
23
|
+
r"\[([^\]]+)\]\s+" # Time
|
|
24
|
+
r'"([^"]*)"\s+' # Request
|
|
25
|
+
r"(\d{3})\s+" # Status
|
|
26
|
+
r"(\d+|-)\s*" # Bytes
|
|
27
|
+
r'(?:"([^"]*)"\s*)?' # Referer (optional)
|
|
28
|
+
r'(?:"([^"]*)")?' # User agent (optional)
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
@property
|
|
32
|
+
def name(self) -> str:
|
|
33
|
+
return "nginx"
|
|
34
|
+
|
|
35
|
+
def can_parse(self, line: str) -> bool:
|
|
36
|
+
"""Check if line matches nginx format."""
|
|
37
|
+
return bool(self.PATTERN.match(line))
|
|
38
|
+
|
|
39
|
+
def parse(self, line: str) -> ParsedLogEntry:
|
|
40
|
+
"""Parse an nginx access log line."""
|
|
41
|
+
match = self.PATTERN.match(line)
|
|
42
|
+
|
|
43
|
+
if not match:
|
|
44
|
+
return ParsedLogEntry(
|
|
45
|
+
raw=line,
|
|
46
|
+
message=line,
|
|
47
|
+
level=self._detect_level_from_content(line),
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
groups = match.groups()
|
|
51
|
+
remote_addr = groups[0]
|
|
52
|
+
remote_user = groups[2] if groups[2] != "-" else None
|
|
53
|
+
time_str = groups[3]
|
|
54
|
+
request = groups[4]
|
|
55
|
+
status = int(groups[5])
|
|
56
|
+
bytes_sent = int(groups[6]) if groups[6] != "-" else 0
|
|
57
|
+
referer = groups[7] if len(groups) > 7 and groups[7] != "-" else None
|
|
58
|
+
user_agent = groups[8] if len(groups) > 8 else None
|
|
59
|
+
|
|
60
|
+
# Parse timestamp
|
|
61
|
+
timestamp = self._parse_nginx_time(time_str)
|
|
62
|
+
|
|
63
|
+
# Determine level based on status code
|
|
64
|
+
level = self._status_to_level(status)
|
|
65
|
+
|
|
66
|
+
# Parse request method and path
|
|
67
|
+
request_parts = request.split() if request else []
|
|
68
|
+
method = request_parts[0] if len(request_parts) > 0 else None
|
|
69
|
+
path = request_parts[1] if len(request_parts) > 1 else None
|
|
70
|
+
|
|
71
|
+
return ParsedLogEntry(
|
|
72
|
+
raw=line,
|
|
73
|
+
message=f"{method} {path} -> {status}" if method and path else request,
|
|
74
|
+
timestamp=timestamp,
|
|
75
|
+
level=level,
|
|
76
|
+
source=remote_addr,
|
|
77
|
+
metadata={
|
|
78
|
+
"remote_addr": remote_addr,
|
|
79
|
+
"remote_user": remote_user,
|
|
80
|
+
"request": request,
|
|
81
|
+
"method": method,
|
|
82
|
+
"path": path,
|
|
83
|
+
"status": status,
|
|
84
|
+
"bytes_sent": bytes_sent,
|
|
85
|
+
"referer": referer,
|
|
86
|
+
"user_agent": user_agent,
|
|
87
|
+
},
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
def _parse_nginx_time(self, time_str: str) -> Optional[datetime]:
|
|
91
|
+
"""Parse nginx time format: 08/Jan/2024:10:23:45 +0000"""
|
|
92
|
+
try:
|
|
93
|
+
# Remove timezone for simpler parsing
|
|
94
|
+
time_str = time_str.split()[0] if " " in time_str else time_str
|
|
95
|
+
return datetime.strptime(time_str, "%d/%b/%Y:%H:%M:%S")
|
|
96
|
+
except ValueError:
|
|
97
|
+
return None
|
|
98
|
+
|
|
99
|
+
def _status_to_level(self, status: int) -> LogLevel:
|
|
100
|
+
"""Convert HTTP status code to log level."""
|
|
101
|
+
if status >= 500:
|
|
102
|
+
return LogLevel.ERROR
|
|
103
|
+
elif status >= 400:
|
|
104
|
+
return LogLevel.WARNING
|
|
105
|
+
elif status >= 300:
|
|
106
|
+
return LogLevel.NOTICE
|
|
107
|
+
else:
|
|
108
|
+
return LogLevel.INFO
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
"""Syslog format parser."""
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
|
|
6
|
+
from logtap.core.parsers.base import LogParser, ParsedLogEntry
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class SyslogParser(LogParser):
|
|
10
|
+
"""
|
|
11
|
+
Parser for standard syslog format.
|
|
12
|
+
|
|
13
|
+
Handles formats like:
|
|
14
|
+
- Jan 8 10:23:45 hostname process[pid]: message
|
|
15
|
+
- Jan 8 10:23:45 hostname process: message
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
# Syslog pattern: Month Day HH:MM:SS hostname process[pid]: message
|
|
19
|
+
PATTERN = re.compile(
|
|
20
|
+
r"^(\w{3}\s+\d{1,2}\s+\d{2}:\d{2}:\d{2})\s+" # Timestamp
|
|
21
|
+
r"(\S+)\s+" # Hostname
|
|
22
|
+
r"(\S+?)(?:\[(\d+)\])?:\s+" # Process[PID]
|
|
23
|
+
r"(.*)$" # Message
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
# Alternative pattern without PID brackets
|
|
27
|
+
PATTERN_ALT = re.compile(
|
|
28
|
+
r"^(\w{3}\s+\d{1,2}\s+\d{2}:\d{2}:\d{2})\s+" r"(\S+)\s+" r"(\S+):\s+" r"(.*)$"
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
@property
|
|
32
|
+
def name(self) -> str:
|
|
33
|
+
return "syslog"
|
|
34
|
+
|
|
35
|
+
def can_parse(self, line: str) -> bool:
|
|
36
|
+
"""Check if line matches syslog format."""
|
|
37
|
+
return bool(self.PATTERN.match(line) or self.PATTERN_ALT.match(line))
|
|
38
|
+
|
|
39
|
+
def parse(self, line: str) -> ParsedLogEntry:
|
|
40
|
+
"""Parse a syslog line."""
|
|
41
|
+
match = self.PATTERN.match(line)
|
|
42
|
+
|
|
43
|
+
if match:
|
|
44
|
+
timestamp_str, hostname, process, pid, message = match.groups()
|
|
45
|
+
else:
|
|
46
|
+
match = self.PATTERN_ALT.match(line)
|
|
47
|
+
if match:
|
|
48
|
+
timestamp_str, hostname, process, message = match.groups()
|
|
49
|
+
pid = None
|
|
50
|
+
else:
|
|
51
|
+
# Can't parse, return basic entry
|
|
52
|
+
return ParsedLogEntry(
|
|
53
|
+
raw=line,
|
|
54
|
+
message=line,
|
|
55
|
+
level=self._detect_level_from_content(line),
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
# Parse timestamp (assume current year)
|
|
59
|
+
try:
|
|
60
|
+
timestamp = datetime.strptime(timestamp_str, "%b %d %H:%M:%S")
|
|
61
|
+
# Set to current year
|
|
62
|
+
timestamp = timestamp.replace(year=datetime.now().year)
|
|
63
|
+
except ValueError:
|
|
64
|
+
timestamp = None
|
|
65
|
+
|
|
66
|
+
# Detect level from message
|
|
67
|
+
level = self._detect_level_from_content(message)
|
|
68
|
+
|
|
69
|
+
return ParsedLogEntry(
|
|
70
|
+
raw=line,
|
|
71
|
+
message=message,
|
|
72
|
+
timestamp=timestamp,
|
|
73
|
+
level=level,
|
|
74
|
+
source=f"{hostname}/{process}",
|
|
75
|
+
metadata={
|
|
76
|
+
"hostname": hostname,
|
|
77
|
+
"process": process,
|
|
78
|
+
"pid": pid,
|
|
79
|
+
},
|
|
80
|
+
)
|
logtap/core/reader.py
ADDED
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Core file reading functionality for logtap.
|
|
3
|
+
|
|
4
|
+
The tail() function is the heart of logtap - an efficient O(n) algorithm that reads
|
|
5
|
+
files from the end backwards in chunks, avoiding the need to load entire files into memory.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from os import SEEK_END
|
|
9
|
+
from typing import IO, List, Optional, Tuple
|
|
10
|
+
|
|
11
|
+
import aiofiles
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def tail(filename: str, lines_limit: int = 50, block_size: int = 1024) -> List[str]:
|
|
15
|
+
"""
|
|
16
|
+
Reads a file in reverse and returns its last 'lines_limit' lines.
|
|
17
|
+
|
|
18
|
+
This is an efficient algorithm that reads from the end of the file backwards
|
|
19
|
+
in chunks, making it suitable for very large log files.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
filename: The path to the file to be read.
|
|
23
|
+
lines_limit: The maximum number of lines to be returned. Defaults to 50.
|
|
24
|
+
block_size: The number of bytes to read at a time. Defaults to 1024.
|
|
25
|
+
|
|
26
|
+
Returns:
|
|
27
|
+
A list of the last 'lines_limit' lines in the file.
|
|
28
|
+
"""
|
|
29
|
+
lines: List[str] = []
|
|
30
|
+
with open(filename, "r", encoding="utf-8") as f:
|
|
31
|
+
# Seek to the end of the file.
|
|
32
|
+
f.seek(0, SEEK_END)
|
|
33
|
+
# Get the current position in the file.
|
|
34
|
+
block_end_byte = f.tell()
|
|
35
|
+
|
|
36
|
+
# Continue reading blocks and adding lines until we have enough lines
|
|
37
|
+
# or reach the start of the file.
|
|
38
|
+
while len(lines) < lines_limit and block_end_byte > 0:
|
|
39
|
+
# Read a block from the file, update the block_end_byte position,
|
|
40
|
+
# and get the new lines.
|
|
41
|
+
new_lines, block_end_byte = read_block(f, block_end_byte, block_size)
|
|
42
|
+
lines.extend(new_lines)
|
|
43
|
+
|
|
44
|
+
# Return the last 'lines_limit' lines
|
|
45
|
+
return lines[-lines_limit:]
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def read_block(file: IO, block_end_byte: int, block_size: int) -> Tuple[List[str], int]:
|
|
49
|
+
"""
|
|
50
|
+
Reads a block from the end of a file and returns the lines in the block.
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
file: The file object to read from.
|
|
54
|
+
block_end_byte: The current position in the file.
|
|
55
|
+
block_size: The number of bytes to read at a time.
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
A tuple containing the list of lines in the block,
|
|
59
|
+
and the updated position in the file.
|
|
60
|
+
"""
|
|
61
|
+
# Use min() to ensure we only step back as far as we can (start of file)
|
|
62
|
+
stepback = min(block_size, block_end_byte)
|
|
63
|
+
|
|
64
|
+
# Step back and read a block from the file
|
|
65
|
+
file.seek(block_end_byte - stepback)
|
|
66
|
+
block = file.read(stepback)
|
|
67
|
+
block_end_byte -= stepback
|
|
68
|
+
lines = block.split("\n")
|
|
69
|
+
|
|
70
|
+
return lines, block_end_byte
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
async def tail_async(
|
|
74
|
+
filename: str, lines_limit: int = 50, block_size: int = 1024
|
|
75
|
+
) -> List[str]:
|
|
76
|
+
"""
|
|
77
|
+
Async version of tail() for use with FastAPI.
|
|
78
|
+
|
|
79
|
+
Reads a file in reverse and returns its last 'lines_limit' lines.
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
filename: The path to the file to be read.
|
|
83
|
+
lines_limit: The maximum number of lines to be returned. Defaults to 50.
|
|
84
|
+
block_size: The number of bytes to read at a time. Defaults to 1024.
|
|
85
|
+
|
|
86
|
+
Returns:
|
|
87
|
+
A list of the last 'lines_limit' lines in the file.
|
|
88
|
+
"""
|
|
89
|
+
lines: List[str] = []
|
|
90
|
+
async with aiofiles.open(filename, "r", encoding="utf-8") as f:
|
|
91
|
+
# Seek to the end of the file.
|
|
92
|
+
await f.seek(0, SEEK_END)
|
|
93
|
+
# Get the current position in the file.
|
|
94
|
+
block_end_byte = await f.tell()
|
|
95
|
+
|
|
96
|
+
# Continue reading blocks and adding lines until we have enough lines
|
|
97
|
+
# or reach the start of the file.
|
|
98
|
+
while len(lines) < lines_limit and block_end_byte > 0:
|
|
99
|
+
# Read a block from the file
|
|
100
|
+
stepback = min(block_size, block_end_byte)
|
|
101
|
+
await f.seek(block_end_byte - stepback)
|
|
102
|
+
block = await f.read(stepback)
|
|
103
|
+
block_end_byte -= stepback
|
|
104
|
+
new_lines = block.split("\n")
|
|
105
|
+
lines.extend(new_lines)
|
|
106
|
+
|
|
107
|
+
# Return the last 'lines_limit' lines
|
|
108
|
+
return lines[-lines_limit:]
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def get_file_lines(
|
|
112
|
+
filepath: str,
|
|
113
|
+
search_term: Optional[str] = None,
|
|
114
|
+
num_lines_to_return: int = 50,
|
|
115
|
+
) -> List[str]:
|
|
116
|
+
"""
|
|
117
|
+
Retrieves a specified number of lines from a file,
|
|
118
|
+
optionally filtering for a search term.
|
|
119
|
+
|
|
120
|
+
Args:
|
|
121
|
+
filepath: Path to the file.
|
|
122
|
+
search_term: Term to filter lines. If None or empty, no filtering is applied.
|
|
123
|
+
num_lines_to_return: Number of lines to retrieve from the end of the file.
|
|
124
|
+
|
|
125
|
+
Returns:
|
|
126
|
+
List of lines from the file.
|
|
127
|
+
"""
|
|
128
|
+
lines = tail(filepath, num_lines_to_return)
|
|
129
|
+
|
|
130
|
+
if search_term:
|
|
131
|
+
lines = [line for line in lines if search_term in line]
|
|
132
|
+
|
|
133
|
+
return lines
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
async def get_file_lines_async(
|
|
137
|
+
filepath: str,
|
|
138
|
+
search_term: Optional[str] = None,
|
|
139
|
+
num_lines_to_return: int = 50,
|
|
140
|
+
) -> List[str]:
|
|
141
|
+
"""
|
|
142
|
+
Async version of get_file_lines() for use with FastAPI.
|
|
143
|
+
|
|
144
|
+
Retrieves a specified number of lines from a file,
|
|
145
|
+
optionally filtering for a search term.
|
|
146
|
+
|
|
147
|
+
Args:
|
|
148
|
+
filepath: Path to the file.
|
|
149
|
+
search_term: Term to filter lines. If None or empty, no filtering is applied.
|
|
150
|
+
num_lines_to_return: Number of lines to retrieve from the end of the file.
|
|
151
|
+
|
|
152
|
+
Returns:
|
|
153
|
+
List of lines from the file.
|
|
154
|
+
"""
|
|
155
|
+
lines = await tail_async(filepath, num_lines_to_return)
|
|
156
|
+
|
|
157
|
+
if search_term:
|
|
158
|
+
lines = [line for line in lines if search_term in line]
|
|
159
|
+
|
|
160
|
+
return lines
|