logtap 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,164 @@
1
+ """Base classes for log parsers."""
2
+
3
+ from abc import ABC, abstractmethod
4
+ from dataclasses import dataclass, field
5
+ from datetime import datetime
6
+ from enum import Enum
7
+ from typing import Any, Dict, List, Optional
8
+
9
+
10
+ class LogLevel(str, Enum):
11
+ """Standard log severity levels."""
12
+
13
+ EMERGENCY = "EMERGENCY" # System is unusable
14
+ ALERT = "ALERT" # Action must be taken immediately
15
+ CRITICAL = "CRITICAL" # Critical conditions
16
+ ERROR = "ERROR" # Error conditions
17
+ WARNING = "WARNING" # Warning conditions
18
+ NOTICE = "NOTICE" # Normal but significant
19
+ INFO = "INFO" # Informational
20
+ DEBUG = "DEBUG" # Debug-level messages
21
+
22
+ @classmethod
23
+ def from_string(cls, level: str) -> Optional["LogLevel"]:
24
+ """Parse a log level from string."""
25
+ level_map = {
26
+ # Standard names
27
+ "emergency": cls.EMERGENCY,
28
+ "emerg": cls.EMERGENCY,
29
+ "alert": cls.ALERT,
30
+ "critical": cls.CRITICAL,
31
+ "crit": cls.CRITICAL,
32
+ "error": cls.ERROR,
33
+ "err": cls.ERROR,
34
+ "warning": cls.WARNING,
35
+ "warn": cls.WARNING,
36
+ "notice": cls.NOTICE,
37
+ "info": cls.INFO,
38
+ "information": cls.INFO,
39
+ "informational": cls.INFO,
40
+ "debug": cls.DEBUG,
41
+ # Numeric syslog levels
42
+ "0": cls.EMERGENCY,
43
+ "1": cls.ALERT,
44
+ "2": cls.CRITICAL,
45
+ "3": cls.ERROR,
46
+ "4": cls.WARNING,
47
+ "5": cls.NOTICE,
48
+ "6": cls.INFO,
49
+ "7": cls.DEBUG,
50
+ }
51
+ return level_map.get(level.lower())
52
+
53
+ @property
54
+ def severity(self) -> int:
55
+ """Get numeric severity (0=most severe, 7=least severe)."""
56
+ severity_map = {
57
+ LogLevel.EMERGENCY: 0,
58
+ LogLevel.ALERT: 1,
59
+ LogLevel.CRITICAL: 2,
60
+ LogLevel.ERROR: 3,
61
+ LogLevel.WARNING: 4,
62
+ LogLevel.NOTICE: 5,
63
+ LogLevel.INFO: 6,
64
+ LogLevel.DEBUG: 7,
65
+ }
66
+ return severity_map[self]
67
+
68
+
69
+ @dataclass
70
+ class ParsedLogEntry:
71
+ """A parsed log entry with structured fields."""
72
+
73
+ raw: str # Original log line
74
+ message: str # Main message content
75
+ timestamp: Optional[datetime] = None
76
+ level: Optional[LogLevel] = None
77
+ source: Optional[str] = None # hostname, process, etc.
78
+ metadata: Dict[str, Any] = field(default_factory=dict)
79
+
80
+ def to_dict(self) -> Dict[str, Any]:
81
+ """Convert to dictionary for JSON serialization."""
82
+ return {
83
+ "raw": self.raw,
84
+ "message": self.message,
85
+ "timestamp": self.timestamp.isoformat() if self.timestamp else None,
86
+ "level": self.level.value if self.level else None,
87
+ "source": self.source,
88
+ "metadata": self.metadata,
89
+ }
90
+
91
+
92
+ class LogParser(ABC):
93
+ """Abstract base class for log parsers."""
94
+
95
+ @property
96
+ @abstractmethod
97
+ def name(self) -> str:
98
+ """Name of this parser format."""
99
+ pass
100
+
101
+ @abstractmethod
102
+ def can_parse(self, line: str) -> bool:
103
+ """
104
+ Check if this parser can handle the given line.
105
+
106
+ Args:
107
+ line: A log line to check.
108
+
109
+ Returns:
110
+ True if this parser can parse the line.
111
+ """
112
+ pass
113
+
114
+ @abstractmethod
115
+ def parse(self, line: str) -> ParsedLogEntry:
116
+ """
117
+ Parse a log line into structured format.
118
+
119
+ Args:
120
+ line: The log line to parse.
121
+
122
+ Returns:
123
+ A ParsedLogEntry with extracted fields.
124
+ """
125
+ pass
126
+
127
+ def parse_many(self, lines: List[str]) -> List[ParsedLogEntry]:
128
+ """
129
+ Parse multiple log lines.
130
+
131
+ Args:
132
+ lines: List of log lines to parse.
133
+
134
+ Returns:
135
+ List of ParsedLogEntry objects.
136
+ """
137
+ return [self.parse(line) for line in lines]
138
+
139
+ def _detect_level_from_content(self, content: str) -> Optional[LogLevel]:
140
+ """
141
+ Detect log level from message content.
142
+
143
+ Common patterns like "ERROR:", "[error]", etc.
144
+ """
145
+ content_lower = content.lower()
146
+
147
+ # Check for explicit level indicators
148
+ level_patterns = [
149
+ (["emergency", "emerg"], LogLevel.EMERGENCY),
150
+ (["alert"], LogLevel.ALERT),
151
+ (["critical", "crit", "fatal"], LogLevel.CRITICAL),
152
+ (["error", "err", "fail", "failed"], LogLevel.ERROR),
153
+ (["warning", "warn"], LogLevel.WARNING),
154
+ (["notice"], LogLevel.NOTICE),
155
+ (["info"], LogLevel.INFO),
156
+ (["debug", "trace"], LogLevel.DEBUG),
157
+ ]
158
+
159
+ for patterns, level in level_patterns:
160
+ for pattern in patterns:
161
+ if pattern in content_lower:
162
+ return level
163
+
164
+ return None
@@ -0,0 +1,119 @@
1
+ """JSON log format parser."""
2
+
3
+ import json
4
+ from datetime import datetime
5
+ from typing import Any, Dict, Optional
6
+
7
+ from logtap.core.parsers.base import LogLevel, LogParser, ParsedLogEntry
8
+
9
+
10
+ class JsonLogParser(LogParser):
11
+ """
12
+ Parser for JSON-formatted log lines.
13
+
14
+ Handles common JSON log formats with fields like:
15
+ - message, msg, log
16
+ - level, severity, loglevel
17
+ - timestamp, time, @timestamp, ts
18
+ - source, logger, name
19
+ """
20
+
21
+ # Common field names for each attribute
22
+ MESSAGE_FIELDS = ["message", "msg", "log", "text", "body"]
23
+ LEVEL_FIELDS = ["level", "severity", "loglevel", "log_level", "lvl"]
24
+ TIMESTAMP_FIELDS = ["timestamp", "time", "@timestamp", "ts", "datetime", "date"]
25
+ SOURCE_FIELDS = ["source", "logger", "name", "service", "app", "application"]
26
+
27
+ @property
28
+ def name(self) -> str:
29
+ return "json"
30
+
31
+ def can_parse(self, line: str) -> bool:
32
+ """Check if line is valid JSON."""
33
+ line = line.strip()
34
+ if not line.startswith("{"):
35
+ return False
36
+ try:
37
+ json.loads(line)
38
+ return True
39
+ except (json.JSONDecodeError, ValueError):
40
+ return False
41
+
42
+ def parse(self, line: str) -> ParsedLogEntry:
43
+ """Parse a JSON log line."""
44
+ try:
45
+ data = json.loads(line.strip())
46
+ except (json.JSONDecodeError, ValueError):
47
+ return ParsedLogEntry(
48
+ raw=line,
49
+ message=line,
50
+ level=self._detect_level_from_content(line),
51
+ )
52
+
53
+ # Extract message
54
+ message = self._get_field(data, self.MESSAGE_FIELDS, line)
55
+
56
+ # Extract level
57
+ level_str = self._get_field(data, self.LEVEL_FIELDS)
58
+ level = None
59
+ if level_str:
60
+ level = LogLevel.from_string(str(level_str))
61
+ if not level:
62
+ level = self._detect_level_from_content(message)
63
+
64
+ # Extract timestamp
65
+ timestamp_str = self._get_field(data, self.TIMESTAMP_FIELDS)
66
+ timestamp = self._parse_timestamp(timestamp_str) if timestamp_str else None
67
+
68
+ # Extract source
69
+ source = self._get_field(data, self.SOURCE_FIELDS)
70
+
71
+ return ParsedLogEntry(
72
+ raw=line,
73
+ message=message,
74
+ timestamp=timestamp,
75
+ level=level,
76
+ source=source,
77
+ metadata=data,
78
+ )
79
+
80
+ def _get_field(self, data: Dict[str, Any], field_names: list, default: Any = None) -> Any:
81
+ """Get first matching field from data."""
82
+ for field in field_names:
83
+ if field in data:
84
+ return data[field]
85
+ # Check case-insensitive
86
+ for key in data:
87
+ if key.lower() == field.lower():
88
+ return data[key]
89
+ return default
90
+
91
+ def _parse_timestamp(self, value: Any) -> Optional[datetime]:
92
+ """Parse timestamp from various formats."""
93
+ if isinstance(value, datetime):
94
+ return value
95
+
96
+ if isinstance(value, (int, float)):
97
+ # Unix timestamp
98
+ try:
99
+ return datetime.fromtimestamp(value)
100
+ except (ValueError, OSError):
101
+ pass
102
+
103
+ if isinstance(value, str):
104
+ # Try common formats
105
+ formats = [
106
+ "%Y-%m-%dT%H:%M:%S.%fZ",
107
+ "%Y-%m-%dT%H:%M:%SZ",
108
+ "%Y-%m-%dT%H:%M:%S.%f",
109
+ "%Y-%m-%dT%H:%M:%S",
110
+ "%Y-%m-%d %H:%M:%S.%f",
111
+ "%Y-%m-%d %H:%M:%S",
112
+ ]
113
+ for fmt in formats:
114
+ try:
115
+ return datetime.strptime(value, fmt)
116
+ except ValueError:
117
+ continue
118
+
119
+ return None
@@ -0,0 +1,108 @@
1
+ """Nginx access log parser."""
2
+
3
+ import re
4
+ from datetime import datetime
5
+ from typing import Optional
6
+
7
+ from logtap.core.parsers.base import LogLevel, LogParser, ParsedLogEntry
8
+
9
+
10
+ class NginxParser(LogParser):
11
+ """
12
+ Parser for Nginx access log format (combined log format).
13
+
14
+ Example:
15
+ 192.168.1.1 - - [08/Jan/2024:10:23:45 +0000] "GET /api HTTP/1.1" 200 45
16
+ """
17
+
18
+ # Combined log format pattern
19
+ PATTERN = re.compile(
20
+ r"^(\S+)\s+" # Remote address
21
+ r"(\S+)\s+" # Identity (usually -)
22
+ r"(\S+)\s+" # Remote user (usually -)
23
+ r"\[([^\]]+)\]\s+" # Time
24
+ r'"([^"]*)"\s+' # Request
25
+ r"(\d{3})\s+" # Status
26
+ r"(\d+|-)\s*" # Bytes
27
+ r'(?:"([^"]*)"\s*)?' # Referer (optional)
28
+ r'(?:"([^"]*)")?' # User agent (optional)
29
+ )
30
+
31
+ @property
32
+ def name(self) -> str:
33
+ return "nginx"
34
+
35
+ def can_parse(self, line: str) -> bool:
36
+ """Check if line matches nginx format."""
37
+ return bool(self.PATTERN.match(line))
38
+
39
+ def parse(self, line: str) -> ParsedLogEntry:
40
+ """Parse an nginx access log line."""
41
+ match = self.PATTERN.match(line)
42
+
43
+ if not match:
44
+ return ParsedLogEntry(
45
+ raw=line,
46
+ message=line,
47
+ level=self._detect_level_from_content(line),
48
+ )
49
+
50
+ groups = match.groups()
51
+ remote_addr = groups[0]
52
+ remote_user = groups[2] if groups[2] != "-" else None
53
+ time_str = groups[3]
54
+ request = groups[4]
55
+ status = int(groups[5])
56
+ bytes_sent = int(groups[6]) if groups[6] != "-" else 0
57
+ referer = groups[7] if len(groups) > 7 and groups[7] != "-" else None
58
+ user_agent = groups[8] if len(groups) > 8 else None
59
+
60
+ # Parse timestamp
61
+ timestamp = self._parse_nginx_time(time_str)
62
+
63
+ # Determine level based on status code
64
+ level = self._status_to_level(status)
65
+
66
+ # Parse request method and path
67
+ request_parts = request.split() if request else []
68
+ method = request_parts[0] if len(request_parts) > 0 else None
69
+ path = request_parts[1] if len(request_parts) > 1 else None
70
+
71
+ return ParsedLogEntry(
72
+ raw=line,
73
+ message=f"{method} {path} -> {status}" if method and path else request,
74
+ timestamp=timestamp,
75
+ level=level,
76
+ source=remote_addr,
77
+ metadata={
78
+ "remote_addr": remote_addr,
79
+ "remote_user": remote_user,
80
+ "request": request,
81
+ "method": method,
82
+ "path": path,
83
+ "status": status,
84
+ "bytes_sent": bytes_sent,
85
+ "referer": referer,
86
+ "user_agent": user_agent,
87
+ },
88
+ )
89
+
90
+ def _parse_nginx_time(self, time_str: str) -> Optional[datetime]:
91
+ """Parse nginx time format: 08/Jan/2024:10:23:45 +0000"""
92
+ try:
93
+ # Remove timezone for simpler parsing
94
+ time_str = time_str.split()[0] if " " in time_str else time_str
95
+ return datetime.strptime(time_str, "%d/%b/%Y:%H:%M:%S")
96
+ except ValueError:
97
+ return None
98
+
99
+ def _status_to_level(self, status: int) -> LogLevel:
100
+ """Convert HTTP status code to log level."""
101
+ if status >= 500:
102
+ return LogLevel.ERROR
103
+ elif status >= 400:
104
+ return LogLevel.WARNING
105
+ elif status >= 300:
106
+ return LogLevel.NOTICE
107
+ else:
108
+ return LogLevel.INFO
@@ -0,0 +1,80 @@
1
+ """Syslog format parser."""
2
+
3
+ import re
4
+ from datetime import datetime
5
+
6
+ from logtap.core.parsers.base import LogParser, ParsedLogEntry
7
+
8
+
9
+ class SyslogParser(LogParser):
10
+ """
11
+ Parser for standard syslog format.
12
+
13
+ Handles formats like:
14
+ - Jan 8 10:23:45 hostname process[pid]: message
15
+ - Jan 8 10:23:45 hostname process: message
16
+ """
17
+
18
+ # Syslog pattern: Month Day HH:MM:SS hostname process[pid]: message
19
+ PATTERN = re.compile(
20
+ r"^(\w{3}\s+\d{1,2}\s+\d{2}:\d{2}:\d{2})\s+" # Timestamp
21
+ r"(\S+)\s+" # Hostname
22
+ r"(\S+?)(?:\[(\d+)\])?:\s+" # Process[PID]
23
+ r"(.*)$" # Message
24
+ )
25
+
26
+ # Alternative pattern without PID brackets
27
+ PATTERN_ALT = re.compile(
28
+ r"^(\w{3}\s+\d{1,2}\s+\d{2}:\d{2}:\d{2})\s+" r"(\S+)\s+" r"(\S+):\s+" r"(.*)$"
29
+ )
30
+
31
+ @property
32
+ def name(self) -> str:
33
+ return "syslog"
34
+
35
+ def can_parse(self, line: str) -> bool:
36
+ """Check if line matches syslog format."""
37
+ return bool(self.PATTERN.match(line) or self.PATTERN_ALT.match(line))
38
+
39
+ def parse(self, line: str) -> ParsedLogEntry:
40
+ """Parse a syslog line."""
41
+ match = self.PATTERN.match(line)
42
+
43
+ if match:
44
+ timestamp_str, hostname, process, pid, message = match.groups()
45
+ else:
46
+ match = self.PATTERN_ALT.match(line)
47
+ if match:
48
+ timestamp_str, hostname, process, message = match.groups()
49
+ pid = None
50
+ else:
51
+ # Can't parse, return basic entry
52
+ return ParsedLogEntry(
53
+ raw=line,
54
+ message=line,
55
+ level=self._detect_level_from_content(line),
56
+ )
57
+
58
+ # Parse timestamp (assume current year)
59
+ try:
60
+ timestamp = datetime.strptime(timestamp_str, "%b %d %H:%M:%S")
61
+ # Set to current year
62
+ timestamp = timestamp.replace(year=datetime.now().year)
63
+ except ValueError:
64
+ timestamp = None
65
+
66
+ # Detect level from message
67
+ level = self._detect_level_from_content(message)
68
+
69
+ return ParsedLogEntry(
70
+ raw=line,
71
+ message=message,
72
+ timestamp=timestamp,
73
+ level=level,
74
+ source=f"{hostname}/{process}",
75
+ metadata={
76
+ "hostname": hostname,
77
+ "process": process,
78
+ "pid": pid,
79
+ },
80
+ )
logtap/core/reader.py ADDED
@@ -0,0 +1,160 @@
1
+ """
2
+ Core file reading functionality for logtap.
3
+
4
+ The tail() function is the heart of logtap - an efficient O(n) algorithm that reads
5
+ files from the end backwards in chunks, avoiding the need to load entire files into memory.
6
+ """
7
+
8
+ from os import SEEK_END
9
+ from typing import IO, List, Optional, Tuple
10
+
11
+ import aiofiles
12
+
13
+
14
+ def tail(filename: str, lines_limit: int = 50, block_size: int = 1024) -> List[str]:
15
+ """
16
+ Reads a file in reverse and returns its last 'lines_limit' lines.
17
+
18
+ This is an efficient algorithm that reads from the end of the file backwards
19
+ in chunks, making it suitable for very large log files.
20
+
21
+ Args:
22
+ filename: The path to the file to be read.
23
+ lines_limit: The maximum number of lines to be returned. Defaults to 50.
24
+ block_size: The number of bytes to read at a time. Defaults to 1024.
25
+
26
+ Returns:
27
+ A list of the last 'lines_limit' lines in the file.
28
+ """
29
+ lines: List[str] = []
30
+ with open(filename, "r", encoding="utf-8") as f:
31
+ # Seek to the end of the file.
32
+ f.seek(0, SEEK_END)
33
+ # Get the current position in the file.
34
+ block_end_byte = f.tell()
35
+
36
+ # Continue reading blocks and adding lines until we have enough lines
37
+ # or reach the start of the file.
38
+ while len(lines) < lines_limit and block_end_byte > 0:
39
+ # Read a block from the file, update the block_end_byte position,
40
+ # and get the new lines.
41
+ new_lines, block_end_byte = read_block(f, block_end_byte, block_size)
42
+ lines.extend(new_lines)
43
+
44
+ # Return the last 'lines_limit' lines
45
+ return lines[-lines_limit:]
46
+
47
+
48
+ def read_block(file: IO, block_end_byte: int, block_size: int) -> Tuple[List[str], int]:
49
+ """
50
+ Reads a block from the end of a file and returns the lines in the block.
51
+
52
+ Args:
53
+ file: The file object to read from.
54
+ block_end_byte: The current position in the file.
55
+ block_size: The number of bytes to read at a time.
56
+
57
+ Returns:
58
+ A tuple containing the list of lines in the block,
59
+ and the updated position in the file.
60
+ """
61
+ # Use min() to ensure we only step back as far as we can (start of file)
62
+ stepback = min(block_size, block_end_byte)
63
+
64
+ # Step back and read a block from the file
65
+ file.seek(block_end_byte - stepback)
66
+ block = file.read(stepback)
67
+ block_end_byte -= stepback
68
+ lines = block.split("\n")
69
+
70
+ return lines, block_end_byte
71
+
72
+
73
+ async def tail_async(
74
+ filename: str, lines_limit: int = 50, block_size: int = 1024
75
+ ) -> List[str]:
76
+ """
77
+ Async version of tail() for use with FastAPI.
78
+
79
+ Reads a file in reverse and returns its last 'lines_limit' lines.
80
+
81
+ Args:
82
+ filename: The path to the file to be read.
83
+ lines_limit: The maximum number of lines to be returned. Defaults to 50.
84
+ block_size: The number of bytes to read at a time. Defaults to 1024.
85
+
86
+ Returns:
87
+ A list of the last 'lines_limit' lines in the file.
88
+ """
89
+ lines: List[str] = []
90
+ async with aiofiles.open(filename, "r", encoding="utf-8") as f:
91
+ # Seek to the end of the file.
92
+ await f.seek(0, SEEK_END)
93
+ # Get the current position in the file.
94
+ block_end_byte = await f.tell()
95
+
96
+ # Continue reading blocks and adding lines until we have enough lines
97
+ # or reach the start of the file.
98
+ while len(lines) < lines_limit and block_end_byte > 0:
99
+ # Read a block from the file
100
+ stepback = min(block_size, block_end_byte)
101
+ await f.seek(block_end_byte - stepback)
102
+ block = await f.read(stepback)
103
+ block_end_byte -= stepback
104
+ new_lines = block.split("\n")
105
+ lines.extend(new_lines)
106
+
107
+ # Return the last 'lines_limit' lines
108
+ return lines[-lines_limit:]
109
+
110
+
111
+ def get_file_lines(
112
+ filepath: str,
113
+ search_term: Optional[str] = None,
114
+ num_lines_to_return: int = 50,
115
+ ) -> List[str]:
116
+ """
117
+ Retrieves a specified number of lines from a file,
118
+ optionally filtering for a search term.
119
+
120
+ Args:
121
+ filepath: Path to the file.
122
+ search_term: Term to filter lines. If None or empty, no filtering is applied.
123
+ num_lines_to_return: Number of lines to retrieve from the end of the file.
124
+
125
+ Returns:
126
+ List of lines from the file.
127
+ """
128
+ lines = tail(filepath, num_lines_to_return)
129
+
130
+ if search_term:
131
+ lines = [line for line in lines if search_term in line]
132
+
133
+ return lines
134
+
135
+
136
+ async def get_file_lines_async(
137
+ filepath: str,
138
+ search_term: Optional[str] = None,
139
+ num_lines_to_return: int = 50,
140
+ ) -> List[str]:
141
+ """
142
+ Async version of get_file_lines() for use with FastAPI.
143
+
144
+ Retrieves a specified number of lines from a file,
145
+ optionally filtering for a search term.
146
+
147
+ Args:
148
+ filepath: Path to the file.
149
+ search_term: Term to filter lines. If None or empty, no filtering is applied.
150
+ num_lines_to_return: Number of lines to retrieve from the end of the file.
151
+
152
+ Returns:
153
+ List of lines from the file.
154
+ """
155
+ lines = await tail_async(filepath, num_lines_to_return)
156
+
157
+ if search_term:
158
+ lines = [line for line in lines if search_term in line]
159
+
160
+ return lines