logler 1.0.7__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
logler/log_reader.py ADDED
@@ -0,0 +1,267 @@
1
+ """Log file reading and streaming module."""
2
+
3
+ import os
4
+ import time
5
+ from pathlib import Path
6
+ from typing import Iterator, Optional
7
+
8
+
9
+ class LogReader:
10
+ """Efficient log file reader with support for tailing and large files."""
11
+
12
+ def __init__(self, file_path: str, buffer_size: int = 8192):
13
+ """
14
+ Initialize the log reader.
15
+
16
+ Args:
17
+ file_path: Path to the log file
18
+ buffer_size: Buffer size for reading (default 8KB)
19
+ """
20
+ self.file_path = Path(file_path)
21
+ self.buffer_size = buffer_size
22
+
23
+ if not self.file_path.exists():
24
+ raise FileNotFoundError(f"Log file not found: {file_path}")
25
+
26
+ if not self.file_path.is_file():
27
+ raise ValueError(f"Not a file: {file_path}")
28
+
29
+ def read_lines(
30
+ self, start_line: int = 0, max_lines: Optional[int] = None, reverse: bool = False
31
+ ) -> Iterator[str]:
32
+ """
33
+ Read lines from the log file.
34
+
35
+ Args:
36
+ start_line: Line number to start from (0-indexed)
37
+ max_lines: Maximum number of lines to read (None for all)
38
+ reverse: Read lines in reverse order
39
+
40
+ Yields:
41
+ Log lines as strings
42
+ """
43
+ if reverse:
44
+ yield from self._read_lines_reverse(start_line, max_lines)
45
+ else:
46
+ yield from self._read_lines_forward(start_line, max_lines)
47
+
48
+ def _read_lines_forward(
49
+ self, start_line: int = 0, max_lines: Optional[int] = None
50
+ ) -> Iterator[str]:
51
+ """Read lines forward from the file."""
52
+ with open(self.file_path, "r", encoding="utf-8", errors="replace") as f:
53
+ line_num = 0
54
+ lines_read = 0
55
+
56
+ for line in f:
57
+ if line_num >= start_line:
58
+ yield line.rstrip("\n\r")
59
+ lines_read += 1
60
+
61
+ if max_lines and lines_read >= max_lines:
62
+ break
63
+
64
+ line_num += 1
65
+
66
+ def _read_lines_reverse(
67
+ self, start_line: int = 0, max_lines: Optional[int] = None
68
+ ) -> Iterator[str]:
69
+ """
70
+ Read lines in reverse order using an efficient algorithm.
71
+
72
+ This reads the file from the end in chunks to avoid loading
73
+ the entire file into memory.
74
+ """
75
+ with open(self.file_path, "rb") as f:
76
+ # Seek to end of file
77
+ f.seek(0, os.SEEK_END)
78
+ file_size = f.tell()
79
+
80
+ # Read in chunks from the end
81
+ buffer = b""
82
+ position = file_size
83
+ lines = []
84
+
85
+ while position > 0:
86
+ # Determine chunk size
87
+ chunk_size = min(self.buffer_size, position)
88
+ position -= chunk_size
89
+
90
+ # Read chunk
91
+ f.seek(position)
92
+ chunk = f.read(chunk_size)
93
+
94
+ # Combine with previous buffer
95
+ buffer = chunk + buffer
96
+
97
+ # Split into lines
98
+ chunk_lines = buffer.split(b"\n")
99
+
100
+ # Keep the first incomplete line in buffer
101
+ buffer = chunk_lines[0]
102
+ chunk_lines = chunk_lines[1:]
103
+
104
+ # Add lines in reverse
105
+ for line in reversed(chunk_lines):
106
+ if line or lines: # Skip empty lines at the end
107
+ try:
108
+ decoded = line.decode("utf-8", errors="replace")
109
+ lines.append(decoded.rstrip("\r"))
110
+ except UnicodeDecodeError:
111
+ continue
112
+
113
+ # Add the first line if buffer has content
114
+ if buffer:
115
+ try:
116
+ decoded = buffer.decode("utf-8", errors="replace")
117
+ lines.append(decoded.rstrip("\r"))
118
+ except UnicodeDecodeError:
119
+ pass
120
+
121
+ # Apply start_line and max_lines
122
+ lines_to_yield = lines[start_line:]
123
+ if max_lines:
124
+ lines_to_yield = lines_to_yield[:max_lines]
125
+
126
+ for line in lines_to_yield:
127
+ yield line
128
+
129
+ def tail(
130
+ self, num_lines: int = 10, follow: bool = False, sleep_interval: float = 0.1
131
+ ) -> Iterator[str]:
132
+ """
133
+ Tail the log file (like tail -f).
134
+
135
+ Args:
136
+ num_lines: Number of initial lines to show
137
+ follow: If True, continue watching for new lines
138
+ sleep_interval: How long to sleep between checks (seconds)
139
+
140
+ Yields:
141
+ Log lines as strings
142
+ """
143
+ # First, yield the last n lines
144
+ for line in self._read_lines_reverse(max_lines=num_lines):
145
+ pass # Skip initial lines in non-follow mode if we just want to set position
146
+
147
+ lines = list(self._read_lines_reverse(max_lines=num_lines))
148
+ for line in reversed(lines):
149
+ yield line
150
+
151
+ if not follow:
152
+ return
153
+
154
+ # Follow mode: watch for new lines
155
+ with open(self.file_path, "r", encoding="utf-8", errors="replace") as f:
156
+ # Seek to end
157
+ f.seek(0, os.SEEK_END)
158
+
159
+ while True:
160
+ line = f.readline()
161
+ if line:
162
+ yield line.rstrip("\n\r")
163
+ else:
164
+ # Check if file was truncated (log rotation)
165
+ current_pos = f.tell()
166
+ f.seek(0, os.SEEK_END)
167
+ end_pos = f.tell()
168
+
169
+ if current_pos > end_pos:
170
+ # File was truncated, start from beginning
171
+ f.seek(0)
172
+ else:
173
+ # No new data, sleep
174
+ time.sleep(sleep_interval)
175
+
176
+ def search(
177
+ self,
178
+ pattern: str,
179
+ case_sensitive: bool = False,
180
+ regex: bool = False,
181
+ max_lines: Optional[int] = None,
182
+ ) -> Iterator[tuple[int, str]]:
183
+ """
184
+ Search for lines matching a pattern.
185
+
186
+ Args:
187
+ pattern: Search pattern (string or regex)
188
+ case_sensitive: Whether search is case-sensitive
189
+ regex: Whether pattern is a regex
190
+ max_lines: Maximum number of matching lines to return
191
+
192
+ Yields:
193
+ Tuples of (line_number, line_content)
194
+ """
195
+ import re as regex_module
196
+
197
+ def make_regex_matcher(compiled):
198
+ return lambda line: compiled.search(line)
199
+
200
+ def make_case_insensitive_matcher(pat):
201
+ return lambda line: pat in line.lower()
202
+
203
+ def make_case_sensitive_matcher(pat):
204
+ return lambda line: pat in line
205
+
206
+ if regex:
207
+ flags = 0 if case_sensitive else regex_module.IGNORECASE
208
+ compiled_pattern = regex_module.compile(pattern, flags)
209
+ match_func = make_regex_matcher(compiled_pattern)
210
+ else:
211
+ if not case_sensitive:
212
+ pattern = pattern.lower()
213
+ match_func = make_case_insensitive_matcher(pattern)
214
+ else:
215
+ match_func = make_case_sensitive_matcher(pattern)
216
+
217
+ with open(self.file_path, "r", encoding="utf-8", errors="replace") as f:
218
+ matches_found = 0
219
+
220
+ for line_num, line in enumerate(f, 1):
221
+ line = line.rstrip("\n\r")
222
+
223
+ if match_func(line):
224
+ yield (line_num, line)
225
+ matches_found += 1
226
+
227
+ if max_lines and matches_found >= max_lines:
228
+ break
229
+
230
+ def get_file_info(self) -> dict:
231
+ """
232
+ Get information about the log file.
233
+
234
+ Returns:
235
+ Dictionary with file metadata
236
+ """
237
+ stat = self.file_path.stat()
238
+
239
+ return {
240
+ "path": str(self.file_path.absolute()),
241
+ "size": stat.st_size,
242
+ "size_human": self._format_bytes(stat.st_size),
243
+ "modified": stat.st_mtime,
244
+ "created": stat.st_ctime,
245
+ }
246
+
247
+ @staticmethod
248
+ def _format_bytes(size: int) -> str:
249
+ """Format bytes to human-readable string."""
250
+ for unit in ["B", "KB", "MB", "GB", "TB"]:
251
+ if size < 1024.0:
252
+ return f"{size:.2f} {unit}"
253
+ size /= 1024.0
254
+ return f"{size:.2f} PB"
255
+
256
+ def count_lines(self) -> int:
257
+ """
258
+ Count total number of lines in the file.
259
+
260
+ Returns:
261
+ Number of lines
262
+ """
263
+ count = 0
264
+ with open(self.file_path, "rb") as f:
265
+ for _ in f:
266
+ count += 1
267
+ return count
logler/parser.py ADDED
@@ -0,0 +1,207 @@
1
+ """
2
+ Log parsing module with support for multiple formats.
3
+ """
4
+
5
+ import re
6
+ import json
7
+ from datetime import datetime
8
+ from typing import Optional, Dict, Any
9
+ from dataclasses import dataclass, field
10
+ from enum import Enum
11
+
12
+
13
+ class LogLevel(str, Enum):
14
+ """Log levels."""
15
+
16
+ TRACE = "TRACE"
17
+ DEBUG = "DEBUG"
18
+ INFO = "INFO"
19
+ WARN = "WARN"
20
+ WARNING = "WARNING"
21
+ ERROR = "ERROR"
22
+ CRITICAL = "CRITICAL"
23
+ FATAL = "FATAL"
24
+ UNKNOWN = "UNKNOWN"
25
+
26
+
27
+ @dataclass
28
+ class LogEntry:
29
+ """Parsed log entry."""
30
+
31
+ line_number: int
32
+ raw: str
33
+ timestamp: Optional[datetime] = None
34
+ level: str = "UNKNOWN"
35
+ message: str = ""
36
+ thread_id: Optional[str] = None
37
+ correlation_id: Optional[str] = None
38
+ trace_id: Optional[str] = None
39
+ span_id: Optional[str] = None
40
+ service_name: Optional[str] = None
41
+ fields: Dict[str, Any] = field(default_factory=dict)
42
+
43
+ def __post_init__(self):
44
+ """Ensure message is set."""
45
+ if not self.message and self.raw:
46
+ self.message = self.raw
47
+
48
+
49
+ class LogParser:
50
+ """Parse log entries from various formats."""
51
+
52
+ # Regex patterns
53
+ PATTERNS = {
54
+ "timestamp": re.compile(
55
+ r"\d{4}-\d{2}-\d{2}[T ]\d{2}:\d{2}:\d{2}(?:\.\d+)?(?:Z|[+-]\d{2}:?\d{2})?"
56
+ ),
57
+ "log_level": re.compile(
58
+ r"\b(TRACE|DEBUG|INFO|INFORMATION|WARN|WARNING|ERROR|ERR|FATAL|CRITICAL|CRIT)\b",
59
+ re.IGNORECASE,
60
+ ),
61
+ "thread_id": re.compile(r"(?:thread[=:\s]+|tid[=:\s]+|\[)([a-zA-Z0-9_-]+)(?:\])?"),
62
+ "correlation_id": re.compile(
63
+ r"(?:correlation[_-]?id|request[_-]?id|req[_-]?id)[=:\s]+([a-zA-Z0-9_-]+)"
64
+ ),
65
+ "trace_id": re.compile(r"(?:trace[_-]?id|traceId)[=:\s]+([a-fA-F0-9]{16,32})"),
66
+ "span_id": re.compile(r"(?:span[_-]?id|spanId)[=:\s]+([a-fA-F0-9]{8,16})"),
67
+ }
68
+
69
+ def parse_line(self, line_number: int, raw: str) -> LogEntry:
70
+ """Parse a single log line."""
71
+ # Try JSON first
72
+ if raw.strip().startswith("{"):
73
+ try:
74
+ data = json.loads(raw.strip())
75
+ return self._parse_json(line_number, raw, data)
76
+ except (json.JSONDecodeError, ValueError):
77
+ pass
78
+
79
+ # Parse as plain text
80
+ return self._parse_plain(line_number, raw)
81
+
82
+ def _parse_json(self, line_number: int, raw: str, data: dict) -> LogEntry:
83
+ """Parse JSON log entry."""
84
+ entry = LogEntry(line_number=line_number, raw=raw)
85
+
86
+ # Extract timestamp
87
+ for ts_field in ["timestamp", "time", "ts", "@timestamp", "datetime"]:
88
+ if ts_field in data:
89
+ try:
90
+ ts_str = str(data[ts_field])
91
+ entry.timestamp = datetime.fromisoformat(ts_str.replace("Z", "+00:00"))
92
+ except (ValueError, AttributeError):
93
+ pass
94
+ break
95
+
96
+ # Extract level
97
+ for level_field in ["level", "severity", "loglevel", "lvl"]:
98
+ if level_field in data:
99
+ entry.level = str(data[level_field]).upper()
100
+ break
101
+
102
+ # Extract message
103
+ for msg_field in ["message", "msg", "text", "content"]:
104
+ if msg_field in data:
105
+ entry.message = str(data[msg_field])
106
+ break
107
+
108
+ # Extract thread ID
109
+ for thread_field in ["thread", "thread_id", "threadId", "tid"]:
110
+ if thread_field in data:
111
+ entry.thread_id = str(data[thread_field])
112
+ break
113
+
114
+ # Extract correlation ID
115
+ for corr_field in ["correlation_id", "correlationId", "request_id", "requestId"]:
116
+ if corr_field in data:
117
+ entry.correlation_id = str(data[corr_field])
118
+ break
119
+
120
+ # Extract trace/span IDs
121
+ if "trace_id" in data or "traceId" in data:
122
+ entry.trace_id = str(data.get("trace_id") or data.get("traceId"))
123
+ if "span_id" in data or "spanId" in data:
124
+ entry.span_id = str(data.get("span_id") or data.get("spanId"))
125
+
126
+ # Extract service name
127
+ for service_field in ["service", "service_name", "serviceName"]:
128
+ if service_field in data:
129
+ entry.service_name = str(data[service_field])
130
+ break
131
+
132
+ # Store other fields
133
+ skip_fields = {
134
+ "timestamp",
135
+ "time",
136
+ "ts",
137
+ "@timestamp",
138
+ "datetime",
139
+ "level",
140
+ "severity",
141
+ "loglevel",
142
+ "lvl",
143
+ "message",
144
+ "msg",
145
+ "text",
146
+ "content",
147
+ "thread",
148
+ "thread_id",
149
+ "threadId",
150
+ "tid",
151
+ "correlation_id",
152
+ "correlationId",
153
+ "request_id",
154
+ "requestId",
155
+ "trace_id",
156
+ "traceId",
157
+ "span_id",
158
+ "spanId",
159
+ "service",
160
+ "service_name",
161
+ "serviceName",
162
+ }
163
+ entry.fields = {k: v for k, v in data.items() if k not in skip_fields}
164
+
165
+ return entry
166
+
167
+ def _parse_plain(self, line_number: int, raw: str) -> LogEntry:
168
+ """Parse plain text log entry."""
169
+ entry = LogEntry(line_number=line_number, raw=raw, message=raw)
170
+
171
+ # Extract timestamp
172
+ ts_match = self.PATTERNS["timestamp"].search(raw)
173
+ if ts_match:
174
+ try:
175
+ ts_str = ts_match.group(0)
176
+ entry.timestamp = datetime.fromisoformat(
177
+ ts_str.replace(" ", "T").replace("Z", "+00:00")
178
+ )
179
+ except ValueError:
180
+ pass
181
+
182
+ # Extract log level
183
+ level_match = self.PATTERNS["log_level"].search(raw)
184
+ if level_match:
185
+ entry.level = level_match.group(1).upper()
186
+
187
+ # Extract thread ID
188
+ thread_match = self.PATTERNS["thread_id"].search(raw)
189
+ if thread_match:
190
+ entry.thread_id = thread_match.group(1)
191
+
192
+ # Extract correlation ID
193
+ corr_match = self.PATTERNS["correlation_id"].search(raw)
194
+ if corr_match:
195
+ entry.correlation_id = corr_match.group(1)
196
+
197
+ # Extract trace ID
198
+ trace_match = self.PATTERNS["trace_id"].search(raw)
199
+ if trace_match:
200
+ entry.trace_id = trace_match.group(1)
201
+
202
+ # Extract span ID
203
+ span_match = self.PATTERNS["span_id"].search(raw)
204
+ if span_match:
205
+ entry.span_id = span_match.group(1)
206
+
207
+ return entry
logler/safe_regex.py ADDED
@@ -0,0 +1,124 @@
1
+ """
2
+ Safe regex compilation with timeout protection against ReDoS attacks.
3
+
4
+ This module provides a safe_compile function that wraps re.compile with:
5
+ - Pattern length validation
6
+ - Compilation timeout (Unix only, graceful fallback on Windows)
7
+ - Clear error messages
8
+ """
9
+
10
+ import re
11
+ import threading
12
+ from typing import Optional
13
+
14
+
15
+ class RegexTimeoutError(Exception):
16
+ """Raised when regex compilation times out."""
17
+
18
+ pass
19
+
20
+
21
+ class RegexPatternTooLongError(Exception):
22
+ """Raised when regex pattern exceeds maximum allowed length."""
23
+
24
+ pass
25
+
26
+
27
+ # Maximum pattern length to prevent ReDoS via complexity
28
+ MAX_PATTERN_LENGTH = 1000
29
+
30
+ # Timeout for regex compilation in seconds
31
+ COMPILE_TIMEOUT = 2.0
32
+
33
+
34
+ def _compile_with_timeout(
35
+ pattern: str,
36
+ flags: int,
37
+ timeout: float,
38
+ result_container: dict,
39
+ ) -> None:
40
+ """Worker function for threaded compilation."""
41
+ try:
42
+ result_container["result"] = re.compile(pattern, flags)
43
+ except re.error as e:
44
+ result_container["error"] = e
45
+ except Exception as e:
46
+ result_container["error"] = e
47
+
48
+
49
+ def safe_compile(
50
+ pattern: str,
51
+ flags: int = 0,
52
+ timeout: float = COMPILE_TIMEOUT,
53
+ max_length: int = MAX_PATTERN_LENGTH,
54
+ ) -> re.Pattern:
55
+ """
56
+ Safely compile a regex pattern with timeout protection.
57
+
58
+ Args:
59
+ pattern: The regex pattern to compile
60
+ flags: Optional regex flags (e.g., re.IGNORECASE)
61
+ timeout: Maximum time in seconds to allow for compilation
62
+ max_length: Maximum allowed pattern length
63
+
64
+ Returns:
65
+ Compiled regex pattern
66
+
67
+ Raises:
68
+ RegexPatternTooLongError: If pattern exceeds max_length
69
+ RegexTimeoutError: If compilation takes longer than timeout
70
+ re.error: If the pattern is invalid
71
+ """
72
+ # Validate pattern length
73
+ if len(pattern) > max_length:
74
+ raise RegexPatternTooLongError(
75
+ f"Regex pattern length {len(pattern)} exceeds maximum {max_length}"
76
+ )
77
+
78
+ # Use threading for cross-platform timeout support
79
+ result_container: dict = {}
80
+ thread = threading.Thread(
81
+ target=_compile_with_timeout,
82
+ args=(pattern, flags, timeout, result_container),
83
+ )
84
+ thread.start()
85
+ thread.join(timeout=timeout)
86
+
87
+ if thread.is_alive():
88
+ # Thread is still running - compilation timed out
89
+ # Note: We can't actually kill the thread, but we return an error
90
+ raise RegexTimeoutError(
91
+ f"Regex compilation timed out after {timeout}s (pattern may cause catastrophic backtracking)"
92
+ )
93
+
94
+ if "error" in result_container:
95
+ raise result_container["error"]
96
+
97
+ return result_container["result"]
98
+
99
+
100
+ def try_compile(
101
+ pattern: str,
102
+ flags: int = 0,
103
+ timeout: float = COMPILE_TIMEOUT,
104
+ max_length: int = MAX_PATTERN_LENGTH,
105
+ ) -> Optional[re.Pattern]:
106
+ """
107
+ Try to compile a regex pattern safely, returning None on failure.
108
+
109
+ This is a convenience wrapper around safe_compile that catches all
110
+ exceptions and returns None instead.
111
+
112
+ Args:
113
+ pattern: The regex pattern to compile
114
+ flags: Optional regex flags
115
+ timeout: Maximum compilation time
116
+ max_length: Maximum pattern length
117
+
118
+ Returns:
119
+ Compiled pattern or None if compilation fails
120
+ """
121
+ try:
122
+ return safe_compile(pattern, flags, timeout, max_length)
123
+ except (RegexTimeoutError, RegexPatternTooLongError, re.error):
124
+ return None