logler 1.0.7__cp311-cp311-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- logler/__init__.py +22 -0
- logler/bootstrap.py +57 -0
- logler/cache.py +75 -0
- logler/cli.py +589 -0
- logler/helpers.py +282 -0
- logler/investigate.py +3962 -0
- logler/llm_cli.py +1426 -0
- logler/log_reader.py +267 -0
- logler/parser.py +207 -0
- logler/safe_regex.py +124 -0
- logler/terminal.py +252 -0
- logler/tracker.py +138 -0
- logler/tree_formatter.py +807 -0
- logler/watcher.py +55 -0
- logler/web/__init__.py +3 -0
- logler/web/app.py +810 -0
- logler/web/static/css/tailwind.css +1 -0
- logler/web/static/css/tailwind.input.css +3 -0
- logler/web/static/logler-logo.png +0 -0
- logler/web/tailwind.config.cjs +9 -0
- logler/web/templates/index.html +1454 -0
- logler-1.0.7.dist-info/METADATA +584 -0
- logler-1.0.7.dist-info/RECORD +28 -0
- logler-1.0.7.dist-info/WHEEL +4 -0
- logler-1.0.7.dist-info/entry_points.txt +2 -0
- logler-1.0.7.dist-info/licenses/LICENSE +21 -0
- logler_rs/__init__.py +5 -0
- logler_rs/logler_rs.cp311-win_amd64.pyd +0 -0
logler/log_reader.py
ADDED
|
@@ -0,0 +1,267 @@
|
|
|
1
|
+
"""Log file reading and streaming module."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import time
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Iterator, Optional
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class LogReader:
|
|
10
|
+
"""Efficient log file reader with support for tailing and large files."""
|
|
11
|
+
|
|
12
|
+
def __init__(self, file_path: str, buffer_size: int = 8192):
|
|
13
|
+
"""
|
|
14
|
+
Initialize the log reader.
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
file_path: Path to the log file
|
|
18
|
+
buffer_size: Buffer size for reading (default 8KB)
|
|
19
|
+
"""
|
|
20
|
+
self.file_path = Path(file_path)
|
|
21
|
+
self.buffer_size = buffer_size
|
|
22
|
+
|
|
23
|
+
if not self.file_path.exists():
|
|
24
|
+
raise FileNotFoundError(f"Log file not found: {file_path}")
|
|
25
|
+
|
|
26
|
+
if not self.file_path.is_file():
|
|
27
|
+
raise ValueError(f"Not a file: {file_path}")
|
|
28
|
+
|
|
29
|
+
def read_lines(
|
|
30
|
+
self, start_line: int = 0, max_lines: Optional[int] = None, reverse: bool = False
|
|
31
|
+
) -> Iterator[str]:
|
|
32
|
+
"""
|
|
33
|
+
Read lines from the log file.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
start_line: Line number to start from (0-indexed)
|
|
37
|
+
max_lines: Maximum number of lines to read (None for all)
|
|
38
|
+
reverse: Read lines in reverse order
|
|
39
|
+
|
|
40
|
+
Yields:
|
|
41
|
+
Log lines as strings
|
|
42
|
+
"""
|
|
43
|
+
if reverse:
|
|
44
|
+
yield from self._read_lines_reverse(start_line, max_lines)
|
|
45
|
+
else:
|
|
46
|
+
yield from self._read_lines_forward(start_line, max_lines)
|
|
47
|
+
|
|
48
|
+
def _read_lines_forward(
|
|
49
|
+
self, start_line: int = 0, max_lines: Optional[int] = None
|
|
50
|
+
) -> Iterator[str]:
|
|
51
|
+
"""Read lines forward from the file."""
|
|
52
|
+
with open(self.file_path, "r", encoding="utf-8", errors="replace") as f:
|
|
53
|
+
line_num = 0
|
|
54
|
+
lines_read = 0
|
|
55
|
+
|
|
56
|
+
for line in f:
|
|
57
|
+
if line_num >= start_line:
|
|
58
|
+
yield line.rstrip("\n\r")
|
|
59
|
+
lines_read += 1
|
|
60
|
+
|
|
61
|
+
if max_lines and lines_read >= max_lines:
|
|
62
|
+
break
|
|
63
|
+
|
|
64
|
+
line_num += 1
|
|
65
|
+
|
|
66
|
+
def _read_lines_reverse(
|
|
67
|
+
self, start_line: int = 0, max_lines: Optional[int] = None
|
|
68
|
+
) -> Iterator[str]:
|
|
69
|
+
"""
|
|
70
|
+
Read lines in reverse order using an efficient algorithm.
|
|
71
|
+
|
|
72
|
+
This reads the file from the end in chunks to avoid loading
|
|
73
|
+
the entire file into memory.
|
|
74
|
+
"""
|
|
75
|
+
with open(self.file_path, "rb") as f:
|
|
76
|
+
# Seek to end of file
|
|
77
|
+
f.seek(0, os.SEEK_END)
|
|
78
|
+
file_size = f.tell()
|
|
79
|
+
|
|
80
|
+
# Read in chunks from the end
|
|
81
|
+
buffer = b""
|
|
82
|
+
position = file_size
|
|
83
|
+
lines = []
|
|
84
|
+
|
|
85
|
+
while position > 0:
|
|
86
|
+
# Determine chunk size
|
|
87
|
+
chunk_size = min(self.buffer_size, position)
|
|
88
|
+
position -= chunk_size
|
|
89
|
+
|
|
90
|
+
# Read chunk
|
|
91
|
+
f.seek(position)
|
|
92
|
+
chunk = f.read(chunk_size)
|
|
93
|
+
|
|
94
|
+
# Combine with previous buffer
|
|
95
|
+
buffer = chunk + buffer
|
|
96
|
+
|
|
97
|
+
# Split into lines
|
|
98
|
+
chunk_lines = buffer.split(b"\n")
|
|
99
|
+
|
|
100
|
+
# Keep the first incomplete line in buffer
|
|
101
|
+
buffer = chunk_lines[0]
|
|
102
|
+
chunk_lines = chunk_lines[1:]
|
|
103
|
+
|
|
104
|
+
# Add lines in reverse
|
|
105
|
+
for line in reversed(chunk_lines):
|
|
106
|
+
if line or lines: # Skip empty lines at the end
|
|
107
|
+
try:
|
|
108
|
+
decoded = line.decode("utf-8", errors="replace")
|
|
109
|
+
lines.append(decoded.rstrip("\r"))
|
|
110
|
+
except UnicodeDecodeError:
|
|
111
|
+
continue
|
|
112
|
+
|
|
113
|
+
# Add the first line if buffer has content
|
|
114
|
+
if buffer:
|
|
115
|
+
try:
|
|
116
|
+
decoded = buffer.decode("utf-8", errors="replace")
|
|
117
|
+
lines.append(decoded.rstrip("\r"))
|
|
118
|
+
except UnicodeDecodeError:
|
|
119
|
+
pass
|
|
120
|
+
|
|
121
|
+
# Apply start_line and max_lines
|
|
122
|
+
lines_to_yield = lines[start_line:]
|
|
123
|
+
if max_lines:
|
|
124
|
+
lines_to_yield = lines_to_yield[:max_lines]
|
|
125
|
+
|
|
126
|
+
for line in lines_to_yield:
|
|
127
|
+
yield line
|
|
128
|
+
|
|
129
|
+
def tail(
|
|
130
|
+
self, num_lines: int = 10, follow: bool = False, sleep_interval: float = 0.1
|
|
131
|
+
) -> Iterator[str]:
|
|
132
|
+
"""
|
|
133
|
+
Tail the log file (like tail -f).
|
|
134
|
+
|
|
135
|
+
Args:
|
|
136
|
+
num_lines: Number of initial lines to show
|
|
137
|
+
follow: If True, continue watching for new lines
|
|
138
|
+
sleep_interval: How long to sleep between checks (seconds)
|
|
139
|
+
|
|
140
|
+
Yields:
|
|
141
|
+
Log lines as strings
|
|
142
|
+
"""
|
|
143
|
+
# First, yield the last n lines
|
|
144
|
+
for line in self._read_lines_reverse(max_lines=num_lines):
|
|
145
|
+
pass # Skip initial lines in non-follow mode if we just want to set position
|
|
146
|
+
|
|
147
|
+
lines = list(self._read_lines_reverse(max_lines=num_lines))
|
|
148
|
+
for line in reversed(lines):
|
|
149
|
+
yield line
|
|
150
|
+
|
|
151
|
+
if not follow:
|
|
152
|
+
return
|
|
153
|
+
|
|
154
|
+
# Follow mode: watch for new lines
|
|
155
|
+
with open(self.file_path, "r", encoding="utf-8", errors="replace") as f:
|
|
156
|
+
# Seek to end
|
|
157
|
+
f.seek(0, os.SEEK_END)
|
|
158
|
+
|
|
159
|
+
while True:
|
|
160
|
+
line = f.readline()
|
|
161
|
+
if line:
|
|
162
|
+
yield line.rstrip("\n\r")
|
|
163
|
+
else:
|
|
164
|
+
# Check if file was truncated (log rotation)
|
|
165
|
+
current_pos = f.tell()
|
|
166
|
+
f.seek(0, os.SEEK_END)
|
|
167
|
+
end_pos = f.tell()
|
|
168
|
+
|
|
169
|
+
if current_pos > end_pos:
|
|
170
|
+
# File was truncated, start from beginning
|
|
171
|
+
f.seek(0)
|
|
172
|
+
else:
|
|
173
|
+
# No new data, sleep
|
|
174
|
+
time.sleep(sleep_interval)
|
|
175
|
+
|
|
176
|
+
def search(
|
|
177
|
+
self,
|
|
178
|
+
pattern: str,
|
|
179
|
+
case_sensitive: bool = False,
|
|
180
|
+
regex: bool = False,
|
|
181
|
+
max_lines: Optional[int] = None,
|
|
182
|
+
) -> Iterator[tuple[int, str]]:
|
|
183
|
+
"""
|
|
184
|
+
Search for lines matching a pattern.
|
|
185
|
+
|
|
186
|
+
Args:
|
|
187
|
+
pattern: Search pattern (string or regex)
|
|
188
|
+
case_sensitive: Whether search is case-sensitive
|
|
189
|
+
regex: Whether pattern is a regex
|
|
190
|
+
max_lines: Maximum number of matching lines to return
|
|
191
|
+
|
|
192
|
+
Yields:
|
|
193
|
+
Tuples of (line_number, line_content)
|
|
194
|
+
"""
|
|
195
|
+
import re as regex_module
|
|
196
|
+
|
|
197
|
+
def make_regex_matcher(compiled):
|
|
198
|
+
return lambda line: compiled.search(line)
|
|
199
|
+
|
|
200
|
+
def make_case_insensitive_matcher(pat):
|
|
201
|
+
return lambda line: pat in line.lower()
|
|
202
|
+
|
|
203
|
+
def make_case_sensitive_matcher(pat):
|
|
204
|
+
return lambda line: pat in line
|
|
205
|
+
|
|
206
|
+
if regex:
|
|
207
|
+
flags = 0 if case_sensitive else regex_module.IGNORECASE
|
|
208
|
+
compiled_pattern = regex_module.compile(pattern, flags)
|
|
209
|
+
match_func = make_regex_matcher(compiled_pattern)
|
|
210
|
+
else:
|
|
211
|
+
if not case_sensitive:
|
|
212
|
+
pattern = pattern.lower()
|
|
213
|
+
match_func = make_case_insensitive_matcher(pattern)
|
|
214
|
+
else:
|
|
215
|
+
match_func = make_case_sensitive_matcher(pattern)
|
|
216
|
+
|
|
217
|
+
with open(self.file_path, "r", encoding="utf-8", errors="replace") as f:
|
|
218
|
+
matches_found = 0
|
|
219
|
+
|
|
220
|
+
for line_num, line in enumerate(f, 1):
|
|
221
|
+
line = line.rstrip("\n\r")
|
|
222
|
+
|
|
223
|
+
if match_func(line):
|
|
224
|
+
yield (line_num, line)
|
|
225
|
+
matches_found += 1
|
|
226
|
+
|
|
227
|
+
if max_lines and matches_found >= max_lines:
|
|
228
|
+
break
|
|
229
|
+
|
|
230
|
+
def get_file_info(self) -> dict:
|
|
231
|
+
"""
|
|
232
|
+
Get information about the log file.
|
|
233
|
+
|
|
234
|
+
Returns:
|
|
235
|
+
Dictionary with file metadata
|
|
236
|
+
"""
|
|
237
|
+
stat = self.file_path.stat()
|
|
238
|
+
|
|
239
|
+
return {
|
|
240
|
+
"path": str(self.file_path.absolute()),
|
|
241
|
+
"size": stat.st_size,
|
|
242
|
+
"size_human": self._format_bytes(stat.st_size),
|
|
243
|
+
"modified": stat.st_mtime,
|
|
244
|
+
"created": stat.st_ctime,
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
@staticmethod
|
|
248
|
+
def _format_bytes(size: int) -> str:
|
|
249
|
+
"""Format bytes to human-readable string."""
|
|
250
|
+
for unit in ["B", "KB", "MB", "GB", "TB"]:
|
|
251
|
+
if size < 1024.0:
|
|
252
|
+
return f"{size:.2f} {unit}"
|
|
253
|
+
size /= 1024.0
|
|
254
|
+
return f"{size:.2f} PB"
|
|
255
|
+
|
|
256
|
+
def count_lines(self) -> int:
|
|
257
|
+
"""
|
|
258
|
+
Count total number of lines in the file.
|
|
259
|
+
|
|
260
|
+
Returns:
|
|
261
|
+
Number of lines
|
|
262
|
+
"""
|
|
263
|
+
count = 0
|
|
264
|
+
with open(self.file_path, "rb") as f:
|
|
265
|
+
for _ in f:
|
|
266
|
+
count += 1
|
|
267
|
+
return count
|
logler/parser.py
ADDED
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Log parsing module with support for multiple formats.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
import json
|
|
7
|
+
from datetime import datetime
|
|
8
|
+
from typing import Optional, Dict, Any
|
|
9
|
+
from dataclasses import dataclass, field
|
|
10
|
+
from enum import Enum
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class LogLevel(str, Enum):
|
|
14
|
+
"""Log levels."""
|
|
15
|
+
|
|
16
|
+
TRACE = "TRACE"
|
|
17
|
+
DEBUG = "DEBUG"
|
|
18
|
+
INFO = "INFO"
|
|
19
|
+
WARN = "WARN"
|
|
20
|
+
WARNING = "WARNING"
|
|
21
|
+
ERROR = "ERROR"
|
|
22
|
+
CRITICAL = "CRITICAL"
|
|
23
|
+
FATAL = "FATAL"
|
|
24
|
+
UNKNOWN = "UNKNOWN"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass
|
|
28
|
+
class LogEntry:
|
|
29
|
+
"""Parsed log entry."""
|
|
30
|
+
|
|
31
|
+
line_number: int
|
|
32
|
+
raw: str
|
|
33
|
+
timestamp: Optional[datetime] = None
|
|
34
|
+
level: str = "UNKNOWN"
|
|
35
|
+
message: str = ""
|
|
36
|
+
thread_id: Optional[str] = None
|
|
37
|
+
correlation_id: Optional[str] = None
|
|
38
|
+
trace_id: Optional[str] = None
|
|
39
|
+
span_id: Optional[str] = None
|
|
40
|
+
service_name: Optional[str] = None
|
|
41
|
+
fields: Dict[str, Any] = field(default_factory=dict)
|
|
42
|
+
|
|
43
|
+
def __post_init__(self):
|
|
44
|
+
"""Ensure message is set."""
|
|
45
|
+
if not self.message and self.raw:
|
|
46
|
+
self.message = self.raw
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class LogParser:
|
|
50
|
+
"""Parse log entries from various formats."""
|
|
51
|
+
|
|
52
|
+
# Regex patterns
|
|
53
|
+
PATTERNS = {
|
|
54
|
+
"timestamp": re.compile(
|
|
55
|
+
r"\d{4}-\d{2}-\d{2}[T ]\d{2}:\d{2}:\d{2}(?:\.\d+)?(?:Z|[+-]\d{2}:?\d{2})?"
|
|
56
|
+
),
|
|
57
|
+
"log_level": re.compile(
|
|
58
|
+
r"\b(TRACE|DEBUG|INFO|INFORMATION|WARN|WARNING|ERROR|ERR|FATAL|CRITICAL|CRIT)\b",
|
|
59
|
+
re.IGNORECASE,
|
|
60
|
+
),
|
|
61
|
+
"thread_id": re.compile(r"(?:thread[=:\s]+|tid[=:\s]+|\[)([a-zA-Z0-9_-]+)(?:\])?"),
|
|
62
|
+
"correlation_id": re.compile(
|
|
63
|
+
r"(?:correlation[_-]?id|request[_-]?id|req[_-]?id)[=:\s]+([a-zA-Z0-9_-]+)"
|
|
64
|
+
),
|
|
65
|
+
"trace_id": re.compile(r"(?:trace[_-]?id|traceId)[=:\s]+([a-fA-F0-9]{16,32})"),
|
|
66
|
+
"span_id": re.compile(r"(?:span[_-]?id|spanId)[=:\s]+([a-fA-F0-9]{8,16})"),
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
def parse_line(self, line_number: int, raw: str) -> LogEntry:
|
|
70
|
+
"""Parse a single log line."""
|
|
71
|
+
# Try JSON first
|
|
72
|
+
if raw.strip().startswith("{"):
|
|
73
|
+
try:
|
|
74
|
+
data = json.loads(raw.strip())
|
|
75
|
+
return self._parse_json(line_number, raw, data)
|
|
76
|
+
except (json.JSONDecodeError, ValueError):
|
|
77
|
+
pass
|
|
78
|
+
|
|
79
|
+
# Parse as plain text
|
|
80
|
+
return self._parse_plain(line_number, raw)
|
|
81
|
+
|
|
82
|
+
def _parse_json(self, line_number: int, raw: str, data: dict) -> LogEntry:
|
|
83
|
+
"""Parse JSON log entry."""
|
|
84
|
+
entry = LogEntry(line_number=line_number, raw=raw)
|
|
85
|
+
|
|
86
|
+
# Extract timestamp
|
|
87
|
+
for ts_field in ["timestamp", "time", "ts", "@timestamp", "datetime"]:
|
|
88
|
+
if ts_field in data:
|
|
89
|
+
try:
|
|
90
|
+
ts_str = str(data[ts_field])
|
|
91
|
+
entry.timestamp = datetime.fromisoformat(ts_str.replace("Z", "+00:00"))
|
|
92
|
+
except (ValueError, AttributeError):
|
|
93
|
+
pass
|
|
94
|
+
break
|
|
95
|
+
|
|
96
|
+
# Extract level
|
|
97
|
+
for level_field in ["level", "severity", "loglevel", "lvl"]:
|
|
98
|
+
if level_field in data:
|
|
99
|
+
entry.level = str(data[level_field]).upper()
|
|
100
|
+
break
|
|
101
|
+
|
|
102
|
+
# Extract message
|
|
103
|
+
for msg_field in ["message", "msg", "text", "content"]:
|
|
104
|
+
if msg_field in data:
|
|
105
|
+
entry.message = str(data[msg_field])
|
|
106
|
+
break
|
|
107
|
+
|
|
108
|
+
# Extract thread ID
|
|
109
|
+
for thread_field in ["thread", "thread_id", "threadId", "tid"]:
|
|
110
|
+
if thread_field in data:
|
|
111
|
+
entry.thread_id = str(data[thread_field])
|
|
112
|
+
break
|
|
113
|
+
|
|
114
|
+
# Extract correlation ID
|
|
115
|
+
for corr_field in ["correlation_id", "correlationId", "request_id", "requestId"]:
|
|
116
|
+
if corr_field in data:
|
|
117
|
+
entry.correlation_id = str(data[corr_field])
|
|
118
|
+
break
|
|
119
|
+
|
|
120
|
+
# Extract trace/span IDs
|
|
121
|
+
if "trace_id" in data or "traceId" in data:
|
|
122
|
+
entry.trace_id = str(data.get("trace_id") or data.get("traceId"))
|
|
123
|
+
if "span_id" in data or "spanId" in data:
|
|
124
|
+
entry.span_id = str(data.get("span_id") or data.get("spanId"))
|
|
125
|
+
|
|
126
|
+
# Extract service name
|
|
127
|
+
for service_field in ["service", "service_name", "serviceName"]:
|
|
128
|
+
if service_field in data:
|
|
129
|
+
entry.service_name = str(data[service_field])
|
|
130
|
+
break
|
|
131
|
+
|
|
132
|
+
# Store other fields
|
|
133
|
+
skip_fields = {
|
|
134
|
+
"timestamp",
|
|
135
|
+
"time",
|
|
136
|
+
"ts",
|
|
137
|
+
"@timestamp",
|
|
138
|
+
"datetime",
|
|
139
|
+
"level",
|
|
140
|
+
"severity",
|
|
141
|
+
"loglevel",
|
|
142
|
+
"lvl",
|
|
143
|
+
"message",
|
|
144
|
+
"msg",
|
|
145
|
+
"text",
|
|
146
|
+
"content",
|
|
147
|
+
"thread",
|
|
148
|
+
"thread_id",
|
|
149
|
+
"threadId",
|
|
150
|
+
"tid",
|
|
151
|
+
"correlation_id",
|
|
152
|
+
"correlationId",
|
|
153
|
+
"request_id",
|
|
154
|
+
"requestId",
|
|
155
|
+
"trace_id",
|
|
156
|
+
"traceId",
|
|
157
|
+
"span_id",
|
|
158
|
+
"spanId",
|
|
159
|
+
"service",
|
|
160
|
+
"service_name",
|
|
161
|
+
"serviceName",
|
|
162
|
+
}
|
|
163
|
+
entry.fields = {k: v for k, v in data.items() if k not in skip_fields}
|
|
164
|
+
|
|
165
|
+
return entry
|
|
166
|
+
|
|
167
|
+
def _parse_plain(self, line_number: int, raw: str) -> LogEntry:
|
|
168
|
+
"""Parse plain text log entry."""
|
|
169
|
+
entry = LogEntry(line_number=line_number, raw=raw, message=raw)
|
|
170
|
+
|
|
171
|
+
# Extract timestamp
|
|
172
|
+
ts_match = self.PATTERNS["timestamp"].search(raw)
|
|
173
|
+
if ts_match:
|
|
174
|
+
try:
|
|
175
|
+
ts_str = ts_match.group(0)
|
|
176
|
+
entry.timestamp = datetime.fromisoformat(
|
|
177
|
+
ts_str.replace(" ", "T").replace("Z", "+00:00")
|
|
178
|
+
)
|
|
179
|
+
except ValueError:
|
|
180
|
+
pass
|
|
181
|
+
|
|
182
|
+
# Extract log level
|
|
183
|
+
level_match = self.PATTERNS["log_level"].search(raw)
|
|
184
|
+
if level_match:
|
|
185
|
+
entry.level = level_match.group(1).upper()
|
|
186
|
+
|
|
187
|
+
# Extract thread ID
|
|
188
|
+
thread_match = self.PATTERNS["thread_id"].search(raw)
|
|
189
|
+
if thread_match:
|
|
190
|
+
entry.thread_id = thread_match.group(1)
|
|
191
|
+
|
|
192
|
+
# Extract correlation ID
|
|
193
|
+
corr_match = self.PATTERNS["correlation_id"].search(raw)
|
|
194
|
+
if corr_match:
|
|
195
|
+
entry.correlation_id = corr_match.group(1)
|
|
196
|
+
|
|
197
|
+
# Extract trace ID
|
|
198
|
+
trace_match = self.PATTERNS["trace_id"].search(raw)
|
|
199
|
+
if trace_match:
|
|
200
|
+
entry.trace_id = trace_match.group(1)
|
|
201
|
+
|
|
202
|
+
# Extract span ID
|
|
203
|
+
span_match = self.PATTERNS["span_id"].search(raw)
|
|
204
|
+
if span_match:
|
|
205
|
+
entry.span_id = span_match.group(1)
|
|
206
|
+
|
|
207
|
+
return entry
|
logler/safe_regex.py
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Safe regex compilation with timeout protection against ReDoS attacks.
|
|
3
|
+
|
|
4
|
+
This module provides a safe_compile function that wraps re.compile with:
|
|
5
|
+
- Pattern length validation
|
|
6
|
+
- Compilation timeout (Unix only, graceful fallback on Windows)
|
|
7
|
+
- Clear error messages
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import re
|
|
11
|
+
import threading
|
|
12
|
+
from typing import Optional
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class RegexTimeoutError(Exception):
|
|
16
|
+
"""Raised when regex compilation times out."""
|
|
17
|
+
|
|
18
|
+
pass
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class RegexPatternTooLongError(Exception):
|
|
22
|
+
"""Raised when regex pattern exceeds maximum allowed length."""
|
|
23
|
+
|
|
24
|
+
pass
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
# Maximum pattern length to prevent ReDoS via complexity
|
|
28
|
+
MAX_PATTERN_LENGTH = 1000
|
|
29
|
+
|
|
30
|
+
# Timeout for regex compilation in seconds
|
|
31
|
+
COMPILE_TIMEOUT = 2.0
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _compile_with_timeout(
|
|
35
|
+
pattern: str,
|
|
36
|
+
flags: int,
|
|
37
|
+
timeout: float,
|
|
38
|
+
result_container: dict,
|
|
39
|
+
) -> None:
|
|
40
|
+
"""Worker function for threaded compilation."""
|
|
41
|
+
try:
|
|
42
|
+
result_container["result"] = re.compile(pattern, flags)
|
|
43
|
+
except re.error as e:
|
|
44
|
+
result_container["error"] = e
|
|
45
|
+
except Exception as e:
|
|
46
|
+
result_container["error"] = e
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def safe_compile(
|
|
50
|
+
pattern: str,
|
|
51
|
+
flags: int = 0,
|
|
52
|
+
timeout: float = COMPILE_TIMEOUT,
|
|
53
|
+
max_length: int = MAX_PATTERN_LENGTH,
|
|
54
|
+
) -> re.Pattern:
|
|
55
|
+
"""
|
|
56
|
+
Safely compile a regex pattern with timeout protection.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
pattern: The regex pattern to compile
|
|
60
|
+
flags: Optional regex flags (e.g., re.IGNORECASE)
|
|
61
|
+
timeout: Maximum time in seconds to allow for compilation
|
|
62
|
+
max_length: Maximum allowed pattern length
|
|
63
|
+
|
|
64
|
+
Returns:
|
|
65
|
+
Compiled regex pattern
|
|
66
|
+
|
|
67
|
+
Raises:
|
|
68
|
+
RegexPatternTooLongError: If pattern exceeds max_length
|
|
69
|
+
RegexTimeoutError: If compilation takes longer than timeout
|
|
70
|
+
re.error: If the pattern is invalid
|
|
71
|
+
"""
|
|
72
|
+
# Validate pattern length
|
|
73
|
+
if len(pattern) > max_length:
|
|
74
|
+
raise RegexPatternTooLongError(
|
|
75
|
+
f"Regex pattern length {len(pattern)} exceeds maximum {max_length}"
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
# Use threading for cross-platform timeout support
|
|
79
|
+
result_container: dict = {}
|
|
80
|
+
thread = threading.Thread(
|
|
81
|
+
target=_compile_with_timeout,
|
|
82
|
+
args=(pattern, flags, timeout, result_container),
|
|
83
|
+
)
|
|
84
|
+
thread.start()
|
|
85
|
+
thread.join(timeout=timeout)
|
|
86
|
+
|
|
87
|
+
if thread.is_alive():
|
|
88
|
+
# Thread is still running - compilation timed out
|
|
89
|
+
# Note: We can't actually kill the thread, but we return an error
|
|
90
|
+
raise RegexTimeoutError(
|
|
91
|
+
f"Regex compilation timed out after {timeout}s (pattern may cause catastrophic backtracking)"
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
if "error" in result_container:
|
|
95
|
+
raise result_container["error"]
|
|
96
|
+
|
|
97
|
+
return result_container["result"]
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def try_compile(
|
|
101
|
+
pattern: str,
|
|
102
|
+
flags: int = 0,
|
|
103
|
+
timeout: float = COMPILE_TIMEOUT,
|
|
104
|
+
max_length: int = MAX_PATTERN_LENGTH,
|
|
105
|
+
) -> Optional[re.Pattern]:
|
|
106
|
+
"""
|
|
107
|
+
Try to compile a regex pattern safely, returning None on failure.
|
|
108
|
+
|
|
109
|
+
This is a convenience wrapper around safe_compile that catches all
|
|
110
|
+
exceptions and returns None instead.
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
pattern: The regex pattern to compile
|
|
114
|
+
flags: Optional regex flags
|
|
115
|
+
timeout: Maximum compilation time
|
|
116
|
+
max_length: Maximum pattern length
|
|
117
|
+
|
|
118
|
+
Returns:
|
|
119
|
+
Compiled pattern or None if compilation fails
|
|
120
|
+
"""
|
|
121
|
+
try:
|
|
122
|
+
return safe_compile(pattern, flags, timeout, max_length)
|
|
123
|
+
except (RegexTimeoutError, RegexPatternTooLongError, re.error):
|
|
124
|
+
return None
|