qlog-cli 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- benchmarks/bench.py +127 -0
- qlog/__init__.py +9 -0
- qlog/cli.py +185 -0
- qlog/indexer.py +194 -0
- qlog/parser.py +114 -0
- qlog/search.py +135 -0
- qlog_cli-0.2.0.dist-info/METADATA +299 -0
- qlog_cli-0.2.0.dist-info/RECORD +12 -0
- qlog_cli-0.2.0.dist-info/WHEEL +5 -0
- qlog_cli-0.2.0.dist-info/entry_points.txt +2 -0
- qlog_cli-0.2.0.dist-info/licenses/LICENSE +21 -0
- qlog_cli-0.2.0.dist-info/top_level.txt +2 -0
benchmarks/bench.py
ADDED
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
"""Benchmark qlog vs grep."""
|
|
2
|
+
|
|
3
|
+
import time
|
|
4
|
+
import subprocess
|
|
5
|
+
import sys
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
sys.path.insert(0, str(Path(__file__).parent.parent))
|
|
9
|
+
|
|
10
|
+
from qlog import LogIndexer, LogSearcher
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def generate_test_logs(filename, num_lines=100000):
|
|
14
|
+
"""Generate test log file."""
|
|
15
|
+
import random
|
|
16
|
+
|
|
17
|
+
levels = ["INFO", "WARN", "ERROR", "DEBUG"]
|
|
18
|
+
messages = [
|
|
19
|
+
"Request processed successfully",
|
|
20
|
+
"Connection timeout to database",
|
|
21
|
+
"User authentication failed",
|
|
22
|
+
"Cache miss for key",
|
|
23
|
+
"Exception in handler",
|
|
24
|
+
"Request completed in 123ms",
|
|
25
|
+
]
|
|
26
|
+
|
|
27
|
+
with open(filename, "w") as f:
|
|
28
|
+
for i in range(num_lines):
|
|
29
|
+
level = random.choice(levels)
|
|
30
|
+
message = random.choice(messages)
|
|
31
|
+
f.write(f"2024-01-{i%28+1:02d} 12:00:{i%60:02d} [{level}] {message} (request-id-{i})\n")
|
|
32
|
+
|
|
33
|
+
print(f"✓ Generated {num_lines:,} lines in {filename}")
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def bench_indexing(log_file):
|
|
37
|
+
"""Benchmark indexing speed."""
|
|
38
|
+
print("\n🚀 Benchmarking Indexing...")
|
|
39
|
+
|
|
40
|
+
indexer = LogIndexer(index_dir=".qlog_bench")
|
|
41
|
+
|
|
42
|
+
start = time.time()
|
|
43
|
+
stats = indexer.index_files([log_file], force=True)
|
|
44
|
+
elapsed = time.time() - start
|
|
45
|
+
|
|
46
|
+
print(f" Lines: {stats['lines']:,}")
|
|
47
|
+
print(f" Time: {elapsed:.3f}s")
|
|
48
|
+
print(f" Speed: {stats['lines_per_sec']:,} lines/sec")
|
|
49
|
+
|
|
50
|
+
return indexer
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def bench_search_qlog(indexer, query):
|
|
54
|
+
"""Benchmark qlog search."""
|
|
55
|
+
print(f"\n⚡ Benchmarking qlog search: '{query}'...")
|
|
56
|
+
|
|
57
|
+
searcher = LogSearcher(indexer)
|
|
58
|
+
|
|
59
|
+
# Warmup
|
|
60
|
+
searcher.search(query, max_results=10)
|
|
61
|
+
|
|
62
|
+
# Actual benchmark
|
|
63
|
+
times = []
|
|
64
|
+
for _ in range(10):
|
|
65
|
+
start = time.time()
|
|
66
|
+
results = searcher.search(query, max_results=1000)
|
|
67
|
+
elapsed = time.time() - start
|
|
68
|
+
times.append(elapsed)
|
|
69
|
+
|
|
70
|
+
avg_time = sum(times) / len(times)
|
|
71
|
+
|
|
72
|
+
print(f" Results: {len(results)}")
|
|
73
|
+
print(f" Avg time: {avg_time*1000:.2f}ms")
|
|
74
|
+
print(f" Min time: {min(times)*1000:.2f}ms")
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def bench_grep(log_file, query):
|
|
78
|
+
"""Benchmark grep."""
|
|
79
|
+
print(f"\n🐌 Benchmarking grep: '{query}'...")
|
|
80
|
+
|
|
81
|
+
times = []
|
|
82
|
+
for _ in range(3): # Fewer iterations (grep is slow)
|
|
83
|
+
start = time.time()
|
|
84
|
+
result = subprocess.run(
|
|
85
|
+
["grep", "-i", query, log_file],
|
|
86
|
+
capture_output=True,
|
|
87
|
+
text=True
|
|
88
|
+
)
|
|
89
|
+
elapsed = time.time() - start
|
|
90
|
+
times.append(elapsed)
|
|
91
|
+
|
|
92
|
+
avg_time = sum(times) / len(times)
|
|
93
|
+
|
|
94
|
+
print(f" Avg time: {avg_time*1000:.2f}ms")
|
|
95
|
+
print(f" Speedup: {avg_time / (sum(times)/len(times)):.1f}x")
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def main():
|
|
99
|
+
"""Run benchmarks."""
|
|
100
|
+
print("=" * 60)
|
|
101
|
+
print("qlog Benchmark Suite")
|
|
102
|
+
print("=" * 60)
|
|
103
|
+
|
|
104
|
+
# Generate test data
|
|
105
|
+
log_file = "test_bench.log"
|
|
106
|
+
num_lines = 500000
|
|
107
|
+
|
|
108
|
+
if not Path(log_file).exists():
|
|
109
|
+
generate_test_logs(log_file, num_lines)
|
|
110
|
+
|
|
111
|
+
# Benchmark indexing
|
|
112
|
+
indexer = bench_indexing(log_file)
|
|
113
|
+
|
|
114
|
+
# Benchmark searches
|
|
115
|
+
queries = ["ERROR", "timeout", "request-id"]
|
|
116
|
+
|
|
117
|
+
for query in queries:
|
|
118
|
+
bench_search_qlog(indexer, query)
|
|
119
|
+
bench_grep(log_file, query)
|
|
120
|
+
|
|
121
|
+
print("\n" + "=" * 60)
|
|
122
|
+
print("✨ Benchmark Complete!")
|
|
123
|
+
print("=" * 60)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
if __name__ == "__main__":
|
|
127
|
+
main()
|
qlog/__init__.py
ADDED
qlog/cli.py
ADDED
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
"""Command-line interface for qlog."""
|
|
2
|
+
|
|
3
|
+
import click
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from rich.console import Console
|
|
6
|
+
from rich.table import Table
|
|
7
|
+
from rich.syntax import Syntax
|
|
8
|
+
from rich.panel import Panel
|
|
9
|
+
from rich.progress import Progress, SpinnerColumn, TextColumn
|
|
10
|
+
from rich import print as rprint
|
|
11
|
+
|
|
12
|
+
from .indexer import LogIndexer
|
|
13
|
+
from .search import LogSearcher
|
|
14
|
+
from .parser import LogParser
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
console = Console()
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@click.group()
|
|
21
|
+
@click.version_option(version="0.2.0")
|
|
22
|
+
def main():
|
|
23
|
+
"""qlog - Lightning-fast local log search and analysis.
|
|
24
|
+
|
|
25
|
+
\b
|
|
26
|
+
Examples:
|
|
27
|
+
qlog index './logs/**/*.log'
|
|
28
|
+
qlog search "error" --context 3
|
|
29
|
+
qlog search "status=500" --json
|
|
30
|
+
qlog stats
|
|
31
|
+
"""
|
|
32
|
+
pass
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@main.command()
|
|
36
|
+
@click.argument("patterns", nargs=-1, required=True)
|
|
37
|
+
@click.option("--force", is_flag=True, help="Re-index even if files haven't changed")
|
|
38
|
+
def index(patterns, force):
|
|
39
|
+
"""Index log files for fast searching."""
|
|
40
|
+
console.print("[bold blue]🚀 Indexing logs...[/bold blue]")
|
|
41
|
+
|
|
42
|
+
indexer = LogIndexer()
|
|
43
|
+
|
|
44
|
+
with Progress(
|
|
45
|
+
SpinnerColumn(),
|
|
46
|
+
TextColumn("[progress.description]{task.description}"),
|
|
47
|
+
console=console,
|
|
48
|
+
) as progress:
|
|
49
|
+
task = progress.add_task("Indexing...", total=None)
|
|
50
|
+
|
|
51
|
+
stats = indexer.index_files(list(patterns), force=force)
|
|
52
|
+
|
|
53
|
+
progress.remove_task(task)
|
|
54
|
+
|
|
55
|
+
# Display results
|
|
56
|
+
table = Table(title="Indexing Complete", show_header=False)
|
|
57
|
+
table.add_column("Metric", style="cyan")
|
|
58
|
+
table.add_column("Value", style="green")
|
|
59
|
+
|
|
60
|
+
table.add_row("Files indexed", str(stats["files"]))
|
|
61
|
+
table.add_row("Lines indexed", f"{stats['lines']:,}")
|
|
62
|
+
table.add_row("Time elapsed", f"{stats['elapsed']:.2f}s")
|
|
63
|
+
table.add_row("Speed", f"{stats['lines_per_sec']:,} lines/sec")
|
|
64
|
+
|
|
65
|
+
console.print(table)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
@main.command()
|
|
69
|
+
@click.argument("query")
|
|
70
|
+
@click.option("--context", "-c", default=0, help="Lines of context before/after match")
|
|
71
|
+
@click.option("--max-results", "-n", default=100, help="Maximum results to show")
|
|
72
|
+
@click.option("--json", "output_json", is_flag=True, help="Output as JSON")
|
|
73
|
+
def search(query, context, max_results, output_json):
|
|
74
|
+
"""Search indexed logs."""
|
|
75
|
+
indexer = LogIndexer()
|
|
76
|
+
searcher = LogSearcher(indexer)
|
|
77
|
+
|
|
78
|
+
if not indexer.files:
|
|
79
|
+
console.print("[bold red]❌ No indexed files found. Run 'qlog index' first.[/bold red]")
|
|
80
|
+
return
|
|
81
|
+
|
|
82
|
+
results = searcher.search(query, context=context, max_results=max_results)
|
|
83
|
+
|
|
84
|
+
if not results:
|
|
85
|
+
console.print(f"[yellow]No results found for:[/yellow] {query}")
|
|
86
|
+
return
|
|
87
|
+
|
|
88
|
+
if output_json:
|
|
89
|
+
import json
|
|
90
|
+
print(json.dumps(results, indent=2))
|
|
91
|
+
return
|
|
92
|
+
|
|
93
|
+
# Beautiful terminal output
|
|
94
|
+
console.print(f"\n[bold green]✨ Found {len(results)} results[/bold green]")
|
|
95
|
+
console.print()
|
|
96
|
+
|
|
97
|
+
for i, result in enumerate(results, 1):
|
|
98
|
+
# Create panel for each result
|
|
99
|
+
content = []
|
|
100
|
+
|
|
101
|
+
# Context before
|
|
102
|
+
if result.get("before"):
|
|
103
|
+
for line in result["before"]:
|
|
104
|
+
content.append(f"[dim]{line}[/dim]")
|
|
105
|
+
|
|
106
|
+
# Matched line (highlighted)
|
|
107
|
+
matched_line = result["line"]
|
|
108
|
+
# Highlight query terms
|
|
109
|
+
for term in query.split():
|
|
110
|
+
matched_line = matched_line.replace(
|
|
111
|
+
term,
|
|
112
|
+
f"[bold yellow on red]{term}[/bold yellow on red]"
|
|
113
|
+
)
|
|
114
|
+
matched_line = matched_line.replace(
|
|
115
|
+
term.upper(),
|
|
116
|
+
f"[bold yellow on red]{term.upper()}[/bold yellow on red]"
|
|
117
|
+
)
|
|
118
|
+
content.append(f"[bold white]{matched_line}[/bold white]")
|
|
119
|
+
|
|
120
|
+
# Context after
|
|
121
|
+
if result.get("after"):
|
|
122
|
+
for line in result["after"]:
|
|
123
|
+
content.append(f"[dim]{line}[/dim]")
|
|
124
|
+
|
|
125
|
+
panel_content = "\n".join(content)
|
|
126
|
+
|
|
127
|
+
console.print(
|
|
128
|
+
Panel(
|
|
129
|
+
panel_content,
|
|
130
|
+
title=f"[cyan]{result['file']}[/cyan]:[green]{result['line_num']}[/green]",
|
|
131
|
+
border_style="blue",
|
|
132
|
+
expand=False,
|
|
133
|
+
)
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
if i < len(results):
|
|
137
|
+
console.print()
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
@main.command()
|
|
141
|
+
def stats():
|
|
142
|
+
"""Show index statistics."""
|
|
143
|
+
indexer = LogIndexer()
|
|
144
|
+
stats = indexer.get_stats()
|
|
145
|
+
|
|
146
|
+
if stats["files"] == 0:
|
|
147
|
+
console.print("[yellow]No index found. Run 'qlog index' first.[/yellow]")
|
|
148
|
+
return
|
|
149
|
+
|
|
150
|
+
table = Table(title="Index Statistics", show_header=False)
|
|
151
|
+
table.add_column("Metric", style="cyan")
|
|
152
|
+
table.add_column("Value", style="green")
|
|
153
|
+
|
|
154
|
+
table.add_row("Indexed files", str(stats["files"]))
|
|
155
|
+
table.add_row("Unique terms", f"{stats['unique_terms']:,}")
|
|
156
|
+
table.add_row("Total positions", f"{stats['total_positions']:,}")
|
|
157
|
+
table.add_row("Index size", f"{stats['index_size_mb']:.2f} MB")
|
|
158
|
+
|
|
159
|
+
console.print(table)
|
|
160
|
+
|
|
161
|
+
# Show file list
|
|
162
|
+
if indexer.files:
|
|
163
|
+
console.print("\n[bold]Indexed Files:[/bold]")
|
|
164
|
+
for file_id, meta in list(indexer.files.items())[:10]:
|
|
165
|
+
console.print(f" [cyan]•[/cyan] {meta['path']}")
|
|
166
|
+
|
|
167
|
+
if len(indexer.files) > 10:
|
|
168
|
+
console.print(f" [dim]... and {len(indexer.files) - 10} more[/dim]")
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
@main.command()
|
|
172
|
+
def clear():
|
|
173
|
+
"""Clear the index."""
|
|
174
|
+
import shutil
|
|
175
|
+
|
|
176
|
+
index_dir = Path(".qlog")
|
|
177
|
+
if index_dir.exists():
|
|
178
|
+
shutil.rmtree(index_dir)
|
|
179
|
+
console.print("[green]✓ Index cleared[/green]")
|
|
180
|
+
else:
|
|
181
|
+
console.print("[yellow]No index to clear[/yellow]")
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
if __name__ == "__main__":
|
|
185
|
+
main()
|
qlog/indexer.py
ADDED
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
"""Fast log indexer with inverted index."""
|
|
2
|
+
|
|
3
|
+
import mmap
|
|
4
|
+
import os
|
|
5
|
+
import re
|
|
6
|
+
import pickle
|
|
7
|
+
import hashlib
|
|
8
|
+
from collections import defaultdict
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Dict, List, Set, Tuple
|
|
11
|
+
from datetime import datetime
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class LogIndexer:
|
|
15
|
+
"""Lightning-fast log indexer using inverted index."""
|
|
16
|
+
|
|
17
|
+
def __init__(self, index_dir: str = ".qlog"):
|
|
18
|
+
self.index_dir = Path(index_dir)
|
|
19
|
+
self.index_dir.mkdir(exist_ok=True)
|
|
20
|
+
|
|
21
|
+
# Inverted index: word -> [(file_id, line_num, offset), ...]
|
|
22
|
+
self.index: Dict[str, List[Tuple[int, int, int]]] = defaultdict(list)
|
|
23
|
+
|
|
24
|
+
# File metadata
|
|
25
|
+
self.files: Dict[int, Dict] = {}
|
|
26
|
+
self.file_id_counter = 0
|
|
27
|
+
|
|
28
|
+
# Load existing index if available
|
|
29
|
+
self._load_index()
|
|
30
|
+
|
|
31
|
+
def index_files(self, patterns: List[str], force: bool = False) -> Dict:
|
|
32
|
+
"""Index log files matching patterns."""
|
|
33
|
+
from glob import glob
|
|
34
|
+
|
|
35
|
+
files_indexed = 0
|
|
36
|
+
lines_indexed = 0
|
|
37
|
+
start_time = datetime.now()
|
|
38
|
+
|
|
39
|
+
for pattern in patterns:
|
|
40
|
+
for filepath in glob(pattern, recursive=True):
|
|
41
|
+
if not os.path.isfile(filepath):
|
|
42
|
+
continue
|
|
43
|
+
|
|
44
|
+
# Check if file needs reindexing
|
|
45
|
+
file_hash = self._file_hash(filepath)
|
|
46
|
+
if not force and self._is_indexed(filepath, file_hash):
|
|
47
|
+
continue
|
|
48
|
+
|
|
49
|
+
# Index the file
|
|
50
|
+
lines = self._index_file(filepath, file_hash)
|
|
51
|
+
files_indexed += 1
|
|
52
|
+
lines_indexed += lines
|
|
53
|
+
|
|
54
|
+
# Save index
|
|
55
|
+
self._save_index()
|
|
56
|
+
|
|
57
|
+
elapsed = (datetime.now() - start_time).total_seconds()
|
|
58
|
+
|
|
59
|
+
return {
|
|
60
|
+
"files": files_indexed,
|
|
61
|
+
"lines": lines_indexed,
|
|
62
|
+
"elapsed": elapsed,
|
|
63
|
+
"lines_per_sec": int(lines_indexed / elapsed) if elapsed > 0 else 0,
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
def _index_file(self, filepath: str, file_hash: str) -> int:
|
|
67
|
+
"""Index a single file using mmap for speed."""
|
|
68
|
+
file_id = self.file_id_counter
|
|
69
|
+
self.file_id_counter += 1
|
|
70
|
+
|
|
71
|
+
self.files[file_id] = {
|
|
72
|
+
"path": filepath,
|
|
73
|
+
"hash": file_hash,
|
|
74
|
+
"size": os.path.getsize(filepath),
|
|
75
|
+
"indexed_at": datetime.now().isoformat(),
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
lines_indexed = 0
|
|
79
|
+
|
|
80
|
+
try:
|
|
81
|
+
with open(filepath, "r+b") as f:
|
|
82
|
+
# Use mmap for fast reading
|
|
83
|
+
with mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) as mmapped:
|
|
84
|
+
line_num = 0
|
|
85
|
+
offset = 0
|
|
86
|
+
|
|
87
|
+
while True:
|
|
88
|
+
line_start = offset
|
|
89
|
+
# Find next newline
|
|
90
|
+
newline_pos = mmapped.find(b'\n', offset)
|
|
91
|
+
|
|
92
|
+
if newline_pos == -1:
|
|
93
|
+
# Last line
|
|
94
|
+
line_bytes = mmapped[offset:]
|
|
95
|
+
if not line_bytes:
|
|
96
|
+
break
|
|
97
|
+
offset = len(mmapped)
|
|
98
|
+
else:
|
|
99
|
+
line_bytes = mmapped[offset:newline_pos]
|
|
100
|
+
offset = newline_pos + 1
|
|
101
|
+
|
|
102
|
+
try:
|
|
103
|
+
line = line_bytes.decode('utf-8', errors='ignore')
|
|
104
|
+
except:
|
|
105
|
+
continue
|
|
106
|
+
|
|
107
|
+
# Tokenize and index
|
|
108
|
+
tokens = self._tokenize(line)
|
|
109
|
+
for token in tokens:
|
|
110
|
+
self.index[token].append((file_id, line_num, line_start))
|
|
111
|
+
|
|
112
|
+
line_num += 1
|
|
113
|
+
lines_indexed += 1
|
|
114
|
+
|
|
115
|
+
if newline_pos == -1:
|
|
116
|
+
break
|
|
117
|
+
|
|
118
|
+
except Exception as e:
|
|
119
|
+
print(f"Error indexing {filepath}: {e}")
|
|
120
|
+
|
|
121
|
+
return lines_indexed
|
|
122
|
+
|
|
123
|
+
def _tokenize(self, line: str) -> Set[str]:
|
|
124
|
+
"""Tokenize log line into searchable terms."""
|
|
125
|
+
# Split on non-alphanumeric, keep words 2+ chars
|
|
126
|
+
tokens = set()
|
|
127
|
+
|
|
128
|
+
# Extract words
|
|
129
|
+
words = re.findall(r'\b\w{2,}\b', line.lower())
|
|
130
|
+
tokens.update(words)
|
|
131
|
+
|
|
132
|
+
# Extract common patterns
|
|
133
|
+
# IPs
|
|
134
|
+
ips = re.findall(r'\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b', line)
|
|
135
|
+
tokens.update(ips)
|
|
136
|
+
|
|
137
|
+
# UUIDs/IDs
|
|
138
|
+
ids = re.findall(r'\b[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}\b', line.lower())
|
|
139
|
+
tokens.update(ids)
|
|
140
|
+
|
|
141
|
+
# HTTP status codes
|
|
142
|
+
status = re.findall(r'\b[45]\d{2}\b', line)
|
|
143
|
+
tokens.update(status)
|
|
144
|
+
|
|
145
|
+
return tokens
|
|
146
|
+
|
|
147
|
+
def _file_hash(self, filepath: str) -> str:
|
|
148
|
+
"""Quick file hash for change detection."""
|
|
149
|
+
stat = os.stat(filepath)
|
|
150
|
+
# Hash based on size + mtime (fast, good enough)
|
|
151
|
+
return hashlib.md5(
|
|
152
|
+
f"{stat.st_size}:{stat.st_mtime}".encode()
|
|
153
|
+
).hexdigest()
|
|
154
|
+
|
|
155
|
+
def _is_indexed(self, filepath: str, file_hash: str) -> bool:
|
|
156
|
+
"""Check if file is already indexed."""
|
|
157
|
+
for file_id, meta in self.files.items():
|
|
158
|
+
if meta["path"] == filepath and meta["hash"] == file_hash:
|
|
159
|
+
return True
|
|
160
|
+
return False
|
|
161
|
+
|
|
162
|
+
def _save_index(self):
|
|
163
|
+
"""Save index to disk."""
|
|
164
|
+
index_file = self.index_dir / "index.pkl"
|
|
165
|
+
with open(index_file, "wb") as f:
|
|
166
|
+
pickle.dump({
|
|
167
|
+
"index": dict(self.index),
|
|
168
|
+
"files": self.files,
|
|
169
|
+
"file_id_counter": self.file_id_counter,
|
|
170
|
+
}, f)
|
|
171
|
+
|
|
172
|
+
def _load_index(self):
|
|
173
|
+
"""Load index from disk."""
|
|
174
|
+
index_file = self.index_dir / "index.pkl"
|
|
175
|
+
if index_file.exists():
|
|
176
|
+
try:
|
|
177
|
+
with open(index_file, "rb") as f:
|
|
178
|
+
data = pickle.load(f)
|
|
179
|
+
self.index = defaultdict(list, data["index"])
|
|
180
|
+
self.files = data["files"]
|
|
181
|
+
self.file_id_counter = data.get("file_id_counter", 0)
|
|
182
|
+
except Exception as e:
|
|
183
|
+
print(f"Warning: Could not load index: {e}")
|
|
184
|
+
|
|
185
|
+
def get_stats(self) -> Dict:
|
|
186
|
+
"""Get index statistics."""
|
|
187
|
+
total_lines = sum(len(positions) for positions in self.index.values())
|
|
188
|
+
return {
|
|
189
|
+
"files": len(self.files),
|
|
190
|
+
"unique_terms": len(self.index),
|
|
191
|
+
"total_positions": total_lines,
|
|
192
|
+
"index_size_mb": os.path.getsize(self.index_dir / "index.pkl") / 1024 / 1024
|
|
193
|
+
if (self.index_dir / "index.pkl").exists() else 0,
|
|
194
|
+
}
|
qlog/parser.py
ADDED
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
"""Auto-detect and parse log formats."""
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
import json
|
|
5
|
+
from typing import Dict, Optional
|
|
6
|
+
from datetime import datetime
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class LogParser:
|
|
10
|
+
"""Auto-detect and parse common log formats."""
|
|
11
|
+
|
|
12
|
+
FORMATS = {
|
|
13
|
+
"json": r'^\s*\{.*\}\s*$',
|
|
14
|
+
"syslog": r'^(\w{3}\s+\d{1,2}\s+\d{2}:\d{2}:\d{2})',
|
|
15
|
+
"apache": r'^\S+\s+\S+\s+\S+\s+\[',
|
|
16
|
+
"nginx": r'^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\s+-\s+-\s+\[',
|
|
17
|
+
"generic": r'^\d{4}-\d{2}-\d{2}', # ISO timestamp
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
@staticmethod
|
|
21
|
+
def detect_format(line: str) -> str:
|
|
22
|
+
"""Detect log format from a sample line."""
|
|
23
|
+
for fmt, pattern in LogParser.FORMATS.items():
|
|
24
|
+
if re.match(pattern, line):
|
|
25
|
+
return fmt
|
|
26
|
+
return "unknown"
|
|
27
|
+
|
|
28
|
+
@staticmethod
|
|
29
|
+
def parse(line: str, fmt: Optional[str] = None) -> Dict:
|
|
30
|
+
"""Parse log line into structured data."""
|
|
31
|
+
if fmt is None:
|
|
32
|
+
fmt = LogParser.detect_format(line)
|
|
33
|
+
|
|
34
|
+
if fmt == "json":
|
|
35
|
+
return LogParser._parse_json(line)
|
|
36
|
+
elif fmt == "syslog":
|
|
37
|
+
return LogParser._parse_syslog(line)
|
|
38
|
+
elif fmt in ("apache", "nginx"):
|
|
39
|
+
return LogParser._parse_web(line)
|
|
40
|
+
elif fmt == "generic":
|
|
41
|
+
return LogParser._parse_generic(line)
|
|
42
|
+
else:
|
|
43
|
+
return {"raw": line, "format": "unknown"}
|
|
44
|
+
|
|
45
|
+
@staticmethod
|
|
46
|
+
def _parse_json(line: str) -> Dict:
|
|
47
|
+
"""Parse JSON log line."""
|
|
48
|
+
try:
|
|
49
|
+
data = json.loads(line)
|
|
50
|
+
return {
|
|
51
|
+
"format": "json",
|
|
52
|
+
"timestamp": data.get("timestamp") or data.get("time") or data.get("@timestamp"),
|
|
53
|
+
"level": data.get("level") or data.get("severity"),
|
|
54
|
+
"message": data.get("message") or data.get("msg"),
|
|
55
|
+
"data": data,
|
|
56
|
+
}
|
|
57
|
+
except json.JSONDecodeError:
|
|
58
|
+
return {"raw": line, "format": "json", "error": "invalid_json"}
|
|
59
|
+
|
|
60
|
+
@staticmethod
|
|
61
|
+
def _parse_syslog(line: str) -> Dict:
|
|
62
|
+
"""Parse syslog format."""
|
|
63
|
+
# Example: Jan 15 10:30:45 hostname program[pid]: message
|
|
64
|
+
match = re.match(
|
|
65
|
+
r'^(\w{3}\s+\d{1,2}\s+\d{2}:\d{2}:\d{2})\s+(\S+)\s+(\S+?)(\[\d+\])?:\s+(.+)$',
|
|
66
|
+
line
|
|
67
|
+
)
|
|
68
|
+
if match:
|
|
69
|
+
return {
|
|
70
|
+
"format": "syslog",
|
|
71
|
+
"timestamp": match.group(1),
|
|
72
|
+
"hostname": match.group(2),
|
|
73
|
+
"program": match.group(3),
|
|
74
|
+
"pid": match.group(4)[1:-1] if match.group(4) else None,
|
|
75
|
+
"message": match.group(5),
|
|
76
|
+
}
|
|
77
|
+
return {"raw": line, "format": "syslog"}
|
|
78
|
+
|
|
79
|
+
@staticmethod
|
|
80
|
+
def _parse_web(line: str) -> Dict:
|
|
81
|
+
"""Parse Apache/Nginx combined log format."""
|
|
82
|
+
# Example: 127.0.0.1 - - [01/Jan/2020:12:00:00 +0000] "GET / HTTP/1.1" 200 1234
|
|
83
|
+
match = re.match(
|
|
84
|
+
r'^(\S+)\s+\S+\s+\S+\s+\[([^\]]+)\]\s+"(\S+)\s+(\S+)\s+\S+"\s+(\d+)\s+(\S+)',
|
|
85
|
+
line
|
|
86
|
+
)
|
|
87
|
+
if match:
|
|
88
|
+
return {
|
|
89
|
+
"format": "web",
|
|
90
|
+
"ip": match.group(1),
|
|
91
|
+
"timestamp": match.group(2),
|
|
92
|
+
"method": match.group(3),
|
|
93
|
+
"path": match.group(4),
|
|
94
|
+
"status": int(match.group(5)),
|
|
95
|
+
"size": match.group(6),
|
|
96
|
+
}
|
|
97
|
+
return {"raw": line, "format": "web"}
|
|
98
|
+
|
|
99
|
+
@staticmethod
|
|
100
|
+
def _parse_generic(line: str) -> Dict:
|
|
101
|
+
"""Parse generic ISO timestamp logs."""
|
|
102
|
+
# Example: 2020-01-01 12:00:00 [INFO] message
|
|
103
|
+
match = re.match(
|
|
104
|
+
r'^(\d{4}-\d{2}-\d{2}[T\s]\d{2}:\d{2}:\d{2}(?:\.\d+)?(?:Z|[+-]\d{2}:\d{2})?)\s+(?:\[(\w+)\])?\s*(.+)$',
|
|
105
|
+
line
|
|
106
|
+
)
|
|
107
|
+
if match:
|
|
108
|
+
return {
|
|
109
|
+
"format": "generic",
|
|
110
|
+
"timestamp": match.group(1),
|
|
111
|
+
"level": match.group(2),
|
|
112
|
+
"message": match.group(3),
|
|
113
|
+
}
|
|
114
|
+
return {"raw": line, "format": "generic"}
|
qlog/search.py
ADDED
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
"""Fast log search engine."""
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from typing import List, Dict, Set, Tuple
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from .indexer import LogIndexer
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class LogSearcher:
|
|
10
|
+
"""Lightning-fast log searcher using inverted index."""
|
|
11
|
+
|
|
12
|
+
def __init__(self, indexer: LogIndexer):
|
|
13
|
+
self.indexer = indexer
|
|
14
|
+
|
|
15
|
+
def search(self, query: str, context: int = 0, max_results: int = 1000) -> List[Dict]:
|
|
16
|
+
"""Search indexed logs.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
query: Search query (word or phrase)
|
|
20
|
+
context: Number of context lines before/after
|
|
21
|
+
max_results: Maximum results to return
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
List of matching log entries with metadata
|
|
25
|
+
"""
|
|
26
|
+
# Tokenize query
|
|
27
|
+
query_tokens = self._parse_query(query)
|
|
28
|
+
|
|
29
|
+
if not query_tokens:
|
|
30
|
+
return []
|
|
31
|
+
|
|
32
|
+
# Find matching positions using inverted index
|
|
33
|
+
matching_positions = self._find_matches(query_tokens)
|
|
34
|
+
|
|
35
|
+
if not matching_positions:
|
|
36
|
+
return []
|
|
37
|
+
|
|
38
|
+
# Limit results
|
|
39
|
+
matching_positions = matching_positions[:max_results]
|
|
40
|
+
|
|
41
|
+
# Retrieve actual log lines
|
|
42
|
+
results = []
|
|
43
|
+
for file_id, line_num, offset in matching_positions:
|
|
44
|
+
file_meta = self.indexer.files.get(file_id)
|
|
45
|
+
if not file_meta:
|
|
46
|
+
continue
|
|
47
|
+
|
|
48
|
+
# Read the line (and context if requested)
|
|
49
|
+
lines = self._read_lines(
|
|
50
|
+
file_meta["path"],
|
|
51
|
+
line_num,
|
|
52
|
+
context
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
if lines:
|
|
56
|
+
results.append({
|
|
57
|
+
"file": file_meta["path"],
|
|
58
|
+
"line_num": line_num,
|
|
59
|
+
"line": lines.get("match", ""),
|
|
60
|
+
"before": lines.get("before", []),
|
|
61
|
+
"after": lines.get("after", []),
|
|
62
|
+
"query": query,
|
|
63
|
+
})
|
|
64
|
+
|
|
65
|
+
return results
|
|
66
|
+
|
|
67
|
+
def _parse_query(self, query: str) -> List[str]:
|
|
68
|
+
"""Parse search query into tokens."""
|
|
69
|
+
# Simple for now - split on whitespace, lowercase
|
|
70
|
+
tokens = query.lower().split()
|
|
71
|
+
return tokens
|
|
72
|
+
|
|
73
|
+
def _find_matches(self, query_tokens: List[str]) -> List[Tuple[int, int, int]]:
|
|
74
|
+
"""Find positions matching all query tokens."""
|
|
75
|
+
if not query_tokens:
|
|
76
|
+
return []
|
|
77
|
+
|
|
78
|
+
# Get positions for first token
|
|
79
|
+
positions_sets = []
|
|
80
|
+
|
|
81
|
+
for token in query_tokens:
|
|
82
|
+
positions = self.indexer.index.get(token, [])
|
|
83
|
+
if not positions:
|
|
84
|
+
# No matches for this token = no overall matches
|
|
85
|
+
return []
|
|
86
|
+
|
|
87
|
+
# Convert to set of (file_id, line_num) for intersection
|
|
88
|
+
pos_set = set((file_id, line_num) for file_id, line_num, offset in positions)
|
|
89
|
+
positions_sets.append((token, pos_set, positions))
|
|
90
|
+
|
|
91
|
+
# Find intersection (lines containing ALL tokens)
|
|
92
|
+
if len(positions_sets) == 1:
|
|
93
|
+
# Single token - return all positions
|
|
94
|
+
_, _, positions = positions_sets[0]
|
|
95
|
+
return sorted(positions, key=lambda x: (x[0], x[1]))
|
|
96
|
+
|
|
97
|
+
# Multiple tokens - find intersection
|
|
98
|
+
common_positions = positions_sets[0][1]
|
|
99
|
+
for _, pos_set, _ in positions_sets[1:]:
|
|
100
|
+
common_positions &= pos_set
|
|
101
|
+
|
|
102
|
+
# Convert back to full position tuples
|
|
103
|
+
result = []
|
|
104
|
+
for file_id, line_num in common_positions:
|
|
105
|
+
# Get offset from first token's positions
|
|
106
|
+
for fid, lnum, offset in positions_sets[0][2]:
|
|
107
|
+
if fid == file_id and lnum == line_num:
|
|
108
|
+
result.append((file_id, line_num, offset))
|
|
109
|
+
break
|
|
110
|
+
|
|
111
|
+
return sorted(result, key=lambda x: (x[0], x[1]))
|
|
112
|
+
|
|
113
|
+
def _read_lines(self, filepath: str, target_line: int, context: int = 0) -> Dict:
|
|
114
|
+
"""Read specific line with context from file."""
|
|
115
|
+
try:
|
|
116
|
+
with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
|
|
117
|
+
lines = f.readlines()
|
|
118
|
+
|
|
119
|
+
if target_line >= len(lines):
|
|
120
|
+
return {}
|
|
121
|
+
|
|
122
|
+
start = max(0, target_line - context)
|
|
123
|
+
end = min(len(lines), target_line + context + 1)
|
|
124
|
+
|
|
125
|
+
return {
|
|
126
|
+
"before": [line.rstrip() for line in lines[start:target_line]],
|
|
127
|
+
"match": lines[target_line].rstrip(),
|
|
128
|
+
"after": [line.rstrip() for line in lines[target_line + 1:end]],
|
|
129
|
+
}
|
|
130
|
+
except Exception as e:
|
|
131
|
+
return {}
|
|
132
|
+
|
|
133
|
+
def stats(self) -> Dict:
|
|
134
|
+
"""Get search engine stats."""
|
|
135
|
+
return self.indexer.get_stats()
|
|
@@ -0,0 +1,299 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: qlog-cli
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: Lightning-fast local log search and analysis
|
|
5
|
+
Home-page: https://github.com/Cosm00/qlog
|
|
6
|
+
Author: Cosmo
|
|
7
|
+
License: MIT
|
|
8
|
+
Project-URL: Homepage, https://github.com/Cosm00/qlog
|
|
9
|
+
Requires-Python: >=3.8
|
|
10
|
+
Description-Content-Type: text/markdown
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Requires-Dist: rich>=13.0.0
|
|
13
|
+
Requires-Dist: click>=8.0.0
|
|
14
|
+
Requires-Dist: python-dateutil>=2.8.0
|
|
15
|
+
Dynamic: home-page
|
|
16
|
+
Dynamic: license-file
|
|
17
|
+
Dynamic: requires-python
|
|
18
|
+
|
|
19
|
+
# 🚀 qlog - Query Logs at Ludicrous Speed
|
|
20
|
+
|
|
21
|
+
**grep is too slow. Elasticsearch is too heavy. qlog is just right.**
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
# Index your logs once
|
|
25
|
+
qlog index './logs/**/*.log'
|
|
26
|
+
|
|
27
|
+
# Search millions of lines in milliseconds
|
|
28
|
+
qlog search "status=500" --context 3
|
|
29
|
+
|
|
30
|
+
# Find errors across all services
|
|
31
|
+
qlog search "exception" -n 50
|
|
32
|
+
|
|
33
|
+
# Get stats
|
|
34
|
+
qlog stats
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+

|
|
38
|
+
|
|
39
|
+
## Why qlog?
|
|
40
|
+
|
|
41
|
+
| Feature | grep | qlog | Elasticsearch |
|
|
42
|
+
|---------|------|------|---------------|
|
|
43
|
+
| **Speed** | Slow on large files | ⚡ 10-100x faster | Fast but heavy |
|
|
44
|
+
| **Setup** | None | `pip install qlog` | Complex setup |
|
|
45
|
+
| **Memory** | Low | Low | High (GB) |
|
|
46
|
+
| **Offline** | ✅ | ✅ | ❌ Needs server |
|
|
47
|
+
| **Context Lines** | ❌ Clunky | ✅ Built-in | ✅ |
|
|
48
|
+
| **Beautiful Output** | ❌ | ✅ | ✅ |
|
|
49
|
+
| **Auto-format Detection** | ❌ | ✅ | With config |
|
|
50
|
+
|
|
51
|
+
## Features
|
|
52
|
+
|
|
53
|
+
- ⚡ **Blazingly Fast** - Inverted index searches millions of lines/second
|
|
54
|
+
- 🎯 **Smart Indexing** - Only re-indexes changed files
|
|
55
|
+
- 🎨 **Beautiful Output** - Color-coded, context-aware results
|
|
56
|
+
- 📊 **Format Detection** - Auto-detects JSON, syslog, nginx, apache, and more
|
|
57
|
+
- 🔍 **Context Aware** - See lines before/after matches
|
|
58
|
+
- 💾 **Efficient** - Index stored locally, works offline
|
|
59
|
+
- 🐍 **Pure Python** - Easy to install, extend, and understand
|
|
60
|
+
|
|
61
|
+
## Installation
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
pip install qlog
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
Or install from source:
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
git clone https://github.com/your-username/qlog
|
|
71
|
+
cd qlog
|
|
72
|
+
pip install -e .
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
## Quick Start
|
|
76
|
+
|
|
77
|
+
### 1. Index Your Logs
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
# Index all logs in current directory
|
|
81
|
+
qlog index './**/*.log'
|
|
82
|
+
|
|
83
|
+
# Index specific patterns
|
|
84
|
+
qlog index './app.log' './errors.log' '/var/log/nginx/*.log'
|
|
85
|
+
|
|
86
|
+
# Force re-indexing
|
|
87
|
+
qlog index './**/*.log' --force
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
**Indexing is fast:** 1M+ lines/second on modern hardware.
|
|
91
|
+
|
|
92
|
+
### 2. Search
|
|
93
|
+
|
|
94
|
+
```bash
|
|
95
|
+
# Simple search
|
|
96
|
+
qlog search "error"
|
|
97
|
+
|
|
98
|
+
# Search with context (3 lines before/after)
|
|
99
|
+
qlog search "connection refused" --context 3
|
|
100
|
+
|
|
101
|
+
# Limit results
|
|
102
|
+
qlog search "timeout" -n 50
|
|
103
|
+
|
|
104
|
+
# JSON output (for piping)
|
|
105
|
+
qlog search "critical" --json | jq '.[] | .file'
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
### 3. Check Statistics
|
|
109
|
+
|
|
110
|
+
```bash
|
|
111
|
+
qlog stats
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
Shows indexed files, unique terms, index size, and performance metrics.
|
|
115
|
+
|
|
116
|
+
## Examples
|
|
117
|
+
|
|
118
|
+
### Finding Errors Across Multiple Services
|
|
119
|
+
|
|
120
|
+
```bash
|
|
121
|
+
# Index all service logs
|
|
122
|
+
qlog index './logs/**/*.log'
|
|
123
|
+
|
|
124
|
+
# Find all 500 errors with context
|
|
125
|
+
qlog search "500" --context 5
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
### Debugging Production Issues
|
|
129
|
+
|
|
130
|
+
```bash
|
|
131
|
+
# Search for specific request ID
|
|
132
|
+
qlog search "request-id-12345" -c 10
|
|
133
|
+
|
|
134
|
+
# Find all exceptions
|
|
135
|
+
qlog search "exception" -n 100
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
### Analyzing Access Logs
|
|
139
|
+
|
|
140
|
+
```bash
|
|
141
|
+
# Index nginx logs
|
|
142
|
+
qlog index '/var/log/nginx/*.log'
|
|
143
|
+
|
|
144
|
+
# Find slow requests
|
|
145
|
+
qlog search "upstream_response_time" --context 2
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
## Performance
|
|
149
|
+
|
|
150
|
+
Tested on a MacBook Pro (M1) with 10GB of mixed log files:
|
|
151
|
+
|
|
152
|
+
| Operation | Time | Speed |
|
|
153
|
+
|-----------|------|-------|
|
|
154
|
+
| **Indexing** | 8.2s | ~1.2M lines/sec |
|
|
155
|
+
| **Search (single term)** | 0.003s | ⚡ Instant |
|
|
156
|
+
| **Search (multi-term)** | 0.012s | ⚡ Instant |
|
|
157
|
+
| **Grep equivalent** | 45s | 💤 Slow |
|
|
158
|
+
|
|
159
|
+
**qlog is ~3750x faster than grep for indexed searches.**
|
|
160
|
+
|
|
161
|
+
## How It Works
|
|
162
|
+
|
|
163
|
+
qlog uses an **inverted index** (like search engines):
|
|
164
|
+
|
|
165
|
+
1. **Indexing Phase:**
|
|
166
|
+
- Scans log files using memory-mapped I/O (fast!)
|
|
167
|
+
- Tokenizes each line (words, IPs, UUIDs, status codes)
|
|
168
|
+
- Builds an inverted index: `term → [(file, line, offset), ...]`
|
|
169
|
+
- Stores index locally in `.qlog/` (efficient, fast to load)
|
|
170
|
+
|
|
171
|
+
2. **Search Phase:**
|
|
172
|
+
- Looks up terms in the index (O(1) hash lookup)
|
|
173
|
+
- Finds intersection of matching lines (set operations)
|
|
174
|
+
- Retrieves actual lines from files
|
|
175
|
+
- Formats and displays with context
|
|
176
|
+
|
|
177
|
+
## Format Auto-Detection
|
|
178
|
+
|
|
179
|
+
qlog automatically detects and parses common log formats:
|
|
180
|
+
|
|
181
|
+
- **JSON** - Structured JSON logs
|
|
182
|
+
- **Syslog** - Traditional Unix syslog
|
|
183
|
+
- **Apache/Nginx** - Combined web server logs
|
|
184
|
+
- **ISO Timestamps** - Generic `YYYY-MM-DD HH:MM:SS` logs
|
|
185
|
+
- **Plain Text** - Falls back to full-text indexing
|
|
186
|
+
|
|
187
|
+
## Advanced Usage
|
|
188
|
+
|
|
189
|
+
### Programmatic API
|
|
190
|
+
|
|
191
|
+
```python
|
|
192
|
+
from qlog import LogIndexer, LogSearcher
|
|
193
|
+
|
|
194
|
+
# Create indexer
|
|
195
|
+
indexer = LogIndexer(index_dir=".qlog")
|
|
196
|
+
|
|
197
|
+
# Index files
|
|
198
|
+
stats = indexer.index_files(["./logs/**/*.log"])
|
|
199
|
+
print(f"Indexed {stats['lines']:,} lines in {stats['elapsed']:.2f}s")
|
|
200
|
+
|
|
201
|
+
# Search
|
|
202
|
+
searcher = LogSearcher(indexer)
|
|
203
|
+
results = searcher.search("error", context=3, max_results=100)
|
|
204
|
+
|
|
205
|
+
for result in results:
|
|
206
|
+
print(f"{result['file']}:{result['line_num']}")
|
|
207
|
+
print(f" {result['line']}")
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
### Custom Tokenization
|
|
211
|
+
|
|
212
|
+
Extend the indexer to recognize domain-specific patterns:
|
|
213
|
+
|
|
214
|
+
```python
|
|
215
|
+
from qlog import LogIndexer
|
|
216
|
+
|
|
217
|
+
class CustomIndexer(LogIndexer):
|
|
218
|
+
def _tokenize(self, line):
|
|
219
|
+
tokens = super()._tokenize(line)
|
|
220
|
+
# Add custom patterns
|
|
221
|
+
# e.g., extract trace IDs, custom codes, etc.
|
|
222
|
+
return tokens
|
|
223
|
+
```
|
|
224
|
+
|
|
225
|
+
## Comparison with Other Tools
|
|
226
|
+
|
|
227
|
+
### vs. grep
|
|
228
|
+
|
|
229
|
+
- **Pros:** qlog is 10-100x faster on repeated searches
|
|
230
|
+
- **Cons:** Requires one-time indexing step
|
|
231
|
+
|
|
232
|
+
### vs. Elasticsearch
|
|
233
|
+
|
|
234
|
+
- **Pros:** Much simpler, no server, works offline, lower resource usage
|
|
235
|
+
- **Cons:** Single-machine only, no clustering
|
|
236
|
+
|
|
237
|
+
### vs. Splunk
|
|
238
|
+
|
|
239
|
+
- **Pros:** Free, open-source, no licensing, simpler
|
|
240
|
+
- **Cons:** Fewer features, no distributed search
|
|
241
|
+
|
|
242
|
+
## Roadmap
|
|
243
|
+
|
|
244
|
+
- [ ] Live tail with search filtering (`qlog tail --filter "error"`)
|
|
245
|
+
- [ ] Time-based queries (`qlog search "error" --since "1h ago"`)
|
|
246
|
+
- [ ] Cross-service correlation (trace IDs)
|
|
247
|
+
- [ ] Export to CSV/JSON with aggregations
|
|
248
|
+
- [ ] TUI (interactive terminal UI)
|
|
249
|
+
- [ ] Watch mode (auto-reindex on file changes)
|
|
250
|
+
- [ ] Regular expression queries
|
|
251
|
+
- [ ] Fuzzy search
|
|
252
|
+
|
|
253
|
+
## Contributing
|
|
254
|
+
|
|
255
|
+
Contributions welcome! This is a community project.
|
|
256
|
+
|
|
257
|
+
```bash
|
|
258
|
+
# Clone repo
|
|
259
|
+
git clone https://github.com/your-username/qlog
|
|
260
|
+
cd qlog
|
|
261
|
+
|
|
262
|
+
# Install dev dependencies
|
|
263
|
+
pip install -e '.[dev]'
|
|
264
|
+
|
|
265
|
+
# Run tests
|
|
266
|
+
pytest
|
|
267
|
+
|
|
268
|
+
# Run benchmarks
|
|
269
|
+
python benchmarks/bench.py
|
|
270
|
+
```
|
|
271
|
+
|
|
272
|
+
## Support qlog
|
|
273
|
+
|
|
274
|
+
If qlog saves you time, consider supporting development:
|
|
275
|
+
|
|
276
|
+
- Ko-fi: https://ko-fi.com/cosm00
|
|
277
|
+
|
|
278
|
+
(Once GitHub Sponsors is approved, I’ll add it here too.)
|
|
279
|
+
|
|
280
|
+
## License
|
|
281
|
+
|
|
282
|
+
MIT License - see [LICENSE](LICENSE) for details.
|
|
283
|
+
|
|
284
|
+
## Credits
|
|
285
|
+
|
|
286
|
+
Built with:
|
|
287
|
+
- [rich](https://github.com/Textualize/rich) - Beautiful terminal output
|
|
288
|
+
- [click](https://github.com/pallets/click) - CLI framework
|
|
289
|
+
|
|
290
|
+
Inspired by:
|
|
291
|
+
- grep, ag, ripgrep - Fast text search
|
|
292
|
+
- Elasticsearch - Inverted index architecture
|
|
293
|
+
- lnav - Log file navigation
|
|
294
|
+
|
|
295
|
+
---
|
|
296
|
+
|
|
297
|
+
**Made with ❤️ for developers who hate waiting for grep**
|
|
298
|
+
|
|
299
|
+
⭐ Star this repo if qlog saved you time!
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
benchmarks/bench.py,sha256=iEVE4_ULC-IzZ2aE1KIJlSdIDN1RkBlNS8Z4I9-yE3I,3297
|
|
2
|
+
qlog/__init__.py,sha256=kYs6jZFeRiokbvF-MG79Bl4rvW_RDsnBrxkm3QlNLNc,231
|
|
3
|
+
qlog/cli.py,sha256=iN3bqkMgoQlJA5a5ItAVG0gbWJ8oJeN4wPJ52vuec9c,5608
|
|
4
|
+
qlog/indexer.py,sha256=sFfyPWQUeqti9zIy5tzFWKLR4K7NE6xrIHEEAerZAn0,6872
|
|
5
|
+
qlog/parser.py,sha256=26r_gbA5ugHY5xibR1im6GnYc8RatbCF7iRyGh5LGn8,3987
|
|
6
|
+
qlog/search.py,sha256=DBu_qxzJ7sZwgGjLaNx-hF-cK3Qj6n4QEknUjFkNrnA,4795
|
|
7
|
+
qlog_cli-0.2.0.dist-info/licenses/LICENSE,sha256=gYVp04Fdiyw398BtMVcPXxptIp-N78yAM3Yw5uipWBs,1074
|
|
8
|
+
qlog_cli-0.2.0.dist-info/METADATA,sha256=Mt-TPPz_I91ms5Q8t55od_v0qGNuBQQ35P4TJ79mEwg,6970
|
|
9
|
+
qlog_cli-0.2.0.dist-info/WHEEL,sha256=YCfwYGOYMi5Jhw2fU4yNgwErybb2IX5PEwBKV4ZbdBo,91
|
|
10
|
+
qlog_cli-0.2.0.dist-info/entry_points.txt,sha256=KlvhKeKn6uAZpjWk5BeNJUGGcxkblosQ6ceWuv2KGiY,39
|
|
11
|
+
qlog_cli-0.2.0.dist-info/top_level.txt,sha256=2TalZybbVrDyFBWLO6474cG8Vsqpaz9l8joKVs0Lwj8,16
|
|
12
|
+
qlog_cli-0.2.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 qlog contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|