loggrepper 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
loggrepper/__init__.py ADDED
File without changes
loggrepper/cli.py ADDED
@@ -0,0 +1,77 @@
1
+ from datetime import datetime, timedelta
2
+ from re import compile
3
+
4
+ import click
5
+
6
+ from loggrepper.models import LogLine
7
+ from loggrepper.timestamp import BUILTIN_FORMATS, detect_format, extract_timestamp
8
+ from loggrepper.grouper import group_incidents
9
+ from loggrepper.formatter import get_formatter
10
+
11
+
12
+ @click.command()
13
+ @click.argument("pattern")
14
+ @click.argument("file", type=click.Path(exists=True))
15
+ @click.option("--window", "-w", default=3, help="Ventana en segundos alrededor del match")
16
+ @click.option("--ts-format", default="auto", help="Formato de timestamp (auto, iso8601, syslog, nginx, epoch-ms)")
17
+ @click.option("--output", "-o", default="pretty", type=click.Choice(["pretty", "json"]), help="Formato de salida")
18
+ def main(pattern: str, file: str, window: int, ts_format: str, output: str) -> None:
19
+ """Extrae ventanas de contexto alrededor de matches en archivos de log.
20
+
21
+ Ejemplo: loggrepper ERROR app.log -w 5 --output json
22
+ """
23
+ # ── formato de timestamp ──────────────────────────────────────
24
+ if ts_format == "auto":
25
+ with open(file) as f:
26
+ head = [next(f, "").rstrip("\n") for _ in range(50)]
27
+ head = [l for l in head if l]
28
+ fmt = detect_format(head)
29
+ if fmt is None:
30
+ raise click.UsageError(
31
+ "No se pudo detectar el formato de timestamp. "
32
+ "Usa --ts-format para especificar uno (iso8601, syslog, nginx, epoch-ms)."
33
+ )
34
+ click.echo(f"Formato detectado: {fmt.name}", err=True)
35
+ else:
36
+ fmt = BUILTIN_FORMATS.get(ts_format)
37
+ if fmt is None:
38
+ valid = ", ".join(BUILTIN_FORMATS.keys())
39
+ raise click.BadParameter(f"Formato '{ts_format}' desconocido. Opciones: {valid}")
40
+
41
+ # ── compilar patron de busqueda ───────────────────────────────
42
+ try:
43
+ pat = compile(pattern)
44
+ except Exception as e:
45
+ raise click.BadParameter(f"Patron regex invalido: {e}")
46
+
47
+ window_td = timedelta(seconds=window)
48
+ formatter = get_formatter(output)
49
+
50
+ # ── procesar archivo ──────────────────────────────────────────
51
+ with open(file) as f:
52
+ raw_lines = (
53
+ LogLine(number=i, raw=line.rstrip("\n"))
54
+ for i, line in enumerate(f, 1)
55
+ )
56
+
57
+ timestamped: list[tuple[LogLine, datetime, bool]] = []
58
+ skipped = 0
59
+ for logline in raw_lines:
60
+ ts = extract_timestamp(logline.raw, [fmt])
61
+ if ts is None:
62
+ skipped += 1
63
+ continue
64
+ matched = pat.search(logline.raw) is not None
65
+ timestamped.append((logline, ts, matched))
66
+
67
+ if skipped:
68
+ click.echo(f"Lineas sin timestamp detectado: {skipped}", err=True)
69
+
70
+ # ── agrupar y mostrar ─────────────────────────────────────────
71
+ incidents = list(group_incidents(iter(timestamped), window_td))
72
+
73
+ if not incidents:
74
+ click.echo(formatter.format([]))
75
+ return
76
+
77
+ click.echo(formatter.format(incidents))
@@ -0,0 +1,66 @@
1
+ """Formateadores de output para incidentes."""
2
+ import json
3
+ from typing import Protocol
4
+
5
+ from loggrepper.models import Incident
6
+
7
+
8
+ class Formatter(Protocol):
9
+ """Protocolo que todo formateador debe cumplir."""
10
+ def format(self, incidents: list[Incident]) -> str:
11
+ ...
12
+
13
+
14
+ class PrettyFormatter:
15
+ """Output legible para humanos, con colores y marcadores."""
16
+
17
+ def format(self, incidents: list[Incident]) -> str:
18
+ if not incidents:
19
+ return "Sin incidentes encontrados."
20
+
21
+ lines: list[str] = []
22
+ for inc in incidents:
23
+ lines.append(
24
+ f"--- Incidente #{inc.id} | "
25
+ f"{inc.start} — {inc.end} | "
26
+ f"{len(inc.lines)} lineas ---"
27
+ )
28
+ for i, logline in enumerate(inc.lines):
29
+ marker = ">>>" if i in inc.matches else " "
30
+ lines.append(f"{marker} {logline.raw}")
31
+ lines.append("")
32
+ return "\n".join(lines)
33
+
34
+
35
+ class JsonFormatter:
36
+ """Output JSON, ideal para pipe a jq u otras herramientas."""
37
+
38
+ def format(self, incidents: list[Incident]) -> str:
39
+ data = [
40
+ {
41
+ "id": inc.id,
42
+ "start": inc.start.isoformat(),
43
+ "end": inc.end.isoformat(),
44
+ "line_count": len(inc.lines),
45
+ "match_count": len(inc.matches),
46
+ "lines": [
47
+ {
48
+ "number": logline.number,
49
+ "text": logline.raw,
50
+ "match": i in inc.matches,
51
+ }
52
+ for i, logline in enumerate(inc.lines)
53
+ ],
54
+ }
55
+ for inc in incidents
56
+ ]
57
+ return json.dumps(data, indent=2, ensure_ascii=False)
58
+
59
+
60
+ def get_formatter(output: str) -> Formatter:
61
+ """Devuelve el formateador segun el formato elegido."""
62
+ formatters: dict[str, Formatter] = {
63
+ "pretty": PrettyFormatter(),
64
+ "json": JsonFormatter(),
65
+ }
66
+ return formatters[output]
loggrepper/grouper.py ADDED
@@ -0,0 +1,80 @@
1
+ from datetime import datetime, timedelta
2
+ from collections.abc import Iterator
3
+
4
+ from loggrepper.models import Incident, LogLine
5
+
6
+
7
+ def group_incidents(
8
+ items: Iterator[tuple[LogLine, datetime, bool]],
9
+ window: timedelta,
10
+ ) -> Iterator[Incident]:
11
+ incident: Incident | None = None
12
+ next_id = 1
13
+ pending: list[tuple[LogLine, datetime]] = []
14
+
15
+ for line, ts, matched in items:
16
+ if incident is not None and ts <= incident.end:
17
+ incident.lines.append(line)
18
+ if matched:
19
+ incident.matches.append(len(incident.lines) - 1)
20
+ incident.end = max(incident.end, ts + window)
21
+ continue
22
+
23
+ if incident is not None and ts > incident.end:
24
+ yield incident
25
+ pending = _discard_before(pending, incident.end)
26
+ incident = None
27
+ # la linea actual se reprocesa en el siguiente if
28
+
29
+ if incident is None:
30
+ if matched:
31
+ incident = _new_incident(next_id, line, ts, window, pending)
32
+ next_id += 1
33
+ else:
34
+ pending.append((line, ts))
35
+
36
+ if incident is not None:
37
+ yield incident
38
+
39
+
40
+ def _new_incident(
41
+ iid: int,
42
+ match_line: LogLine,
43
+ match_ts: datetime,
44
+ window: timedelta,
45
+ pending: list[tuple[LogLine, datetime]],
46
+ ) -> Incident:
47
+ """Crea incidente rescatando lineas pendientes dentro de [match_ts - window, ...]."""
48
+ start = match_ts - window
49
+ incident_lines: list[LogLine] = []
50
+ incident_matches: list[int] = []
51
+
52
+ # rescatar lineas pendientes dentro de la ventana
53
+ survivors: list[tuple[LogLine, datetime]] = []
54
+ for pl, pts in pending:
55
+ if pts >= start:
56
+ incident_lines.append(pl)
57
+ else:
58
+ survivors.append((pl, pts))
59
+
60
+ pending.clear()
61
+ pending.extend(survivors)
62
+
63
+ incident_matches.append(len(incident_lines))
64
+ incident_lines.append(match_line)
65
+
66
+ return Incident(
67
+ id=iid,
68
+ start=start,
69
+ end=match_ts + window,
70
+ lines=incident_lines,
71
+ matches=incident_matches,
72
+ )
73
+
74
+
75
+ def _discard_before(
76
+ pending: list[tuple[LogLine, datetime]],
77
+ cutoff: datetime,
78
+ ) -> list[tuple[LogLine, datetime]]:
79
+ """Descarta lineas con timestamp <= cutoff."""
80
+ return [(pl, pts) for pl, pts in pending if pts > cutoff]
loggrepper/matcher.py ADDED
@@ -0,0 +1,9 @@
1
+ from re import Pattern
2
+ from collections.abc import Iterator
3
+
4
+ from loggrepper.models import LogLine
5
+
6
+ def match_lines(lines: Iterator[LogLine], patterns: list[Pattern]) -> Iterator[tuple[LogLine, bool]]:
7
+ for line in lines:
8
+ matched = any(p.search(line.raw) for p in patterns)
9
+ yield line, matched
loggrepper/models.py ADDED
@@ -0,0 +1,16 @@
1
+ from dataclasses import dataclass
2
+ from datetime import datetime
3
+
4
+ @dataclass
5
+ class LogLine:
6
+ number: int
7
+ raw: str
8
+
9
+ @dataclass
10
+ class Incident:
11
+ id: int
12
+ start: datetime
13
+ end: datetime
14
+ lines: list[LogLine]
15
+ matches: list[int]
16
+
@@ -0,0 +1,90 @@
1
+ from datetime import datetime
2
+ from re import Pattern, compile
3
+ from typing import Literal
4
+ from dataclasses import dataclass
5
+
6
+
7
+ @dataclass
8
+ class TimestampFormat:
9
+ """Define como extraer y parsear un timestamp de una linea."""
10
+ name: str
11
+ regex: Pattern
12
+ format_str: str
13
+ position: Literal["start", "anywhere"]
14
+
15
+
16
+ def extract_timestamp(line: str, formats: list[TimestampFormat]) -> datetime | None:
17
+ """Prueba cada formato contra la linea, devuelve el primer datetime parseado."""
18
+ for fmt in formats:
19
+ match = fmt.regex.match(line) if fmt.position == "start" else fmt.regex.search(line)
20
+ if match:
21
+ ts_str = match.group(0)
22
+ if fmt.name == "epoch-ms":
23
+ try:
24
+ return datetime.fromtimestamp(int(ts_str) / 1000)
25
+ except (ValueError, OSError):
26
+ continue
27
+ try:
28
+ return datetime.strptime(ts_str, fmt.format_str)
29
+ except ValueError:
30
+ continue
31
+ return None
32
+
33
+
34
+ def detect_format(lines: list[str]) -> TimestampFormat | None:
35
+ """Auto-detecta el formato de timestamp analizando las primeras lineas."""
36
+ best: tuple[TimestampFormat, int] | None = None
37
+ for fmt in ALL_FORMATS:
38
+ hits = sum(1 for line in lines if extract_timestamp(line, [fmt]) is not None)
39
+ if hits > len(lines) * 0.5:
40
+ if best is None or hits > best[1]:
41
+ best = (fmt, hits)
42
+ return best[0] if best else None
43
+
44
+
45
+ # ── Formatos predefinidos ────────────────────────────────────────────
46
+
47
+ ISO8601 = TimestampFormat(
48
+ name="iso8601",
49
+ regex=compile(r"\d{4}-\d{2}-\d{2}[T ]\d{2}:\d{2}:\d{2}(?:\.\d{3,6})?(?:[+-]\d{2}:?\d{2}|Z)?"),
50
+ format_str="%Y-%m-%d %H:%M:%S.%f",
51
+ position="anywhere",
52
+ )
53
+
54
+ ISO8601_T = TimestampFormat(
55
+ name="iso8601-t",
56
+ regex=compile(r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d{3,6})?(?:[+-]\d{2}:?\d{2}|Z)?"),
57
+ format_str="%Y-%m-%dT%H:%M:%S.%f",
58
+ position="anywhere",
59
+ )
60
+
61
+ SYSLOG = TimestampFormat(
62
+ name="syslog",
63
+ regex=compile(r"[A-Z][a-z]{2} \d{1,2} \d{2}:\d{2}:\d{2}"),
64
+ format_str="%b %d %H:%M:%S",
65
+ position="anywhere",
66
+ )
67
+
68
+ NGINX = TimestampFormat(
69
+ name="nginx",
70
+ regex=compile(r"\d{2}/[A-Z][a-z]{2}/\d{4}:\d{2}:\d{2}:\d{2} [+-]\d{4}"),
71
+ format_str="%d/%b/%Y:%H:%M:%S %z",
72
+ position="anywhere",
73
+ )
74
+
75
+ EPOCH_MS = TimestampFormat(
76
+ name="epoch-ms",
77
+ regex=compile(r"\b\d{13}\b"),
78
+ format_str="",
79
+ position="anywhere",
80
+ )
81
+
82
+ BUILTIN_FORMATS: dict[str, TimestampFormat] = {
83
+ "iso8601": ISO8601,
84
+ "iso8601-t": ISO8601_T,
85
+ "syslog": SYSLOG,
86
+ "nginx": NGINX,
87
+ "epoch-ms": EPOCH_MS,
88
+ }
89
+
90
+ ALL_FORMATS: list[TimestampFormat] = list(BUILTIN_FORMATS.values())
@@ -0,0 +1,143 @@
1
+ Metadata-Version: 2.4
2
+ Name: loggrepper
3
+ Version: 0.1.0
4
+ Summary: grep contextual para logs con ventanas de tiempo
5
+ Author-email: Matias Atuan <matiasatuan.2018@gmail.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/FixZzT/loggrepper
8
+ Project-URL: Repository, https://github.com/FixZzT/loggrepper
9
+ Project-URL: Issues, https://github.com/FixZzT/loggrepper/issues
10
+ Requires-Python: >=3.10
11
+ Description-Content-Type: text/markdown
12
+ License-File: LICENSE
13
+ Requires-Dist: click>=8
14
+ Requires-Dist: rich>=13
15
+ Dynamic: license-file
16
+
17
+ # loggrepper
18
+
19
+ [![CI](https://github.com/FixZzT/loggrepper/actions/workflows/ci.yml/badge.svg)](https://github.com/FixZzT/loggrepper/actions/workflows/ci.yml)
20
+ [![Python](https://img.shields.io/pypi/pyversions/loggrepper.svg)](https://pypi.org/project/loggrepper/)
21
+ [![PyPI](https://img.shields.io/pypi/v/loggrepper.svg)](https://pypi.org/project/loggrepper/)
22
+ [![License](https://img.shields.io/github/license/FixZzT/loggrepper.svg)](https://github.com/FixZzT/loggrepper/blob/master/LICENSE)
23
+
24
+ grep contextual para logs. Extrae ventanas de tiempo alrededor de matches en archivos de log — no por número de líneas, sino por timestamps.
25
+
26
+ ## El problema
27
+
28
+ `grep ERROR app.log` te da esto:
29
+
30
+ ```
31
+ 2026-05-16 14:32:01.123 ERROR PaymentProcessor: timeout
32
+ ```
33
+
34
+ Pero no te dice **qué pasó antes** del error (¿llegó el request? ¿qué parámetros tenía?) ni **después** (¿se reintentó? ¿el usuario recibió 500?).
35
+
36
+ `grep -B 20 -A 20` asume que 20 líneas cubren tu ventana de tiempo. Si el request empezó 2 segundos antes y tu log es verboso, 20 líneas puede ser muy poco. Si es poco verboso, 20 líneas es ruido innecesario.
37
+
38
+ **loggrepper** busca por timestamps reales. Le pasas `--window 3s` y te devuelve todas las líneas cuyo timestamp está dentro de ±3 segundos del match. Líneas sueltas se agrupan en "incidentes". Ventanas solapadas se fusionan.
39
+
40
+ ## Instalación
41
+
42
+ ```bash
43
+ pip install git+https://github.com/FixZzT/loggrepper.git
44
+ # o modo desarrollo (editable)
45
+ pip install -e .
46
+ # o global con pipx
47
+ pipx install git+https://github.com/FixZzT/loggrepper.git
48
+ ```
49
+
50
+ ## Uso
51
+
52
+ ```bash
53
+ # Básico
54
+ loggrepper ERROR app.log
55
+
56
+ # Ventana de 5 segundos
57
+ loggrepper ERROR app.log -w 5
58
+
59
+ # Salida JSON para scripts
60
+ loggrepper ERROR app.log -o json | jq '.[] | {start, end, line_count}'
61
+
62
+ # Formato de timestamp específico
63
+ loggrepper "404" nginx-access.log --ts-format nginx
64
+
65
+ # Pipe desde docker/k8s
66
+ docker logs mi-app 2>&1 | loggrepper FATAL -
67
+ kubectl logs pod-xyz | loggrepper panic -
68
+ ```
69
+
70
+ ## Ejemplos reales
71
+
72
+ **Depurar un error en producción:**
73
+
74
+ ```bash
75
+ $ loggrepper "IntegrityError" app.log -w 5
76
+
77
+ --- Incidente #1 | 14:32:00 — 14:32:04 | 5 líneas ---
78
+ 14:32:00.100 INFO POST /api/orders payload={"user":42}
79
+ 14:32:00.500 DEBUG INSERT INTO orders VALUES (...)
80
+ >>> 14:32:01.123 ERROR IntegrityError: duplicate key
81
+ 14:32:01.200 WARN rolling back transaction
82
+ 14:32:02.000 INFO POST /api/orders -> 500
83
+ ```
84
+
85
+ Ves el request entero, SQL, error, rollback, y respuesta — en contexto temporal real.
86
+
87
+ **Investigar timeouts entre microservicios:**
88
+
89
+ ```bash
90
+ $ loggrepper "pi_abc123" payment-service.log -w 10
91
+
92
+ --- Incidente #1 | 14:32:00 — 14:32:10 | 7 líneas ---
93
+ 14:32:00.100 INFO received payment intent pi_abc123
94
+ 14:32:00.200 DEBUG calling Stripe /v1/payment_intents
95
+ >>> 14:32:08.500 ERROR timeout calling Stripe (8.3s)
96
+ 14:32:08.501 WARN retrying (1/3)
97
+ 14:32:10.000 DEBUG Stripe responded 200 OK
98
+ ```
99
+
100
+ Stripe tardó 8s, no es tu código. La ventana captura causa y efecto.
101
+
102
+ **Auditar requests sospechosos en nginx:**
103
+
104
+ ```bash
105
+ $ loggrepper "POST /admin" access.log --ts-format nginx -w 30 -o json | jq .
106
+ ```
107
+
108
+ ## Formatos de timestamp soportados
109
+
110
+ | Formato | Ejemplo | Uso típico |
111
+ |-----------|--------------------------------------------|--------------------------|
112
+ | iso8601 | `2026-05-16 14:32:01.123 ERROR` | Python, Java, Node, Go |
113
+ | iso8601-t | `2026-05-16T14:32:01.123Z ERROR` | JSON logs, Docker, k8s |
114
+ | syslog | `May 16 14:32:01 hostname error:` | syslog, journald, /var/log |
115
+ | nginx | `16/May/2026:14:32:01 +0000 GET /` | Nginx, Apache access |
116
+ | epoch-ms | `1715872321123 ERROR` | Splunk, sistemas embedded |
117
+
118
+ Con `--ts-format auto` (default) detecta automáticamente el formato analizando las primeras 50 líneas.
119
+
120
+ ## Output
121
+
122
+ **Pretty** (default):
123
+
124
+ ```
125
+ --- Incidente #1 | 2026-05-16 14:31:58 — 2026-05-16 14:32:04 | 5 líneas ---
126
+ 2026-05-16 14:32:00.100 INFO inicio del proceso
127
+ >>> 2026-05-16 14:32:01.123 ERROR timeout en conexión
128
+ 2026-05-16 14:32:02.000 DEBUG conexión exitosa
129
+ ```
130
+
131
+ `>>>` marca las líneas que coinciden con el patrón. Cada incidente muestra rango temporal y cantidad de líneas.
132
+
133
+ **JSON**: cada incidente con id, start, end, líneas con número, texto y flag `match`.
134
+
135
+ ## Desarrollo
136
+
137
+ ```bash
138
+ python -m venv .venv
139
+ source .venv/bin/activate
140
+ pip install -e .
141
+ pytest # 14 tests
142
+ ruff check src/ tests/
143
+ ```
@@ -0,0 +1,13 @@
1
+ loggrepper/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ loggrepper/cli.py,sha256=DcWXbe0TUm3CEYVf_dJhkU2mBClImOGlsfgVTfKB74k,3276
3
+ loggrepper/formatter.py,sha256=83S62Qv6IwRKq-evbdBwFBaPu59Yzoy6BtHnNwF_Loc,2036
4
+ loggrepper/grouper.py,sha256=BBLS5dB0CSqkQp63r9lc8scKBoZYJTyTTla_Ijo3RU8,2319
5
+ loggrepper/matcher.py,sha256=h6UyJC2foY5G3EpLjk-sFIy8RmkqvA-Gb8jBf3JF8bM,313
6
+ loggrepper/models.py,sha256=2JhoXp213oOy9WBWJbteRRwzymrOPsjUH8XAlMlitm4,247
7
+ loggrepper/timestamp.py,sha256=Kglf7jWiSz9kmFOpNGCVR5mR4AW8iSCyPTxvKaquT2o,2808
8
+ loggrepper-0.1.0.dist-info/licenses/LICENSE,sha256=SdnvK8jBdHqrqVuIZVe5lutHvymf56Lr7nGP3FZ5uXg,1069
9
+ loggrepper-0.1.0.dist-info/METADATA,sha256=syOVQvtIYafKVt3vR_rmu--dk1osVDJoY0E3Qol4yyI,5036
10
+ loggrepper-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
11
+ loggrepper-0.1.0.dist-info/entry_points.txt,sha256=iqQVpYjIOs1inwrZ2sLaZICcb-2a-kZcPY303DLft7k,51
12
+ loggrepper-0.1.0.dist-info/top_level.txt,sha256=PyYppPASup7reNEcmtMy7lzmrDc49zjw4HnzmYMpI7Y,11
13
+ loggrepper-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ loggrepper = loggrepper.cli:main
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Matias Atuan
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ loggrepper