loggrepper 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- loggrepper-0.1.0/LICENSE +21 -0
- loggrepper-0.1.0/PKG-INFO +143 -0
- loggrepper-0.1.0/README.md +127 -0
- loggrepper-0.1.0/pyproject.toml +26 -0
- loggrepper-0.1.0/setup.cfg +4 -0
- loggrepper-0.1.0/src/loggrepper/__init__.py +0 -0
- loggrepper-0.1.0/src/loggrepper/cli.py +77 -0
- loggrepper-0.1.0/src/loggrepper/formatter.py +66 -0
- loggrepper-0.1.0/src/loggrepper/grouper.py +80 -0
- loggrepper-0.1.0/src/loggrepper/matcher.py +9 -0
- loggrepper-0.1.0/src/loggrepper/models.py +16 -0
- loggrepper-0.1.0/src/loggrepper/timestamp.py +90 -0
- loggrepper-0.1.0/src/loggrepper.egg-info/PKG-INFO +143 -0
- loggrepper-0.1.0/src/loggrepper.egg-info/SOURCES.txt +21 -0
- loggrepper-0.1.0/src/loggrepper.egg-info/dependency_links.txt +1 -0
- loggrepper-0.1.0/src/loggrepper.egg-info/entry_points.txt +2 -0
- loggrepper-0.1.0/src/loggrepper.egg-info/requires.txt +2 -0
- loggrepper-0.1.0/src/loggrepper.egg-info/top_level.txt +1 -0
- loggrepper-0.1.0/tests/test_formatter.py +53 -0
- loggrepper-0.1.0/tests/test_grouper.py +31 -0
- loggrepper-0.1.0/tests/test_matcher.py +16 -0
- loggrepper-0.1.0/tests/test_models.py +18 -0
- loggrepper-0.1.0/tests/test_timestamp.py +37 -0
loggrepper-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Matias Atuan
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: loggrepper
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: grep contextual para logs con ventanas de tiempo
|
|
5
|
+
Author-email: Matias Atuan <matiasatuan.2018@gmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/FixZzT/loggrepper
|
|
8
|
+
Project-URL: Repository, https://github.com/FixZzT/loggrepper
|
|
9
|
+
Project-URL: Issues, https://github.com/FixZzT/loggrepper/issues
|
|
10
|
+
Requires-Python: >=3.10
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
License-File: LICENSE
|
|
13
|
+
Requires-Dist: click>=8
|
|
14
|
+
Requires-Dist: rich>=13
|
|
15
|
+
Dynamic: license-file
|
|
16
|
+
|
|
17
|
+
# loggrepper
|
|
18
|
+
|
|
19
|
+
[](https://github.com/FixZzT/loggrepper/actions/workflows/ci.yml)
|
|
20
|
+
[](https://pypi.org/project/loggrepper/)
|
|
21
|
+
[](https://pypi.org/project/loggrepper/)
|
|
22
|
+
[](https://github.com/FixZzT/loggrepper/blob/master/LICENSE)
|
|
23
|
+
|
|
24
|
+
grep contextual para logs. Extrae ventanas de tiempo alrededor de matches en archivos de log — no por número de líneas, sino por timestamps.
|
|
25
|
+
|
|
26
|
+
## El problema
|
|
27
|
+
|
|
28
|
+
`grep ERROR app.log` te da esto:
|
|
29
|
+
|
|
30
|
+
```
|
|
31
|
+
2026-05-16 14:32:01.123 ERROR PaymentProcessor: timeout
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
Pero no te dice **qué pasó antes** del error (¿llegó el request? ¿qué parámetros tenía?) ni **después** (¿se reintentó? ¿el usuario recibió 500?).
|
|
35
|
+
|
|
36
|
+
`grep -B 20 -A 20` asume que 20 líneas cubren tu ventana de tiempo. Si el request empezó 2 segundos antes y tu log es verboso, 20 líneas puede ser muy poco. Si es poco verboso, 20 líneas es ruido innecesario.
|
|
37
|
+
|
|
38
|
+
**loggrepper** busca por timestamps reales. Le pasas `--window 3s` y te devuelve todas las líneas cuyo timestamp está dentro de ±3 segundos del match. Líneas sueltas se agrupan en "incidentes". Ventanas solapadas se fusionan.
|
|
39
|
+
|
|
40
|
+
## Instalación
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
pip install git+https://github.com/FixZzT/loggrepper.git
|
|
44
|
+
# o modo desarrollo (editable)
|
|
45
|
+
pip install -e .
|
|
46
|
+
# o global con pipx
|
|
47
|
+
pipx install git+https://github.com/FixZzT/loggrepper.git
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
## Uso
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
# Básico
|
|
54
|
+
loggrepper ERROR app.log
|
|
55
|
+
|
|
56
|
+
# Ventana de 5 segundos
|
|
57
|
+
loggrepper ERROR app.log -w 5
|
|
58
|
+
|
|
59
|
+
# Salida JSON para scripts
|
|
60
|
+
loggrepper ERROR app.log -o json | jq '.[] | {start, end, line_count}'
|
|
61
|
+
|
|
62
|
+
# Formato de timestamp específico
|
|
63
|
+
loggrepper "404" nginx-access.log --ts-format nginx
|
|
64
|
+
|
|
65
|
+
# Pipe desde docker/k8s
|
|
66
|
+
docker logs mi-app 2>&1 | loggrepper FATAL -
|
|
67
|
+
kubectl logs pod-xyz | loggrepper panic -
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
## Ejemplos reales
|
|
71
|
+
|
|
72
|
+
**Depurar un error en producción:**
|
|
73
|
+
|
|
74
|
+
```bash
|
|
75
|
+
$ loggrepper "IntegrityError" app.log -w 5
|
|
76
|
+
|
|
77
|
+
--- Incidente #1 | 14:32:00 — 14:32:04 | 5 líneas ---
|
|
78
|
+
14:32:00.100 INFO POST /api/orders payload={"user":42}
|
|
79
|
+
14:32:00.500 DEBUG INSERT INTO orders VALUES (...)
|
|
80
|
+
>>> 14:32:01.123 ERROR IntegrityError: duplicate key
|
|
81
|
+
14:32:01.200 WARN rolling back transaction
|
|
82
|
+
14:32:02.000 INFO POST /api/orders -> 500
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
Ves el request entero, SQL, error, rollback, y respuesta — en contexto temporal real.
|
|
86
|
+
|
|
87
|
+
**Investigar timeouts entre microservicios:**
|
|
88
|
+
|
|
89
|
+
```bash
|
|
90
|
+
$ loggrepper "pi_abc123" payment-service.log -w 10
|
|
91
|
+
|
|
92
|
+
--- Incidente #1 | 14:32:00 — 14:32:10 | 7 líneas ---
|
|
93
|
+
14:32:00.100 INFO received payment intent pi_abc123
|
|
94
|
+
14:32:00.200 DEBUG calling Stripe /v1/payment_intents
|
|
95
|
+
>>> 14:32:08.500 ERROR timeout calling Stripe (8.3s)
|
|
96
|
+
14:32:08.501 WARN retrying (1/3)
|
|
97
|
+
14:32:10.000 DEBUG Stripe responded 200 OK
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
Stripe tardó 8s, no es tu código. La ventana captura causa y efecto.
|
|
101
|
+
|
|
102
|
+
**Auditar requests sospechosos en nginx:**
|
|
103
|
+
|
|
104
|
+
```bash
|
|
105
|
+
$ loggrepper "POST /admin" access.log --ts-format nginx -w 30 -o json | jq .
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
## Formatos de timestamp soportados
|
|
109
|
+
|
|
110
|
+
| Formato | Ejemplo | Uso típico |
|
|
111
|
+
|-----------|--------------------------------------------|--------------------------|
|
|
112
|
+
| iso8601 | `2026-05-16 14:32:01.123 ERROR` | Python, Java, Node, Go |
|
|
113
|
+
| iso8601-t | `2026-05-16T14:32:01.123Z ERROR` | JSON logs, Docker, k8s |
|
|
114
|
+
| syslog | `May 16 14:32:01 hostname error:` | syslog, journald, /var/log |
|
|
115
|
+
| nginx | `16/May/2026:14:32:01 +0000 GET /` | Nginx, Apache access |
|
|
116
|
+
| epoch-ms | `1715872321123 ERROR` | Splunk, sistemas embedded |
|
|
117
|
+
|
|
118
|
+
Con `--ts-format auto` (default) detecta automáticamente el formato analizando las primeras 50 líneas.
|
|
119
|
+
|
|
120
|
+
## Output
|
|
121
|
+
|
|
122
|
+
**Pretty** (default):
|
|
123
|
+
|
|
124
|
+
```
|
|
125
|
+
--- Incidente #1 | 2026-05-16 14:31:58 — 2026-05-16 14:32:04 | 5 líneas ---
|
|
126
|
+
2026-05-16 14:32:00.100 INFO inicio del proceso
|
|
127
|
+
>>> 2026-05-16 14:32:01.123 ERROR timeout en conexión
|
|
128
|
+
2026-05-16 14:32:02.000 DEBUG conexión exitosa
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
`>>>` marca las líneas que coinciden con el patrón. Cada incidente muestra rango temporal y cantidad de líneas.
|
|
132
|
+
|
|
133
|
+
**JSON**: cada incidente con id, start, end, líneas con número, texto y flag `match`.
|
|
134
|
+
|
|
135
|
+
## Desarrollo
|
|
136
|
+
|
|
137
|
+
```bash
|
|
138
|
+
python -m venv .venv
|
|
139
|
+
source .venv/bin/activate
|
|
140
|
+
pip install -e .
|
|
141
|
+
pytest # 14 tests
|
|
142
|
+
ruff check src/ tests/
|
|
143
|
+
```
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
# loggrepper
|
|
2
|
+
|
|
3
|
+
[](https://github.com/FixZzT/loggrepper/actions/workflows/ci.yml)
|
|
4
|
+
[](https://pypi.org/project/loggrepper/)
|
|
5
|
+
[](https://pypi.org/project/loggrepper/)
|
|
6
|
+
[](https://github.com/FixZzT/loggrepper/blob/master/LICENSE)
|
|
7
|
+
|
|
8
|
+
grep contextual para logs. Extrae ventanas de tiempo alrededor de matches en archivos de log — no por número de líneas, sino por timestamps.
|
|
9
|
+
|
|
10
|
+
## El problema
|
|
11
|
+
|
|
12
|
+
`grep ERROR app.log` te da esto:
|
|
13
|
+
|
|
14
|
+
```
|
|
15
|
+
2026-05-16 14:32:01.123 ERROR PaymentProcessor: timeout
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
Pero no te dice **qué pasó antes** del error (¿llegó el request? ¿qué parámetros tenía?) ni **después** (¿se reintentó? ¿el usuario recibió 500?).
|
|
19
|
+
|
|
20
|
+
`grep -B 20 -A 20` asume que 20 líneas cubren tu ventana de tiempo. Si el request empezó 2 segundos antes y tu log es verboso, 20 líneas puede ser muy poco. Si es poco verboso, 20 líneas es ruido innecesario.
|
|
21
|
+
|
|
22
|
+
**loggrepper** busca por timestamps reales. Le pasas `--window 3s` y te devuelve todas las líneas cuyo timestamp está dentro de ±3 segundos del match. Líneas sueltas se agrupan en "incidentes". Ventanas solapadas se fusionan.
|
|
23
|
+
|
|
24
|
+
## Instalación
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
pip install git+https://github.com/FixZzT/loggrepper.git
|
|
28
|
+
# o modo desarrollo (editable)
|
|
29
|
+
pip install -e .
|
|
30
|
+
# o global con pipx
|
|
31
|
+
pipx install git+https://github.com/FixZzT/loggrepper.git
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
## Uso
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
# Básico
|
|
38
|
+
loggrepper ERROR app.log
|
|
39
|
+
|
|
40
|
+
# Ventana de 5 segundos
|
|
41
|
+
loggrepper ERROR app.log -w 5
|
|
42
|
+
|
|
43
|
+
# Salida JSON para scripts
|
|
44
|
+
loggrepper ERROR app.log -o json | jq '.[] | {start, end, line_count}'
|
|
45
|
+
|
|
46
|
+
# Formato de timestamp específico
|
|
47
|
+
loggrepper "404" nginx-access.log --ts-format nginx
|
|
48
|
+
|
|
49
|
+
# Pipe desde docker/k8s
|
|
50
|
+
docker logs mi-app 2>&1 | loggrepper FATAL -
|
|
51
|
+
kubectl logs pod-xyz | loggrepper panic -
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
## Ejemplos reales
|
|
55
|
+
|
|
56
|
+
**Depurar un error en producción:**
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
$ loggrepper "IntegrityError" app.log -w 5
|
|
60
|
+
|
|
61
|
+
--- Incidente #1 | 14:32:00 — 14:32:04 | 5 líneas ---
|
|
62
|
+
14:32:00.100 INFO POST /api/orders payload={"user":42}
|
|
63
|
+
14:32:00.500 DEBUG INSERT INTO orders VALUES (...)
|
|
64
|
+
>>> 14:32:01.123 ERROR IntegrityError: duplicate key
|
|
65
|
+
14:32:01.200 WARN rolling back transaction
|
|
66
|
+
14:32:02.000 INFO POST /api/orders -> 500
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
Ves el request entero, SQL, error, rollback, y respuesta — en contexto temporal real.
|
|
70
|
+
|
|
71
|
+
**Investigar timeouts entre microservicios:**
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
$ loggrepper "pi_abc123" payment-service.log -w 10
|
|
75
|
+
|
|
76
|
+
--- Incidente #1 | 14:32:00 — 14:32:10 | 7 líneas ---
|
|
77
|
+
14:32:00.100 INFO received payment intent pi_abc123
|
|
78
|
+
14:32:00.200 DEBUG calling Stripe /v1/payment_intents
|
|
79
|
+
>>> 14:32:08.500 ERROR timeout calling Stripe (8.3s)
|
|
80
|
+
14:32:08.501 WARN retrying (1/3)
|
|
81
|
+
14:32:10.000 DEBUG Stripe responded 200 OK
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
Stripe tardó 8s, no es tu código. La ventana captura causa y efecto.
|
|
85
|
+
|
|
86
|
+
**Auditar requests sospechosos en nginx:**
|
|
87
|
+
|
|
88
|
+
```bash
|
|
89
|
+
$ loggrepper "POST /admin" access.log --ts-format nginx -w 30 -o json | jq .
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
## Formatos de timestamp soportados
|
|
93
|
+
|
|
94
|
+
| Formato | Ejemplo | Uso típico |
|
|
95
|
+
|-----------|--------------------------------------------|--------------------------|
|
|
96
|
+
| iso8601 | `2026-05-16 14:32:01.123 ERROR` | Python, Java, Node, Go |
|
|
97
|
+
| iso8601-t | `2026-05-16T14:32:01.123Z ERROR` | JSON logs, Docker, k8s |
|
|
98
|
+
| syslog | `May 16 14:32:01 hostname error:` | syslog, journald, /var/log |
|
|
99
|
+
| nginx | `16/May/2026:14:32:01 +0000 GET /` | Nginx, Apache access |
|
|
100
|
+
| epoch-ms | `1715872321123 ERROR` | Splunk, sistemas embedded |
|
|
101
|
+
|
|
102
|
+
Con `--ts-format auto` (default) detecta automáticamente el formato analizando las primeras 50 líneas.
|
|
103
|
+
|
|
104
|
+
## Output
|
|
105
|
+
|
|
106
|
+
**Pretty** (default):
|
|
107
|
+
|
|
108
|
+
```
|
|
109
|
+
--- Incidente #1 | 2026-05-16 14:31:58 — 2026-05-16 14:32:04 | 5 líneas ---
|
|
110
|
+
2026-05-16 14:32:00.100 INFO inicio del proceso
|
|
111
|
+
>>> 2026-05-16 14:32:01.123 ERROR timeout en conexión
|
|
112
|
+
2026-05-16 14:32:02.000 DEBUG conexión exitosa
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
`>>>` marca las líneas que coinciden con el patrón. Cada incidente muestra rango temporal y cantidad de líneas.
|
|
116
|
+
|
|
117
|
+
**JSON**: cada incidente con id, start, end, líneas con número, texto y flag `match`.
|
|
118
|
+
|
|
119
|
+
## Desarrollo
|
|
120
|
+
|
|
121
|
+
```bash
|
|
122
|
+
python -m venv .venv
|
|
123
|
+
source .venv/bin/activate
|
|
124
|
+
pip install -e .
|
|
125
|
+
pytest # 14 tests
|
|
126
|
+
ruff check src/ tests/
|
|
127
|
+
```
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=75"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "loggrepper"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "grep contextual para logs con ventanas de tiempo"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = {text = "MIT"}
|
|
11
|
+
authors = [
|
|
12
|
+
{name = "Matias Atuan", email = "matiasatuan.2018@gmail.com"},
|
|
13
|
+
]
|
|
14
|
+
requires-python = ">=3.10"
|
|
15
|
+
dependencies = [
|
|
16
|
+
"click>=8",
|
|
17
|
+
"rich>=13",
|
|
18
|
+
]
|
|
19
|
+
|
|
20
|
+
[project.urls]
|
|
21
|
+
Homepage = "https://github.com/FixZzT/loggrepper"
|
|
22
|
+
Repository = "https://github.com/FixZzT/loggrepper"
|
|
23
|
+
Issues = "https://github.com/FixZzT/loggrepper/issues"
|
|
24
|
+
|
|
25
|
+
[project.scripts]
|
|
26
|
+
loggrepper = "loggrepper.cli:main"
|
|
File without changes
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
from datetime import datetime, timedelta
|
|
2
|
+
from re import compile
|
|
3
|
+
|
|
4
|
+
import click
|
|
5
|
+
|
|
6
|
+
from loggrepper.models import LogLine
|
|
7
|
+
from loggrepper.timestamp import BUILTIN_FORMATS, detect_format, extract_timestamp
|
|
8
|
+
from loggrepper.grouper import group_incidents
|
|
9
|
+
from loggrepper.formatter import get_formatter
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@click.command()
|
|
13
|
+
@click.argument("pattern")
|
|
14
|
+
@click.argument("file", type=click.Path(exists=True))
|
|
15
|
+
@click.option("--window", "-w", default=3, help="Ventana en segundos alrededor del match")
|
|
16
|
+
@click.option("--ts-format", default="auto", help="Formato de timestamp (auto, iso8601, syslog, nginx, epoch-ms)")
|
|
17
|
+
@click.option("--output", "-o", default="pretty", type=click.Choice(["pretty", "json"]), help="Formato de salida")
|
|
18
|
+
def main(pattern: str, file: str, window: int, ts_format: str, output: str) -> None:
|
|
19
|
+
"""Extrae ventanas de contexto alrededor de matches en archivos de log.
|
|
20
|
+
|
|
21
|
+
Ejemplo: loggrepper ERROR app.log -w 5 --output json
|
|
22
|
+
"""
|
|
23
|
+
# ── formato de timestamp ──────────────────────────────────────
|
|
24
|
+
if ts_format == "auto":
|
|
25
|
+
with open(file) as f:
|
|
26
|
+
head = [next(f, "").rstrip("\n") for _ in range(50)]
|
|
27
|
+
head = [l for l in head if l]
|
|
28
|
+
fmt = detect_format(head)
|
|
29
|
+
if fmt is None:
|
|
30
|
+
raise click.UsageError(
|
|
31
|
+
"No se pudo detectar el formato de timestamp. "
|
|
32
|
+
"Usa --ts-format para especificar uno (iso8601, syslog, nginx, epoch-ms)."
|
|
33
|
+
)
|
|
34
|
+
click.echo(f"Formato detectado: {fmt.name}", err=True)
|
|
35
|
+
else:
|
|
36
|
+
fmt = BUILTIN_FORMATS.get(ts_format)
|
|
37
|
+
if fmt is None:
|
|
38
|
+
valid = ", ".join(BUILTIN_FORMATS.keys())
|
|
39
|
+
raise click.BadParameter(f"Formato '{ts_format}' desconocido. Opciones: {valid}")
|
|
40
|
+
|
|
41
|
+
# ── compilar patron de busqueda ───────────────────────────────
|
|
42
|
+
try:
|
|
43
|
+
pat = compile(pattern)
|
|
44
|
+
except Exception as e:
|
|
45
|
+
raise click.BadParameter(f"Patron regex invalido: {e}")
|
|
46
|
+
|
|
47
|
+
window_td = timedelta(seconds=window)
|
|
48
|
+
formatter = get_formatter(output)
|
|
49
|
+
|
|
50
|
+
# ── procesar archivo ──────────────────────────────────────────
|
|
51
|
+
with open(file) as f:
|
|
52
|
+
raw_lines = (
|
|
53
|
+
LogLine(number=i, raw=line.rstrip("\n"))
|
|
54
|
+
for i, line in enumerate(f, 1)
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
timestamped: list[tuple[LogLine, datetime, bool]] = []
|
|
58
|
+
skipped = 0
|
|
59
|
+
for logline in raw_lines:
|
|
60
|
+
ts = extract_timestamp(logline.raw, [fmt])
|
|
61
|
+
if ts is None:
|
|
62
|
+
skipped += 1
|
|
63
|
+
continue
|
|
64
|
+
matched = pat.search(logline.raw) is not None
|
|
65
|
+
timestamped.append((logline, ts, matched))
|
|
66
|
+
|
|
67
|
+
if skipped:
|
|
68
|
+
click.echo(f"Lineas sin timestamp detectado: {skipped}", err=True)
|
|
69
|
+
|
|
70
|
+
# ── agrupar y mostrar ─────────────────────────────────────────
|
|
71
|
+
incidents = list(group_incidents(iter(timestamped), window_td))
|
|
72
|
+
|
|
73
|
+
if not incidents:
|
|
74
|
+
click.echo(formatter.format([]))
|
|
75
|
+
return
|
|
76
|
+
|
|
77
|
+
click.echo(formatter.format(incidents))
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
"""Formateadores de output para incidentes."""
|
|
2
|
+
import json
|
|
3
|
+
from typing import Protocol
|
|
4
|
+
|
|
5
|
+
from loggrepper.models import Incident
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class Formatter(Protocol):
|
|
9
|
+
"""Protocolo que todo formateador debe cumplir."""
|
|
10
|
+
def format(self, incidents: list[Incident]) -> str:
|
|
11
|
+
...
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class PrettyFormatter:
|
|
15
|
+
"""Output legible para humanos, con colores y marcadores."""
|
|
16
|
+
|
|
17
|
+
def format(self, incidents: list[Incident]) -> str:
|
|
18
|
+
if not incidents:
|
|
19
|
+
return "Sin incidentes encontrados."
|
|
20
|
+
|
|
21
|
+
lines: list[str] = []
|
|
22
|
+
for inc in incidents:
|
|
23
|
+
lines.append(
|
|
24
|
+
f"--- Incidente #{inc.id} | "
|
|
25
|
+
f"{inc.start} — {inc.end} | "
|
|
26
|
+
f"{len(inc.lines)} lineas ---"
|
|
27
|
+
)
|
|
28
|
+
for i, logline in enumerate(inc.lines):
|
|
29
|
+
marker = ">>>" if i in inc.matches else " "
|
|
30
|
+
lines.append(f"{marker} {logline.raw}")
|
|
31
|
+
lines.append("")
|
|
32
|
+
return "\n".join(lines)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class JsonFormatter:
|
|
36
|
+
"""Output JSON, ideal para pipe a jq u otras herramientas."""
|
|
37
|
+
|
|
38
|
+
def format(self, incidents: list[Incident]) -> str:
|
|
39
|
+
data = [
|
|
40
|
+
{
|
|
41
|
+
"id": inc.id,
|
|
42
|
+
"start": inc.start.isoformat(),
|
|
43
|
+
"end": inc.end.isoformat(),
|
|
44
|
+
"line_count": len(inc.lines),
|
|
45
|
+
"match_count": len(inc.matches),
|
|
46
|
+
"lines": [
|
|
47
|
+
{
|
|
48
|
+
"number": logline.number,
|
|
49
|
+
"text": logline.raw,
|
|
50
|
+
"match": i in inc.matches,
|
|
51
|
+
}
|
|
52
|
+
for i, logline in enumerate(inc.lines)
|
|
53
|
+
],
|
|
54
|
+
}
|
|
55
|
+
for inc in incidents
|
|
56
|
+
]
|
|
57
|
+
return json.dumps(data, indent=2, ensure_ascii=False)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def get_formatter(output: str) -> Formatter:
|
|
61
|
+
"""Devuelve el formateador segun el formato elegido."""
|
|
62
|
+
formatters: dict[str, Formatter] = {
|
|
63
|
+
"pretty": PrettyFormatter(),
|
|
64
|
+
"json": JsonFormatter(),
|
|
65
|
+
}
|
|
66
|
+
return formatters[output]
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
from datetime import datetime, timedelta
|
|
2
|
+
from collections.abc import Iterator
|
|
3
|
+
|
|
4
|
+
from loggrepper.models import Incident, LogLine
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def group_incidents(
|
|
8
|
+
items: Iterator[tuple[LogLine, datetime, bool]],
|
|
9
|
+
window: timedelta,
|
|
10
|
+
) -> Iterator[Incident]:
|
|
11
|
+
incident: Incident | None = None
|
|
12
|
+
next_id = 1
|
|
13
|
+
pending: list[tuple[LogLine, datetime]] = []
|
|
14
|
+
|
|
15
|
+
for line, ts, matched in items:
|
|
16
|
+
if incident is not None and ts <= incident.end:
|
|
17
|
+
incident.lines.append(line)
|
|
18
|
+
if matched:
|
|
19
|
+
incident.matches.append(len(incident.lines) - 1)
|
|
20
|
+
incident.end = max(incident.end, ts + window)
|
|
21
|
+
continue
|
|
22
|
+
|
|
23
|
+
if incident is not None and ts > incident.end:
|
|
24
|
+
yield incident
|
|
25
|
+
pending = _discard_before(pending, incident.end)
|
|
26
|
+
incident = None
|
|
27
|
+
# la linea actual se reprocesa en el siguiente if
|
|
28
|
+
|
|
29
|
+
if incident is None:
|
|
30
|
+
if matched:
|
|
31
|
+
incident = _new_incident(next_id, line, ts, window, pending)
|
|
32
|
+
next_id += 1
|
|
33
|
+
else:
|
|
34
|
+
pending.append((line, ts))
|
|
35
|
+
|
|
36
|
+
if incident is not None:
|
|
37
|
+
yield incident
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _new_incident(
|
|
41
|
+
iid: int,
|
|
42
|
+
match_line: LogLine,
|
|
43
|
+
match_ts: datetime,
|
|
44
|
+
window: timedelta,
|
|
45
|
+
pending: list[tuple[LogLine, datetime]],
|
|
46
|
+
) -> Incident:
|
|
47
|
+
"""Crea incidente rescatando lineas pendientes dentro de [match_ts - window, ...]."""
|
|
48
|
+
start = match_ts - window
|
|
49
|
+
incident_lines: list[LogLine] = []
|
|
50
|
+
incident_matches: list[int] = []
|
|
51
|
+
|
|
52
|
+
# rescatar lineas pendientes dentro de la ventana
|
|
53
|
+
survivors: list[tuple[LogLine, datetime]] = []
|
|
54
|
+
for pl, pts in pending:
|
|
55
|
+
if pts >= start:
|
|
56
|
+
incident_lines.append(pl)
|
|
57
|
+
else:
|
|
58
|
+
survivors.append((pl, pts))
|
|
59
|
+
|
|
60
|
+
pending.clear()
|
|
61
|
+
pending.extend(survivors)
|
|
62
|
+
|
|
63
|
+
incident_matches.append(len(incident_lines))
|
|
64
|
+
incident_lines.append(match_line)
|
|
65
|
+
|
|
66
|
+
return Incident(
|
|
67
|
+
id=iid,
|
|
68
|
+
start=start,
|
|
69
|
+
end=match_ts + window,
|
|
70
|
+
lines=incident_lines,
|
|
71
|
+
matches=incident_matches,
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def _discard_before(
|
|
76
|
+
pending: list[tuple[LogLine, datetime]],
|
|
77
|
+
cutoff: datetime,
|
|
78
|
+
) -> list[tuple[LogLine, datetime]]:
|
|
79
|
+
"""Descarta lineas con timestamp <= cutoff."""
|
|
80
|
+
return [(pl, pts) for pl, pts in pending if pts > cutoff]
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
from re import Pattern
|
|
2
|
+
from collections.abc import Iterator
|
|
3
|
+
|
|
4
|
+
from loggrepper.models import LogLine
|
|
5
|
+
|
|
6
|
+
def match_lines(lines: Iterator[LogLine], patterns: list[Pattern]) -> Iterator[tuple[LogLine, bool]]:
|
|
7
|
+
for line in lines:
|
|
8
|
+
matched = any(p.search(line.raw) for p in patterns)
|
|
9
|
+
yield line, matched
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
|
|
4
|
+
@dataclass
|
|
5
|
+
class LogLine:
|
|
6
|
+
number: int
|
|
7
|
+
raw: str
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class Incident:
|
|
11
|
+
id: int
|
|
12
|
+
start: datetime
|
|
13
|
+
end: datetime
|
|
14
|
+
lines: list[LogLine]
|
|
15
|
+
matches: list[int]
|
|
16
|
+
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
from datetime import datetime
|
|
2
|
+
from re import Pattern, compile
|
|
3
|
+
from typing import Literal
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@dataclass
|
|
8
|
+
class TimestampFormat:
|
|
9
|
+
"""Define como extraer y parsear un timestamp de una linea."""
|
|
10
|
+
name: str
|
|
11
|
+
regex: Pattern
|
|
12
|
+
format_str: str
|
|
13
|
+
position: Literal["start", "anywhere"]
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def extract_timestamp(line: str, formats: list[TimestampFormat]) -> datetime | None:
|
|
17
|
+
"""Prueba cada formato contra la linea, devuelve el primer datetime parseado."""
|
|
18
|
+
for fmt in formats:
|
|
19
|
+
match = fmt.regex.match(line) if fmt.position == "start" else fmt.regex.search(line)
|
|
20
|
+
if match:
|
|
21
|
+
ts_str = match.group(0)
|
|
22
|
+
if fmt.name == "epoch-ms":
|
|
23
|
+
try:
|
|
24
|
+
return datetime.fromtimestamp(int(ts_str) / 1000)
|
|
25
|
+
except (ValueError, OSError):
|
|
26
|
+
continue
|
|
27
|
+
try:
|
|
28
|
+
return datetime.strptime(ts_str, fmt.format_str)
|
|
29
|
+
except ValueError:
|
|
30
|
+
continue
|
|
31
|
+
return None
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def detect_format(lines: list[str]) -> TimestampFormat | None:
|
|
35
|
+
"""Auto-detecta el formato de timestamp analizando las primeras lineas."""
|
|
36
|
+
best: tuple[TimestampFormat, int] | None = None
|
|
37
|
+
for fmt in ALL_FORMATS:
|
|
38
|
+
hits = sum(1 for line in lines if extract_timestamp(line, [fmt]) is not None)
|
|
39
|
+
if hits > len(lines) * 0.5:
|
|
40
|
+
if best is None or hits > best[1]:
|
|
41
|
+
best = (fmt, hits)
|
|
42
|
+
return best[0] if best else None
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
# ── Formatos predefinidos ────────────────────────────────────────────
|
|
46
|
+
|
|
47
|
+
ISO8601 = TimestampFormat(
|
|
48
|
+
name="iso8601",
|
|
49
|
+
regex=compile(r"\d{4}-\d{2}-\d{2}[T ]\d{2}:\d{2}:\d{2}(?:\.\d{3,6})?(?:[+-]\d{2}:?\d{2}|Z)?"),
|
|
50
|
+
format_str="%Y-%m-%d %H:%M:%S.%f",
|
|
51
|
+
position="anywhere",
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
ISO8601_T = TimestampFormat(
|
|
55
|
+
name="iso8601-t",
|
|
56
|
+
regex=compile(r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d{3,6})?(?:[+-]\d{2}:?\d{2}|Z)?"),
|
|
57
|
+
format_str="%Y-%m-%dT%H:%M:%S.%f",
|
|
58
|
+
position="anywhere",
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
SYSLOG = TimestampFormat(
|
|
62
|
+
name="syslog",
|
|
63
|
+
regex=compile(r"[A-Z][a-z]{2} \d{1,2} \d{2}:\d{2}:\d{2}"),
|
|
64
|
+
format_str="%b %d %H:%M:%S",
|
|
65
|
+
position="anywhere",
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
NGINX = TimestampFormat(
|
|
69
|
+
name="nginx",
|
|
70
|
+
regex=compile(r"\d{2}/[A-Z][a-z]{2}/\d{4}:\d{2}:\d{2}:\d{2} [+-]\d{4}"),
|
|
71
|
+
format_str="%d/%b/%Y:%H:%M:%S %z",
|
|
72
|
+
position="anywhere",
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
EPOCH_MS = TimestampFormat(
|
|
76
|
+
name="epoch-ms",
|
|
77
|
+
regex=compile(r"\b\d{13}\b"),
|
|
78
|
+
format_str="",
|
|
79
|
+
position="anywhere",
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
BUILTIN_FORMATS: dict[str, TimestampFormat] = {
|
|
83
|
+
"iso8601": ISO8601,
|
|
84
|
+
"iso8601-t": ISO8601_T,
|
|
85
|
+
"syslog": SYSLOG,
|
|
86
|
+
"nginx": NGINX,
|
|
87
|
+
"epoch-ms": EPOCH_MS,
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
ALL_FORMATS: list[TimestampFormat] = list(BUILTIN_FORMATS.values())
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: loggrepper
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: grep contextual para logs con ventanas de tiempo
|
|
5
|
+
Author-email: Matias Atuan <matiasatuan.2018@gmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/FixZzT/loggrepper
|
|
8
|
+
Project-URL: Repository, https://github.com/FixZzT/loggrepper
|
|
9
|
+
Project-URL: Issues, https://github.com/FixZzT/loggrepper/issues
|
|
10
|
+
Requires-Python: >=3.10
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
License-File: LICENSE
|
|
13
|
+
Requires-Dist: click>=8
|
|
14
|
+
Requires-Dist: rich>=13
|
|
15
|
+
Dynamic: license-file
|
|
16
|
+
|
|
17
|
+
# loggrepper
|
|
18
|
+
|
|
19
|
+
[](https://github.com/FixZzT/loggrepper/actions/workflows/ci.yml)
|
|
20
|
+
[](https://pypi.org/project/loggrepper/)
|
|
21
|
+
[](https://pypi.org/project/loggrepper/)
|
|
22
|
+
[](https://github.com/FixZzT/loggrepper/blob/master/LICENSE)
|
|
23
|
+
|
|
24
|
+
grep contextual para logs. Extrae ventanas de tiempo alrededor de matches en archivos de log — no por número de líneas, sino por timestamps.
|
|
25
|
+
|
|
26
|
+
## El problema
|
|
27
|
+
|
|
28
|
+
`grep ERROR app.log` te da esto:
|
|
29
|
+
|
|
30
|
+
```
|
|
31
|
+
2026-05-16 14:32:01.123 ERROR PaymentProcessor: timeout
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
Pero no te dice **qué pasó antes** del error (¿llegó el request? ¿qué parámetros tenía?) ni **después** (¿se reintentó? ¿el usuario recibió 500?).
|
|
35
|
+
|
|
36
|
+
`grep -B 20 -A 20` asume que 20 líneas cubren tu ventana de tiempo. Si el request empezó 2 segundos antes y tu log es verboso, 20 líneas puede ser muy poco. Si es poco verboso, 20 líneas es ruido innecesario.
|
|
37
|
+
|
|
38
|
+
**loggrepper** busca por timestamps reales. Le pasas `--window 3s` y te devuelve todas las líneas cuyo timestamp está dentro de ±3 segundos del match. Líneas sueltas se agrupan en "incidentes". Ventanas solapadas se fusionan.
|
|
39
|
+
|
|
40
|
+
## Instalación
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
pip install git+https://github.com/FixZzT/loggrepper.git
|
|
44
|
+
# o modo desarrollo (editable)
|
|
45
|
+
pip install -e .
|
|
46
|
+
# o global con pipx
|
|
47
|
+
pipx install git+https://github.com/FixZzT/loggrepper.git
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
## Uso
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
# Básico
|
|
54
|
+
loggrepper ERROR app.log
|
|
55
|
+
|
|
56
|
+
# Ventana de 5 segundos
|
|
57
|
+
loggrepper ERROR app.log -w 5
|
|
58
|
+
|
|
59
|
+
# Salida JSON para scripts
|
|
60
|
+
loggrepper ERROR app.log -o json | jq '.[] | {start, end, line_count}'
|
|
61
|
+
|
|
62
|
+
# Formato de timestamp específico
|
|
63
|
+
loggrepper "404" nginx-access.log --ts-format nginx
|
|
64
|
+
|
|
65
|
+
# Pipe desde docker/k8s
|
|
66
|
+
docker logs mi-app 2>&1 | loggrepper FATAL -
|
|
67
|
+
kubectl logs pod-xyz | loggrepper panic -
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
## Ejemplos reales
|
|
71
|
+
|
|
72
|
+
**Depurar un error en producción:**
|
|
73
|
+
|
|
74
|
+
```bash
|
|
75
|
+
$ loggrepper "IntegrityError" app.log -w 5
|
|
76
|
+
|
|
77
|
+
--- Incidente #1 | 14:32:00 — 14:32:04 | 5 líneas ---
|
|
78
|
+
14:32:00.100 INFO POST /api/orders payload={"user":42}
|
|
79
|
+
14:32:00.500 DEBUG INSERT INTO orders VALUES (...)
|
|
80
|
+
>>> 14:32:01.123 ERROR IntegrityError: duplicate key
|
|
81
|
+
14:32:01.200 WARN rolling back transaction
|
|
82
|
+
14:32:02.000 INFO POST /api/orders -> 500
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
Ves el request entero, SQL, error, rollback, y respuesta — en contexto temporal real.
|
|
86
|
+
|
|
87
|
+
**Investigar timeouts entre microservicios:**
|
|
88
|
+
|
|
89
|
+
```bash
|
|
90
|
+
$ loggrepper "pi_abc123" payment-service.log -w 10
|
|
91
|
+
|
|
92
|
+
--- Incidente #1 | 14:32:00 — 14:32:10 | 7 líneas ---
|
|
93
|
+
14:32:00.100 INFO received payment intent pi_abc123
|
|
94
|
+
14:32:00.200 DEBUG calling Stripe /v1/payment_intents
|
|
95
|
+
>>> 14:32:08.500 ERROR timeout calling Stripe (8.3s)
|
|
96
|
+
14:32:08.501 WARN retrying (1/3)
|
|
97
|
+
14:32:10.000 DEBUG Stripe responded 200 OK
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
Stripe tardó 8s, no es tu código. La ventana captura causa y efecto.
|
|
101
|
+
|
|
102
|
+
**Auditar requests sospechosos en nginx:**
|
|
103
|
+
|
|
104
|
+
```bash
|
|
105
|
+
$ loggrepper "POST /admin" access.log --ts-format nginx -w 30 -o json | jq .
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
## Formatos de timestamp soportados
|
|
109
|
+
|
|
110
|
+
| Formato | Ejemplo | Uso típico |
|
|
111
|
+
|-----------|--------------------------------------------|--------------------------|
|
|
112
|
+
| iso8601 | `2026-05-16 14:32:01.123 ERROR` | Python, Java, Node, Go |
|
|
113
|
+
| iso8601-t | `2026-05-16T14:32:01.123Z ERROR` | JSON logs, Docker, k8s |
|
|
114
|
+
| syslog | `May 16 14:32:01 hostname error:` | syslog, journald, /var/log |
|
|
115
|
+
| nginx | `16/May/2026:14:32:01 +0000 GET /` | Nginx, Apache access |
|
|
116
|
+
| epoch-ms | `1715872321123 ERROR` | Splunk, sistemas embedded |
|
|
117
|
+
|
|
118
|
+
Con `--ts-format auto` (default) detecta automáticamente el formato analizando las primeras 50 líneas.
|
|
119
|
+
|
|
120
|
+
## Output
|
|
121
|
+
|
|
122
|
+
**Pretty** (default):
|
|
123
|
+
|
|
124
|
+
```
|
|
125
|
+
--- Incidente #1 | 2026-05-16 14:31:58 — 2026-05-16 14:32:04 | 5 líneas ---
|
|
126
|
+
2026-05-16 14:32:00.100 INFO inicio del proceso
|
|
127
|
+
>>> 2026-05-16 14:32:01.123 ERROR timeout en conexión
|
|
128
|
+
2026-05-16 14:32:02.000 DEBUG conexión exitosa
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
`>>>` marca las líneas que coinciden con el patrón. Cada incidente muestra rango temporal y cantidad de líneas.
|
|
132
|
+
|
|
133
|
+
**JSON**: cada incidente con id, start, end, líneas con número, texto y flag `match`.
|
|
134
|
+
|
|
135
|
+
## Desarrollo
|
|
136
|
+
|
|
137
|
+
```bash
|
|
138
|
+
python -m venv .venv
|
|
139
|
+
source .venv/bin/activate
|
|
140
|
+
pip install -e .
|
|
141
|
+
pytest # 14 tests
|
|
142
|
+
ruff check src/ tests/
|
|
143
|
+
```
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
src/loggrepper/__init__.py
|
|
5
|
+
src/loggrepper/cli.py
|
|
6
|
+
src/loggrepper/formatter.py
|
|
7
|
+
src/loggrepper/grouper.py
|
|
8
|
+
src/loggrepper/matcher.py
|
|
9
|
+
src/loggrepper/models.py
|
|
10
|
+
src/loggrepper/timestamp.py
|
|
11
|
+
src/loggrepper.egg-info/PKG-INFO
|
|
12
|
+
src/loggrepper.egg-info/SOURCES.txt
|
|
13
|
+
src/loggrepper.egg-info/dependency_links.txt
|
|
14
|
+
src/loggrepper.egg-info/entry_points.txt
|
|
15
|
+
src/loggrepper.egg-info/requires.txt
|
|
16
|
+
src/loggrepper.egg-info/top_level.txt
|
|
17
|
+
tests/test_formatter.py
|
|
18
|
+
tests/test_grouper.py
|
|
19
|
+
tests/test_matcher.py
|
|
20
|
+
tests/test_models.py
|
|
21
|
+
tests/test_timestamp.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
loggrepper
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
|
|
4
|
+
from loggrepper.formatter import PrettyFormatter, JsonFormatter
|
|
5
|
+
from loggrepper.models import Incident, LogLine
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def test_pretty_formatter_empty():
|
|
9
|
+
result = PrettyFormatter().format([])
|
|
10
|
+
assert result == "Sin incidentes encontrados."
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def test_pretty_formatter_single_incident():
|
|
14
|
+
incident = Incident(
|
|
15
|
+
id=1,
|
|
16
|
+
start=datetime(2026, 5, 16, 14, 31, 58),
|
|
17
|
+
end=datetime(2026, 5, 16, 14, 32, 4),
|
|
18
|
+
lines=[
|
|
19
|
+
LogLine(1, "INFO: antes"),
|
|
20
|
+
LogLine(2, "ERROR: boom"),
|
|
21
|
+
LogLine(3, "DEBUG: despues"),
|
|
22
|
+
],
|
|
23
|
+
matches=[1],
|
|
24
|
+
)
|
|
25
|
+
result = PrettyFormatter().format([incident])
|
|
26
|
+
assert "Incidente #1" in result
|
|
27
|
+
assert ">>>" in result
|
|
28
|
+
assert "ERROR: boom" in result
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def test_json_formatter_empty():
|
|
32
|
+
result = JsonFormatter().format([])
|
|
33
|
+
data = json.loads(result)
|
|
34
|
+
assert data == []
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def test_json_formatter_incident():
|
|
38
|
+
incident = Incident(
|
|
39
|
+
id=1,
|
|
40
|
+
start=datetime(2026, 5, 16, 14, 31, 58),
|
|
41
|
+
end=datetime(2026, 5, 16, 14, 32, 4),
|
|
42
|
+
lines=[
|
|
43
|
+
LogLine(1, "INFO: antes"),
|
|
44
|
+
LogLine(2, "ERROR: boom"),
|
|
45
|
+
],
|
|
46
|
+
matches=[1],
|
|
47
|
+
)
|
|
48
|
+
result = JsonFormatter().format([incident])
|
|
49
|
+
data = json.loads(result)
|
|
50
|
+
assert len(data) == 1
|
|
51
|
+
assert data[0]["id"] == 1
|
|
52
|
+
assert data[0]["lines"][1]["match"] is True
|
|
53
|
+
assert data[0]["lines"][0]["match"] is False
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
from datetime import datetime, timedelta
|
|
2
|
+
from loggrepper.grouper import group_incidents
|
|
3
|
+
from loggrepper.models import LogLine
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def test_single_match_creates_window():
|
|
7
|
+
window = timedelta(seconds=3)
|
|
8
|
+
ts = datetime(2026, 5, 16, 14, 32, 0)
|
|
9
|
+
items = iter([
|
|
10
|
+
(LogLine(1, "DEBUG: antes"), ts - timedelta(seconds=2), False),
|
|
11
|
+
(LogLine(2, "ERROR: boom"), ts, True),
|
|
12
|
+
(LogLine(3, "DEBUG: despues"), ts + timedelta(seconds=2), False),
|
|
13
|
+
])
|
|
14
|
+
incidents = list(group_incidents(items, window))
|
|
15
|
+
assert len(incidents) == 1
|
|
16
|
+
assert incidents[0].id == 1
|
|
17
|
+
assert len(incidents[0].lines) == 3
|
|
18
|
+
assert incidents[0].matches == [1]
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def test_line_outside_window_excluded():
|
|
22
|
+
window = timedelta(seconds=2)
|
|
23
|
+
ts = datetime(2026, 5, 16, 14, 32, 0)
|
|
24
|
+
items = iter([
|
|
25
|
+
(LogLine(1, "ERROR: boom"), ts, True),
|
|
26
|
+
(LogLine(2, "DEBUG: muy lejos"), ts + timedelta(seconds=5), False),
|
|
27
|
+
])
|
|
28
|
+
incidents = list(group_incidents(items, window))
|
|
29
|
+
assert len(incidents) == 1
|
|
30
|
+
assert len(incidents[0].lines) == 1
|
|
31
|
+
assert incidents[0].lines[0].raw == "ERROR: boom"
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
from re import compile
|
|
2
|
+
from loggrepper.matcher import match_lines
|
|
3
|
+
from loggrepper.models import LogLine
|
|
4
|
+
|
|
5
|
+
def test_match_lines_finds_pattern():
|
|
6
|
+
pattern = compile(r"ERROR")
|
|
7
|
+
lines = iter([LogLine(1,"INFO: ok"), LogLine(2,"ERROR: timeout")])
|
|
8
|
+
result = list(match_lines(lines, [pattern]))
|
|
9
|
+
assert result[0] == (LogLine(1,"INFO: ok"), False)
|
|
10
|
+
assert result[1] == (LogLine(2,"ERROR: timeout"), True)
|
|
11
|
+
|
|
12
|
+
def test_match_lines_no_match():
|
|
13
|
+
pattern = compile(r"ERROR")
|
|
14
|
+
lines = iter([LogLine(1,"todo bien"), LogLine(2, "todo ok")])
|
|
15
|
+
result = list(match_lines(lines, [pattern]))
|
|
16
|
+
assert all(not matched for _, matched in result)
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
from loggrepper.models import LogLine,Incident
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def test_create_logline():
|
|
6
|
+
linea = LogLine(number=3, raw="ERROR: algo exploto")
|
|
7
|
+
assert linea.number == 3
|
|
8
|
+
assert linea.raw == "ERROR: algo exploto"
|
|
9
|
+
|
|
10
|
+
def test_create_incident():
|
|
11
|
+
linea = LogLine(number=5, raw="ERROR: timeout")
|
|
12
|
+
incident = Incident(
|
|
13
|
+
id = 1,
|
|
14
|
+
start=datetime(2026, 5, 16, 14, 32, 0),
|
|
15
|
+
end=datetime(2026, 5, 16, 14, 32, 5),
|
|
16
|
+
lines=[linea],
|
|
17
|
+
matches=[0]
|
|
18
|
+
)
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
from re import compile
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
from loggrepper.timestamp import TimestampFormat, extract_timestamp, detect_format
|
|
4
|
+
|
|
5
|
+
ISO_FORMAT = TimestampFormat(
|
|
6
|
+
name="iso8601",
|
|
7
|
+
regex=compile(r"^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{3}"),
|
|
8
|
+
format_str="%Y-%m-%d %H:%M:%S.%f",
|
|
9
|
+
position="start",
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def test_extract_timestamp_start():
|
|
14
|
+
ts = extract_timestamp("2026-05-16 14:32:01.123 ERROR: timeout", [ISO_FORMAT])
|
|
15
|
+
assert ts == datetime(2026, 5, 16, 14, 32, 1, 123000)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def test_extract_timestamp_no_match():
|
|
19
|
+
ts = extract_timestamp("sin timestamp aqui", [ISO_FORMAT])
|
|
20
|
+
assert ts is None
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def test_detect_format_iso8601():
|
|
24
|
+
lines = [
|
|
25
|
+
"2026-05-16 14:32:00.100 INFO inicio",
|
|
26
|
+
"2026-05-16 14:32:01.123 ERROR timeout",
|
|
27
|
+
"2026-05-16 14:32:02.000 DEBUG fin",
|
|
28
|
+
]
|
|
29
|
+
fmt = detect_format(lines)
|
|
30
|
+
assert fmt is not None
|
|
31
|
+
assert fmt.name in ("iso8601", "iso8601-t")
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def test_detect_format_no_detect():
|
|
35
|
+
lines = ["sin timestamp aqui", "tampoco esta linea"]
|
|
36
|
+
fmt = detect_format(lines)
|
|
37
|
+
assert fmt is None
|