runspec-logops-core 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- runspec_logops_core-0.1.0/.gitignore +61 -0
- runspec_logops_core-0.1.0/CHANGELOG.md +28 -0
- runspec_logops_core-0.1.0/PKG-INFO +9 -0
- runspec_logops_core-0.1.0/pyproject.toml +30 -0
- runspec_logops_core-0.1.0/runspec_logops_core/__init__.py +35 -0
- runspec_logops_core-0.1.0/runspec_logops_core/bundle.py +48 -0
- runspec_logops_core-0.1.0/runspec_logops_core/codemap.py +158 -0
- runspec_logops_core-0.1.0/runspec_logops_core/digest.py +167 -0
- runspec_logops_core-0.1.0/runspec_logops_core/errors.py +15 -0
- runspec_logops_core-0.1.0/runspec_logops_core/signatures.py +138 -0
- runspec_logops_core-0.1.0/tests/__init__.py +0 -0
- runspec_logops_core-0.1.0/tests/test_bundle.py +28 -0
- runspec_logops_core-0.1.0/tests/test_codemap.py +54 -0
- runspec_logops_core-0.1.0/tests/test_digest.py +81 -0
- runspec_logops_core-0.1.0/tests/test_signatures.py +65 -0
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*.pyo
|
|
5
|
+
*.pyd
|
|
6
|
+
.Python
|
|
7
|
+
*.egg
|
|
8
|
+
*.egg-info/
|
|
9
|
+
dist/
|
|
10
|
+
build/
|
|
11
|
+
.eggs/
|
|
12
|
+
.venv/
|
|
13
|
+
venv/
|
|
14
|
+
env/
|
|
15
|
+
.env
|
|
16
|
+
pip-wheel-metadata/
|
|
17
|
+
.pytest_cache/
|
|
18
|
+
.mypy_cache/
|
|
19
|
+
.ruff_cache/
|
|
20
|
+
htmlcov/
|
|
21
|
+
.coverage
|
|
22
|
+
coverage.xml
|
|
23
|
+
*.cover
|
|
24
|
+
|
|
25
|
+
# Node
|
|
26
|
+
node_modules/
|
|
27
|
+
dist/
|
|
28
|
+
*.js.map
|
|
29
|
+
.npm
|
|
30
|
+
|
|
31
|
+
# Go
|
|
32
|
+
*.exe
|
|
33
|
+
*.test
|
|
34
|
+
*.out
|
|
35
|
+
vendor/
|
|
36
|
+
|
|
37
|
+
# IDE
|
|
38
|
+
.idea/
|
|
39
|
+
.vscode/
|
|
40
|
+
*.iml
|
|
41
|
+
*.iws
|
|
42
|
+
*.ipr
|
|
43
|
+
.DS_Store
|
|
44
|
+
Thumbs.db
|
|
45
|
+
|
|
46
|
+
# Docs
|
|
47
|
+
site/
|
|
48
|
+
|
|
49
|
+
# Misc
|
|
50
|
+
*.log
|
|
51
|
+
*.tmp
|
|
52
|
+
|
|
53
|
+
# External reference repos (cloned locally, not committed)
|
|
54
|
+
chainlit-docs/
|
|
55
|
+
.chainlit/
|
|
56
|
+
|
|
57
|
+
# Claude Code local config (machine-specific)
|
|
58
|
+
.claude/launch.json
|
|
59
|
+
|
|
60
|
+
# Stray committed test venv (removed from tracking)
|
|
61
|
+
.venv-test/
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# runspec-logops-core Changelog
|
|
2
|
+
|
|
3
|
+
## [0.1.0] — 2026-06-18
|
|
4
|
+
|
|
5
|
+
Initial release.
|
|
6
|
+
|
|
7
|
+
The pure-Python logic core behind `runspec-logops`. Provides log-condensing and
|
|
8
|
+
code-mapping helpers as plain importable functions — **no dependency on
|
|
9
|
+
`runspec`, no `runspec.toml`, no console-script entry points** — so a package can
|
|
10
|
+
`from runspec_logops_core import summarize_log` (and the rest) without surfacing
|
|
11
|
+
any runnables in the venv or in `runspec local` / `runspec serve` discovery. This
|
|
12
|
+
is the corporate-facing deliverable: wrap it in a private package, bake in
|
|
13
|
+
corporate defaults as plain params, and ship your own runnables.
|
|
14
|
+
|
|
15
|
+
Each function returns plain data and *raises* on failure (`SourceNotFoundError`
|
|
16
|
+
when an input path is missing); wrappers catch these and render the JSON/exit
|
|
17
|
+
behaviour.
|
|
18
|
+
|
|
19
|
+
Exports:
|
|
20
|
+
|
|
21
|
+
- **Signatures** — `normalize_line`, `error_signature`, `detect_level` (pure
|
|
22
|
+
line/event shaping for cheap clustering)
|
|
23
|
+
- **Digest** — `summarize_log` (streams a log, clusters by signature, returns a
|
|
24
|
+
bounded top-N digest whose size is independent of input size)
|
|
25
|
+
- **Code map** — `map_trace_to_sources` (resolves a stack trace / signature to the
|
|
26
|
+
few relevant source snippets in a checkout)
|
|
27
|
+
- **Bundle** — `build_bundle` (zips digest + snippets + manifest for transfer)
|
|
28
|
+
- **Errors** — `LogopsCoreError`, `SourceNotFoundError`
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: runspec-logops-core
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Pure-Python log-condensing + code-mapping helpers — the importable core behind runspec-logops (no runspec dependency, no runnables)
|
|
5
|
+
Requires-Python: >=3.10
|
|
6
|
+
Provides-Extra: dev
|
|
7
|
+
Requires-Dist: mypy; extra == 'dev'
|
|
8
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
9
|
+
Requires-Dist: ruff; extra == 'dev'
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "runspec-logops-core"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
requires-python = ">=3.10"
|
|
9
|
+
description = "Pure-Python log-condensing + code-mapping helpers — the importable core behind runspec-logops (no runspec dependency, no runnables)"
|
|
10
|
+
dependencies = []
|
|
11
|
+
|
|
12
|
+
[project.optional-dependencies]
|
|
13
|
+
dev = [
|
|
14
|
+
"ruff",
|
|
15
|
+
"mypy",
|
|
16
|
+
"pytest>=8.0",
|
|
17
|
+
]
|
|
18
|
+
|
|
19
|
+
[tool.pytest.ini_options]
|
|
20
|
+
testpaths = ["tests"]
|
|
21
|
+
|
|
22
|
+
[tool.mypy]
|
|
23
|
+
python_version = "3.10"
|
|
24
|
+
|
|
25
|
+
[tool.ruff]
|
|
26
|
+
line-length = 200
|
|
27
|
+
target-version = "py310"
|
|
28
|
+
|
|
29
|
+
[tool.ruff.lint]
|
|
30
|
+
select = ["E", "F", "I", "UP", "B", "SIM"]
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
"""runspec-logops-core — pure-Python log-condensing + code-mapping helpers.
|
|
2
|
+
|
|
3
|
+
This package has **no dependency on runspec** and ships **no runspec.toml and no
|
|
4
|
+
entry points**, so installing it exposes the helper functions for import without
|
|
5
|
+
surfacing any runnables (it is invisible to ``runspec local`` / ``runspec serve``
|
|
6
|
+
discovery). ``runspec-logops`` depends on it and wraps each helper in a runnable;
|
|
7
|
+
a private (e.g. Nexus-hosted) package can instead import these helpers directly,
|
|
8
|
+
bake in corporate defaults/paths as plain params, and ship its own runnables —
|
|
9
|
+
so only the wrapped runnables ever surface in the venv.
|
|
10
|
+
|
|
11
|
+
Each function returns plain data and *raises* on failure (see
|
|
12
|
+
``runspec_logops_core.errors``).
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from runspec_logops_core.bundle import build_bundle
|
|
16
|
+
from runspec_logops_core.codemap import map_trace_to_sources
|
|
17
|
+
from runspec_logops_core.digest import summarize_log
|
|
18
|
+
from runspec_logops_core.errors import LogopsCoreError, SourceNotFoundError
|
|
19
|
+
from runspec_logops_core.signatures import detect_level, error_signature, normalize_line
|
|
20
|
+
|
|
21
|
+
__all__ = [
|
|
22
|
+
# errors
|
|
23
|
+
"LogopsCoreError",
|
|
24
|
+
"SourceNotFoundError",
|
|
25
|
+
# signatures
|
|
26
|
+
"normalize_line",
|
|
27
|
+
"error_signature",
|
|
28
|
+
"detect_level",
|
|
29
|
+
# digest
|
|
30
|
+
"summarize_log",
|
|
31
|
+
# codemap
|
|
32
|
+
"map_trace_to_sources",
|
|
33
|
+
# bundle
|
|
34
|
+
"build_bundle",
|
|
35
|
+
]
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"""Bundle a digest + code map + metadata into one small zip for transfer.
|
|
2
|
+
|
|
3
|
+
The console's existing ``download_file`` agent tool then pulls this single small
|
|
4
|
+
artifact to local — the whole log and whole repo never transit the network.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
import os
|
|
11
|
+
import zipfile
|
|
12
|
+
from datetime import datetime
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def build_bundle(digest: dict, code_map: dict | None, *, dest_dir: str, metadata: dict | None = None) -> dict:
|
|
16
|
+
"""Write ``digest.json`` (+ ``snippets.json`` + ``manifest.json``) into one zip.
|
|
17
|
+
|
|
18
|
+
Returns the ``backup_files``-style ``{destination, size_bytes, size_mb,
|
|
19
|
+
contents}``. Creates ``dest_dir`` if needed; propagates :class:`OSError` on
|
|
20
|
+
write failure.
|
|
21
|
+
"""
|
|
22
|
+
os.makedirs(dest_dir, exist_ok=True)
|
|
23
|
+
timestamp = datetime.now().strftime("%Y%m%dT%H%M%S")
|
|
24
|
+
archive_path = os.path.join(dest_dir, f"logops_digest_{timestamp}.zip")
|
|
25
|
+
|
|
26
|
+
manifest = {
|
|
27
|
+
"generated_at": datetime.now().isoformat(),
|
|
28
|
+
"log_file": digest.get("file"),
|
|
29
|
+
"distinct_signatures": digest.get("distinct_signatures"),
|
|
30
|
+
"has_code_map": code_map is not None,
|
|
31
|
+
**(metadata or {}),
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
contents = ["digest.json", "manifest.json"]
|
|
35
|
+
with zipfile.ZipFile(archive_path, "w", compression=zipfile.ZIP_DEFLATED) as zf:
|
|
36
|
+
zf.writestr("digest.json", json.dumps(digest, indent=2))
|
|
37
|
+
zf.writestr("manifest.json", json.dumps(manifest, indent=2))
|
|
38
|
+
if code_map is not None:
|
|
39
|
+
zf.writestr("snippets.json", json.dumps(code_map, indent=2))
|
|
40
|
+
contents.append("snippets.json")
|
|
41
|
+
|
|
42
|
+
size_bytes = os.path.getsize(archive_path)
|
|
43
|
+
return {
|
|
44
|
+
"destination": archive_path,
|
|
45
|
+
"size_bytes": size_bytes,
|
|
46
|
+
"size_mb": round(size_bytes / 1_048_576, 2),
|
|
47
|
+
"contents": contents,
|
|
48
|
+
}
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
"""Map a stack trace (or a digest signature) to the few relevant source snippets.
|
|
2
|
+
|
|
3
|
+
Given a trace and the local git checkout, ``map_trace_to_sources`` extracts the
|
|
4
|
+
referenced frames, resolves each to a file in the checkout, and returns only the
|
|
5
|
+
±context window around each frame's line — capped at ``max_files`` frames and
|
|
6
|
+
``max_total_lines`` total. The agent gets the handful of lines that matter, never
|
|
7
|
+
the repo. Resolution is plain filesystem (``os.walk``); no ``git`` is required.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import os
|
|
13
|
+
import re
|
|
14
|
+
|
|
15
|
+
from runspec_logops_core.errors import SourceNotFoundError
|
|
16
|
+
|
|
17
|
+
_IGNORE_DIRS = {".git", "node_modules", "__pycache__", ".venv", "venv", ".mypy_cache", ".tox", "dist", "build", ".idea"}
|
|
18
|
+
|
|
19
|
+
# Frame shapes across runtimes.
|
|
20
|
+
_PY = re.compile(r'File\s+"([^"]+)",\s+line\s+(\d+),\s+in\s+(\S+)')
|
|
21
|
+
_JAVA = re.compile(r"at\s+([\w.$]+)\(([^()\s:]+):(\d+)\)")
|
|
22
|
+
_JS = re.compile(r"at\s+(?:([\w.$<>]+)\s+)?\(?((?:/|\./|\w:|[\w.\-]+/)[\w./\-]*\.[a-zA-Z]+):(\d+):\d+\)?")
|
|
23
|
+
_GENERIC = re.compile(r"\b([\w./\-]+\.[a-zA-Z]{1,5}):(\d+)\b")
|
|
24
|
+
_CAPWORD = re.compile(r"^[A-Z]\w+$")
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _frames_from_trace(trace: str) -> list[tuple[str | None, int | None, str | None]]:
|
|
28
|
+
"""Extract ``(file_hint, lineno, symbol)`` candidates from a raw trace."""
|
|
29
|
+
out: list[tuple[str | None, int | None, str | None]] = []
|
|
30
|
+
seen: set[tuple] = set()
|
|
31
|
+
|
|
32
|
+
def add(file_hint: str | None, lineno: int | None, symbol: str | None) -> None:
|
|
33
|
+
# Dedup by (basename, line) when a line is known — so the generic
|
|
34
|
+
# ``file:line`` catch-all doesn't re-add a frame a typed regex already
|
|
35
|
+
# captured with its symbol. Symbol-only frames key on the symbol instead.
|
|
36
|
+
key = ("L", os.path.basename(file_hint) if file_hint else None, lineno) if lineno is not None else ("S", file_hint, symbol)
|
|
37
|
+
if key not in seen:
|
|
38
|
+
seen.add(key)
|
|
39
|
+
out.append((file_hint, lineno, symbol))
|
|
40
|
+
|
|
41
|
+
for m in _PY.finditer(trace):
|
|
42
|
+
add(m.group(1), int(m.group(2)), m.group(3))
|
|
43
|
+
for m in _JAVA.finditer(trace):
|
|
44
|
+
add(m.group(2), int(m.group(3)), m.group(1))
|
|
45
|
+
for m in _JS.finditer(trace):
|
|
46
|
+
add(m.group(2), int(m.group(3)), m.group(1))
|
|
47
|
+
for m in _GENERIC.finditer(trace):
|
|
48
|
+
add(m.group(1), int(m.group(2)), None)
|
|
49
|
+
return out
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _frames_from_signature(trace: str) -> list[tuple[str | None, int | None, str | None]]:
|
|
53
|
+
"""Extract frames from a digest signature like ``Exc@com.acme.OrderSvc.price``."""
|
|
54
|
+
if "@" not in trace:
|
|
55
|
+
return []
|
|
56
|
+
out: list[tuple[str | None, int | None, str | None]] = []
|
|
57
|
+
for frame in trace.split("@", 1)[1].split(">"):
|
|
58
|
+
frame = frame.strip()
|
|
59
|
+
if not frame:
|
|
60
|
+
continue
|
|
61
|
+
parts = frame.split(".")
|
|
62
|
+
cls = next((p for p in reversed(parts) if _CAPWORD.match(p)), None)
|
|
63
|
+
method = parts[-1] if parts else None
|
|
64
|
+
out.append((cls, None, method))
|
|
65
|
+
return out
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _find_file(repo_root: str, file_hint: str) -> str | None:
|
|
69
|
+
"""Resolve ``file_hint`` to a path under ``repo_root`` (exact rel path, then basename)."""
|
|
70
|
+
exact = os.path.join(repo_root, file_hint)
|
|
71
|
+
if os.path.isfile(exact):
|
|
72
|
+
return exact
|
|
73
|
+
target = os.path.basename(file_hint)
|
|
74
|
+
for dirpath, dirs, files in os.walk(repo_root):
|
|
75
|
+
dirs[:] = [d for d in dirs if d not in _IGNORE_DIRS]
|
|
76
|
+
if target in files:
|
|
77
|
+
return os.path.join(dirpath, target)
|
|
78
|
+
return None
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _find_class_file(repo_root: str, cls: str) -> str | None:
|
|
82
|
+
"""Find a file whose stem equals the class name (e.g. ``OrderSvc`` → OrderSvc.java)."""
|
|
83
|
+
for dirpath, dirs, files in os.walk(repo_root):
|
|
84
|
+
dirs[:] = [d for d in dirs if d not in _IGNORE_DIRS]
|
|
85
|
+
for name in files:
|
|
86
|
+
if os.path.splitext(name)[0] == cls:
|
|
87
|
+
return os.path.join(dirpath, name)
|
|
88
|
+
return None
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def _symbol_line(lines: list[str], symbol: str) -> int | None:
|
|
92
|
+
"""Return the 1-based line number where ``symbol`` is defined/first referenced."""
|
|
93
|
+
needles = (f"def {symbol}", f"{symbol}(", f" {symbol} ", f".{symbol}")
|
|
94
|
+
for i, line in enumerate(lines, start=1):
|
|
95
|
+
if any(n in line for n in needles):
|
|
96
|
+
return i
|
|
97
|
+
return None
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _snippet(path: str, lineno: int, context: int) -> list[str]:
|
|
101
|
+
"""Return ``lineno`` ±context as ``"<n>: <text>"`` strings (no trailing newline)."""
|
|
102
|
+
with open(path, errors="replace") as fh:
|
|
103
|
+
lines = fh.read().splitlines()
|
|
104
|
+
start = max(0, lineno - context - 1)
|
|
105
|
+
end = min(len(lines), lineno + context)
|
|
106
|
+
return [f"{i + 1}: {lines[i]}" for i in range(start, end)]
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def map_trace_to_sources(
|
|
110
|
+
repo_root: str,
|
|
111
|
+
trace: str,
|
|
112
|
+
*,
|
|
113
|
+
context: int = 8,
|
|
114
|
+
max_files: int = 5,
|
|
115
|
+
max_total_lines: int = 200,
|
|
116
|
+
) -> dict:
|
|
117
|
+
"""Resolve the frames in ``trace`` to source snippets in ``repo_root``.
|
|
118
|
+
|
|
119
|
+
Raises :class:`SourceNotFoundError` if ``repo_root`` is not a directory.
|
|
120
|
+
"""
|
|
121
|
+
if not os.path.isdir(repo_root):
|
|
122
|
+
raise SourceNotFoundError(f"repo checkout not found: {repo_root}")
|
|
123
|
+
|
|
124
|
+
candidates = _frames_from_trace(trace) or _frames_from_signature(trace)
|
|
125
|
+
|
|
126
|
+
frames: list[dict] = []
|
|
127
|
+
total_lines = 0
|
|
128
|
+
for file_hint, lineno, symbol in candidates:
|
|
129
|
+
if len(frames) >= max_files or total_lines >= max_total_lines:
|
|
130
|
+
break
|
|
131
|
+
|
|
132
|
+
path: str | None = None
|
|
133
|
+
if file_hint and ("/" in file_hint or "." in file_hint and lineno is not None):
|
|
134
|
+
path = _find_file(repo_root, file_hint)
|
|
135
|
+
if path is None and file_hint:
|
|
136
|
+
path = _find_class_file(repo_root, os.path.splitext(os.path.basename(file_hint))[0])
|
|
137
|
+
if path is None:
|
|
138
|
+
continue
|
|
139
|
+
|
|
140
|
+
resolved_line = lineno
|
|
141
|
+
if resolved_line is None and symbol:
|
|
142
|
+
with open(path, errors="replace") as fh:
|
|
143
|
+
resolved_line = _symbol_line(fh.read().splitlines(), symbol)
|
|
144
|
+
if resolved_line is None:
|
|
145
|
+
continue
|
|
146
|
+
|
|
147
|
+
snippet = _snippet(path, resolved_line, context)
|
|
148
|
+
room = max(0, max_total_lines - total_lines)
|
|
149
|
+
snippet = snippet[:room]
|
|
150
|
+
total_lines += len(snippet)
|
|
151
|
+
frames.append({"file": os.path.relpath(path, repo_root), "lineno": resolved_line, "symbol": symbol, "snippet": snippet})
|
|
152
|
+
|
|
153
|
+
return {
|
|
154
|
+
"repo": repo_root,
|
|
155
|
+
"frames": frames,
|
|
156
|
+
"files_matched": len(frames),
|
|
157
|
+
"truncated": len(frames) < len(candidates),
|
|
158
|
+
}
|
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
"""Condense a noisy log into a small, bounded digest.
|
|
2
|
+
|
|
3
|
+
``summarize_log`` streams a file line-by-line (it never loads the whole file),
|
|
4
|
+
groups continuation/stack lines into events, clusters events by
|
|
5
|
+
:func:`signatures.error_signature`, and returns only the top-N distinct
|
|
6
|
+
signatures with counts, first/last timestamp and one truncated sample each. The
|
|
7
|
+
returned dict's size is bounded by ``top`` × ``max_sample_lines`` × ``max_bytes``
|
|
8
|
+
— independent of how big the input log is. That bound is the whole point: the
|
|
9
|
+
agent reads the digest, never the log.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import os
|
|
15
|
+
import re
|
|
16
|
+
from datetime import datetime, timedelta
|
|
17
|
+
|
|
18
|
+
from runspec_logops_core.errors import SourceNotFoundError
|
|
19
|
+
from runspec_logops_core.signatures import detect_level, error_signature
|
|
20
|
+
|
|
21
|
+
_LEVEL_FLOOR = {"all": 0, "warning": 2, "error": 3, "critical": 4}
|
|
22
|
+
|
|
23
|
+
# Leading ISO timestamp captured for window filtering (syslog has no year, so it
|
|
24
|
+
# is intentionally left for the no-op path).
|
|
25
|
+
_ISO_TS = re.compile(r"^\s*\[?(\d{4}-\d{2}-\d{2})[ T](\d{2}:\d{2}:\d{2})(?:[.,]\d+)?")
|
|
26
|
+
_REL = re.compile(r"^\s*(\d+)\s*([a-z]+?)s?(?:\s+ago)?\s*$", re.IGNORECASE)
|
|
27
|
+
_UNIT = {"s": "seconds", "sec": "seconds", "second": "seconds", "m": "minutes", "min": "minutes", "minute": "minutes", "h": "hours", "hour": "hours", "hr": "hours", "d": "days", "day": "days"}
|
|
28
|
+
|
|
29
|
+
# A line that continues the previous event rather than starting a new one.
|
|
30
|
+
_CONT = re.compile(r"^\s+|^(?:at\s|Caused by:|\.{3}|File\s\")", re.IGNORECASE)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _parse_dt(text: str) -> datetime | None:
|
|
34
|
+
"""Best-effort parse of an ISO-ish ``YYYY-MM-DD[ T]HH:MM:SS`` string (naive)."""
|
|
35
|
+
try:
|
|
36
|
+
return datetime.fromisoformat(text)
|
|
37
|
+
except ValueError:
|
|
38
|
+
return None
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _parse_line_ts(line: str) -> datetime | None:
|
|
42
|
+
"""Extract a leading ISO timestamp from a log line, or None."""
|
|
43
|
+
m = _ISO_TS.match(line)
|
|
44
|
+
if not m:
|
|
45
|
+
return None
|
|
46
|
+
return _parse_dt(f"{m.group(1)} {m.group(2)}")
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _parse_when(value: str | None) -> datetime | None:
|
|
50
|
+
"""Parse a user-supplied window bound: ISO, ``now``, or ``<n> <unit> ago``."""
|
|
51
|
+
if not value:
|
|
52
|
+
return None
|
|
53
|
+
v = value.strip()
|
|
54
|
+
if v.lower() == "now":
|
|
55
|
+
return datetime.now()
|
|
56
|
+
rel = _REL.match(v)
|
|
57
|
+
if rel:
|
|
58
|
+
unit = _UNIT.get(rel.group(2).lower())
|
|
59
|
+
if unit:
|
|
60
|
+
return datetime.now() - timedelta(**{unit: int(rel.group(1))})
|
|
61
|
+
iso = _parse_dt(v.replace("Z", "").replace("T", " ").strip())
|
|
62
|
+
return iso
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _is_continuation(line: str) -> bool:
|
|
66
|
+
return bool(line) and bool(_CONT.match(line))
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _iter_events(path: str):
|
|
70
|
+
"""Yield ``(block_lines, first_ts)`` events, streaming the file.
|
|
71
|
+
|
|
72
|
+
An event is a leading (non-indented) line plus the indented / stack-frame
|
|
73
|
+
lines that follow it. ``lines_scanned`` is tracked by the caller.
|
|
74
|
+
"""
|
|
75
|
+
block: list[str] = []
|
|
76
|
+
block_ts: datetime | None = None
|
|
77
|
+
with open(path, errors="replace") as fh:
|
|
78
|
+
for raw in fh:
|
|
79
|
+
line = raw.rstrip("\n")
|
|
80
|
+
if block and _is_continuation(line):
|
|
81
|
+
block.append(line)
|
|
82
|
+
continue
|
|
83
|
+
if block:
|
|
84
|
+
yield block, block_ts
|
|
85
|
+
block = [line]
|
|
86
|
+
block_ts = _parse_line_ts(line)
|
|
87
|
+
if block:
|
|
88
|
+
yield block, block_ts
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def summarize_log(
|
|
92
|
+
path: str,
|
|
93
|
+
*,
|
|
94
|
+
level: str | None = None,
|
|
95
|
+
since: str | None = None,
|
|
96
|
+
until: str | None = None,
|
|
97
|
+
top: int = 10,
|
|
98
|
+
max_sample_lines: int = 20,
|
|
99
|
+
max_bytes: int = 200,
|
|
100
|
+
) -> dict:
|
|
101
|
+
"""Return a bounded digest of ``path`` clustered by error signature.
|
|
102
|
+
|
|
103
|
+
Raises :class:`SourceNotFoundError` if the file does not exist; propagates
|
|
104
|
+
:class:`OSError` on other read failures.
|
|
105
|
+
"""
|
|
106
|
+
if not os.path.exists(path):
|
|
107
|
+
raise SourceNotFoundError(f"log file not found: {path}")
|
|
108
|
+
|
|
109
|
+
floor = _LEVEL_FLOOR.get((level or "all").lower(), 0)
|
|
110
|
+
since_dt, until_dt = _parse_when(since), _parse_when(until)
|
|
111
|
+
|
|
112
|
+
groups: dict[str, dict] = {}
|
|
113
|
+
lines_scanned = 0
|
|
114
|
+
total_events = 0
|
|
115
|
+
|
|
116
|
+
for block, ts in _iter_events(path):
|
|
117
|
+
lines_scanned += len(block)
|
|
118
|
+
|
|
119
|
+
if (since_dt and ts and ts < since_dt) or (until_dt and ts and ts > until_dt):
|
|
120
|
+
continue
|
|
121
|
+
|
|
122
|
+
sig = error_signature(block)
|
|
123
|
+
# Event level = highest severity seen on any line; an exception-bearing
|
|
124
|
+
# event with no explicit level counts as "error".
|
|
125
|
+
ranks = [_LEVEL_FLOOR.get(detect_level(ln) or "", 0) for ln in block]
|
|
126
|
+
rank = max(ranks) if ranks else 0
|
|
127
|
+
if rank == 0 and "@" in sig:
|
|
128
|
+
rank = 3
|
|
129
|
+
if rank < floor:
|
|
130
|
+
continue
|
|
131
|
+
|
|
132
|
+
total_events += 1
|
|
133
|
+
g = groups.get(sig)
|
|
134
|
+
if g is None:
|
|
135
|
+
sample = [ln[:max_bytes] for ln in block[:max_sample_lines]]
|
|
136
|
+
groups[sig] = {"signature": sig, "level_rank": rank, "count": 1, "first_ts": ts, "last_ts": ts, "sample": sample}
|
|
137
|
+
else:
|
|
138
|
+
g["count"] += 1
|
|
139
|
+
g["level_rank"] = max(g["level_rank"], rank)
|
|
140
|
+
if ts:
|
|
141
|
+
if g["first_ts"] is None or ts < g["first_ts"]:
|
|
142
|
+
g["first_ts"] = ts
|
|
143
|
+
if g["last_ts"] is None or ts > g["last_ts"]:
|
|
144
|
+
g["last_ts"] = ts
|
|
145
|
+
|
|
146
|
+
ranked = sorted(groups.values(), key=lambda g: g["count"], reverse=True)
|
|
147
|
+
rank_name = {0: "info", 2: "warning", 3: "error", 4: "critical"}
|
|
148
|
+
top_list = [
|
|
149
|
+
{
|
|
150
|
+
"signature": g["signature"],
|
|
151
|
+
"level": rank_name.get(g["level_rank"], "info"),
|
|
152
|
+
"count": g["count"],
|
|
153
|
+
"first_ts": g["first_ts"].isoformat() if g["first_ts"] else None,
|
|
154
|
+
"last_ts": g["last_ts"].isoformat() if g["last_ts"] else None,
|
|
155
|
+
"sample": g["sample"],
|
|
156
|
+
}
|
|
157
|
+
for g in ranked[:top]
|
|
158
|
+
]
|
|
159
|
+
|
|
160
|
+
return {
|
|
161
|
+
"file": path,
|
|
162
|
+
"window": {"since": since, "until": until, "lines_scanned": lines_scanned},
|
|
163
|
+
"total_events": total_events,
|
|
164
|
+
"distinct_signatures": len(groups),
|
|
165
|
+
"truncated": len(groups) > top,
|
|
166
|
+
"top": top_list,
|
|
167
|
+
}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"""Exception types raised by the pure helper functions.
|
|
2
|
+
|
|
3
|
+
The functions in this package do the work and *raise* on failure; the thin
|
|
4
|
+
runnable wrappers in ``runspec-logops`` (and any private wrapper that imports the
|
|
5
|
+
helpers) catch these and turn them into the JSON error payloads + non-zero exits
|
|
6
|
+
that the CLI/agent surface expects.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class LogopsCoreError(Exception):
|
|
11
|
+
"""Base class for all runspec-logops-core failures."""
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class SourceNotFoundError(LogopsCoreError):
|
|
15
|
+
"""A required input path (the log file or the repo checkout) does not exist."""
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
"""Pure, I/O-free line normalisation and event-signature extraction.
|
|
2
|
+
|
|
3
|
+
This is what makes log condensing cheap: collapse the *variable* parts of a log
|
|
4
|
+
line (timestamps, ids, addresses, numbers, quoted literals) into a stable
|
|
5
|
+
"shape" so that thousands of near-identical lines fold onto one signature. For a
|
|
6
|
+
multi-line stack trace, the signature is the exception class plus the top normalised
|
|
7
|
+
frames, so the same failure clusters regardless of the surrounding noise.
|
|
8
|
+
|
|
9
|
+
Nothing here reads files or the clock — every function is a deterministic pure
|
|
10
|
+
transform, which is what the unit tests pin.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import re
|
|
16
|
+
|
|
17
|
+
# --- pieces of a line that vary run-to-run and must be masked for clustering ---
|
|
18
|
+
_UUID = re.compile(r"\b[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}\b")
|
|
19
|
+
_HEX = re.compile(r"\b0x[0-9a-fA-F]+\b")
|
|
20
|
+
_LONGHEX = re.compile(r"\b[0-9a-fA-F]{16,}\b")
|
|
21
|
+
# A hex-ish id (request id, short hash): >=4 chars, mixing at least one digit and
|
|
22
|
+
# one a-f letter — masks ``7f3a``/``0a1b`` while leaving plain words alone.
|
|
23
|
+
_HEXID = re.compile(r"\b(?=[0-9a-fA-F]*[0-9])(?=[0-9a-fA-F]*[a-fA-F])[0-9a-fA-F]{4,}\b")
|
|
24
|
+
_QUOTED = re.compile(r"""(['"]).*?\1""")
|
|
25
|
+
_NUM = re.compile(r"\d+") # any digit run, incl. unit-glued (412ms) — masked last
|
|
26
|
+
_WS = re.compile(r"\s+")
|
|
27
|
+
|
|
28
|
+
# A leading timestamp: ISO (``2026-06-18T08:01:11`` / ``... 08:01:11,123``),
|
|
29
|
+
# bracketed (``[2026-06-18 08:01:11]``), or syslog (``Jun 18 08:01:11``).
|
|
30
|
+
_LEADING_TS = re.compile(
|
|
31
|
+
r"""^\s*
|
|
32
|
+
(?:
|
|
33
|
+
\[?\d{4}-\d{2}-\d{2}[ T]\d{2}:\d{2}:\d{2}(?:[.,]\d+)?(?:Z|[+-]\d{2}:?\d{2})?\]?
|
|
34
|
+
| [A-Z][a-z]{2}\s+\d{1,2}\s+\d{2}:\d{2}:\d{2}
|
|
35
|
+
)
|
|
36
|
+
\s*""",
|
|
37
|
+
re.VERBOSE,
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
# Common severity tokens, mapped to an ordered rank.
|
|
41
|
+
_LEVELS = {
|
|
42
|
+
"DEBUG": 0,
|
|
43
|
+
"TRACE": 0,
|
|
44
|
+
"INFO": 1,
|
|
45
|
+
"NOTICE": 1,
|
|
46
|
+
"WARN": 2,
|
|
47
|
+
"WARNING": 2,
|
|
48
|
+
"ERROR": 3,
|
|
49
|
+
"ERR": 3,
|
|
50
|
+
"SEVERE": 3,
|
|
51
|
+
"CRITICAL": 4,
|
|
52
|
+
"CRIT": 4,
|
|
53
|
+
"FATAL": 4,
|
|
54
|
+
}
|
|
55
|
+
_LEVEL_RANK = {"all": 0, "debug": 0, "info": 1, "warning": 2, "error": 3, "critical": 4}
|
|
56
|
+
_LEVEL_TOKEN = re.compile(r"\b(DEBUG|TRACE|INFO|NOTICE|WARN(?:ING)?|ERR(?:OR)?|SEVERE|CRIT(?:ICAL)?|FATAL)\b")
|
|
57
|
+
|
|
58
|
+
# An exception/error class name, e.g. ``NullPointerException`` / ``ValueError``.
|
|
59
|
+
_EXC = re.compile(r"\b([A-Za-z_][\w.]*(?:Error|Exception|Failure|Fault))\b")
|
|
60
|
+
|
|
61
|
+
# Stack-frame patterns across common runtimes.
|
|
62
|
+
_FRAME_JAVA = re.compile(r"\bat\s+([\w.$]+)\s*\(") # at com.acme.Foo.bar(Foo.java:42)
|
|
63
|
+
_FRAME_PY = re.compile(r'File\s+"[^"]+",\s+line\s+\d+,\s+in\s+(\S+)') # File "x.py", line 5, in foo
|
|
64
|
+
_FRAME_JS = re.compile(r"\bat\s+([\w.$<>]+)\s*\(") # at Object.fn (/a/b.js:1:2)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def strip_timestamp(line: str) -> str:
|
|
68
|
+
"""Remove a leading timestamp prefix from ``line`` (no-op when absent)."""
|
|
69
|
+
return _LEADING_TS.sub("", line, count=1)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def detect_level(line: str) -> str | None:
|
|
73
|
+
"""Return the canonical severity name found in ``line`` (e.g. ``"error"``), or None."""
|
|
74
|
+
m = _LEVEL_TOKEN.search(line)
|
|
75
|
+
if not m:
|
|
76
|
+
return None
|
|
77
|
+
token = m.group(1).upper()
|
|
78
|
+
rank = _LEVELS[token]
|
|
79
|
+
# Map the rank back to a canonical name used by the level filter.
|
|
80
|
+
for name, r in _LEVEL_RANK.items():
|
|
81
|
+
if name != "all" and r == rank:
|
|
82
|
+
return name
|
|
83
|
+
return None
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def normalize_line(line: str) -> str:
|
|
87
|
+
"""Collapse the variable parts of a single log line into a stable shape.
|
|
88
|
+
|
|
89
|
+
Masks (in order) the leading timestamp, UUIDs, hex addresses, long hex
|
|
90
|
+
blobs, quoted literals and bare integers, then squeezes whitespace. Two lines
|
|
91
|
+
that differ only in those volatile parts return the same string.
|
|
92
|
+
"""
|
|
93
|
+
s = strip_timestamp(line)
|
|
94
|
+
s = _UUID.sub("<uuid>", s)
|
|
95
|
+
s = _HEX.sub("<hex>", s)
|
|
96
|
+
s = _LONGHEX.sub("<hex>", s)
|
|
97
|
+
s = _HEXID.sub("<hex>", s)
|
|
98
|
+
s = _QUOTED.sub("<str>", s)
|
|
99
|
+
s = _NUM.sub("<n>", s)
|
|
100
|
+
return _WS.sub(" ", s).strip()
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _frames(block: list[str], max_frames: int = 3) -> list[str]:
|
|
104
|
+
"""Extract up to ``max_frames`` normalised stack frames from an event block."""
|
|
105
|
+
out: list[str] = []
|
|
106
|
+
for raw in block:
|
|
107
|
+
for pat in (_FRAME_PY, _FRAME_JAVA, _FRAME_JS):
|
|
108
|
+
m = pat.search(raw)
|
|
109
|
+
if m:
|
|
110
|
+
out.append(m.group(1))
|
|
111
|
+
break
|
|
112
|
+
if len(out) >= max_frames:
|
|
113
|
+
break
|
|
114
|
+
return out
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def error_signature(block: list[str], max_frames: int = 3) -> str:
|
|
118
|
+
"""Return a short, stable signature for an event (one or more lines).
|
|
119
|
+
|
|
120
|
+
For a stack trace: ``<ExceptionClass>@<frame1>>major<frame2>...`` using the
|
|
121
|
+
first exception class seen and the top normalised frames — so the same crash
|
|
122
|
+
clusters regardless of message text or addresses. For a plain line with no
|
|
123
|
+
exception/frames, falls back to the normalised first line (truncated).
|
|
124
|
+
"""
|
|
125
|
+
if not block:
|
|
126
|
+
return ""
|
|
127
|
+
exc: str | None = None
|
|
128
|
+
for raw in block:
|
|
129
|
+
m = _EXC.search(raw)
|
|
130
|
+
if m:
|
|
131
|
+
exc = m.group(1).rsplit(".", 1)[-1] # bare class name
|
|
132
|
+
break
|
|
133
|
+
frames = _frames(block, max_frames=max_frames)
|
|
134
|
+
if exc or frames:
|
|
135
|
+
head = exc or "error"
|
|
136
|
+
return head + "@" + ">".join(frames) if frames else head
|
|
137
|
+
# Plain line — normalised shape is the signature.
|
|
138
|
+
return normalize_line(block[0])[:200]
|
|
File without changes
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
"""build_bundle writes one small zip with the expected members."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import zipfile
|
|
5
|
+
|
|
6
|
+
from runspec_logops_core import build_bundle
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def test_bundle_contains_digest_and_manifest(tmp_path) -> None:
|
|
10
|
+
digest = {"file": "/var/log/app.log", "distinct_signatures": 3, "top": []}
|
|
11
|
+
res = build_bundle(digest, None, dest_dir=str(tmp_path), metadata={"host": "prod-1"})
|
|
12
|
+
|
|
13
|
+
assert res["destination"].endswith(".zip")
|
|
14
|
+
assert res["size_bytes"] > 0
|
|
15
|
+
assert set(res["contents"]) == {"digest.json", "manifest.json"}
|
|
16
|
+
|
|
17
|
+
with zipfile.ZipFile(res["destination"]) as zf:
|
|
18
|
+
assert json.loads(zf.read("digest.json"))["distinct_signatures"] == 3
|
|
19
|
+
manifest = json.loads(zf.read("manifest.json"))
|
|
20
|
+
assert manifest["host"] == "prod-1"
|
|
21
|
+
assert manifest["has_code_map"] is False
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def test_bundle_includes_snippets_when_code_map_present(tmp_path) -> None:
|
|
25
|
+
res = build_bundle({"file": "x"}, {"frames": [], "files_matched": 0}, dest_dir=str(tmp_path))
|
|
26
|
+
assert "snippets.json" in res["contents"]
|
|
27
|
+
with zipfile.ZipFile(res["destination"]) as zf:
|
|
28
|
+
assert "snippets.json" in zf.namelist()
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
"""map_trace_to_sources resolves frames to bounded source snippets."""
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
from runspec_logops_core import map_trace_to_sources
|
|
6
|
+
from runspec_logops_core.errors import SourceNotFoundError
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def test_missing_repo_raises(tmp_path) -> None:
|
|
10
|
+
with pytest.raises(SourceNotFoundError):
|
|
11
|
+
map_trace_to_sources(str(tmp_path / "nope"), "trace")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def test_python_traceback_to_snippet(tmp_path) -> None:
|
|
15
|
+
src = tmp_path / "app.py"
|
|
16
|
+
src.write_text("\n".join(f"line {i}" for i in range(1, 21)) + "\n")
|
|
17
|
+
trace = 'Traceback:\n File "app.py", line 10, in handler\nValueError: x'
|
|
18
|
+
|
|
19
|
+
res = map_trace_to_sources(str(tmp_path), trace, context=2)
|
|
20
|
+
assert res["files_matched"] == 1
|
|
21
|
+
frame = res["frames"][0]
|
|
22
|
+
assert frame["file"] == "app.py"
|
|
23
|
+
assert frame["lineno"] == 10
|
|
24
|
+
assert any("10: line 10" in s for s in frame["snippet"])
|
|
25
|
+
# ±2 context → at most 5 lines.
|
|
26
|
+
assert len(frame["snippet"]) <= 5
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def test_basename_search_when_path_not_exact(tmp_path) -> None:
|
|
30
|
+
pkg = tmp_path / "src" / "acme"
|
|
31
|
+
pkg.mkdir(parents=True)
|
|
32
|
+
(pkg / "OrderSvc.java").write_text("\n".join(f"row {i}" for i in range(1, 100)) + "\n")
|
|
33
|
+
trace = "Exception\n\tat com.acme.OrderSvc.price(OrderSvc.java:42)"
|
|
34
|
+
|
|
35
|
+
res = map_trace_to_sources(str(tmp_path), trace, context=1)
|
|
36
|
+
assert res["files_matched"] == 1
|
|
37
|
+
assert res["frames"][0]["file"].endswith("OrderSvc.java")
|
|
38
|
+
assert res["frames"][0]["lineno"] == 42
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def test_signature_resolves_by_class_and_symbol(tmp_path) -> None:
|
|
42
|
+
(tmp_path / "OrderSvc.java").write_text("class OrderSvc {\n int price() {\n return 0;\n }\n}\n")
|
|
43
|
+
res = map_trace_to_sources(str(tmp_path), "NullPointerException@com.acme.OrderSvc.price", context=1)
|
|
44
|
+
assert res["files_matched"] == 1
|
|
45
|
+
assert res["frames"][0]["symbol"] == "price"
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def test_total_lines_capped(tmp_path) -> None:
|
|
49
|
+
for n in range(4):
|
|
50
|
+
(tmp_path / f"f{n}.py").write_text("\n".join(str(i) for i in range(1, 200)) + "\n")
|
|
51
|
+
trace = "\n".join(f' File "f{n}.py", line 100, in g' for n in range(4))
|
|
52
|
+
res = map_trace_to_sources(str(tmp_path), trace, context=10, max_total_lines=15)
|
|
53
|
+
total = sum(len(f["snippet"]) for f in res["frames"])
|
|
54
|
+
assert total <= 15
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
"""summarize_log clusters, bounds its output, filters by level and time window."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
|
|
5
|
+
import pytest
|
|
6
|
+
|
|
7
|
+
from runspec_logops_core import summarize_log
|
|
8
|
+
from runspec_logops_core.errors import SourceNotFoundError
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _write(tmp_path, lines):
|
|
12
|
+
p = tmp_path / "app.log"
|
|
13
|
+
p.write_text("\n".join(lines) + "\n")
|
|
14
|
+
return str(p)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def test_missing_file_raises(tmp_path) -> None:
|
|
18
|
+
with pytest.raises(SourceNotFoundError):
|
|
19
|
+
summarize_log(str(tmp_path / "nope.log"))
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def test_clusters_repeated_errors(tmp_path) -> None:
|
|
23
|
+
lines = []
|
|
24
|
+
for i in range(800):
|
|
25
|
+
lines.append(f"2026-06-18T08:{i % 60:02d}:11Z ERROR NullPointerException: id={i}")
|
|
26
|
+
lines.append("\tat com.acme.OrderSvc.price(OrderSvc.java:42)")
|
|
27
|
+
lines.append("2026-06-18T09:00:00Z INFO started ok")
|
|
28
|
+
path = _write(tmp_path, lines)
|
|
29
|
+
|
|
30
|
+
d = summarize_log(path, top=10)
|
|
31
|
+
assert d["distinct_signatures"] == 2
|
|
32
|
+
top = d["top"][0]
|
|
33
|
+
assert top["count"] == 800
|
|
34
|
+
assert top["signature"].startswith("NullPointerException@")
|
|
35
|
+
assert top["level"] == "error"
|
|
36
|
+
assert top["first_ts"] is not None and top["last_ts"] is not None
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def test_digest_size_is_bounded_regardless_of_input(tmp_path) -> None:
|
|
40
|
+
# 50k distinct noisy lines, but top=5 and small sample caps keep the digest tiny.
|
|
41
|
+
lines = [f"2026-06-18T08:00:00Z ERROR Boom{i % 7}Exception: detail {i}" for i in range(50_000)]
|
|
42
|
+
path = _write(tmp_path, lines)
|
|
43
|
+
|
|
44
|
+
d = summarize_log(path, top=5, max_sample_lines=3, max_bytes=80)
|
|
45
|
+
assert len(d["top"]) <= 5
|
|
46
|
+
assert d["truncated"] is True
|
|
47
|
+
assert d["window"]["lines_scanned"] == 50_000
|
|
48
|
+
# The whole serialised digest stays small no matter the 50k-line input.
|
|
49
|
+
assert len(json.dumps(d)) < 4000
|
|
50
|
+
for grp in d["top"]:
|
|
51
|
+
assert len(grp["sample"]) <= 3
|
|
52
|
+
assert all(len(s) <= 80 for s in grp["sample"])
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def test_level_filter_drops_below_floor(tmp_path) -> None:
|
|
56
|
+
path = _write(
|
|
57
|
+
tmp_path,
|
|
58
|
+
[
|
|
59
|
+
"2026-06-18T08:00:00Z INFO served ok",
|
|
60
|
+
"2026-06-18T08:00:01Z WARN slow query",
|
|
61
|
+
"2026-06-18T08:00:02Z ERROR boom ValueError",
|
|
62
|
+
],
|
|
63
|
+
)
|
|
64
|
+
d = summarize_log(path, level="error")
|
|
65
|
+
assert d["total_events"] == 1
|
|
66
|
+
assert d["top"][0]["level"] == "error"
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def test_time_window_filters(tmp_path) -> None:
|
|
70
|
+
path = _write(
|
|
71
|
+
tmp_path,
|
|
72
|
+
[
|
|
73
|
+
"2026-06-18T08:00:00Z ERROR early FooException",
|
|
74
|
+
"2026-06-18T12:00:00Z ERROR mid BarException",
|
|
75
|
+
"2026-06-18T20:00:00Z ERROR late BazException",
|
|
76
|
+
],
|
|
77
|
+
)
|
|
78
|
+
d = summarize_log(path, since="2026-06-18T10:00:00", until="2026-06-18T18:00:00")
|
|
79
|
+
sigs = {g["signature"] for g in d["top"]}
|
|
80
|
+
assert any("BarException" in s for s in sigs)
|
|
81
|
+
assert not any("FooException" in s or "BazException" in s for s in sigs)
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
"""normalize_line and error_signature are deterministic and cluster correctly."""
|
|
2
|
+
|
|
3
|
+
from runspec_logops_core import detect_level, error_signature, normalize_line
|
|
4
|
+
from runspec_logops_core.signatures import strip_timestamp
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def test_normalize_masks_volatile_parts() -> None:
|
|
8
|
+
a = normalize_line("2026-06-18T08:01:11Z worker-12 handled request id=7f3a in 412ms")
|
|
9
|
+
b = normalize_line("2026-06-18T09:44:02Z worker-99 handled request id=0a1b in 7ms")
|
|
10
|
+
# Different timestamps / numbers / ids collapse to the same shape.
|
|
11
|
+
assert a == b
|
|
12
|
+
assert "<n>" in a
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def test_normalize_masks_uuid_and_hex() -> None:
|
|
16
|
+
s = normalize_line("error at 0xdeadbeef for 550e8400-e29b-41d4-a716-446655440000")
|
|
17
|
+
assert "<hex>" in s
|
|
18
|
+
assert "<uuid>" in s
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def test_strip_timestamp_variants() -> None:
|
|
22
|
+
assert strip_timestamp("[2026-06-18 08:01:11] hello") == "hello"
|
|
23
|
+
assert strip_timestamp("Jun 18 08:01:11 hello") == "hello"
|
|
24
|
+
assert strip_timestamp("no timestamp here") == "no timestamp here"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def test_detect_level() -> None:
|
|
28
|
+
assert detect_level("2026-06-18 ERROR boom") == "error"
|
|
29
|
+
assert detect_level("WARN something") == "warning"
|
|
30
|
+
assert detect_level("plain line") is None
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def test_error_signature_java_stack_clusters() -> None:
|
|
34
|
+
block1 = [
|
|
35
|
+
"2026-06-18T08:01:11Z ERROR NullPointerException: discount was null",
|
|
36
|
+
"\tat com.acme.OrderSvc.price(OrderSvc.java:42)",
|
|
37
|
+
"\tat com.acme.OrderSvc.checkout(OrderSvc.java:88)",
|
|
38
|
+
]
|
|
39
|
+
block2 = [
|
|
40
|
+
"2026-06-18T09:00:00Z ERROR NullPointerException: discount was null",
|
|
41
|
+
"\tat com.acme.OrderSvc.price(OrderSvc.java:42)",
|
|
42
|
+
"\tat com.acme.OrderSvc.checkout(OrderSvc.java:88)",
|
|
43
|
+
]
|
|
44
|
+
sig = error_signature(block1)
|
|
45
|
+
assert sig == error_signature(block2)
|
|
46
|
+
assert sig.startswith("NullPointerException@")
|
|
47
|
+
assert "OrderSvc.price" in sig
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def test_error_signature_python_stack() -> None:
|
|
51
|
+
block = [
|
|
52
|
+
"Traceback (most recent call last):",
|
|
53
|
+
' File "app.py", line 10, in handler',
|
|
54
|
+
" do_thing()",
|
|
55
|
+
"ValueError: bad input",
|
|
56
|
+
]
|
|
57
|
+
sig = error_signature(block)
|
|
58
|
+
assert sig.startswith("ValueError@")
|
|
59
|
+
assert "handler" in sig
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def test_error_signature_plain_line_falls_back_to_shape() -> None:
|
|
63
|
+
sig = error_signature(["2026-06-18 INFO served 200 in 5ms"])
|
|
64
|
+
assert "@" not in sig
|
|
65
|
+
assert "<n>" in sig
|