runspec-logops-core 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- runspec_logops_core/__init__.py +35 -0
- runspec_logops_core/bundle.py +48 -0
- runspec_logops_core/codemap.py +158 -0
- runspec_logops_core/digest.py +167 -0
- runspec_logops_core/errors.py +15 -0
- runspec_logops_core/signatures.py +138 -0
- runspec_logops_core-0.1.0.dist-info/METADATA +9 -0
- runspec_logops_core-0.1.0.dist-info/RECORD +9 -0
- runspec_logops_core-0.1.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
"""runspec-logops-core — pure-Python log-condensing + code-mapping helpers.
|
|
2
|
+
|
|
3
|
+
This package has **no dependency on runspec** and ships **no runspec.toml and no
|
|
4
|
+
entry points**, so installing it exposes the helper functions for import without
|
|
5
|
+
surfacing any runnables (it is invisible to ``runspec local`` / ``runspec serve``
|
|
6
|
+
discovery). ``runspec-logops`` depends on it and wraps each helper in a runnable;
|
|
7
|
+
a private (e.g. Nexus-hosted) package can instead import these helpers directly,
|
|
8
|
+
bake in corporate defaults/paths as plain params, and ship its own runnables —
|
|
9
|
+
so only the wrapped runnables ever surface in the venv.
|
|
10
|
+
|
|
11
|
+
Each function returns plain data and *raises* on failure (see
|
|
12
|
+
``runspec_logops_core.errors``).
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from runspec_logops_core.bundle import build_bundle
|
|
16
|
+
from runspec_logops_core.codemap import map_trace_to_sources
|
|
17
|
+
from runspec_logops_core.digest import summarize_log
|
|
18
|
+
from runspec_logops_core.errors import LogopsCoreError, SourceNotFoundError
|
|
19
|
+
from runspec_logops_core.signatures import detect_level, error_signature, normalize_line
|
|
20
|
+
|
|
21
|
+
__all__ = [
|
|
22
|
+
# errors
|
|
23
|
+
"LogopsCoreError",
|
|
24
|
+
"SourceNotFoundError",
|
|
25
|
+
# signatures
|
|
26
|
+
"normalize_line",
|
|
27
|
+
"error_signature",
|
|
28
|
+
"detect_level",
|
|
29
|
+
# digest
|
|
30
|
+
"summarize_log",
|
|
31
|
+
# codemap
|
|
32
|
+
"map_trace_to_sources",
|
|
33
|
+
# bundle
|
|
34
|
+
"build_bundle",
|
|
35
|
+
]
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"""Bundle a digest + code map + metadata into one small zip for transfer.
|
|
2
|
+
|
|
3
|
+
The console's existing ``download_file`` agent tool then pulls this single small
|
|
4
|
+
artifact to local — the whole log and whole repo never transit the network.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
import os
|
|
11
|
+
import zipfile
|
|
12
|
+
from datetime import datetime
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def build_bundle(digest: dict, code_map: dict | None, *, dest_dir: str, metadata: dict | None = None) -> dict:
|
|
16
|
+
"""Write ``digest.json`` (+ ``snippets.json`` + ``manifest.json``) into one zip.
|
|
17
|
+
|
|
18
|
+
Returns the ``backup_files``-style ``{destination, size_bytes, size_mb,
|
|
19
|
+
contents}``. Creates ``dest_dir`` if needed; propagates :class:`OSError` on
|
|
20
|
+
write failure.
|
|
21
|
+
"""
|
|
22
|
+
os.makedirs(dest_dir, exist_ok=True)
|
|
23
|
+
timestamp = datetime.now().strftime("%Y%m%dT%H%M%S")
|
|
24
|
+
archive_path = os.path.join(dest_dir, f"logops_digest_{timestamp}.zip")
|
|
25
|
+
|
|
26
|
+
manifest = {
|
|
27
|
+
"generated_at": datetime.now().isoformat(),
|
|
28
|
+
"log_file": digest.get("file"),
|
|
29
|
+
"distinct_signatures": digest.get("distinct_signatures"),
|
|
30
|
+
"has_code_map": code_map is not None,
|
|
31
|
+
**(metadata or {}),
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
contents = ["digest.json", "manifest.json"]
|
|
35
|
+
with zipfile.ZipFile(archive_path, "w", compression=zipfile.ZIP_DEFLATED) as zf:
|
|
36
|
+
zf.writestr("digest.json", json.dumps(digest, indent=2))
|
|
37
|
+
zf.writestr("manifest.json", json.dumps(manifest, indent=2))
|
|
38
|
+
if code_map is not None:
|
|
39
|
+
zf.writestr("snippets.json", json.dumps(code_map, indent=2))
|
|
40
|
+
contents.append("snippets.json")
|
|
41
|
+
|
|
42
|
+
size_bytes = os.path.getsize(archive_path)
|
|
43
|
+
return {
|
|
44
|
+
"destination": archive_path,
|
|
45
|
+
"size_bytes": size_bytes,
|
|
46
|
+
"size_mb": round(size_bytes / 1_048_576, 2),
|
|
47
|
+
"contents": contents,
|
|
48
|
+
}
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
"""Map a stack trace (or a digest signature) to the few relevant source snippets.
|
|
2
|
+
|
|
3
|
+
Given a trace and the local git checkout, ``map_trace_to_sources`` extracts the
|
|
4
|
+
referenced frames, resolves each to a file in the checkout, and returns only the
|
|
5
|
+
±context window around each frame's line — capped at ``max_files`` frames and
|
|
6
|
+
``max_total_lines`` total. The agent gets the handful of lines that matter, never
|
|
7
|
+
the repo. Resolution is plain filesystem (``os.walk``); no ``git`` is required.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import os
|
|
13
|
+
import re
|
|
14
|
+
|
|
15
|
+
from runspec_logops_core.errors import SourceNotFoundError
|
|
16
|
+
|
|
17
|
+
_IGNORE_DIRS = {".git", "node_modules", "__pycache__", ".venv", "venv", ".mypy_cache", ".tox", "dist", "build", ".idea"}
|
|
18
|
+
|
|
19
|
+
# Frame shapes across runtimes.
|
|
20
|
+
_PY = re.compile(r'File\s+"([^"]+)",\s+line\s+(\d+),\s+in\s+(\S+)')
|
|
21
|
+
_JAVA = re.compile(r"at\s+([\w.$]+)\(([^()\s:]+):(\d+)\)")
|
|
22
|
+
_JS = re.compile(r"at\s+(?:([\w.$<>]+)\s+)?\(?((?:/|\./|\w:|[\w.\-]+/)[\w./\-]*\.[a-zA-Z]+):(\d+):\d+\)?")
|
|
23
|
+
_GENERIC = re.compile(r"\b([\w./\-]+\.[a-zA-Z]{1,5}):(\d+)\b")
|
|
24
|
+
_CAPWORD = re.compile(r"^[A-Z]\w+$")
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _frames_from_trace(trace: str) -> list[tuple[str | None, int | None, str | None]]:
|
|
28
|
+
"""Extract ``(file_hint, lineno, symbol)`` candidates from a raw trace."""
|
|
29
|
+
out: list[tuple[str | None, int | None, str | None]] = []
|
|
30
|
+
seen: set[tuple] = set()
|
|
31
|
+
|
|
32
|
+
def add(file_hint: str | None, lineno: int | None, symbol: str | None) -> None:
|
|
33
|
+
# Dedup by (basename, line) when a line is known — so the generic
|
|
34
|
+
# ``file:line`` catch-all doesn't re-add a frame a typed regex already
|
|
35
|
+
# captured with its symbol. Symbol-only frames key on the symbol instead.
|
|
36
|
+
key = ("L", os.path.basename(file_hint) if file_hint else None, lineno) if lineno is not None else ("S", file_hint, symbol)
|
|
37
|
+
if key not in seen:
|
|
38
|
+
seen.add(key)
|
|
39
|
+
out.append((file_hint, lineno, symbol))
|
|
40
|
+
|
|
41
|
+
for m in _PY.finditer(trace):
|
|
42
|
+
add(m.group(1), int(m.group(2)), m.group(3))
|
|
43
|
+
for m in _JAVA.finditer(trace):
|
|
44
|
+
add(m.group(2), int(m.group(3)), m.group(1))
|
|
45
|
+
for m in _JS.finditer(trace):
|
|
46
|
+
add(m.group(2), int(m.group(3)), m.group(1))
|
|
47
|
+
for m in _GENERIC.finditer(trace):
|
|
48
|
+
add(m.group(1), int(m.group(2)), None)
|
|
49
|
+
return out
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _frames_from_signature(trace: str) -> list[tuple[str | None, int | None, str | None]]:
|
|
53
|
+
"""Extract frames from a digest signature like ``Exc@com.acme.OrderSvc.price``."""
|
|
54
|
+
if "@" not in trace:
|
|
55
|
+
return []
|
|
56
|
+
out: list[tuple[str | None, int | None, str | None]] = []
|
|
57
|
+
for frame in trace.split("@", 1)[1].split(">"):
|
|
58
|
+
frame = frame.strip()
|
|
59
|
+
if not frame:
|
|
60
|
+
continue
|
|
61
|
+
parts = frame.split(".")
|
|
62
|
+
cls = next((p for p in reversed(parts) if _CAPWORD.match(p)), None)
|
|
63
|
+
method = parts[-1] if parts else None
|
|
64
|
+
out.append((cls, None, method))
|
|
65
|
+
return out
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _find_file(repo_root: str, file_hint: str) -> str | None:
|
|
69
|
+
"""Resolve ``file_hint`` to a path under ``repo_root`` (exact rel path, then basename)."""
|
|
70
|
+
exact = os.path.join(repo_root, file_hint)
|
|
71
|
+
if os.path.isfile(exact):
|
|
72
|
+
return exact
|
|
73
|
+
target = os.path.basename(file_hint)
|
|
74
|
+
for dirpath, dirs, files in os.walk(repo_root):
|
|
75
|
+
dirs[:] = [d for d in dirs if d not in _IGNORE_DIRS]
|
|
76
|
+
if target in files:
|
|
77
|
+
return os.path.join(dirpath, target)
|
|
78
|
+
return None
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _find_class_file(repo_root: str, cls: str) -> str | None:
|
|
82
|
+
"""Find a file whose stem equals the class name (e.g. ``OrderSvc`` → OrderSvc.java)."""
|
|
83
|
+
for dirpath, dirs, files in os.walk(repo_root):
|
|
84
|
+
dirs[:] = [d for d in dirs if d not in _IGNORE_DIRS]
|
|
85
|
+
for name in files:
|
|
86
|
+
if os.path.splitext(name)[0] == cls:
|
|
87
|
+
return os.path.join(dirpath, name)
|
|
88
|
+
return None
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def _symbol_line(lines: list[str], symbol: str) -> int | None:
|
|
92
|
+
"""Return the 1-based line number where ``symbol`` is defined/first referenced."""
|
|
93
|
+
needles = (f"def {symbol}", f"{symbol}(", f" {symbol} ", f".{symbol}")
|
|
94
|
+
for i, line in enumerate(lines, start=1):
|
|
95
|
+
if any(n in line for n in needles):
|
|
96
|
+
return i
|
|
97
|
+
return None
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _snippet(path: str, lineno: int, context: int) -> list[str]:
|
|
101
|
+
"""Return ``lineno`` ±context as ``"<n>: <text>"`` strings (no trailing newline)."""
|
|
102
|
+
with open(path, errors="replace") as fh:
|
|
103
|
+
lines = fh.read().splitlines()
|
|
104
|
+
start = max(0, lineno - context - 1)
|
|
105
|
+
end = min(len(lines), lineno + context)
|
|
106
|
+
return [f"{i + 1}: {lines[i]}" for i in range(start, end)]
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def map_trace_to_sources(
|
|
110
|
+
repo_root: str,
|
|
111
|
+
trace: str,
|
|
112
|
+
*,
|
|
113
|
+
context: int = 8,
|
|
114
|
+
max_files: int = 5,
|
|
115
|
+
max_total_lines: int = 200,
|
|
116
|
+
) -> dict:
|
|
117
|
+
"""Resolve the frames in ``trace`` to source snippets in ``repo_root``.
|
|
118
|
+
|
|
119
|
+
Raises :class:`SourceNotFoundError` if ``repo_root`` is not a directory.
|
|
120
|
+
"""
|
|
121
|
+
if not os.path.isdir(repo_root):
|
|
122
|
+
raise SourceNotFoundError(f"repo checkout not found: {repo_root}")
|
|
123
|
+
|
|
124
|
+
candidates = _frames_from_trace(trace) or _frames_from_signature(trace)
|
|
125
|
+
|
|
126
|
+
frames: list[dict] = []
|
|
127
|
+
total_lines = 0
|
|
128
|
+
for file_hint, lineno, symbol in candidates:
|
|
129
|
+
if len(frames) >= max_files or total_lines >= max_total_lines:
|
|
130
|
+
break
|
|
131
|
+
|
|
132
|
+
path: str | None = None
|
|
133
|
+
if file_hint and ("/" in file_hint or "." in file_hint and lineno is not None):
|
|
134
|
+
path = _find_file(repo_root, file_hint)
|
|
135
|
+
if path is None and file_hint:
|
|
136
|
+
path = _find_class_file(repo_root, os.path.splitext(os.path.basename(file_hint))[0])
|
|
137
|
+
if path is None:
|
|
138
|
+
continue
|
|
139
|
+
|
|
140
|
+
resolved_line = lineno
|
|
141
|
+
if resolved_line is None and symbol:
|
|
142
|
+
with open(path, errors="replace") as fh:
|
|
143
|
+
resolved_line = _symbol_line(fh.read().splitlines(), symbol)
|
|
144
|
+
if resolved_line is None:
|
|
145
|
+
continue
|
|
146
|
+
|
|
147
|
+
snippet = _snippet(path, resolved_line, context)
|
|
148
|
+
room = max(0, max_total_lines - total_lines)
|
|
149
|
+
snippet = snippet[:room]
|
|
150
|
+
total_lines += len(snippet)
|
|
151
|
+
frames.append({"file": os.path.relpath(path, repo_root), "lineno": resolved_line, "symbol": symbol, "snippet": snippet})
|
|
152
|
+
|
|
153
|
+
return {
|
|
154
|
+
"repo": repo_root,
|
|
155
|
+
"frames": frames,
|
|
156
|
+
"files_matched": len(frames),
|
|
157
|
+
"truncated": len(frames) < len(candidates),
|
|
158
|
+
}
|
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
"""Condense a noisy log into a small, bounded digest.
|
|
2
|
+
|
|
3
|
+
``summarize_log`` streams a file line-by-line (it never loads the whole file),
|
|
4
|
+
groups continuation/stack lines into events, clusters events by
|
|
5
|
+
:func:`signatures.error_signature`, and returns only the top-N distinct
|
|
6
|
+
signatures with counts, first/last timestamp and one truncated sample each. The
|
|
7
|
+
returned dict's size is bounded by ``top`` × ``max_sample_lines`` × ``max_bytes``
|
|
8
|
+
— independent of how big the input log is. That bound is the whole point: the
|
|
9
|
+
agent reads the digest, never the log.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import os
|
|
15
|
+
import re
|
|
16
|
+
from datetime import datetime, timedelta
|
|
17
|
+
|
|
18
|
+
from runspec_logops_core.errors import SourceNotFoundError
|
|
19
|
+
from runspec_logops_core.signatures import detect_level, error_signature
|
|
20
|
+
|
|
21
|
+
_LEVEL_FLOOR = {"all": 0, "warning": 2, "error": 3, "critical": 4}
|
|
22
|
+
|
|
23
|
+
# Leading ISO timestamp captured for window filtering (syslog has no year, so it
|
|
24
|
+
# is intentionally left for the no-op path).
|
|
25
|
+
_ISO_TS = re.compile(r"^\s*\[?(\d{4}-\d{2}-\d{2})[ T](\d{2}:\d{2}:\d{2})(?:[.,]\d+)?")
|
|
26
|
+
_REL = re.compile(r"^\s*(\d+)\s*([a-z]+?)s?(?:\s+ago)?\s*$", re.IGNORECASE)
|
|
27
|
+
_UNIT = {"s": "seconds", "sec": "seconds", "second": "seconds", "m": "minutes", "min": "minutes", "minute": "minutes", "h": "hours", "hour": "hours", "hr": "hours", "d": "days", "day": "days"}
|
|
28
|
+
|
|
29
|
+
# A line that continues the previous event rather than starting a new one.
|
|
30
|
+
_CONT = re.compile(r"^\s+|^(?:at\s|Caused by:|\.{3}|File\s\")", re.IGNORECASE)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _parse_dt(text: str) -> datetime | None:
|
|
34
|
+
"""Best-effort parse of an ISO-ish ``YYYY-MM-DD[ T]HH:MM:SS`` string (naive)."""
|
|
35
|
+
try:
|
|
36
|
+
return datetime.fromisoformat(text)
|
|
37
|
+
except ValueError:
|
|
38
|
+
return None
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _parse_line_ts(line: str) -> datetime | None:
|
|
42
|
+
"""Extract a leading ISO timestamp from a log line, or None."""
|
|
43
|
+
m = _ISO_TS.match(line)
|
|
44
|
+
if not m:
|
|
45
|
+
return None
|
|
46
|
+
return _parse_dt(f"{m.group(1)} {m.group(2)}")
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _parse_when(value: str | None) -> datetime | None:
|
|
50
|
+
"""Parse a user-supplied window bound: ISO, ``now``, or ``<n> <unit> ago``."""
|
|
51
|
+
if not value:
|
|
52
|
+
return None
|
|
53
|
+
v = value.strip()
|
|
54
|
+
if v.lower() == "now":
|
|
55
|
+
return datetime.now()
|
|
56
|
+
rel = _REL.match(v)
|
|
57
|
+
if rel:
|
|
58
|
+
unit = _UNIT.get(rel.group(2).lower())
|
|
59
|
+
if unit:
|
|
60
|
+
return datetime.now() - timedelta(**{unit: int(rel.group(1))})
|
|
61
|
+
iso = _parse_dt(v.replace("Z", "").replace("T", " ").strip())
|
|
62
|
+
return iso
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _is_continuation(line: str) -> bool:
|
|
66
|
+
return bool(line) and bool(_CONT.match(line))
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _iter_events(path: str):
|
|
70
|
+
"""Yield ``(block_lines, first_ts)`` events, streaming the file.
|
|
71
|
+
|
|
72
|
+
An event is a leading (non-indented) line plus the indented / stack-frame
|
|
73
|
+
lines that follow it. ``lines_scanned`` is tracked by the caller.
|
|
74
|
+
"""
|
|
75
|
+
block: list[str] = []
|
|
76
|
+
block_ts: datetime | None = None
|
|
77
|
+
with open(path, errors="replace") as fh:
|
|
78
|
+
for raw in fh:
|
|
79
|
+
line = raw.rstrip("\n")
|
|
80
|
+
if block and _is_continuation(line):
|
|
81
|
+
block.append(line)
|
|
82
|
+
continue
|
|
83
|
+
if block:
|
|
84
|
+
yield block, block_ts
|
|
85
|
+
block = [line]
|
|
86
|
+
block_ts = _parse_line_ts(line)
|
|
87
|
+
if block:
|
|
88
|
+
yield block, block_ts
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def summarize_log(
|
|
92
|
+
path: str,
|
|
93
|
+
*,
|
|
94
|
+
level: str | None = None,
|
|
95
|
+
since: str | None = None,
|
|
96
|
+
until: str | None = None,
|
|
97
|
+
top: int = 10,
|
|
98
|
+
max_sample_lines: int = 20,
|
|
99
|
+
max_bytes: int = 200,
|
|
100
|
+
) -> dict:
|
|
101
|
+
"""Return a bounded digest of ``path`` clustered by error signature.
|
|
102
|
+
|
|
103
|
+
Raises :class:`SourceNotFoundError` if the file does not exist; propagates
|
|
104
|
+
:class:`OSError` on other read failures.
|
|
105
|
+
"""
|
|
106
|
+
if not os.path.exists(path):
|
|
107
|
+
raise SourceNotFoundError(f"log file not found: {path}")
|
|
108
|
+
|
|
109
|
+
floor = _LEVEL_FLOOR.get((level or "all").lower(), 0)
|
|
110
|
+
since_dt, until_dt = _parse_when(since), _parse_when(until)
|
|
111
|
+
|
|
112
|
+
groups: dict[str, dict] = {}
|
|
113
|
+
lines_scanned = 0
|
|
114
|
+
total_events = 0
|
|
115
|
+
|
|
116
|
+
for block, ts in _iter_events(path):
|
|
117
|
+
lines_scanned += len(block)
|
|
118
|
+
|
|
119
|
+
if (since_dt and ts and ts < since_dt) or (until_dt and ts and ts > until_dt):
|
|
120
|
+
continue
|
|
121
|
+
|
|
122
|
+
sig = error_signature(block)
|
|
123
|
+
# Event level = highest severity seen on any line; an exception-bearing
|
|
124
|
+
# event with no explicit level counts as "error".
|
|
125
|
+
ranks = [_LEVEL_FLOOR.get(detect_level(ln) or "", 0) for ln in block]
|
|
126
|
+
rank = max(ranks) if ranks else 0
|
|
127
|
+
if rank == 0 and "@" in sig:
|
|
128
|
+
rank = 3
|
|
129
|
+
if rank < floor:
|
|
130
|
+
continue
|
|
131
|
+
|
|
132
|
+
total_events += 1
|
|
133
|
+
g = groups.get(sig)
|
|
134
|
+
if g is None:
|
|
135
|
+
sample = [ln[:max_bytes] for ln in block[:max_sample_lines]]
|
|
136
|
+
groups[sig] = {"signature": sig, "level_rank": rank, "count": 1, "first_ts": ts, "last_ts": ts, "sample": sample}
|
|
137
|
+
else:
|
|
138
|
+
g["count"] += 1
|
|
139
|
+
g["level_rank"] = max(g["level_rank"], rank)
|
|
140
|
+
if ts:
|
|
141
|
+
if g["first_ts"] is None or ts < g["first_ts"]:
|
|
142
|
+
g["first_ts"] = ts
|
|
143
|
+
if g["last_ts"] is None or ts > g["last_ts"]:
|
|
144
|
+
g["last_ts"] = ts
|
|
145
|
+
|
|
146
|
+
ranked = sorted(groups.values(), key=lambda g: g["count"], reverse=True)
|
|
147
|
+
rank_name = {0: "info", 2: "warning", 3: "error", 4: "critical"}
|
|
148
|
+
top_list = [
|
|
149
|
+
{
|
|
150
|
+
"signature": g["signature"],
|
|
151
|
+
"level": rank_name.get(g["level_rank"], "info"),
|
|
152
|
+
"count": g["count"],
|
|
153
|
+
"first_ts": g["first_ts"].isoformat() if g["first_ts"] else None,
|
|
154
|
+
"last_ts": g["last_ts"].isoformat() if g["last_ts"] else None,
|
|
155
|
+
"sample": g["sample"],
|
|
156
|
+
}
|
|
157
|
+
for g in ranked[:top]
|
|
158
|
+
]
|
|
159
|
+
|
|
160
|
+
return {
|
|
161
|
+
"file": path,
|
|
162
|
+
"window": {"since": since, "until": until, "lines_scanned": lines_scanned},
|
|
163
|
+
"total_events": total_events,
|
|
164
|
+
"distinct_signatures": len(groups),
|
|
165
|
+
"truncated": len(groups) > top,
|
|
166
|
+
"top": top_list,
|
|
167
|
+
}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"""Exception types raised by the pure helper functions.
|
|
2
|
+
|
|
3
|
+
The functions in this package do the work and *raise* on failure; the thin
|
|
4
|
+
runnable wrappers in ``runspec-logops`` (and any private wrapper that imports the
|
|
5
|
+
helpers) catch these and turn them into the JSON error payloads + non-zero exits
|
|
6
|
+
that the CLI/agent surface expects.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class LogopsCoreError(Exception):
|
|
11
|
+
"""Base class for all runspec-logops-core failures."""
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class SourceNotFoundError(LogopsCoreError):
|
|
15
|
+
"""A required input path (the log file or the repo checkout) does not exist."""
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
"""Pure, I/O-free line normalisation and event-signature extraction.
|
|
2
|
+
|
|
3
|
+
This is what makes log condensing cheap: collapse the *variable* parts of a log
|
|
4
|
+
line (timestamps, ids, addresses, numbers, quoted literals) into a stable
|
|
5
|
+
"shape" so that thousands of near-identical lines fold onto one signature. For a
|
|
6
|
+
multi-line stack trace, the signature is the exception class plus the top normalised
|
|
7
|
+
frames, so the same failure clusters regardless of the surrounding noise.
|
|
8
|
+
|
|
9
|
+
Nothing here reads files or the clock — every function is a deterministic pure
|
|
10
|
+
transform, which is what the unit tests pin.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import re
|
|
16
|
+
|
|
17
|
+
# --- pieces of a line that vary run-to-run and must be masked for clustering ---
|
|
18
|
+
_UUID = re.compile(r"\b[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}\b")
|
|
19
|
+
_HEX = re.compile(r"\b0x[0-9a-fA-F]+\b")
|
|
20
|
+
_LONGHEX = re.compile(r"\b[0-9a-fA-F]{16,}\b")
|
|
21
|
+
# A hex-ish id (request id, short hash): >=4 chars, mixing at least one digit and
|
|
22
|
+
# one a-f letter — masks ``7f3a``/``0a1b`` while leaving plain words alone.
|
|
23
|
+
_HEXID = re.compile(r"\b(?=[0-9a-fA-F]*[0-9])(?=[0-9a-fA-F]*[a-fA-F])[0-9a-fA-F]{4,}\b")
|
|
24
|
+
_QUOTED = re.compile(r"""(['"]).*?\1""")
|
|
25
|
+
_NUM = re.compile(r"\d+") # any digit run, incl. unit-glued (412ms) — masked last
|
|
26
|
+
_WS = re.compile(r"\s+")
|
|
27
|
+
|
|
28
|
+
# A leading timestamp: ISO (``2026-06-18T08:01:11`` / ``... 08:01:11,123``),
|
|
29
|
+
# bracketed (``[2026-06-18 08:01:11]``), or syslog (``Jun 18 08:01:11``).
|
|
30
|
+
_LEADING_TS = re.compile(
|
|
31
|
+
r"""^\s*
|
|
32
|
+
(?:
|
|
33
|
+
\[?\d{4}-\d{2}-\d{2}[ T]\d{2}:\d{2}:\d{2}(?:[.,]\d+)?(?:Z|[+-]\d{2}:?\d{2})?\]?
|
|
34
|
+
| [A-Z][a-z]{2}\s+\d{1,2}\s+\d{2}:\d{2}:\d{2}
|
|
35
|
+
)
|
|
36
|
+
\s*""",
|
|
37
|
+
re.VERBOSE,
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
# Common severity tokens, mapped to an ordered rank.
|
|
41
|
+
_LEVELS = {
|
|
42
|
+
"DEBUG": 0,
|
|
43
|
+
"TRACE": 0,
|
|
44
|
+
"INFO": 1,
|
|
45
|
+
"NOTICE": 1,
|
|
46
|
+
"WARN": 2,
|
|
47
|
+
"WARNING": 2,
|
|
48
|
+
"ERROR": 3,
|
|
49
|
+
"ERR": 3,
|
|
50
|
+
"SEVERE": 3,
|
|
51
|
+
"CRITICAL": 4,
|
|
52
|
+
"CRIT": 4,
|
|
53
|
+
"FATAL": 4,
|
|
54
|
+
}
|
|
55
|
+
_LEVEL_RANK = {"all": 0, "debug": 0, "info": 1, "warning": 2, "error": 3, "critical": 4}
|
|
56
|
+
_LEVEL_TOKEN = re.compile(r"\b(DEBUG|TRACE|INFO|NOTICE|WARN(?:ING)?|ERR(?:OR)?|SEVERE|CRIT(?:ICAL)?|FATAL)\b")
|
|
57
|
+
|
|
58
|
+
# An exception/error class name, e.g. ``NullPointerException`` / ``ValueError``.
|
|
59
|
+
_EXC = re.compile(r"\b([A-Za-z_][\w.]*(?:Error|Exception|Failure|Fault))\b")
|
|
60
|
+
|
|
61
|
+
# Stack-frame patterns across common runtimes.
|
|
62
|
+
_FRAME_JAVA = re.compile(r"\bat\s+([\w.$]+)\s*\(") # at com.acme.Foo.bar(Foo.java:42)
|
|
63
|
+
_FRAME_PY = re.compile(r'File\s+"[^"]+",\s+line\s+\d+,\s+in\s+(\S+)') # File "x.py", line 5, in foo
|
|
64
|
+
_FRAME_JS = re.compile(r"\bat\s+([\w.$<>]+)\s*\(") # at Object.fn (/a/b.js:1:2)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def strip_timestamp(line: str) -> str:
|
|
68
|
+
"""Remove a leading timestamp prefix from ``line`` (no-op when absent)."""
|
|
69
|
+
return _LEADING_TS.sub("", line, count=1)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def detect_level(line: str) -> str | None:
|
|
73
|
+
"""Return the canonical severity name found in ``line`` (e.g. ``"error"``), or None."""
|
|
74
|
+
m = _LEVEL_TOKEN.search(line)
|
|
75
|
+
if not m:
|
|
76
|
+
return None
|
|
77
|
+
token = m.group(1).upper()
|
|
78
|
+
rank = _LEVELS[token]
|
|
79
|
+
# Map the rank back to a canonical name used by the level filter.
|
|
80
|
+
for name, r in _LEVEL_RANK.items():
|
|
81
|
+
if name != "all" and r == rank:
|
|
82
|
+
return name
|
|
83
|
+
return None
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def normalize_line(line: str) -> str:
|
|
87
|
+
"""Collapse the variable parts of a single log line into a stable shape.
|
|
88
|
+
|
|
89
|
+
Masks (in order) the leading timestamp, UUIDs, hex addresses, long hex
|
|
90
|
+
blobs, quoted literals and bare integers, then squeezes whitespace. Two lines
|
|
91
|
+
that differ only in those volatile parts return the same string.
|
|
92
|
+
"""
|
|
93
|
+
s = strip_timestamp(line)
|
|
94
|
+
s = _UUID.sub("<uuid>", s)
|
|
95
|
+
s = _HEX.sub("<hex>", s)
|
|
96
|
+
s = _LONGHEX.sub("<hex>", s)
|
|
97
|
+
s = _HEXID.sub("<hex>", s)
|
|
98
|
+
s = _QUOTED.sub("<str>", s)
|
|
99
|
+
s = _NUM.sub("<n>", s)
|
|
100
|
+
return _WS.sub(" ", s).strip()
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _frames(block: list[str], max_frames: int = 3) -> list[str]:
|
|
104
|
+
"""Extract up to ``max_frames`` normalised stack frames from an event block."""
|
|
105
|
+
out: list[str] = []
|
|
106
|
+
for raw in block:
|
|
107
|
+
for pat in (_FRAME_PY, _FRAME_JAVA, _FRAME_JS):
|
|
108
|
+
m = pat.search(raw)
|
|
109
|
+
if m:
|
|
110
|
+
out.append(m.group(1))
|
|
111
|
+
break
|
|
112
|
+
if len(out) >= max_frames:
|
|
113
|
+
break
|
|
114
|
+
return out
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def error_signature(block: list[str], max_frames: int = 3) -> str:
|
|
118
|
+
"""Return a short, stable signature for an event (one or more lines).
|
|
119
|
+
|
|
120
|
+
For a stack trace: ``<ExceptionClass>@<frame1>>major<frame2>...`` using the
|
|
121
|
+
first exception class seen and the top normalised frames — so the same crash
|
|
122
|
+
clusters regardless of message text or addresses. For a plain line with no
|
|
123
|
+
exception/frames, falls back to the normalised first line (truncated).
|
|
124
|
+
"""
|
|
125
|
+
if not block:
|
|
126
|
+
return ""
|
|
127
|
+
exc: str | None = None
|
|
128
|
+
for raw in block:
|
|
129
|
+
m = _EXC.search(raw)
|
|
130
|
+
if m:
|
|
131
|
+
exc = m.group(1).rsplit(".", 1)[-1] # bare class name
|
|
132
|
+
break
|
|
133
|
+
frames = _frames(block, max_frames=max_frames)
|
|
134
|
+
if exc or frames:
|
|
135
|
+
head = exc or "error"
|
|
136
|
+
return head + "@" + ">".join(frames) if frames else head
|
|
137
|
+
# Plain line — normalised shape is the signature.
|
|
138
|
+
return normalize_line(block[0])[:200]
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: runspec-logops-core
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Pure-Python log-condensing + code-mapping helpers — the importable core behind runspec-logops (no runspec dependency, no runnables)
|
|
5
|
+
Requires-Python: >=3.10
|
|
6
|
+
Provides-Extra: dev
|
|
7
|
+
Requires-Dist: mypy; extra == 'dev'
|
|
8
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
9
|
+
Requires-Dist: ruff; extra == 'dev'
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
runspec_logops_core/__init__.py,sha256=imfGQnqUJY8uUIAv007g6kc6wVknWgXrF8t8bVOOsho,1324
|
|
2
|
+
runspec_logops_core/bundle.py,sha256=tcZSVSaQinqSBE58FiUEvLbQ3OTEORmwOsDOJKAIIMs,1794
|
|
3
|
+
runspec_logops_core/codemap.py,sha256=PN2HPh3i6BtUV0mfRY6vcgl1r74o5gMhwNAt8FOYdmY,6349
|
|
4
|
+
runspec_logops_core/digest.py,sha256=5UdiL0cbv7mL1-flTonQwXa2yeFz1Qwq7nY1Nl7PAvE,5994
|
|
5
|
+
runspec_logops_core/errors.py,sha256=iJnGf8G7B6Ei4b_FOz0-GECQVXJ3h7_70ZVCH3JdAj0,560
|
|
6
|
+
runspec_logops_core/signatures.py,sha256=VX8PYoWrGI05dTBKMmo1j6E0ntAua2XC8UebHAX7XhM,5227
|
|
7
|
+
runspec_logops_core-0.1.0.dist-info/METADATA,sha256=TfW42N3Ue47-gmaPsEuC_2F9Qmuljj5TfT9oFuNjwCw,365
|
|
8
|
+
runspec_logops_core-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
|
|
9
|
+
runspec_logops_core-0.1.0.dist-info/RECORD,,
|