runspec-logops-core 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,61 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *.pyo
5
+ *.pyd
6
+ .Python
7
+ *.egg
8
+ *.egg-info/
9
+ dist/
10
+ build/
11
+ .eggs/
12
+ .venv/
13
+ venv/
14
+ env/
15
+ .env
16
+ pip-wheel-metadata/
17
+ .pytest_cache/
18
+ .mypy_cache/
19
+ .ruff_cache/
20
+ htmlcov/
21
+ .coverage
22
+ coverage.xml
23
+ *.cover
24
+
25
+ # Node
26
+ node_modules/
27
+ dist/
28
+ *.js.map
29
+ .npm
30
+
31
+ # Go
32
+ *.exe
33
+ *.test
34
+ *.out
35
+ vendor/
36
+
37
+ # IDE
38
+ .idea/
39
+ .vscode/
40
+ *.iml
41
+ *.iws
42
+ *.ipr
43
+ .DS_Store
44
+ Thumbs.db
45
+
46
+ # Docs
47
+ site/
48
+
49
+ # Misc
50
+ *.log
51
+ *.tmp
52
+
53
+ # External reference repos (cloned locally, not committed)
54
+ chainlit-docs/
55
+ .chainlit/
56
+
57
+ # Claude Code local config (machine-specific)
58
+ .claude/launch.json
59
+
60
+ # Stray committed test venv (removed from tracking)
61
+ .venv-test/
@@ -0,0 +1,28 @@
1
+ # runspec-logops-core Changelog
2
+
3
+ ## [0.1.0] — 2026-06-18
4
+
5
+ Initial release.
6
+
7
+ The pure-Python logic core behind `runspec-logops`. Provides log-condensing and
8
+ code-mapping helpers as plain importable functions — **no dependency on
9
+ `runspec`, no `runspec.toml`, no console-script entry points** — so a package can
10
+ `from runspec_logops_core import summarize_log` (and the rest) without surfacing
11
+ any runnables in the venv or in `runspec local` / `runspec serve` discovery. This
12
+ is the corporate-facing deliverable: wrap it in a private package, bake in
13
+ corporate defaults as plain params, and ship your own runnables.
14
+
15
+ Each function returns plain data and *raises* on failure (`SourceNotFoundError`
16
+ when an input path is missing); wrappers catch these and render the JSON/exit
17
+ behaviour.
18
+
19
+ Exports:
20
+
21
+ - **Signatures** — `normalize_line`, `error_signature`, `detect_level` (pure
22
+ line/event shaping for cheap clustering)
23
+ - **Digest** — `summarize_log` (streams a log, clusters by signature, returns a
24
+ bounded top-N digest whose size is independent of input size)
25
+ - **Code map** — `map_trace_to_sources` (resolves a stack trace / signature to the
26
+ few relevant source snippets in a checkout)
27
+ - **Bundle** — `build_bundle` (zips digest + snippets + manifest for transfer)
28
+ - **Errors** — `LogopsCoreError`, `SourceNotFoundError`
@@ -0,0 +1,9 @@
1
+ Metadata-Version: 2.4
2
+ Name: runspec-logops-core
3
+ Version: 0.1.0
4
+ Summary: Pure-Python log-condensing + code-mapping helpers — the importable core behind runspec-logops (no runspec dependency, no runnables)
5
+ Requires-Python: >=3.10
6
+ Provides-Extra: dev
7
+ Requires-Dist: mypy; extra == 'dev'
8
+ Requires-Dist: pytest>=8.0; extra == 'dev'
9
+ Requires-Dist: ruff; extra == 'dev'
@@ -0,0 +1,30 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "runspec-logops-core"
7
+ version = "0.1.0"
8
+ requires-python = ">=3.10"
9
+ description = "Pure-Python log-condensing + code-mapping helpers — the importable core behind runspec-logops (no runspec dependency, no runnables)"
10
+ dependencies = []
11
+
12
+ [project.optional-dependencies]
13
+ dev = [
14
+ "ruff",
15
+ "mypy",
16
+ "pytest>=8.0",
17
+ ]
18
+
19
+ [tool.pytest.ini_options]
20
+ testpaths = ["tests"]
21
+
22
+ [tool.mypy]
23
+ python_version = "3.10"
24
+
25
+ [tool.ruff]
26
+ line-length = 200
27
+ target-version = "py310"
28
+
29
+ [tool.ruff.lint]
30
+ select = ["E", "F", "I", "UP", "B", "SIM"]
@@ -0,0 +1,35 @@
1
+ """runspec-logops-core — pure-Python log-condensing + code-mapping helpers.
2
+
3
+ This package has **no dependency on runspec** and ships **no runspec.toml and no
4
+ entry points**, so installing it exposes the helper functions for import without
5
+ surfacing any runnables (it is invisible to ``runspec local`` / ``runspec serve``
6
+ discovery). ``runspec-logops`` depends on it and wraps each helper in a runnable;
7
+ a private (e.g. Nexus-hosted) package can instead import these helpers directly,
8
+ bake in corporate defaults/paths as plain params, and ship its own runnables —
9
+ so only the wrapped runnables ever surface in the venv.
10
+
11
+ Each function returns plain data and *raises* on failure (see
12
+ ``runspec_logops_core.errors``).
13
+ """
14
+
15
+ from runspec_logops_core.bundle import build_bundle
16
+ from runspec_logops_core.codemap import map_trace_to_sources
17
+ from runspec_logops_core.digest import summarize_log
18
+ from runspec_logops_core.errors import LogopsCoreError, SourceNotFoundError
19
+ from runspec_logops_core.signatures import detect_level, error_signature, normalize_line
20
+
21
+ __all__ = [
22
+ # errors
23
+ "LogopsCoreError",
24
+ "SourceNotFoundError",
25
+ # signatures
26
+ "normalize_line",
27
+ "error_signature",
28
+ "detect_level",
29
+ # digest
30
+ "summarize_log",
31
+ # codemap
32
+ "map_trace_to_sources",
33
+ # bundle
34
+ "build_bundle",
35
+ ]
@@ -0,0 +1,48 @@
1
+ """Bundle a digest + code map + metadata into one small zip for transfer.
2
+
3
+ The console's existing ``download_file`` agent tool then pulls this single small
4
+ artifact to local — the whole log and whole repo never transit the network.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ import os
11
+ import zipfile
12
+ from datetime import datetime
13
+
14
+
15
+ def build_bundle(digest: dict, code_map: dict | None, *, dest_dir: str, metadata: dict | None = None) -> dict:
16
+ """Write ``digest.json`` (+ ``snippets.json`` + ``manifest.json``) into one zip.
17
+
18
+ Returns the ``backup_files``-style ``{destination, size_bytes, size_mb,
19
+ contents}``. Creates ``dest_dir`` if needed; propagates :class:`OSError` on
20
+ write failure.
21
+ """
22
+ os.makedirs(dest_dir, exist_ok=True)
23
+ timestamp = datetime.now().strftime("%Y%m%dT%H%M%S")
24
+ archive_path = os.path.join(dest_dir, f"logops_digest_{timestamp}.zip")
25
+
26
+ manifest = {
27
+ "generated_at": datetime.now().isoformat(),
28
+ "log_file": digest.get("file"),
29
+ "distinct_signatures": digest.get("distinct_signatures"),
30
+ "has_code_map": code_map is not None,
31
+ **(metadata or {}),
32
+ }
33
+
34
+ contents = ["digest.json", "manifest.json"]
35
+ with zipfile.ZipFile(archive_path, "w", compression=zipfile.ZIP_DEFLATED) as zf:
36
+ zf.writestr("digest.json", json.dumps(digest, indent=2))
37
+ zf.writestr("manifest.json", json.dumps(manifest, indent=2))
38
+ if code_map is not None:
39
+ zf.writestr("snippets.json", json.dumps(code_map, indent=2))
40
+ contents.append("snippets.json")
41
+
42
+ size_bytes = os.path.getsize(archive_path)
43
+ return {
44
+ "destination": archive_path,
45
+ "size_bytes": size_bytes,
46
+ "size_mb": round(size_bytes / 1_048_576, 2),
47
+ "contents": contents,
48
+ }
@@ -0,0 +1,158 @@
1
+ """Map a stack trace (or a digest signature) to the few relevant source snippets.
2
+
3
+ Given a trace and the local git checkout, ``map_trace_to_sources`` extracts the
4
+ referenced frames, resolves each to a file in the checkout, and returns only the
5
+ ±context window around each frame's line — capped at ``max_files`` frames and
6
+ ``max_total_lines`` total. The agent gets the handful of lines that matter, never
7
+ the repo. Resolution is plain filesystem (``os.walk``); no ``git`` is required.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import os
13
+ import re
14
+
15
+ from runspec_logops_core.errors import SourceNotFoundError
16
+
17
+ _IGNORE_DIRS = {".git", "node_modules", "__pycache__", ".venv", "venv", ".mypy_cache", ".tox", "dist", "build", ".idea"}
18
+
19
+ # Frame shapes across runtimes.
20
+ _PY = re.compile(r'File\s+"([^"]+)",\s+line\s+(\d+),\s+in\s+(\S+)')
21
+ _JAVA = re.compile(r"at\s+([\w.$]+)\(([^()\s:]+):(\d+)\)")
22
+ _JS = re.compile(r"at\s+(?:([\w.$<>]+)\s+)?\(?((?:/|\./|\w:|[\w.\-]+/)[\w./\-]*\.[a-zA-Z]+):(\d+):\d+\)?")
23
+ _GENERIC = re.compile(r"\b([\w./\-]+\.[a-zA-Z]{1,5}):(\d+)\b")
24
+ _CAPWORD = re.compile(r"^[A-Z]\w+$")
25
+
26
+
27
+ def _frames_from_trace(trace: str) -> list[tuple[str | None, int | None, str | None]]:
28
+ """Extract ``(file_hint, lineno, symbol)`` candidates from a raw trace."""
29
+ out: list[tuple[str | None, int | None, str | None]] = []
30
+ seen: set[tuple] = set()
31
+
32
+ def add(file_hint: str | None, lineno: int | None, symbol: str | None) -> None:
33
+ # Dedup by (basename, line) when a line is known — so the generic
34
+ # ``file:line`` catch-all doesn't re-add a frame a typed regex already
35
+ # captured with its symbol. Symbol-only frames key on the symbol instead.
36
+ key = ("L", os.path.basename(file_hint) if file_hint else None, lineno) if lineno is not None else ("S", file_hint, symbol)
37
+ if key not in seen:
38
+ seen.add(key)
39
+ out.append((file_hint, lineno, symbol))
40
+
41
+ for m in _PY.finditer(trace):
42
+ add(m.group(1), int(m.group(2)), m.group(3))
43
+ for m in _JAVA.finditer(trace):
44
+ add(m.group(2), int(m.group(3)), m.group(1))
45
+ for m in _JS.finditer(trace):
46
+ add(m.group(2), int(m.group(3)), m.group(1))
47
+ for m in _GENERIC.finditer(trace):
48
+ add(m.group(1), int(m.group(2)), None)
49
+ return out
50
+
51
+
52
+ def _frames_from_signature(trace: str) -> list[tuple[str | None, int | None, str | None]]:
53
+ """Extract frames from a digest signature like ``Exc@com.acme.OrderSvc.price``."""
54
+ if "@" not in trace:
55
+ return []
56
+ out: list[tuple[str | None, int | None, str | None]] = []
57
+ for frame in trace.split("@", 1)[1].split(">"):
58
+ frame = frame.strip()
59
+ if not frame:
60
+ continue
61
+ parts = frame.split(".")
62
+ cls = next((p for p in reversed(parts) if _CAPWORD.match(p)), None)
63
+ method = parts[-1] if parts else None
64
+ out.append((cls, None, method))
65
+ return out
66
+
67
+
68
+ def _find_file(repo_root: str, file_hint: str) -> str | None:
69
+ """Resolve ``file_hint`` to a path under ``repo_root`` (exact rel path, then basename)."""
70
+ exact = os.path.join(repo_root, file_hint)
71
+ if os.path.isfile(exact):
72
+ return exact
73
+ target = os.path.basename(file_hint)
74
+ for dirpath, dirs, files in os.walk(repo_root):
75
+ dirs[:] = [d for d in dirs if d not in _IGNORE_DIRS]
76
+ if target in files:
77
+ return os.path.join(dirpath, target)
78
+ return None
79
+
80
+
81
+ def _find_class_file(repo_root: str, cls: str) -> str | None:
82
+ """Find a file whose stem equals the class name (e.g. ``OrderSvc`` → OrderSvc.java)."""
83
+ for dirpath, dirs, files in os.walk(repo_root):
84
+ dirs[:] = [d for d in dirs if d not in _IGNORE_DIRS]
85
+ for name in files:
86
+ if os.path.splitext(name)[0] == cls:
87
+ return os.path.join(dirpath, name)
88
+ return None
89
+
90
+
91
+ def _symbol_line(lines: list[str], symbol: str) -> int | None:
92
+ """Return the 1-based line number where ``symbol`` is defined/first referenced."""
93
+ needles = (f"def {symbol}", f"{symbol}(", f" {symbol} ", f".{symbol}")
94
+ for i, line in enumerate(lines, start=1):
95
+ if any(n in line for n in needles):
96
+ return i
97
+ return None
98
+
99
+
100
+ def _snippet(path: str, lineno: int, context: int) -> list[str]:
101
+ """Return ``lineno`` ±context as ``"<n>: <text>"`` strings (no trailing newline)."""
102
+ with open(path, errors="replace") as fh:
103
+ lines = fh.read().splitlines()
104
+ start = max(0, lineno - context - 1)
105
+ end = min(len(lines), lineno + context)
106
+ return [f"{i + 1}: {lines[i]}" for i in range(start, end)]
107
+
108
+
109
+ def map_trace_to_sources(
110
+ repo_root: str,
111
+ trace: str,
112
+ *,
113
+ context: int = 8,
114
+ max_files: int = 5,
115
+ max_total_lines: int = 200,
116
+ ) -> dict:
117
+ """Resolve the frames in ``trace`` to source snippets in ``repo_root``.
118
+
119
+ Raises :class:`SourceNotFoundError` if ``repo_root`` is not a directory.
120
+ """
121
+ if not os.path.isdir(repo_root):
122
+ raise SourceNotFoundError(f"repo checkout not found: {repo_root}")
123
+
124
+ candidates = _frames_from_trace(trace) or _frames_from_signature(trace)
125
+
126
+ frames: list[dict] = []
127
+ total_lines = 0
128
+ for file_hint, lineno, symbol in candidates:
129
+ if len(frames) >= max_files or total_lines >= max_total_lines:
130
+ break
131
+
132
+ path: str | None = None
133
+ if file_hint and ("/" in file_hint or "." in file_hint and lineno is not None):
134
+ path = _find_file(repo_root, file_hint)
135
+ if path is None and file_hint:
136
+ path = _find_class_file(repo_root, os.path.splitext(os.path.basename(file_hint))[0])
137
+ if path is None:
138
+ continue
139
+
140
+ resolved_line = lineno
141
+ if resolved_line is None and symbol:
142
+ with open(path, errors="replace") as fh:
143
+ resolved_line = _symbol_line(fh.read().splitlines(), symbol)
144
+ if resolved_line is None:
145
+ continue
146
+
147
+ snippet = _snippet(path, resolved_line, context)
148
+ room = max(0, max_total_lines - total_lines)
149
+ snippet = snippet[:room]
150
+ total_lines += len(snippet)
151
+ frames.append({"file": os.path.relpath(path, repo_root), "lineno": resolved_line, "symbol": symbol, "snippet": snippet})
152
+
153
+ return {
154
+ "repo": repo_root,
155
+ "frames": frames,
156
+ "files_matched": len(frames),
157
+ "truncated": len(frames) < len(candidates),
158
+ }
@@ -0,0 +1,167 @@
1
+ """Condense a noisy log into a small, bounded digest.
2
+
3
+ ``summarize_log`` streams a file line-by-line (it never loads the whole file),
4
+ groups continuation/stack lines into events, clusters events by
5
+ :func:`signatures.error_signature`, and returns only the top-N distinct
6
+ signatures with counts, first/last timestamp and one truncated sample each. The
7
+ returned dict's size is bounded by ``top`` × ``max_sample_lines`` × ``max_bytes``
8
+ — independent of how big the input log is. That bound is the whole point: the
9
+ agent reads the digest, never the log.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import os
15
+ import re
16
+ from datetime import datetime, timedelta
17
+
18
+ from runspec_logops_core.errors import SourceNotFoundError
19
+ from runspec_logops_core.signatures import detect_level, error_signature
20
+
21
+ _LEVEL_FLOOR = {"all": 0, "warning": 2, "error": 3, "critical": 4}
22
+
23
+ # Leading ISO timestamp captured for window filtering (syslog has no year, so it
24
+ # is intentionally left for the no-op path).
25
+ _ISO_TS = re.compile(r"^\s*\[?(\d{4}-\d{2}-\d{2})[ T](\d{2}:\d{2}:\d{2})(?:[.,]\d+)?")
26
+ _REL = re.compile(r"^\s*(\d+)\s*([a-z]+?)s?(?:\s+ago)?\s*$", re.IGNORECASE)
27
+ _UNIT = {"s": "seconds", "sec": "seconds", "second": "seconds", "m": "minutes", "min": "minutes", "minute": "minutes", "h": "hours", "hour": "hours", "hr": "hours", "d": "days", "day": "days"}
28
+
29
+ # A line that continues the previous event rather than starting a new one.
30
+ _CONT = re.compile(r"^\s+|^(?:at\s|Caused by:|\.{3}|File\s\")", re.IGNORECASE)
31
+
32
+
33
+ def _parse_dt(text: str) -> datetime | None:
34
+ """Best-effort parse of an ISO-ish ``YYYY-MM-DD[ T]HH:MM:SS`` string (naive)."""
35
+ try:
36
+ return datetime.fromisoformat(text)
37
+ except ValueError:
38
+ return None
39
+
40
+
41
+ def _parse_line_ts(line: str) -> datetime | None:
42
+ """Extract a leading ISO timestamp from a log line, or None."""
43
+ m = _ISO_TS.match(line)
44
+ if not m:
45
+ return None
46
+ return _parse_dt(f"{m.group(1)} {m.group(2)}")
47
+
48
+
49
+ def _parse_when(value: str | None) -> datetime | None:
50
+ """Parse a user-supplied window bound: ISO, ``now``, or ``<n> <unit> ago``."""
51
+ if not value:
52
+ return None
53
+ v = value.strip()
54
+ if v.lower() == "now":
55
+ return datetime.now()
56
+ rel = _REL.match(v)
57
+ if rel:
58
+ unit = _UNIT.get(rel.group(2).lower())
59
+ if unit:
60
+ return datetime.now() - timedelta(**{unit: int(rel.group(1))})
61
+ iso = _parse_dt(v.replace("Z", "").replace("T", " ").strip())
62
+ return iso
63
+
64
+
65
+ def _is_continuation(line: str) -> bool:
66
+ return bool(line) and bool(_CONT.match(line))
67
+
68
+
69
+ def _iter_events(path: str):
70
+ """Yield ``(block_lines, first_ts)`` events, streaming the file.
71
+
72
+ An event is a leading (non-indented) line plus the indented / stack-frame
73
+ lines that follow it. ``lines_scanned`` is tracked by the caller.
74
+ """
75
+ block: list[str] = []
76
+ block_ts: datetime | None = None
77
+ with open(path, errors="replace") as fh:
78
+ for raw in fh:
79
+ line = raw.rstrip("\n")
80
+ if block and _is_continuation(line):
81
+ block.append(line)
82
+ continue
83
+ if block:
84
+ yield block, block_ts
85
+ block = [line]
86
+ block_ts = _parse_line_ts(line)
87
+ if block:
88
+ yield block, block_ts
89
+
90
+
91
+ def summarize_log(
92
+ path: str,
93
+ *,
94
+ level: str | None = None,
95
+ since: str | None = None,
96
+ until: str | None = None,
97
+ top: int = 10,
98
+ max_sample_lines: int = 20,
99
+ max_bytes: int = 200,
100
+ ) -> dict:
101
+ """Return a bounded digest of ``path`` clustered by error signature.
102
+
103
+ Raises :class:`SourceNotFoundError` if the file does not exist; propagates
104
+ :class:`OSError` on other read failures.
105
+ """
106
+ if not os.path.exists(path):
107
+ raise SourceNotFoundError(f"log file not found: {path}")
108
+
109
+ floor = _LEVEL_FLOOR.get((level or "all").lower(), 0)
110
+ since_dt, until_dt = _parse_when(since), _parse_when(until)
111
+
112
+ groups: dict[str, dict] = {}
113
+ lines_scanned = 0
114
+ total_events = 0
115
+
116
+ for block, ts in _iter_events(path):
117
+ lines_scanned += len(block)
118
+
119
+ if (since_dt and ts and ts < since_dt) or (until_dt and ts and ts > until_dt):
120
+ continue
121
+
122
+ sig = error_signature(block)
123
+ # Event level = highest severity seen on any line; an exception-bearing
124
+ # event with no explicit level counts as "error".
125
+ ranks = [_LEVEL_FLOOR.get(detect_level(ln) or "", 0) for ln in block]
126
+ rank = max(ranks) if ranks else 0
127
+ if rank == 0 and "@" in sig:
128
+ rank = 3
129
+ if rank < floor:
130
+ continue
131
+
132
+ total_events += 1
133
+ g = groups.get(sig)
134
+ if g is None:
135
+ sample = [ln[:max_bytes] for ln in block[:max_sample_lines]]
136
+ groups[sig] = {"signature": sig, "level_rank": rank, "count": 1, "first_ts": ts, "last_ts": ts, "sample": sample}
137
+ else:
138
+ g["count"] += 1
139
+ g["level_rank"] = max(g["level_rank"], rank)
140
+ if ts:
141
+ if g["first_ts"] is None or ts < g["first_ts"]:
142
+ g["first_ts"] = ts
143
+ if g["last_ts"] is None or ts > g["last_ts"]:
144
+ g["last_ts"] = ts
145
+
146
+ ranked = sorted(groups.values(), key=lambda g: g["count"], reverse=True)
147
+ rank_name = {0: "info", 2: "warning", 3: "error", 4: "critical"}
148
+ top_list = [
149
+ {
150
+ "signature": g["signature"],
151
+ "level": rank_name.get(g["level_rank"], "info"),
152
+ "count": g["count"],
153
+ "first_ts": g["first_ts"].isoformat() if g["first_ts"] else None,
154
+ "last_ts": g["last_ts"].isoformat() if g["last_ts"] else None,
155
+ "sample": g["sample"],
156
+ }
157
+ for g in ranked[:top]
158
+ ]
159
+
160
+ return {
161
+ "file": path,
162
+ "window": {"since": since, "until": until, "lines_scanned": lines_scanned},
163
+ "total_events": total_events,
164
+ "distinct_signatures": len(groups),
165
+ "truncated": len(groups) > top,
166
+ "top": top_list,
167
+ }
@@ -0,0 +1,15 @@
1
+ """Exception types raised by the pure helper functions.
2
+
3
+ The functions in this package do the work and *raise* on failure; the thin
4
+ runnable wrappers in ``runspec-logops`` (and any private wrapper that imports the
5
+ helpers) catch these and turn them into the JSON error payloads + non-zero exits
6
+ that the CLI/agent surface expects.
7
+ """
8
+
9
+
10
+ class LogopsCoreError(Exception):
11
+ """Base class for all runspec-logops-core failures."""
12
+
13
+
14
+ class SourceNotFoundError(LogopsCoreError):
15
+ """A required input path (the log file or the repo checkout) does not exist."""
@@ -0,0 +1,138 @@
1
+ """Pure, I/O-free line normalisation and event-signature extraction.
2
+
3
+ This is what makes log condensing cheap: collapse the *variable* parts of a log
4
+ line (timestamps, ids, addresses, numbers, quoted literals) into a stable
5
+ "shape" so that thousands of near-identical lines fold onto one signature. For a
6
+ multi-line stack trace, the signature is the exception class plus the top normalised
7
+ frames, so the same failure clusters regardless of the surrounding noise.
8
+
9
+ Nothing here reads files or the clock — every function is a deterministic pure
10
+ transform, which is what the unit tests pin.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import re
16
+
17
+ # --- pieces of a line that vary run-to-run and must be masked for clustering ---
18
+ _UUID = re.compile(r"\b[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}\b")
19
+ _HEX = re.compile(r"\b0x[0-9a-fA-F]+\b")
20
+ _LONGHEX = re.compile(r"\b[0-9a-fA-F]{16,}\b")
21
+ # A hex-ish id (request id, short hash): >=4 chars, mixing at least one digit and
22
+ # one a-f letter — masks ``7f3a``/``0a1b`` while leaving plain words alone.
23
+ _HEXID = re.compile(r"\b(?=[0-9a-fA-F]*[0-9])(?=[0-9a-fA-F]*[a-fA-F])[0-9a-fA-F]{4,}\b")
24
+ _QUOTED = re.compile(r"""(['"]).*?\1""")
25
+ _NUM = re.compile(r"\d+") # any digit run, incl. unit-glued (412ms) — masked last
26
+ _WS = re.compile(r"\s+")
27
+
28
+ # A leading timestamp: ISO (``2026-06-18T08:01:11`` / ``... 08:01:11,123``),
29
+ # bracketed (``[2026-06-18 08:01:11]``), or syslog (``Jun 18 08:01:11``).
30
+ _LEADING_TS = re.compile(
31
+ r"""^\s*
32
+ (?:
33
+ \[?\d{4}-\d{2}-\d{2}[ T]\d{2}:\d{2}:\d{2}(?:[.,]\d+)?(?:Z|[+-]\d{2}:?\d{2})?\]?
34
+ | [A-Z][a-z]{2}\s+\d{1,2}\s+\d{2}:\d{2}:\d{2}
35
+ )
36
+ \s*""",
37
+ re.VERBOSE,
38
+ )
39
+
40
+ # Common severity tokens, mapped to an ordered rank.
41
+ _LEVELS = {
42
+ "DEBUG": 0,
43
+ "TRACE": 0,
44
+ "INFO": 1,
45
+ "NOTICE": 1,
46
+ "WARN": 2,
47
+ "WARNING": 2,
48
+ "ERROR": 3,
49
+ "ERR": 3,
50
+ "SEVERE": 3,
51
+ "CRITICAL": 4,
52
+ "CRIT": 4,
53
+ "FATAL": 4,
54
+ }
55
+ _LEVEL_RANK = {"all": 0, "debug": 0, "info": 1, "warning": 2, "error": 3, "critical": 4}
56
+ _LEVEL_TOKEN = re.compile(r"\b(DEBUG|TRACE|INFO|NOTICE|WARN(?:ING)?|ERR(?:OR)?|SEVERE|CRIT(?:ICAL)?|FATAL)\b")
57
+
58
+ # An exception/error class name, e.g. ``NullPointerException`` / ``ValueError``.
59
+ _EXC = re.compile(r"\b([A-Za-z_][\w.]*(?:Error|Exception|Failure|Fault))\b")
60
+
61
+ # Stack-frame patterns across common runtimes.
62
+ _FRAME_JAVA = re.compile(r"\bat\s+([\w.$]+)\s*\(") # at com.acme.Foo.bar(Foo.java:42)
63
+ _FRAME_PY = re.compile(r'File\s+"[^"]+",\s+line\s+\d+,\s+in\s+(\S+)') # File "x.py", line 5, in foo
64
+ _FRAME_JS = re.compile(r"\bat\s+([\w.$<>]+)\s*\(") # at Object.fn (/a/b.js:1:2)
65
+
66
+
67
+ def strip_timestamp(line: str) -> str:
68
+ """Remove a leading timestamp prefix from ``line`` (no-op when absent)."""
69
+ return _LEADING_TS.sub("", line, count=1)
70
+
71
+
72
+ def detect_level(line: str) -> str | None:
73
+ """Return the canonical severity name found in ``line`` (e.g. ``"error"``), or None."""
74
+ m = _LEVEL_TOKEN.search(line)
75
+ if not m:
76
+ return None
77
+ token = m.group(1).upper()
78
+ rank = _LEVELS[token]
79
+ # Map the rank back to a canonical name used by the level filter.
80
+ for name, r in _LEVEL_RANK.items():
81
+ if name != "all" and r == rank:
82
+ return name
83
+ return None
84
+
85
+
86
+ def normalize_line(line: str) -> str:
87
+ """Collapse the variable parts of a single log line into a stable shape.
88
+
89
+ Masks (in order) the leading timestamp, UUIDs, hex addresses, long hex
90
+ blobs, quoted literals and bare integers, then squeezes whitespace. Two lines
91
+ that differ only in those volatile parts return the same string.
92
+ """
93
+ s = strip_timestamp(line)
94
+ s = _UUID.sub("<uuid>", s)
95
+ s = _HEX.sub("<hex>", s)
96
+ s = _LONGHEX.sub("<hex>", s)
97
+ s = _HEXID.sub("<hex>", s)
98
+ s = _QUOTED.sub("<str>", s)
99
+ s = _NUM.sub("<n>", s)
100
+ return _WS.sub(" ", s).strip()
101
+
102
+
103
+ def _frames(block: list[str], max_frames: int = 3) -> list[str]:
104
+ """Extract up to ``max_frames`` normalised stack frames from an event block."""
105
+ out: list[str] = []
106
+ for raw in block:
107
+ for pat in (_FRAME_PY, _FRAME_JAVA, _FRAME_JS):
108
+ m = pat.search(raw)
109
+ if m:
110
+ out.append(m.group(1))
111
+ break
112
+ if len(out) >= max_frames:
113
+ break
114
+ return out
115
+
116
+
117
+ def error_signature(block: list[str], max_frames: int = 3) -> str:
118
+ """Return a short, stable signature for an event (one or more lines).
119
+
120
+ For a stack trace: ``<ExceptionClass>@<frame1>>major<frame2>...`` using the
121
+ first exception class seen and the top normalised frames — so the same crash
122
+ clusters regardless of message text or addresses. For a plain line with no
123
+ exception/frames, falls back to the normalised first line (truncated).
124
+ """
125
+ if not block:
126
+ return ""
127
+ exc: str | None = None
128
+ for raw in block:
129
+ m = _EXC.search(raw)
130
+ if m:
131
+ exc = m.group(1).rsplit(".", 1)[-1] # bare class name
132
+ break
133
+ frames = _frames(block, max_frames=max_frames)
134
+ if exc or frames:
135
+ head = exc or "error"
136
+ return head + "@" + ">".join(frames) if frames else head
137
+ # Plain line — normalised shape is the signature.
138
+ return normalize_line(block[0])[:200]
File without changes
@@ -0,0 +1,28 @@
1
+ """build_bundle writes one small zip with the expected members."""
2
+
3
+ import json
4
+ import zipfile
5
+
6
+ from runspec_logops_core import build_bundle
7
+
8
+
9
+ def test_bundle_contains_digest_and_manifest(tmp_path) -> None:
10
+ digest = {"file": "/var/log/app.log", "distinct_signatures": 3, "top": []}
11
+ res = build_bundle(digest, None, dest_dir=str(tmp_path), metadata={"host": "prod-1"})
12
+
13
+ assert res["destination"].endswith(".zip")
14
+ assert res["size_bytes"] > 0
15
+ assert set(res["contents"]) == {"digest.json", "manifest.json"}
16
+
17
+ with zipfile.ZipFile(res["destination"]) as zf:
18
+ assert json.loads(zf.read("digest.json"))["distinct_signatures"] == 3
19
+ manifest = json.loads(zf.read("manifest.json"))
20
+ assert manifest["host"] == "prod-1"
21
+ assert manifest["has_code_map"] is False
22
+
23
+
24
+ def test_bundle_includes_snippets_when_code_map_present(tmp_path) -> None:
25
+ res = build_bundle({"file": "x"}, {"frames": [], "files_matched": 0}, dest_dir=str(tmp_path))
26
+ assert "snippets.json" in res["contents"]
27
+ with zipfile.ZipFile(res["destination"]) as zf:
28
+ assert "snippets.json" in zf.namelist()
@@ -0,0 +1,54 @@
1
+ """map_trace_to_sources resolves frames to bounded source snippets."""
2
+
3
+ import pytest
4
+
5
+ from runspec_logops_core import map_trace_to_sources
6
+ from runspec_logops_core.errors import SourceNotFoundError
7
+
8
+
9
+ def test_missing_repo_raises(tmp_path) -> None:
10
+ with pytest.raises(SourceNotFoundError):
11
+ map_trace_to_sources(str(tmp_path / "nope"), "trace")
12
+
13
+
14
+ def test_python_traceback_to_snippet(tmp_path) -> None:
15
+ src = tmp_path / "app.py"
16
+ src.write_text("\n".join(f"line {i}" for i in range(1, 21)) + "\n")
17
+ trace = 'Traceback:\n File "app.py", line 10, in handler\nValueError: x'
18
+
19
+ res = map_trace_to_sources(str(tmp_path), trace, context=2)
20
+ assert res["files_matched"] == 1
21
+ frame = res["frames"][0]
22
+ assert frame["file"] == "app.py"
23
+ assert frame["lineno"] == 10
24
+ assert any("10: line 10" in s for s in frame["snippet"])
25
+ # ±2 context → at most 5 lines.
26
+ assert len(frame["snippet"]) <= 5
27
+
28
+
29
+ def test_basename_search_when_path_not_exact(tmp_path) -> None:
30
+ pkg = tmp_path / "src" / "acme"
31
+ pkg.mkdir(parents=True)
32
+ (pkg / "OrderSvc.java").write_text("\n".join(f"row {i}" for i in range(1, 100)) + "\n")
33
+ trace = "Exception\n\tat com.acme.OrderSvc.price(OrderSvc.java:42)"
34
+
35
+ res = map_trace_to_sources(str(tmp_path), trace, context=1)
36
+ assert res["files_matched"] == 1
37
+ assert res["frames"][0]["file"].endswith("OrderSvc.java")
38
+ assert res["frames"][0]["lineno"] == 42
39
+
40
+
41
+ def test_signature_resolves_by_class_and_symbol(tmp_path) -> None:
42
+ (tmp_path / "OrderSvc.java").write_text("class OrderSvc {\n int price() {\n return 0;\n }\n}\n")
43
+ res = map_trace_to_sources(str(tmp_path), "NullPointerException@com.acme.OrderSvc.price", context=1)
44
+ assert res["files_matched"] == 1
45
+ assert res["frames"][0]["symbol"] == "price"
46
+
47
+
48
+ def test_total_lines_capped(tmp_path) -> None:
49
+ for n in range(4):
50
+ (tmp_path / f"f{n}.py").write_text("\n".join(str(i) for i in range(1, 200)) + "\n")
51
+ trace = "\n".join(f' File "f{n}.py", line 100, in g' for n in range(4))
52
+ res = map_trace_to_sources(str(tmp_path), trace, context=10, max_total_lines=15)
53
+ total = sum(len(f["snippet"]) for f in res["frames"])
54
+ assert total <= 15
@@ -0,0 +1,81 @@
1
+ """summarize_log clusters, bounds its output, filters by level and time window."""
2
+
3
+ import json
4
+
5
+ import pytest
6
+
7
+ from runspec_logops_core import summarize_log
8
+ from runspec_logops_core.errors import SourceNotFoundError
9
+
10
+
11
+ def _write(tmp_path, lines):
12
+ p = tmp_path / "app.log"
13
+ p.write_text("\n".join(lines) + "\n")
14
+ return str(p)
15
+
16
+
17
+ def test_missing_file_raises(tmp_path) -> None:
18
+ with pytest.raises(SourceNotFoundError):
19
+ summarize_log(str(tmp_path / "nope.log"))
20
+
21
+
22
+ def test_clusters_repeated_errors(tmp_path) -> None:
23
+ lines = []
24
+ for i in range(800):
25
+ lines.append(f"2026-06-18T08:{i % 60:02d}:11Z ERROR NullPointerException: id={i}")
26
+ lines.append("\tat com.acme.OrderSvc.price(OrderSvc.java:42)")
27
+ lines.append("2026-06-18T09:00:00Z INFO started ok")
28
+ path = _write(tmp_path, lines)
29
+
30
+ d = summarize_log(path, top=10)
31
+ assert d["distinct_signatures"] == 2
32
+ top = d["top"][0]
33
+ assert top["count"] == 800
34
+ assert top["signature"].startswith("NullPointerException@")
35
+ assert top["level"] == "error"
36
+ assert top["first_ts"] is not None and top["last_ts"] is not None
37
+
38
+
39
+ def test_digest_size_is_bounded_regardless_of_input(tmp_path) -> None:
40
+ # 50k distinct noisy lines, but top=5 and small sample caps keep the digest tiny.
41
+ lines = [f"2026-06-18T08:00:00Z ERROR Boom{i % 7}Exception: detail {i}" for i in range(50_000)]
42
+ path = _write(tmp_path, lines)
43
+
44
+ d = summarize_log(path, top=5, max_sample_lines=3, max_bytes=80)
45
+ assert len(d["top"]) <= 5
46
+ assert d["truncated"] is True
47
+ assert d["window"]["lines_scanned"] == 50_000
48
+ # The whole serialised digest stays small no matter the 50k-line input.
49
+ assert len(json.dumps(d)) < 4000
50
+ for grp in d["top"]:
51
+ assert len(grp["sample"]) <= 3
52
+ assert all(len(s) <= 80 for s in grp["sample"])
53
+
54
+
55
+ def test_level_filter_drops_below_floor(tmp_path) -> None:
56
+ path = _write(
57
+ tmp_path,
58
+ [
59
+ "2026-06-18T08:00:00Z INFO served ok",
60
+ "2026-06-18T08:00:01Z WARN slow query",
61
+ "2026-06-18T08:00:02Z ERROR boom ValueError",
62
+ ],
63
+ )
64
+ d = summarize_log(path, level="error")
65
+ assert d["total_events"] == 1
66
+ assert d["top"][0]["level"] == "error"
67
+
68
+
69
+ def test_time_window_filters(tmp_path) -> None:
70
+ path = _write(
71
+ tmp_path,
72
+ [
73
+ "2026-06-18T08:00:00Z ERROR early FooException",
74
+ "2026-06-18T12:00:00Z ERROR mid BarException",
75
+ "2026-06-18T20:00:00Z ERROR late BazException",
76
+ ],
77
+ )
78
+ d = summarize_log(path, since="2026-06-18T10:00:00", until="2026-06-18T18:00:00")
79
+ sigs = {g["signature"] for g in d["top"]}
80
+ assert any("BarException" in s for s in sigs)
81
+ assert not any("FooException" in s or "BazException" in s for s in sigs)
@@ -0,0 +1,65 @@
1
+ """normalize_line and error_signature are deterministic and cluster correctly."""
2
+
3
+ from runspec_logops_core import detect_level, error_signature, normalize_line
4
+ from runspec_logops_core.signatures import strip_timestamp
5
+
6
+
7
+ def test_normalize_masks_volatile_parts() -> None:
8
+ a = normalize_line("2026-06-18T08:01:11Z worker-12 handled request id=7f3a in 412ms")
9
+ b = normalize_line("2026-06-18T09:44:02Z worker-99 handled request id=0a1b in 7ms")
10
+ # Different timestamps / numbers / ids collapse to the same shape.
11
+ assert a == b
12
+ assert "<n>" in a
13
+
14
+
15
+ def test_normalize_masks_uuid_and_hex() -> None:
16
+ s = normalize_line("error at 0xdeadbeef for 550e8400-e29b-41d4-a716-446655440000")
17
+ assert "<hex>" in s
18
+ assert "<uuid>" in s
19
+
20
+
21
+ def test_strip_timestamp_variants() -> None:
22
+ assert strip_timestamp("[2026-06-18 08:01:11] hello") == "hello"
23
+ assert strip_timestamp("Jun 18 08:01:11 hello") == "hello"
24
+ assert strip_timestamp("no timestamp here") == "no timestamp here"
25
+
26
+
27
+ def test_detect_level() -> None:
28
+ assert detect_level("2026-06-18 ERROR boom") == "error"
29
+ assert detect_level("WARN something") == "warning"
30
+ assert detect_level("plain line") is None
31
+
32
+
33
+ def test_error_signature_java_stack_clusters() -> None:
34
+ block1 = [
35
+ "2026-06-18T08:01:11Z ERROR NullPointerException: discount was null",
36
+ "\tat com.acme.OrderSvc.price(OrderSvc.java:42)",
37
+ "\tat com.acme.OrderSvc.checkout(OrderSvc.java:88)",
38
+ ]
39
+ block2 = [
40
+ "2026-06-18T09:00:00Z ERROR NullPointerException: discount was null",
41
+ "\tat com.acme.OrderSvc.price(OrderSvc.java:42)",
42
+ "\tat com.acme.OrderSvc.checkout(OrderSvc.java:88)",
43
+ ]
44
+ sig = error_signature(block1)
45
+ assert sig == error_signature(block2)
46
+ assert sig.startswith("NullPointerException@")
47
+ assert "OrderSvc.price" in sig
48
+
49
+
50
+ def test_error_signature_python_stack() -> None:
51
+ block = [
52
+ "Traceback (most recent call last):",
53
+ ' File "app.py", line 10, in handler',
54
+ " do_thing()",
55
+ "ValueError: bad input",
56
+ ]
57
+ sig = error_signature(block)
58
+ assert sig.startswith("ValueError@")
59
+ assert "handler" in sig
60
+
61
+
62
+ def test_error_signature_plain_line_falls_back_to_shape() -> None:
63
+ sig = error_signature(["2026-06-18 INFO served 200 in 5ms"])
64
+ assert "@" not in sig
65
+ assert "<n>" in sig