runspec-logops-core 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,35 @@
1
+ """runspec-logops-core — pure-Python log-condensing + code-mapping helpers.
2
+
3
+ This package has **no dependency on runspec** and ships **no runspec.toml and no
4
+ entry points**, so installing it exposes the helper functions for import without
5
+ surfacing any runnables (it is invisible to ``runspec local`` / ``runspec serve``
6
+ discovery). ``runspec-logops`` depends on it and wraps each helper in a runnable;
7
+ a private (e.g. Nexus-hosted) package can instead import these helpers directly,
8
+ bake in corporate defaults/paths as plain params, and ship its own runnables —
9
+ so only the wrapped runnables ever surface in the venv.
10
+
11
+ Each function returns plain data and *raises* on failure (see
12
+ ``runspec_logops_core.errors``).
13
+ """
14
+
15
+ from runspec_logops_core.bundle import build_bundle
16
+ from runspec_logops_core.codemap import map_trace_to_sources
17
+ from runspec_logops_core.digest import summarize_log
18
+ from runspec_logops_core.errors import LogopsCoreError, SourceNotFoundError
19
+ from runspec_logops_core.signatures import detect_level, error_signature, normalize_line
20
+
21
+ __all__ = [
22
+ # errors
23
+ "LogopsCoreError",
24
+ "SourceNotFoundError",
25
+ # signatures
26
+ "normalize_line",
27
+ "error_signature",
28
+ "detect_level",
29
+ # digest
30
+ "summarize_log",
31
+ # codemap
32
+ "map_trace_to_sources",
33
+ # bundle
34
+ "build_bundle",
35
+ ]
@@ -0,0 +1,48 @@
1
+ """Bundle a digest + code map + metadata into one small zip for transfer.
2
+
3
+ The console's existing ``download_file`` agent tool then pulls this single small
4
+ artifact to local — the whole log and whole repo never transit the network.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ import os
11
+ import zipfile
12
+ from datetime import datetime
13
+
14
+
15
+ def build_bundle(digest: dict, code_map: dict | None, *, dest_dir: str, metadata: dict | None = None) -> dict:
16
+ """Write ``digest.json`` (+ ``snippets.json`` + ``manifest.json``) into one zip.
17
+
18
+ Returns the ``backup_files``-style ``{destination, size_bytes, size_mb,
19
+ contents}``. Creates ``dest_dir`` if needed; propagates :class:`OSError` on
20
+ write failure.
21
+ """
22
+ os.makedirs(dest_dir, exist_ok=True)
23
+ timestamp = datetime.now().strftime("%Y%m%dT%H%M%S")
24
+ archive_path = os.path.join(dest_dir, f"logops_digest_{timestamp}.zip")
25
+
26
+ manifest = {
27
+ "generated_at": datetime.now().isoformat(),
28
+ "log_file": digest.get("file"),
29
+ "distinct_signatures": digest.get("distinct_signatures"),
30
+ "has_code_map": code_map is not None,
31
+ **(metadata or {}),
32
+ }
33
+
34
+ contents = ["digest.json", "manifest.json"]
35
+ with zipfile.ZipFile(archive_path, "w", compression=zipfile.ZIP_DEFLATED) as zf:
36
+ zf.writestr("digest.json", json.dumps(digest, indent=2))
37
+ zf.writestr("manifest.json", json.dumps(manifest, indent=2))
38
+ if code_map is not None:
39
+ zf.writestr("snippets.json", json.dumps(code_map, indent=2))
40
+ contents.append("snippets.json")
41
+
42
+ size_bytes = os.path.getsize(archive_path)
43
+ return {
44
+ "destination": archive_path,
45
+ "size_bytes": size_bytes,
46
+ "size_mb": round(size_bytes / 1_048_576, 2),
47
+ "contents": contents,
48
+ }
@@ -0,0 +1,158 @@
1
+ """Map a stack trace (or a digest signature) to the few relevant source snippets.
2
+
3
+ Given a trace and the local git checkout, ``map_trace_to_sources`` extracts the
4
+ referenced frames, resolves each to a file in the checkout, and returns only the
5
+ ±context window around each frame's line — capped at ``max_files`` frames and
6
+ ``max_total_lines`` total. The agent gets the handful of lines that matter, never
7
+ the repo. Resolution is plain filesystem (``os.walk``); no ``git`` is required.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import os
13
+ import re
14
+
15
+ from runspec_logops_core.errors import SourceNotFoundError
16
+
17
+ _IGNORE_DIRS = {".git", "node_modules", "__pycache__", ".venv", "venv", ".mypy_cache", ".tox", "dist", "build", ".idea"}
18
+
19
+ # Frame shapes across runtimes.
20
+ _PY = re.compile(r'File\s+"([^"]+)",\s+line\s+(\d+),\s+in\s+(\S+)')
21
+ _JAVA = re.compile(r"at\s+([\w.$]+)\(([^()\s:]+):(\d+)\)")
22
+ _JS = re.compile(r"at\s+(?:([\w.$<>]+)\s+)?\(?((?:/|\./|\w:|[\w.\-]+/)[\w./\-]*\.[a-zA-Z]+):(\d+):\d+\)?")
23
+ _GENERIC = re.compile(r"\b([\w./\-]+\.[a-zA-Z]{1,5}):(\d+)\b")
24
+ _CAPWORD = re.compile(r"^[A-Z]\w+$")
25
+
26
+
27
+ def _frames_from_trace(trace: str) -> list[tuple[str | None, int | None, str | None]]:
28
+ """Extract ``(file_hint, lineno, symbol)`` candidates from a raw trace."""
29
+ out: list[tuple[str | None, int | None, str | None]] = []
30
+ seen: set[tuple] = set()
31
+
32
+ def add(file_hint: str | None, lineno: int | None, symbol: str | None) -> None:
33
+ # Dedup by (basename, line) when a line is known — so the generic
34
+ # ``file:line`` catch-all doesn't re-add a frame a typed regex already
35
+ # captured with its symbol. Symbol-only frames key on the symbol instead.
36
+ key = ("L", os.path.basename(file_hint) if file_hint else None, lineno) if lineno is not None else ("S", file_hint, symbol)
37
+ if key not in seen:
38
+ seen.add(key)
39
+ out.append((file_hint, lineno, symbol))
40
+
41
+ for m in _PY.finditer(trace):
42
+ add(m.group(1), int(m.group(2)), m.group(3))
43
+ for m in _JAVA.finditer(trace):
44
+ add(m.group(2), int(m.group(3)), m.group(1))
45
+ for m in _JS.finditer(trace):
46
+ add(m.group(2), int(m.group(3)), m.group(1))
47
+ for m in _GENERIC.finditer(trace):
48
+ add(m.group(1), int(m.group(2)), None)
49
+ return out
50
+
51
+
52
+ def _frames_from_signature(trace: str) -> list[tuple[str | None, int | None, str | None]]:
53
+ """Extract frames from a digest signature like ``Exc@com.acme.OrderSvc.price``."""
54
+ if "@" not in trace:
55
+ return []
56
+ out: list[tuple[str | None, int | None, str | None]] = []
57
+ for frame in trace.split("@", 1)[1].split(">"):
58
+ frame = frame.strip()
59
+ if not frame:
60
+ continue
61
+ parts = frame.split(".")
62
+ cls = next((p for p in reversed(parts) if _CAPWORD.match(p)), None)
63
+ method = parts[-1] if parts else None
64
+ out.append((cls, None, method))
65
+ return out
66
+
67
+
68
+ def _find_file(repo_root: str, file_hint: str) -> str | None:
69
+ """Resolve ``file_hint`` to a path under ``repo_root`` (exact rel path, then basename)."""
70
+ exact = os.path.join(repo_root, file_hint)
71
+ if os.path.isfile(exact):
72
+ return exact
73
+ target = os.path.basename(file_hint)
74
+ for dirpath, dirs, files in os.walk(repo_root):
75
+ dirs[:] = [d for d in dirs if d not in _IGNORE_DIRS]
76
+ if target in files:
77
+ return os.path.join(dirpath, target)
78
+ return None
79
+
80
+
81
+ def _find_class_file(repo_root: str, cls: str) -> str | None:
82
+ """Find a file whose stem equals the class name (e.g. ``OrderSvc`` → OrderSvc.java)."""
83
+ for dirpath, dirs, files in os.walk(repo_root):
84
+ dirs[:] = [d for d in dirs if d not in _IGNORE_DIRS]
85
+ for name in files:
86
+ if os.path.splitext(name)[0] == cls:
87
+ return os.path.join(dirpath, name)
88
+ return None
89
+
90
+
91
+ def _symbol_line(lines: list[str], symbol: str) -> int | None:
92
+ """Return the 1-based line number where ``symbol`` is defined/first referenced."""
93
+ needles = (f"def {symbol}", f"{symbol}(", f" {symbol} ", f".{symbol}")
94
+ for i, line in enumerate(lines, start=1):
95
+ if any(n in line for n in needles):
96
+ return i
97
+ return None
98
+
99
+
100
+ def _snippet(path: str, lineno: int, context: int) -> list[str]:
101
+ """Return ``lineno`` ±context as ``"<n>: <text>"`` strings (no trailing newline)."""
102
+ with open(path, errors="replace") as fh:
103
+ lines = fh.read().splitlines()
104
+ start = max(0, lineno - context - 1)
105
+ end = min(len(lines), lineno + context)
106
+ return [f"{i + 1}: {lines[i]}" for i in range(start, end)]
107
+
108
+
109
+ def map_trace_to_sources(
110
+ repo_root: str,
111
+ trace: str,
112
+ *,
113
+ context: int = 8,
114
+ max_files: int = 5,
115
+ max_total_lines: int = 200,
116
+ ) -> dict:
117
+ """Resolve the frames in ``trace`` to source snippets in ``repo_root``.
118
+
119
+ Raises :class:`SourceNotFoundError` if ``repo_root`` is not a directory.
120
+ """
121
+ if not os.path.isdir(repo_root):
122
+ raise SourceNotFoundError(f"repo checkout not found: {repo_root}")
123
+
124
+ candidates = _frames_from_trace(trace) or _frames_from_signature(trace)
125
+
126
+ frames: list[dict] = []
127
+ total_lines = 0
128
+ for file_hint, lineno, symbol in candidates:
129
+ if len(frames) >= max_files or total_lines >= max_total_lines:
130
+ break
131
+
132
+ path: str | None = None
133
+ if file_hint and ("/" in file_hint or "." in file_hint and lineno is not None):
134
+ path = _find_file(repo_root, file_hint)
135
+ if path is None and file_hint:
136
+ path = _find_class_file(repo_root, os.path.splitext(os.path.basename(file_hint))[0])
137
+ if path is None:
138
+ continue
139
+
140
+ resolved_line = lineno
141
+ if resolved_line is None and symbol:
142
+ with open(path, errors="replace") as fh:
143
+ resolved_line = _symbol_line(fh.read().splitlines(), symbol)
144
+ if resolved_line is None:
145
+ continue
146
+
147
+ snippet = _snippet(path, resolved_line, context)
148
+ room = max(0, max_total_lines - total_lines)
149
+ snippet = snippet[:room]
150
+ total_lines += len(snippet)
151
+ frames.append({"file": os.path.relpath(path, repo_root), "lineno": resolved_line, "symbol": symbol, "snippet": snippet})
152
+
153
+ return {
154
+ "repo": repo_root,
155
+ "frames": frames,
156
+ "files_matched": len(frames),
157
+ "truncated": len(frames) < len(candidates),
158
+ }
@@ -0,0 +1,167 @@
1
+ """Condense a noisy log into a small, bounded digest.
2
+
3
+ ``summarize_log`` streams a file line-by-line (it never loads the whole file),
4
+ groups continuation/stack lines into events, clusters events by
5
+ :func:`signatures.error_signature`, and returns only the top-N distinct
6
+ signatures with counts, first/last timestamp and one truncated sample each. The
7
+ returned dict's size is bounded by ``top`` × ``max_sample_lines`` × ``max_bytes``
8
+ — independent of how big the input log is. That bound is the whole point: the
9
+ agent reads the digest, never the log.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import os
15
+ import re
16
+ from datetime import datetime, timedelta
17
+
18
+ from runspec_logops_core.errors import SourceNotFoundError
19
+ from runspec_logops_core.signatures import detect_level, error_signature
20
+
21
+ _LEVEL_FLOOR = {"all": 0, "warning": 2, "error": 3, "critical": 4}
22
+
23
+ # Leading ISO timestamp captured for window filtering (syslog has no year, so it
24
+ # is intentionally left for the no-op path).
25
+ _ISO_TS = re.compile(r"^\s*\[?(\d{4}-\d{2}-\d{2})[ T](\d{2}:\d{2}:\d{2})(?:[.,]\d+)?")
26
+ _REL = re.compile(r"^\s*(\d+)\s*([a-z]+?)s?(?:\s+ago)?\s*$", re.IGNORECASE)
27
+ _UNIT = {"s": "seconds", "sec": "seconds", "second": "seconds", "m": "minutes", "min": "minutes", "minute": "minutes", "h": "hours", "hour": "hours", "hr": "hours", "d": "days", "day": "days"}
28
+
29
+ # A line that continues the previous event rather than starting a new one.
30
+ _CONT = re.compile(r"^\s+|^(?:at\s|Caused by:|\.{3}|File\s\")", re.IGNORECASE)
31
+
32
+
33
+ def _parse_dt(text: str) -> datetime | None:
34
+ """Best-effort parse of an ISO-ish ``YYYY-MM-DD[ T]HH:MM:SS`` string (naive)."""
35
+ try:
36
+ return datetime.fromisoformat(text)
37
+ except ValueError:
38
+ return None
39
+
40
+
41
+ def _parse_line_ts(line: str) -> datetime | None:
42
+ """Extract a leading ISO timestamp from a log line, or None."""
43
+ m = _ISO_TS.match(line)
44
+ if not m:
45
+ return None
46
+ return _parse_dt(f"{m.group(1)} {m.group(2)}")
47
+
48
+
49
+ def _parse_when(value: str | None) -> datetime | None:
50
+ """Parse a user-supplied window bound: ISO, ``now``, or ``<n> <unit> ago``."""
51
+ if not value:
52
+ return None
53
+ v = value.strip()
54
+ if v.lower() == "now":
55
+ return datetime.now()
56
+ rel = _REL.match(v)
57
+ if rel:
58
+ unit = _UNIT.get(rel.group(2).lower())
59
+ if unit:
60
+ return datetime.now() - timedelta(**{unit: int(rel.group(1))})
61
+ iso = _parse_dt(v.replace("Z", "").replace("T", " ").strip())
62
+ return iso
63
+
64
+
65
+ def _is_continuation(line: str) -> bool:
66
+ return bool(line) and bool(_CONT.match(line))
67
+
68
+
69
+ def _iter_events(path: str):
70
+ """Yield ``(block_lines, first_ts)`` events, streaming the file.
71
+
72
+ An event is a leading (non-indented) line plus the indented / stack-frame
73
+ lines that follow it. ``lines_scanned`` is tracked by the caller.
74
+ """
75
+ block: list[str] = []
76
+ block_ts: datetime | None = None
77
+ with open(path, errors="replace") as fh:
78
+ for raw in fh:
79
+ line = raw.rstrip("\n")
80
+ if block and _is_continuation(line):
81
+ block.append(line)
82
+ continue
83
+ if block:
84
+ yield block, block_ts
85
+ block = [line]
86
+ block_ts = _parse_line_ts(line)
87
+ if block:
88
+ yield block, block_ts
89
+
90
+
91
+ def summarize_log(
92
+ path: str,
93
+ *,
94
+ level: str | None = None,
95
+ since: str | None = None,
96
+ until: str | None = None,
97
+ top: int = 10,
98
+ max_sample_lines: int = 20,
99
+ max_bytes: int = 200,
100
+ ) -> dict:
101
+ """Return a bounded digest of ``path`` clustered by error signature.
102
+
103
+ Raises :class:`SourceNotFoundError` if the file does not exist; propagates
104
+ :class:`OSError` on other read failures.
105
+ """
106
+ if not os.path.exists(path):
107
+ raise SourceNotFoundError(f"log file not found: {path}")
108
+
109
+ floor = _LEVEL_FLOOR.get((level or "all").lower(), 0)
110
+ since_dt, until_dt = _parse_when(since), _parse_when(until)
111
+
112
+ groups: dict[str, dict] = {}
113
+ lines_scanned = 0
114
+ total_events = 0
115
+
116
+ for block, ts in _iter_events(path):
117
+ lines_scanned += len(block)
118
+
119
+ if (since_dt and ts and ts < since_dt) or (until_dt and ts and ts > until_dt):
120
+ continue
121
+
122
+ sig = error_signature(block)
123
+ # Event level = highest severity seen on any line; an exception-bearing
124
+ # event with no explicit level counts as "error".
125
+ ranks = [_LEVEL_FLOOR.get(detect_level(ln) or "", 0) for ln in block]
126
+ rank = max(ranks) if ranks else 0
127
+ if rank == 0 and "@" in sig:
128
+ rank = 3
129
+ if rank < floor:
130
+ continue
131
+
132
+ total_events += 1
133
+ g = groups.get(sig)
134
+ if g is None:
135
+ sample = [ln[:max_bytes] for ln in block[:max_sample_lines]]
136
+ groups[sig] = {"signature": sig, "level_rank": rank, "count": 1, "first_ts": ts, "last_ts": ts, "sample": sample}
137
+ else:
138
+ g["count"] += 1
139
+ g["level_rank"] = max(g["level_rank"], rank)
140
+ if ts:
141
+ if g["first_ts"] is None or ts < g["first_ts"]:
142
+ g["first_ts"] = ts
143
+ if g["last_ts"] is None or ts > g["last_ts"]:
144
+ g["last_ts"] = ts
145
+
146
+ ranked = sorted(groups.values(), key=lambda g: g["count"], reverse=True)
147
+ rank_name = {0: "info", 2: "warning", 3: "error", 4: "critical"}
148
+ top_list = [
149
+ {
150
+ "signature": g["signature"],
151
+ "level": rank_name.get(g["level_rank"], "info"),
152
+ "count": g["count"],
153
+ "first_ts": g["first_ts"].isoformat() if g["first_ts"] else None,
154
+ "last_ts": g["last_ts"].isoformat() if g["last_ts"] else None,
155
+ "sample": g["sample"],
156
+ }
157
+ for g in ranked[:top]
158
+ ]
159
+
160
+ return {
161
+ "file": path,
162
+ "window": {"since": since, "until": until, "lines_scanned": lines_scanned},
163
+ "total_events": total_events,
164
+ "distinct_signatures": len(groups),
165
+ "truncated": len(groups) > top,
166
+ "top": top_list,
167
+ }
@@ -0,0 +1,15 @@
1
+ """Exception types raised by the pure helper functions.
2
+
3
+ The functions in this package do the work and *raise* on failure; the thin
4
+ runnable wrappers in ``runspec-logops`` (and any private wrapper that imports the
5
+ helpers) catch these and turn them into the JSON error payloads + non-zero exits
6
+ that the CLI/agent surface expects.
7
+ """
8
+
9
+
10
+ class LogopsCoreError(Exception):
11
+ """Base class for all runspec-logops-core failures."""
12
+
13
+
14
+ class SourceNotFoundError(LogopsCoreError):
15
+ """A required input path (the log file or the repo checkout) does not exist."""
@@ -0,0 +1,138 @@
1
+ """Pure, I/O-free line normalisation and event-signature extraction.
2
+
3
+ This is what makes log condensing cheap: collapse the *variable* parts of a log
4
+ line (timestamps, ids, addresses, numbers, quoted literals) into a stable
5
+ "shape" so that thousands of near-identical lines fold onto one signature. For a
6
+ multi-line stack trace, the signature is the exception class plus the top normalised
7
+ frames, so the same failure clusters regardless of the surrounding noise.
8
+
9
+ Nothing here reads files or the clock — every function is a deterministic pure
10
+ transform, which is what the unit tests pin.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import re
16
+
17
+ # --- pieces of a line that vary run-to-run and must be masked for clustering ---
18
+ _UUID = re.compile(r"\b[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}\b")
19
+ _HEX = re.compile(r"\b0x[0-9a-fA-F]+\b")
20
+ _LONGHEX = re.compile(r"\b[0-9a-fA-F]{16,}\b")
21
+ # A hex-ish id (request id, short hash): >=4 chars, mixing at least one digit and
22
+ # one a-f letter — masks ``7f3a``/``0a1b`` while leaving plain words alone.
23
+ _HEXID = re.compile(r"\b(?=[0-9a-fA-F]*[0-9])(?=[0-9a-fA-F]*[a-fA-F])[0-9a-fA-F]{4,}\b")
24
+ _QUOTED = re.compile(r"""(['"]).*?\1""")
25
+ _NUM = re.compile(r"\d+") # any digit run, incl. unit-glued (412ms) — masked last
26
+ _WS = re.compile(r"\s+")
27
+
28
+ # A leading timestamp: ISO (``2026-06-18T08:01:11`` / ``... 08:01:11,123``),
29
+ # bracketed (``[2026-06-18 08:01:11]``), or syslog (``Jun 18 08:01:11``).
30
+ _LEADING_TS = re.compile(
31
+ r"""^\s*
32
+ (?:
33
+ \[?\d{4}-\d{2}-\d{2}[ T]\d{2}:\d{2}:\d{2}(?:[.,]\d+)?(?:Z|[+-]\d{2}:?\d{2})?\]?
34
+ | [A-Z][a-z]{2}\s+\d{1,2}\s+\d{2}:\d{2}:\d{2}
35
+ )
36
+ \s*""",
37
+ re.VERBOSE,
38
+ )
39
+
40
+ # Common severity tokens, mapped to an ordered rank.
41
+ _LEVELS = {
42
+ "DEBUG": 0,
43
+ "TRACE": 0,
44
+ "INFO": 1,
45
+ "NOTICE": 1,
46
+ "WARN": 2,
47
+ "WARNING": 2,
48
+ "ERROR": 3,
49
+ "ERR": 3,
50
+ "SEVERE": 3,
51
+ "CRITICAL": 4,
52
+ "CRIT": 4,
53
+ "FATAL": 4,
54
+ }
55
+ _LEVEL_RANK = {"all": 0, "debug": 0, "info": 1, "warning": 2, "error": 3, "critical": 4}
56
+ _LEVEL_TOKEN = re.compile(r"\b(DEBUG|TRACE|INFO|NOTICE|WARN(?:ING)?|ERR(?:OR)?|SEVERE|CRIT(?:ICAL)?|FATAL)\b")
57
+
58
+ # An exception/error class name, e.g. ``NullPointerException`` / ``ValueError``.
59
+ _EXC = re.compile(r"\b([A-Za-z_][\w.]*(?:Error|Exception|Failure|Fault))\b")
60
+
61
+ # Stack-frame patterns across common runtimes.
62
+ _FRAME_JAVA = re.compile(r"\bat\s+([\w.$]+)\s*\(") # at com.acme.Foo.bar(Foo.java:42)
63
+ _FRAME_PY = re.compile(r'File\s+"[^"]+",\s+line\s+\d+,\s+in\s+(\S+)') # File "x.py", line 5, in foo
64
+ _FRAME_JS = re.compile(r"\bat\s+([\w.$<>]+)\s*\(") # at Object.fn (/a/b.js:1:2)
65
+
66
+
67
+ def strip_timestamp(line: str) -> str:
68
+ """Remove a leading timestamp prefix from ``line`` (no-op when absent)."""
69
+ return _LEADING_TS.sub("", line, count=1)
70
+
71
+
72
+ def detect_level(line: str) -> str | None:
73
+ """Return the canonical severity name found in ``line`` (e.g. ``"error"``), or None."""
74
+ m = _LEVEL_TOKEN.search(line)
75
+ if not m:
76
+ return None
77
+ token = m.group(1).upper()
78
+ rank = _LEVELS[token]
79
+ # Map the rank back to a canonical name used by the level filter.
80
+ for name, r in _LEVEL_RANK.items():
81
+ if name != "all" and r == rank:
82
+ return name
83
+ return None
84
+
85
+
86
+ def normalize_line(line: str) -> str:
87
+ """Collapse the variable parts of a single log line into a stable shape.
88
+
89
+ Masks (in order) the leading timestamp, UUIDs, hex addresses, long hex
90
+ blobs, quoted literals and bare integers, then squeezes whitespace. Two lines
91
+ that differ only in those volatile parts return the same string.
92
+ """
93
+ s = strip_timestamp(line)
94
+ s = _UUID.sub("<uuid>", s)
95
+ s = _HEX.sub("<hex>", s)
96
+ s = _LONGHEX.sub("<hex>", s)
97
+ s = _HEXID.sub("<hex>", s)
98
+ s = _QUOTED.sub("<str>", s)
99
+ s = _NUM.sub("<n>", s)
100
+ return _WS.sub(" ", s).strip()
101
+
102
+
103
+ def _frames(block: list[str], max_frames: int = 3) -> list[str]:
104
+ """Extract up to ``max_frames`` normalised stack frames from an event block."""
105
+ out: list[str] = []
106
+ for raw in block:
107
+ for pat in (_FRAME_PY, _FRAME_JAVA, _FRAME_JS):
108
+ m = pat.search(raw)
109
+ if m:
110
+ out.append(m.group(1))
111
+ break
112
+ if len(out) >= max_frames:
113
+ break
114
+ return out
115
+
116
+
117
+ def error_signature(block: list[str], max_frames: int = 3) -> str:
118
+ """Return a short, stable signature for an event (one or more lines).
119
+
120
+ For a stack trace: ``<ExceptionClass>@<frame1>>major<frame2>...`` using the
121
+ first exception class seen and the top normalised frames — so the same crash
122
+ clusters regardless of message text or addresses. For a plain line with no
123
+ exception/frames, falls back to the normalised first line (truncated).
124
+ """
125
+ if not block:
126
+ return ""
127
+ exc: str | None = None
128
+ for raw in block:
129
+ m = _EXC.search(raw)
130
+ if m:
131
+ exc = m.group(1).rsplit(".", 1)[-1] # bare class name
132
+ break
133
+ frames = _frames(block, max_frames=max_frames)
134
+ if exc or frames:
135
+ head = exc or "error"
136
+ return head + "@" + ">".join(frames) if frames else head
137
+ # Plain line — normalised shape is the signature.
138
+ return normalize_line(block[0])[:200]
@@ -0,0 +1,9 @@
1
+ Metadata-Version: 2.4
2
+ Name: runspec-logops-core
3
+ Version: 0.1.0
4
+ Summary: Pure-Python log-condensing + code-mapping helpers — the importable core behind runspec-logops (no runspec dependency, no runnables)
5
+ Requires-Python: >=3.10
6
+ Provides-Extra: dev
7
+ Requires-Dist: mypy; extra == 'dev'
8
+ Requires-Dist: pytest>=8.0; extra == 'dev'
9
+ Requires-Dist: ruff; extra == 'dev'
@@ -0,0 +1,9 @@
1
+ runspec_logops_core/__init__.py,sha256=imfGQnqUJY8uUIAv007g6kc6wVknWgXrF8t8bVOOsho,1324
2
+ runspec_logops_core/bundle.py,sha256=tcZSVSaQinqSBE58FiUEvLbQ3OTEORmwOsDOJKAIIMs,1794
3
+ runspec_logops_core/codemap.py,sha256=PN2HPh3i6BtUV0mfRY6vcgl1r74o5gMhwNAt8FOYdmY,6349
4
+ runspec_logops_core/digest.py,sha256=5UdiL0cbv7mL1-flTonQwXa2yeFz1Qwq7nY1Nl7PAvE,5994
5
+ runspec_logops_core/errors.py,sha256=iJnGf8G7B6Ei4b_FOz0-GECQVXJ3h7_70ZVCH3JdAj0,560
6
+ runspec_logops_core/signatures.py,sha256=VX8PYoWrGI05dTBKMmo1j6E0ntAua2XC8UebHAX7XhM,5227
7
+ runspec_logops_core-0.1.0.dist-info/METADATA,sha256=TfW42N3Ue47-gmaPsEuC_2F9Qmuljj5TfT9oFuNjwCw,365
8
+ runspec_logops_core-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
9
+ runspec_logops_core-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.30.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any