mneme-code 3.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mneme_code/resolve.py ADDED
@@ -0,0 +1,116 @@
1
+ """Frame-to-graph resolution: match stack frames against mneme-graph nodes.
2
+
3
+ Design constraints
4
+ ------------------
5
+ * No hard import of ``mneme_graph`` types — the dependency is optional at
6
+ runtime (the store may be ``None``) and hard coupling would break mypy
7
+ ``--strict`` when ``mneme_graph`` stubs are absent.
8
+ * A local ``typing.Protocol`` pair (``_NodeLike`` / ``_StoreLike``) defines
9
+ the minimal structural interface required for matching. Any object that
10
+ satisfies the protocol works — including a real ``GraphStore``.
11
+ * Clean fallback: if ``graph_store`` is ``None`` or its ``.nodes`` sequence
12
+ is empty, every frame maps to ``(frame, None)`` with no error.
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ from collections.abc import Sequence
18
+ from typing import Protocol, runtime_checkable
19
+
20
+ from mneme_code.stacktrace import Frame
21
+
22
+ # ---------------------------------------------------------------------------
23
+ # Structural protocols (no mneme_graph import required)
24
+ # ---------------------------------------------------------------------------
25
+
26
+
27
+ @runtime_checkable
28
+ class _NodeLike(Protocol):
29
+ """Minimal interface for a graph node used by :func:`resolve_frames`."""
30
+
31
+ name: str
32
+ source_path: str
33
+ kind: str
34
+
35
+
36
+ @runtime_checkable
37
+ class _StoreLike(Protocol):
38
+ """Minimal interface for a graph store used by :func:`resolve_frames`."""
39
+
40
+ @property
41
+ def nodes(self) -> Sequence[_NodeLike]:
42
+ """Ordered sequence of graph nodes."""
43
+ ... # pragma: no cover
44
+
45
+
46
+ # ---------------------------------------------------------------------------
47
+ # Public API
48
+ # ---------------------------------------------------------------------------
49
+
50
+
51
+ def resolve_frames(
52
+ parsed: object,
53
+ graph_store: object | None,
54
+ ) -> list[tuple[Frame, object | None]]:
55
+ """Match each frame in *parsed* against nodes in *graph_store*.
56
+
57
+ Matching criteria: a node matches a frame when ``node.kind == "function"``,
58
+ ``node.name == frame.function``, and the frame's (normalised) absolute
59
+ ``file_path`` equals or ends with ``/<node.source_path>``. The suffix test
60
+ bridges absolute traceback paths and vault-relative graph ``source_path``.
61
+
62
+ Args:
63
+ parsed: A :class:`~mneme_code.stacktrace.ParsedTraceback` (or
64
+ any object with a ``.frames`` attribute that is a
65
+ sequence of :class:`Frame`). Accepted as ``object``
66
+ to avoid a circular import; the attribute is accessed
67
+ via ``getattr``.
68
+ graph_store: A :class:`~mneme_graph.store.GraphStore` or any object
69
+ satisfying :class:`_StoreLike`, or ``None``. When
70
+ ``None`` or empty, every frame maps to ``(frame, None)``.
71
+
72
+ Returns:
73
+ A list of ``(frame, node_or_none)`` pairs in frame order.
74
+ Each pair contains the original :class:`Frame` and either the
75
+ matching node object or ``None`` if no match was found.
76
+ """
77
+ # Extract frames from parsed; tolerate any object with .frames.
78
+ frames_raw = getattr(parsed, "frames", ())
79
+ frames: list[Frame] = [f for f in frames_raw if isinstance(f, Frame)]
80
+
81
+ # Fast path: no store or no nodes.
82
+ if graph_store is None:
83
+ return [(frame, None) for frame in frames]
84
+
85
+ # Attempt to read .nodes from the store via the protocol.
86
+ nodes_seq: Sequence[object] = ()
87
+ if isinstance(graph_store, _StoreLike):
88
+ try:
89
+ nodes_seq = graph_store.nodes
90
+ except Exception: # noqa: BLE001
91
+ nodes_seq = ()
92
+
93
+ if not nodes_seq:
94
+ return [(frame, None) for frame in frames]
95
+
96
+ # Traceback file paths are absolute while graph source_paths are
97
+ # vault-relative, so match by function name plus a path-suffix test rather
98
+ # than exact equality (which would never match a real traceback).
99
+ func_nodes: list[_NodeLike] = []
100
+ for node in nodes_seq:
101
+ if isinstance(node, _NodeLike) and node.kind == "function" and node.source_path:
102
+ func_nodes.append(node)
103
+
104
+ result: list[tuple[Frame, object | None]] = []
105
+ for frame in frames:
106
+ normalised = frame.file_path.replace("\\", "/")
107
+ match: object | None = None
108
+ for node in func_nodes:
109
+ if node.name != frame.function:
110
+ continue
111
+ if normalised == node.source_path or normalised.endswith("/" + node.source_path):
112
+ match = node
113
+ break
114
+ result.append((frame, match))
115
+
116
+ return result
@@ -0,0 +1,191 @@
1
+ """Deterministic CPython traceback parser with redact-before-store invariant.
2
+
3
+ Parses the standard CPython traceback format::
4
+
5
+ Traceback (most recent call last):
6
+ File "PATH", line N, in FUNC
7
+ code line
8
+ ExcType: message
9
+
10
+ Redaction invariant (C4): ``mneme_core.privacy.redact`` is applied to
11
+ ``exc_message``, every ``code_context``, and every ``file_path`` *before*
12
+ the dataclasses are constructed. No raw user content ever reaches a field.
13
+
14
+ ``parse_traceback`` returns ``None`` for any input that is not a recognisable
15
+ CPython traceback and never raises.
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import keyword
21
+ import re
22
+ from dataclasses import dataclass
23
+
24
+ from mneme_core.privacy import redact
25
+
26
+ # ---------------------------------------------------------------------------
27
+ # Regex patterns
28
+ # ---------------------------------------------------------------------------
29
+
30
+ # Opening line of a standard CPython traceback.
31
+ _TRACEBACK_HEADER = re.compile(r"^Traceback \(most recent call last\):\s*$", re.MULTILINE)
32
+
33
+ # Each frame header: ' File "PATH", line N, in FUNC'
34
+ # Captures: path (group 1), line number (group 2), function name (group 3).
35
+ _FRAME_HEADER = re.compile(
36
+ r'^\s{2}File "([^"]+)", line (\d+), in (.+?)\s*$',
37
+ re.MULTILINE,
38
+ )
39
+
40
+ # Trailing exception line: 'ExcType: message' or just 'ExcType' (no message).
41
+ # Dotted types are supported (e.g. 'pkg.mod.Error').
42
+ _EXC_LINE = re.compile(
43
+ r"^([\w][\w.]*(?:[\w]+)?)(?::\s*(.*))?\s*$",
44
+ )
45
+
46
+
47
+ # ---------------------------------------------------------------------------
48
+ # Dataclasses
49
+ # ---------------------------------------------------------------------------
50
+
51
+
52
+ @dataclass(frozen=True)
53
+ class Frame:
54
+ """A single stack frame extracted from a CPython traceback.
55
+
56
+ All string fields have already been passed through ``redact()``
57
+ before construction; callers must not bypass this by constructing
58
+ ``Frame`` directly with raw user content.
59
+ """
60
+
61
+ file_path: str
62
+ line: int
63
+ function: str
64
+ code_context: str | None
65
+
66
+
67
+ @dataclass(frozen=True)
68
+ class ParsedTraceback:
69
+ """The parsed, redacted representation of a CPython traceback.
70
+
71
+ Attributes:
72
+ exc_type Exception class name (may be dotted, e.g. ``pkg.Error``).
73
+ exc_message Redacted exception message (empty string if none).
74
+ frames Tuple of :class:`Frame` objects, innermost last.
75
+ """
76
+
77
+ exc_type: str
78
+ exc_message: str
79
+ frames: tuple[Frame, ...]
80
+
81
+
82
+ # ---------------------------------------------------------------------------
83
+ # Parser
84
+ # ---------------------------------------------------------------------------
85
+
86
+
87
+ def parse_traceback(text: str) -> ParsedTraceback | None:
88
+ """Parse a standard CPython traceback text into a :class:`ParsedTraceback`.
89
+
90
+ Returns ``None`` if *text* is not a recognisable CPython traceback.
91
+ Never raises.
92
+
93
+ Redaction invariant: ``redact()`` is applied to ``exc_message``,
94
+ every ``code_context``, and every ``file_path`` before any dataclass
95
+ is constructed.
96
+
97
+ Args:
98
+ text: Raw traceback text (may include leading/trailing whitespace
99
+ or surrounding log lines; the parser searches for the
100
+ ``Traceback (most recent call last):`` header).
101
+ """
102
+ try:
103
+ return _parse_traceback_inner(text)
104
+ except Exception: # noqa: BLE001
105
+ return None
106
+
107
+
108
+ def _parse_traceback_inner(text: str) -> ParsedTraceback | None:
109
+ """Inner parser — may raise; wrapped by ``parse_traceback``."""
110
+ # Must contain the standard header.
111
+ header_match = _TRACEBACK_HEADER.search(text)
112
+ if header_match is None:
113
+ return None
114
+
115
+ # Work only with text from the header onwards.
116
+ body = text[header_match.start():]
117
+ lines = body.splitlines()
118
+
119
+ # Collect frame header positions.
120
+ # Each frame header is ' File "PATH", line N, in FUNC'.
121
+ # The optional next line (indented more) is the code context.
122
+ frames: list[Frame] = []
123
+ i = 1 # skip the 'Traceback ...' header line itself
124
+ while i < len(lines):
125
+ fh = _FRAME_HEADER.match(lines[i])
126
+ if fh is None:
127
+ i += 1
128
+ continue
129
+
130
+ raw_path = fh.group(1)
131
+ raw_line = int(fh.group(2))
132
+ raw_func = fh.group(3).strip()
133
+
134
+ # Optional next line: code context (must be indented by >= 4 spaces).
135
+ code_ctx: str | None = None
136
+ if i + 1 < len(lines):
137
+ candidate = lines[i + 1]
138
+ # Code context lines are indented with at least 4 spaces and are
139
+ # NOT themselves frame headers.
140
+ if candidate.startswith(" ") and not _FRAME_HEADER.match(candidate):
141
+ code_ctx = redact(candidate.strip()) or None
142
+ i += 2
143
+ else:
144
+ i += 1
145
+ else:
146
+ i += 1
147
+
148
+ frames.append(
149
+ Frame(
150
+ file_path=redact(raw_path),
151
+ line=raw_line,
152
+ function=raw_func,
153
+ code_context=code_ctx,
154
+ )
155
+ )
156
+
157
+ if not frames:
158
+ return None
159
+
160
+ # The exception line is the last non-blank, NON-INDENTED line of the body.
161
+ # Code-context lines are indented (>= 4 spaces) and the real exception line
162
+ # sits at column 0, so skipping indented lines stops a truncated traceback
163
+ # (which ends in an indented code line like ``pass``) from being misread as
164
+ # the exception. A bare Python keyword is also rejected — no exception class
165
+ # is named after a keyword.
166
+ exc_type = ""
167
+ exc_message = ""
168
+ for raw_line_text in reversed(lines):
169
+ if raw_line_text[:1] in (" ", "\t"):
170
+ continue
171
+ stripped = raw_line_text.strip()
172
+ if not stripped:
173
+ continue
174
+ if _FRAME_HEADER.match(raw_line_text):
175
+ continue
176
+ if stripped.startswith("Traceback (most recent call last)"):
177
+ continue
178
+ m = _EXC_LINE.match(stripped)
179
+ if m and m.group(1) not in keyword.kwlist:
180
+ exc_type = m.group(1)
181
+ exc_message = redact(m.group(2) or "")
182
+ break
183
+
184
+ if not exc_type:
185
+ return None
186
+
187
+ return ParsedTraceback(
188
+ exc_type=exc_type,
189
+ exc_message=exc_message,
190
+ frames=tuple(frames),
191
+ )
mneme_code/testrun.py ADDED
@@ -0,0 +1,293 @@
1
+ """Test-runner output parser: pytest and unittest console output → FailureMemory.
2
+
3
+ Parses pytest / unittest console output into :class:`TestFailure` records by
4
+ re-using :func:`~mneme_code.stacktrace.parse_traceback` and
5
+ :func:`~mneme_code.failure.failure_from_traceback`. Pure, deterministic,
6
+ never raises.
7
+
8
+ Design invariants (mirror stacktrace.py / failure.py):
9
+ * No clock, no random, no I/O inside parsing functions.
10
+ * Redaction is inherited from ``parse_traceback`` — every ``ParsedTraceback``
11
+ field is already redacted before it reaches a ``TestFailure`` or
12
+ ``FailureMemory``.
13
+ * ``failures_from_test_output`` accepts ``observed_at: datetime`` injected by
14
+ the caller; it never calls ``datetime.now()``.
15
+ * Never raises — every public entry point catches all exceptions and returns
16
+ ``[]``.
17
+ * Frozen dataclasses for all public types.
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ import re
23
+ from dataclasses import dataclass
24
+ from datetime import datetime
25
+
26
+ from mneme_core.privacy import redact
27
+
28
+ from mneme_code.failure import FailureMemory, failure_from_traceback
29
+ from mneme_code.stacktrace import ParsedTraceback, parse_traceback
30
+
31
+ # ---------------------------------------------------------------------------
32
+ # Regex patterns
33
+ # ---------------------------------------------------------------------------
34
+
35
+ # pytest: separator banner around each failing test block.
36
+ # e.g. "________ TestFoo.test_bar ________" or "________ test_bar ________"
37
+ _PYTEST_BANNER = re.compile(r"^_{4,}\s+(.+?)\s+_{4,}\s*$", re.MULTILINE)
38
+
39
+ # pytest: failures section header
40
+ _PYTEST_FAILURES_HDR = re.compile(r"=+\s*FAILURES\s*=+", re.IGNORECASE)
41
+
42
+ # pytest: short-test-summary lines ("FAILED tests/x.py::C::m - ...")
43
+ # Requires the test ID to look like a path (contains "/" or "::") so bare
44
+ # unittest footers like "FAILED (failures=1)" are not matched.
45
+ _PYTEST_SUMMARY_LINE = re.compile(r"^FAILED\s+(\S*(?:/|::)\S*)", re.MULTILINE)
46
+
47
+ # unittest: "FAIL: test_x (mod.TestClass)" or "ERROR: test_x (mod.TestClass)"
48
+ _UNITTEST_HDR = re.compile(
49
+ r"^(?:FAIL|ERROR):\s+(.+?)\s*$",
50
+ re.MULTILINE,
51
+ )
52
+
53
+ # Rule separators used by unittest (lines of "=" or "-", >= 10 chars)
54
+ _RULE_LINE = re.compile(r"^[=\-]{10,}\s*$", re.MULTILINE)
55
+
56
+ # Traceback start sentinel
57
+ _TB_START = re.compile(r"Traceback \(most recent call last\):", re.MULTILINE)
58
+
59
+
60
+ # ---------------------------------------------------------------------------
61
+ # Dataclass
62
+ # ---------------------------------------------------------------------------
63
+
64
+
65
+ @dataclass(frozen=True)
66
+ class TestFailure:
67
+ """A single test failure with optional parsed traceback.
68
+
69
+ Attributes:
70
+ test_id Human-readable test identifier.
71
+ pytest style: ``"tests/test_x.py::TestY::test_z"``
72
+ unittest style: ``"test_z (mod.TestY)"``
73
+ traceback :class:`~mneme_code.stacktrace.ParsedTraceback` parsed from
74
+ the failure's traceback block, or ``None`` if no parseable
75
+ traceback was found.
76
+ """
77
+
78
+ test_id: str
79
+ traceback: ParsedTraceback | None
80
+
81
+
82
+ # ---------------------------------------------------------------------------
83
+ # pytest parser
84
+ # ---------------------------------------------------------------------------
85
+
86
+
87
+ def parse_pytest_output(text: str) -> list[TestFailure]:
88
+ """Parse *text* as pytest console output into :class:`TestFailure` records.
89
+
90
+ Strategy:
91
+ 1. Locate the ``=== FAILURES ===`` section (or fall back to the full text).
92
+ 2. Use ``_______ <test-id> _______`` banners to delimit per-test blocks.
93
+ 3. For each block, call :func:`~mneme_code.stacktrace.parse_traceback` on
94
+ the block text.
95
+ 4. Fall back to short-test-summary ``FAILED ...`` lines for test IDs that
96
+ had no banner-delimited block.
97
+
98
+ Args:
99
+ text: Raw pytest stdout/stderr output.
100
+
101
+ Returns:
102
+ Ordered list of :class:`TestFailure`, one per failing test.
103
+ Returns ``[]`` if nothing parseable is found or on any error.
104
+ """
105
+ try:
106
+ return _parse_pytest_inner(text)
107
+ except Exception: # noqa: BLE001
108
+ return []
109
+
110
+
111
+ def _parse_pytest_inner(text: str) -> list[TestFailure]:
112
+ """Inner pytest parser — may raise; wrapped by ``parse_pytest_output``."""
113
+ if not text or not text.strip():
114
+ return []
115
+
116
+ # Locate the FAILURES section; restrict parsing to that region when present.
117
+ hdr_match = _PYTEST_FAILURES_HDR.search(text)
118
+ body = text[hdr_match.start() :] if hdr_match else text
119
+
120
+ # Split on banner lines to get (banner_text, block_text) pairs.
121
+ banners = list(_PYTEST_BANNER.finditer(body))
122
+ if not banners:
123
+ # No banner-delimited blocks; try summary lines only (no tracebacks).
124
+ return _pytest_summary_fallback(text)
125
+
126
+ failures: list[TestFailure] = []
127
+ for idx, banner in enumerate(banners):
128
+ # B5: redact the test_id before it becomes source_label. A parametrized
129
+ # test name can carry secret-like text (e.g. test_x[<private>tok</private>]);
130
+ # failure.py assumes source_label is already clean, so redact at the source.
131
+ test_id = redact(banner.group(1).strip())
132
+ block_start = banner.end()
133
+ block_end = banners[idx + 1].start() if idx + 1 < len(banners) else len(body)
134
+ block_text = body[block_start:block_end]
135
+ parsed = parse_traceback(block_text)
136
+ failures.append(TestFailure(test_id=test_id, traceback=parsed))
137
+
138
+ return failures
139
+
140
+
141
+ def _pytest_summary_fallback(text: str) -> list[TestFailure]:
142
+ """Return TestFailures from FAILED summary lines when no banners exist."""
143
+ return [
144
+ TestFailure(test_id=redact(m.group(1)), traceback=None)
145
+ for m in _PYTEST_SUMMARY_LINE.finditer(text)
146
+ ]
147
+
148
+
149
+ # ---------------------------------------------------------------------------
150
+ # unittest parser
151
+ # ---------------------------------------------------------------------------
152
+
153
+
154
+ def parse_unittest_output(text: str) -> list[TestFailure]:
155
+ """Parse *text* as unittest console output into :class:`TestFailure` records.
156
+
157
+ Strategy:
158
+ 1. Find ``FAIL:`` / ``ERROR:`` header lines.
159
+ 2. For each header, extract the block that follows (from the next rule line
160
+ to the subsequent rule line or the next FAIL/ERROR header).
161
+ 3. Within that block, locate the ``Traceback (most recent call last):`` and
162
+ parse it with :func:`~mneme_code.stacktrace.parse_traceback`.
163
+
164
+ Args:
165
+ text: Raw unittest stdout/stderr output.
166
+
167
+ Returns:
168
+ Ordered list of :class:`TestFailure`.
169
+ Returns ``[]`` if nothing parseable is found or on any error.
170
+ """
171
+ try:
172
+ return _parse_unittest_inner(text)
173
+ except Exception: # noqa: BLE001
174
+ return []
175
+
176
+
177
+ def _parse_unittest_inner(text: str) -> list[TestFailure]:
178
+ """Inner unittest parser — may raise; wrapped by ``parse_unittest_output``."""
179
+ if not text or not text.strip():
180
+ return []
181
+
182
+ hdrs = list(_UNITTEST_HDR.finditer(text))
183
+ if not hdrs:
184
+ return []
185
+
186
+ failures: list[TestFailure] = []
187
+ for idx, hdr in enumerate(hdrs):
188
+ # B5: redact the test_id before it becomes source_label (see parse_pytest).
189
+ test_id = redact(hdr.group(1).strip())
190
+ block_start = hdr.end()
191
+ # Block ends at the start of the next header or end of text.
192
+ block_end = hdrs[idx + 1].start() if idx + 1 < len(hdrs) else len(text)
193
+ block_text = text[block_start:block_end]
194
+ parsed = parse_traceback(block_text)
195
+ failures.append(TestFailure(test_id=test_id, traceback=parsed))
196
+
197
+ return failures
198
+
199
+
200
+ # ---------------------------------------------------------------------------
201
+ # Unified entry point
202
+ # ---------------------------------------------------------------------------
203
+
204
+
205
+ def failures_from_test_output(
206
+ text: str,
207
+ *,
208
+ observed_at: datetime,
209
+ runner: str = "auto",
210
+ ) -> list[FailureMemory]:
211
+ """Convert test-runner console output into :class:`FailureMemory` records.
212
+
213
+ Auto-detection heuristic (``runner="auto"``):
214
+ * pytest — text contains ``"=== FAILURES ==="`` (case-insensitive) *or*
215
+ ``"short test summary"`` *or* a ``"FAILED "`` summary line *or*
216
+ a ``"______"`` banner line.
217
+ * unittest — text contains a ``"FAIL:"`` / ``"ERROR:"`` header followed by
218
+ a traceback block.
219
+ * Falls back to pytest if detection is ambiguous.
220
+
221
+ Only :class:`TestFailure` records whose ``.traceback`` is not ``None`` are
222
+ converted; failures with no parseable traceback are silently skipped.
223
+
224
+ Args:
225
+ text: Raw test-runner output.
226
+ observed_at: Tz-aware UTC datetime for each :class:`FailureMemory`.
227
+ Must be injected by the caller; this function never calls
228
+ ``datetime.now()``.
229
+ runner: ``"pytest"``, ``"unittest"``, or ``"auto"`` (default).
230
+
231
+ Returns:
232
+ List of :class:`FailureMemory`, one per test with a parseable
233
+ traceback. Returns ``[]`` on any error or if nothing is parseable.
234
+ """
235
+ try:
236
+ return _failures_inner(text, observed_at=observed_at, runner=runner)
237
+ except Exception: # noqa: BLE001
238
+ return []
239
+
240
+
241
+ def _failures_inner(
242
+ text: str,
243
+ *,
244
+ observed_at: datetime,
245
+ runner: str,
246
+ ) -> list[FailureMemory]:
247
+ """Inner implementation — may raise; wrapped by ``failures_from_test_output``."""
248
+ if not text or not text.strip():
249
+ return []
250
+
251
+ effective_runner = runner if runner != "auto" else _detect_runner(text)
252
+
253
+ if effective_runner == "unittest":
254
+ test_failures = parse_unittest_output(text)
255
+ else:
256
+ test_failures = parse_pytest_output(text)
257
+
258
+ results: list[FailureMemory] = []
259
+ for tf in test_failures:
260
+ if tf.traceback is None:
261
+ continue
262
+ fm = failure_from_traceback(
263
+ tf.traceback,
264
+ observed_at=observed_at,
265
+ source_label=tf.test_id,
266
+ )
267
+ results.append(fm)
268
+
269
+ return results
270
+
271
+
272
+ def _detect_runner(text: str) -> str:
273
+ """Return ``"pytest"`` or ``"unittest"`` based on text heuristics.
274
+
275
+ Unittest signals are checked first because they are more specific
276
+ (``FAIL:``/``ERROR:`` headers are unambiguous). Pytest signals that
277
+ can appear in unittest output (e.g. the word ``FAILED``) are only
278
+ consulted when the unittest pattern is absent.
279
+ """
280
+ # unittest signals: FAIL:/ERROR: header + traceback — check first, more specific
281
+ if _UNITTEST_HDR.search(text) and _TB_START.search(text):
282
+ return "unittest"
283
+ # pytest signals
284
+ lower = text.lower()
285
+ if (
286
+ "=== failures ===" in lower
287
+ or "short test summary" in lower
288
+ or _PYTEST_SUMMARY_LINE.search(text)
289
+ or _PYTEST_BANNER.search(text)
290
+ ):
291
+ return "pytest"
292
+ # Default to pytest
293
+ return "pytest"