mneme-code 3.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mneme_code/__init__.py +71 -0
- mneme_code/agents.py +240 -0
- mneme_code/cli.py +174 -0
- mneme_code/failure.py +175 -0
- mneme_code/py.typed +0 -0
- mneme_code/resolve.py +116 -0
- mneme_code/stacktrace.py +191 -0
- mneme_code/testrun.py +293 -0
- mneme_code/trajectory.py +140 -0
- mneme_code-3.1.0.dist-info/METADATA +59 -0
- mneme_code-3.1.0.dist-info/RECORD +13 -0
- mneme_code-3.1.0.dist-info/WHEEL +4 -0
- mneme_code-3.1.0.dist-info/entry_points.txt +2 -0
mneme_code/__init__.py
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
"""mneme-code: deterministic Python code-failure memory layer.
|
|
2
|
+
|
|
3
|
+
Design invariants
|
|
4
|
+
-----------------
|
|
5
|
+
* No LLM, no network — pure stdlib + mneme-core + mneme-graph.
|
|
6
|
+
* Redact-before-store — ``mneme_core.privacy.redact`` is applied to
|
|
7
|
+
``exc_message``, every ``code_context``, and every ``file_path``
|
|
8
|
+
*before* any dataclass is constructed or any text is rendered.
|
|
9
|
+
* Provenance on every record — ``content_hash`` (sha256 of canonical
|
|
10
|
+
redacted rendering), ``observed_at`` (injected by caller; never
|
|
11
|
+
produced inside library functions), ``trust`` (default ``"user"``),
|
|
12
|
+
and ``confidence`` (default ``"EXTRACTED"``).
|
|
13
|
+
* Deterministic — same inputs always produce the same ``failure_id``
|
|
14
|
+
and ``content_hash``; no ``datetime.now()`` or ``random`` inside
|
|
15
|
+
library functions.
|
|
16
|
+
* Markdown is the ground-truth artifact — ``failure_to_markdown``
|
|
17
|
+
produces a vault-ready note; the vault is the store.
|
|
18
|
+
|
|
19
|
+
DEFER (not built in this version)
|
|
20
|
+
----------------------------------
|
|
21
|
+
* Live in-process test-runner plugin (a pytest hook). Console-output parsing
|
|
22
|
+
of pytest/unittest IS built — see ``testrun``.
|
|
23
|
+
* Branch-aware failure tracking.
|
|
24
|
+
* Repository runbook generation.
|
|
25
|
+
* Non-Python tracebacks (JavaScript, Rust, Go, etc.).
|
|
26
|
+
|
|
27
|
+
Public API
|
|
28
|
+
----------
|
|
29
|
+
from mneme_code.stacktrace import Frame, ParsedTraceback, parse_traceback
|
|
30
|
+
from mneme_code.failure import FailureMemory, failure_from_traceback, failure_to_markdown
|
|
31
|
+
from mneme_code.resolve import resolve_frames
|
|
32
|
+
from mneme_code.agents import ProceduralMemory, parse_agents_md, procedural_to_markdown
|
|
33
|
+
from mneme_code.testrun import (
|
|
34
|
+
TestFailure, parse_pytest_output, parse_unittest_output, failures_from_test_output,
|
|
35
|
+
)
|
|
36
|
+
from mneme_code.trajectory import FixTrajectory, fix_from_failure, fix_to_markdown
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
from mneme_code.agents import ProceduralMemory, parse_agents_md, procedural_to_markdown
|
|
40
|
+
from mneme_code.failure import FailureMemory, failure_from_traceback, failure_to_markdown
|
|
41
|
+
from mneme_code.resolve import resolve_frames
|
|
42
|
+
from mneme_code.stacktrace import Frame, ParsedTraceback, parse_traceback
|
|
43
|
+
from mneme_code.testrun import (
|
|
44
|
+
TestFailure,
|
|
45
|
+
failures_from_test_output,
|
|
46
|
+
parse_pytest_output,
|
|
47
|
+
parse_unittest_output,
|
|
48
|
+
)
|
|
49
|
+
from mneme_code.trajectory import FixTrajectory, fix_from_failure, fix_to_markdown
|
|
50
|
+
|
|
51
|
+
__version__ = "0.2.0"
|
|
52
|
+
__all__ = [
|
|
53
|
+
"__version__",
|
|
54
|
+
"Frame",
|
|
55
|
+
"ParsedTraceback",
|
|
56
|
+
"parse_traceback",
|
|
57
|
+
"FailureMemory",
|
|
58
|
+
"failure_from_traceback",
|
|
59
|
+
"failure_to_markdown",
|
|
60
|
+
"resolve_frames",
|
|
61
|
+
"ProceduralMemory",
|
|
62
|
+
"parse_agents_md",
|
|
63
|
+
"procedural_to_markdown",
|
|
64
|
+
"TestFailure",
|
|
65
|
+
"parse_pytest_output",
|
|
66
|
+
"parse_unittest_output",
|
|
67
|
+
"failures_from_test_output",
|
|
68
|
+
"FixTrajectory",
|
|
69
|
+
"fix_from_failure",
|
|
70
|
+
"fix_to_markdown",
|
|
71
|
+
]
|
mneme_code/agents.py
ADDED
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
"""ProceduralMemory dataclass and vault-ready markdown serialization for AGENTS.md.
|
|
2
|
+
|
|
3
|
+
Parses an AGENTS.md (or any conventions markdown) into structured procedural
|
|
4
|
+
memories, one per level-2 (``##``) heading section.
|
|
5
|
+
|
|
6
|
+
Design invariants (mirror stacktrace.py / failure.py):
|
|
7
|
+
* Pure, deterministic — no clock, no random, no I/O inside parse functions.
|
|
8
|
+
* Redact-before-store — ``mneme_core.privacy.redact`` is applied to heading
|
|
9
|
+
text, body text, and fenced-command text before any dataclass field is set.
|
|
10
|
+
* Never raises — ``parse_agents_md`` catches all exceptions and returns ``[]``.
|
|
11
|
+
* Frozen dataclasses for all public types.
|
|
12
|
+
* ``procedural_to_markdown`` accepts ``created: datetime`` injected by caller.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import hashlib
|
|
18
|
+
import json
|
|
19
|
+
import re
|
|
20
|
+
import uuid
|
|
21
|
+
from dataclasses import dataclass, field
|
|
22
|
+
from datetime import UTC, datetime
|
|
23
|
+
|
|
24
|
+
from mneme_core.privacy import redact
|
|
25
|
+
|
|
26
|
+
# ---------------------------------------------------------------------------
|
|
27
|
+
# Regex helpers
|
|
28
|
+
# ---------------------------------------------------------------------------
|
|
29
|
+
|
|
30
|
+
# Level-2 heading: "## Some Title" (with optional trailing whitespace)
|
|
31
|
+
_H2 = re.compile(r"^##\s+(.+?)\s*$", re.MULTILINE)
|
|
32
|
+
|
|
33
|
+
# Fenced code block: ```[lang]\n...\n``` (DOTALL so body can span lines)
|
|
34
|
+
_FENCE = re.compile(r"```[^\n]*\n(.*?)```", re.DOTALL)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
# ---------------------------------------------------------------------------
|
|
38
|
+
# Dataclass
|
|
39
|
+
# ---------------------------------------------------------------------------
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@dataclass(frozen=True)
|
|
43
|
+
class ProceduralMemory:
|
|
44
|
+
"""A single procedural-memory record extracted from one ## section.
|
|
45
|
+
|
|
46
|
+
All string fields have already been passed through ``redact()`` before
|
|
47
|
+
construction; callers must not bypass this by constructing
|
|
48
|
+
``ProceduralMemory`` directly with raw user content.
|
|
49
|
+
|
|
50
|
+
Attributes:
|
|
51
|
+
memory_id Deterministic UUID5(NAMESPACE_URL, source_path + NUL + heading).
|
|
52
|
+
title Redacted section heading text (without the "## " prefix).
|
|
53
|
+
content Redacted section body (prose text, fenced blocks stripped out).
|
|
54
|
+
commands Tuple of fenced code-block contents found in the section,
|
|
55
|
+
redacted, in document order.
|
|
56
|
+
source_path Vault-relative posix path supplied by the caller.
|
|
57
|
+
content_hash SHA-256 hex of ``redact(full_file_text)``.
|
|
58
|
+
trust Provenance tier; default ``"user"``.
|
|
59
|
+
confidence Confidence label; default ``"EXTRACTED"``.
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
memory_id: str
|
|
63
|
+
title: str
|
|
64
|
+
content: str
|
|
65
|
+
commands: tuple[str, ...]
|
|
66
|
+
source_path: str
|
|
67
|
+
content_hash: str
|
|
68
|
+
trust: str = field(default="user")
|
|
69
|
+
confidence: str = field(default="EXTRACTED")
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
# ---------------------------------------------------------------------------
|
|
73
|
+
# Parser
|
|
74
|
+
# ---------------------------------------------------------------------------
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def parse_agents_md(text: str, *, source_path: str) -> list[ProceduralMemory]:
|
|
78
|
+
"""Parse *text* into a list of :class:`ProceduralMemory` records.
|
|
79
|
+
|
|
80
|
+
Each level-2 (``## ``) heading produces one record. Content that appears
|
|
81
|
+
before the first ``## `` heading (including any level-1 ``# `` title line)
|
|
82
|
+
is discarded — it is typically a document title with no actionable procedure.
|
|
83
|
+
|
|
84
|
+
Edge cases:
|
|
85
|
+
* Empty string or whitespace-only → ``[]``.
|
|
86
|
+
* No ``##`` headings → ``[]``.
|
|
87
|
+
* A section with no body text and no commands → ``content=""`` / ``commands=()``.
|
|
88
|
+
* Never raises; all exceptions are swallowed and return ``[]``.
|
|
89
|
+
|
|
90
|
+
Args:
|
|
91
|
+
text: Raw AGENTS.md file content (may be empty).
|
|
92
|
+
source_path: Vault-relative posix path to embed in each record
|
|
93
|
+
(used for ``memory_id`` derivation and the frontmatter).
|
|
94
|
+
|
|
95
|
+
Returns:
|
|
96
|
+
Ordered list of :class:`ProceduralMemory`, one per ``##`` section.
|
|
97
|
+
"""
|
|
98
|
+
try:
|
|
99
|
+
return _parse_inner(text, source_path=source_path)
|
|
100
|
+
except Exception: # noqa: BLE001
|
|
101
|
+
return []
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def _parse_inner(text: str, *, source_path: str) -> list[ProceduralMemory]:
|
|
105
|
+
"""Inner parser — may raise; wrapped by ``parse_agents_md``."""
|
|
106
|
+
if not text or not text.strip():
|
|
107
|
+
return []
|
|
108
|
+
|
|
109
|
+
# content_hash is sha256 of the redacted *full* file text.
|
|
110
|
+
content_hash = hashlib.sha256(redact(text).encode("utf-8")).hexdigest()
|
|
111
|
+
|
|
112
|
+
# Walk lines tracking fenced-code state so a "## " line INSIDE a ``` fence is
|
|
113
|
+
# treated as code, NOT as a heading (otherwise a bash comment like
|
|
114
|
+
# "## install deps" would split a phantom section). Collect (heading, body)
|
|
115
|
+
# pairs; any text before the first heading is preamble and discarded.
|
|
116
|
+
sections: list[tuple[str, str]] = []
|
|
117
|
+
current_heading: str | None = None
|
|
118
|
+
current_body: list[str] = []
|
|
119
|
+
in_fence = False
|
|
120
|
+
for line in text.splitlines():
|
|
121
|
+
if line.lstrip().startswith("```"):
|
|
122
|
+
in_fence = not in_fence
|
|
123
|
+
if current_heading is not None:
|
|
124
|
+
current_body.append(line)
|
|
125
|
+
continue
|
|
126
|
+
if not in_fence:
|
|
127
|
+
heading_match = _H2.match(line)
|
|
128
|
+
if heading_match is not None:
|
|
129
|
+
if current_heading is not None:
|
|
130
|
+
sections.append((current_heading, "\n".join(current_body)))
|
|
131
|
+
current_heading = heading_match.group(1).strip()
|
|
132
|
+
current_body = []
|
|
133
|
+
continue
|
|
134
|
+
if current_heading is not None:
|
|
135
|
+
current_body.append(line)
|
|
136
|
+
if current_heading is not None:
|
|
137
|
+
sections.append((current_heading, "\n".join(current_body)))
|
|
138
|
+
|
|
139
|
+
if not sections:
|
|
140
|
+
# No ## headings found.
|
|
141
|
+
return []
|
|
142
|
+
|
|
143
|
+
memories: list[ProceduralMemory] = []
|
|
144
|
+
for raw_heading, raw_body in sections:
|
|
145
|
+
# Extract fenced commands before removing them from the prose body.
|
|
146
|
+
raw_commands: list[str] = [m.group(1) for m in _FENCE.finditer(raw_body)]
|
|
147
|
+
|
|
148
|
+
# Prose content: remove fenced blocks, strip leading/trailing whitespace.
|
|
149
|
+
prose = _FENCE.sub("", raw_body).strip()
|
|
150
|
+
|
|
151
|
+
# Redact everything before storing.
|
|
152
|
+
title = redact(raw_heading)
|
|
153
|
+
content = redact(prose)
|
|
154
|
+
commands = tuple(redact(cmd) for cmd in raw_commands)
|
|
155
|
+
|
|
156
|
+
# Deterministic UUID5: namespace URL + "source_path\x00heading" (pre-redaction
|
|
157
|
+
# heading used here so identity is stable across redaction-token changes).
|
|
158
|
+
key = f"{source_path}\x00{raw_heading}"
|
|
159
|
+
memory_id = str(uuid.uuid5(uuid.NAMESPACE_URL, key))
|
|
160
|
+
|
|
161
|
+
memories.append(
|
|
162
|
+
ProceduralMemory(
|
|
163
|
+
memory_id=memory_id,
|
|
164
|
+
title=title,
|
|
165
|
+
content=content,
|
|
166
|
+
commands=commands,
|
|
167
|
+
source_path=source_path,
|
|
168
|
+
content_hash=content_hash,
|
|
169
|
+
)
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
return memories
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
# ---------------------------------------------------------------------------
|
|
176
|
+
# Markdown serializer
|
|
177
|
+
# ---------------------------------------------------------------------------
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def procedural_to_markdown(mem: ProceduralMemory, *, created: datetime) -> str:
|
|
181
|
+
"""Render a :class:`ProceduralMemory` as a vault-ready markdown note.
|
|
182
|
+
|
|
183
|
+
Frontmatter fields (manual construction, no yaml import — mirrors
|
|
184
|
+
``failure_to_markdown`` style):
|
|
185
|
+
id mem.memory_id
|
|
186
|
+
type ``"reference"``
|
|
187
|
+
created *created* normalised to tz-aware UTC ISO 8601 w/ microseconds
|
|
188
|
+
tags ``["procedure", "agents-md"]``
|
|
189
|
+
source_path JSON-encoded (safe for arbitrary vault paths)
|
|
190
|
+
content_hash mem.content_hash
|
|
191
|
+
trust mem.trust
|
|
192
|
+
|
|
193
|
+
Body: ``# {title}``, blank line, prose content, then (if commands are
|
|
194
|
+
present) a ``## Commands`` section with each command as a fenced block.
|
|
195
|
+
|
|
196
|
+
The *created* datetime is injected by the caller; this function never
|
|
197
|
+
calls ``datetime.now()``. All content is already redacted upstream.
|
|
198
|
+
|
|
199
|
+
Args:
|
|
200
|
+
mem: :class:`ProceduralMemory` to render.
|
|
201
|
+
created: Tz-aware (or naive-UTC) datetime for the ``created`` field.
|
|
202
|
+
|
|
203
|
+
Returns:
|
|
204
|
+
Vault-ready markdown string (trailing newline included).
|
|
205
|
+
"""
|
|
206
|
+
# Normalise to tz-aware UTC.
|
|
207
|
+
ts = created.replace(tzinfo=UTC) if created.tzinfo is None else created.astimezone(UTC)
|
|
208
|
+
created_str = ts.isoformat(timespec="microseconds")
|
|
209
|
+
|
|
210
|
+
fm_lines = [
|
|
211
|
+
"---",
|
|
212
|
+
f"id: {mem.memory_id}",
|
|
213
|
+
"type: reference",
|
|
214
|
+
f"created: {created_str}",
|
|
215
|
+
"tags:",
|
|
216
|
+
" - procedure",
|
|
217
|
+
" - agents-md",
|
|
218
|
+
f"source_path: {json.dumps(mem.source_path)}",
|
|
219
|
+
f"content_hash: {mem.content_hash}",
|
|
220
|
+
f"trust: {mem.trust}",
|
|
221
|
+
"---",
|
|
222
|
+
]
|
|
223
|
+
|
|
224
|
+
body_lines: list[str] = [
|
|
225
|
+
f"# {mem.title}",
|
|
226
|
+
"",
|
|
227
|
+
mem.content,
|
|
228
|
+
]
|
|
229
|
+
|
|
230
|
+
if mem.commands:
|
|
231
|
+
body_lines.append("")
|
|
232
|
+
body_lines.append("## Commands")
|
|
233
|
+
body_lines.append("")
|
|
234
|
+
for cmd in mem.commands:
|
|
235
|
+
body_lines.append("```")
|
|
236
|
+
body_lines.append(cmd)
|
|
237
|
+
body_lines.append("```")
|
|
238
|
+
body_lines.append("")
|
|
239
|
+
|
|
240
|
+
return "\n".join(fm_lines) + "\n" + "\n".join(body_lines) + "\n"
|
mneme_code/cli.py
ADDED
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
"""Command-line interface for mneme-code.
|
|
2
|
+
|
|
3
|
+
Subcommands
|
|
4
|
+
-----------
|
|
5
|
+
parse-trace [--input FILE] [--vault VAULT_ROOT] [--write] [--source LABEL]
|
|
6
|
+
[--branch NAME | --no-branch]
|
|
7
|
+
Read a CPython traceback from --input (or stdin), parse it, print a
|
|
8
|
+
redacted structured summary to stdout. With --write, write the
|
|
9
|
+
failure_to_markdown output into
|
|
10
|
+
``<vault>/code-failures/<failure_id>.md`` via atomic_write_text.
|
|
11
|
+
Branch-aware: the note records the vault's current git branch
|
|
12
|
+
(explicit --branch wins; --no-branch disables auto-detection).
|
|
13
|
+
|
|
14
|
+
No new dependencies: argparse only (stdlib).
|
|
15
|
+
|
|
16
|
+
Redaction invariant: all content is redacted by parse_traceback before
|
|
17
|
+
any output or write. The CLI may call ``datetime.now(UTC)`` — the library
|
|
18
|
+
functions never do.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
import argparse
|
|
24
|
+
import sys
|
|
25
|
+
from datetime import UTC, datetime
|
|
26
|
+
from pathlib import Path
|
|
27
|
+
|
|
28
|
+
from mneme_code.failure import failure_from_traceback, failure_to_markdown
|
|
29
|
+
from mneme_code.stacktrace import parse_traceback
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _cmd_parse_trace(args: argparse.Namespace) -> int:
|
|
33
|
+
"""Implementation of the ``parse-trace`` subcommand.
|
|
34
|
+
|
|
35
|
+
Returns an exit code (0 = success, 1 = not a traceback / error).
|
|
36
|
+
"""
|
|
37
|
+
# Read input.
|
|
38
|
+
input_path: str | None = getattr(args, "input", None)
|
|
39
|
+
if input_path is not None:
|
|
40
|
+
try:
|
|
41
|
+
text = Path(input_path).read_text(encoding="utf-8")
|
|
42
|
+
except OSError as exc:
|
|
43
|
+
print(f"mneme-code: cannot read input file: {exc}", file=sys.stderr)
|
|
44
|
+
return 1
|
|
45
|
+
else:
|
|
46
|
+
text = sys.stdin.read()
|
|
47
|
+
|
|
48
|
+
# Parse (redaction happens inside parse_traceback).
|
|
49
|
+
parsed = parse_traceback(text)
|
|
50
|
+
if parsed is None:
|
|
51
|
+
print("not a recognizable traceback", file=sys.stderr)
|
|
52
|
+
return 1
|
|
53
|
+
|
|
54
|
+
# Inject observed_at at the CLI boundary — the library stays pure.
|
|
55
|
+
observed_at = datetime.now(UTC)
|
|
56
|
+
source_label: str | None = getattr(args, "source", None)
|
|
57
|
+
|
|
58
|
+
failure = failure_from_traceback(parsed, observed_at=observed_at, source_label=source_label)
|
|
59
|
+
|
|
60
|
+
# Print redacted summary.
|
|
61
|
+
print(f"failure_id: {failure.failure_id}")
|
|
62
|
+
print(f"exc_type: {failure.exc_type}")
|
|
63
|
+
print(f"exc_message: {failure.exc_message}")
|
|
64
|
+
print(f"frames: {len(failure.frames)}")
|
|
65
|
+
print(f"content_hash: {failure.content_hash}")
|
|
66
|
+
if failure.source_label is not None:
|
|
67
|
+
print(f"source_label: {failure.source_label}")
|
|
68
|
+
for i, frame in enumerate(failure.frames):
|
|
69
|
+
print(f" [{i}] {frame.file_path}:{frame.line} in {frame.function}")
|
|
70
|
+
|
|
71
|
+
# Optional vault write.
|
|
72
|
+
write: bool = getattr(args, "write", False)
|
|
73
|
+
vault_raw: str | None = getattr(args, "vault", None)
|
|
74
|
+
if write:
|
|
75
|
+
if vault_raw is None:
|
|
76
|
+
print("mneme-code: --write requires --vault", file=sys.stderr)
|
|
77
|
+
return 1
|
|
78
|
+
vault_root = Path(vault_raw).resolve()
|
|
79
|
+
out_dir = vault_root / "code-failures"
|
|
80
|
+
out_path = out_dir / f"{failure.failure_id}.md"
|
|
81
|
+
# Branch-aware failure tracking: explicit --branch wins, otherwise
|
|
82
|
+
# auto-detect from the vault's git checkout. Metadata only — the
|
|
83
|
+
# failure_id and content_hash stay branch-independent.
|
|
84
|
+
branch: str | None = getattr(args, "branch", None)
|
|
85
|
+
if branch is None and not getattr(args, "no_branch", False):
|
|
86
|
+
branch = _detect_git_branch(vault_root)
|
|
87
|
+
note = failure_to_markdown(failure, branch=branch)
|
|
88
|
+
try:
|
|
89
|
+
from mneme_core.vault.atomic_write import atomic_write_text
|
|
90
|
+
|
|
91
|
+
out_dir.mkdir(parents=True, exist_ok=True)
|
|
92
|
+
atomic_write_text(out_path, note)
|
|
93
|
+
print(f"written: {out_path}")
|
|
94
|
+
except Exception as exc: # noqa: BLE001
|
|
95
|
+
print(f"mneme-code: write failed: {exc}", file=sys.stderr)
|
|
96
|
+
return 1
|
|
97
|
+
|
|
98
|
+
return 0
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def _detect_git_branch(vault_root: Path) -> str | None:
|
|
102
|
+
"""Current git branch of *vault_root*, or ``None`` outside a checkout."""
|
|
103
|
+
import subprocess
|
|
104
|
+
|
|
105
|
+
try:
|
|
106
|
+
proc = subprocess.run(
|
|
107
|
+
["git", "rev-parse", "--abbrev-ref", "HEAD"],
|
|
108
|
+
cwd=str(vault_root),
|
|
109
|
+
capture_output=True,
|
|
110
|
+
text=True,
|
|
111
|
+
encoding="utf-8",
|
|
112
|
+
timeout=5,
|
|
113
|
+
check=False,
|
|
114
|
+
)
|
|
115
|
+
except (OSError, subprocess.TimeoutExpired):
|
|
116
|
+
return None
|
|
117
|
+
name = proc.stdout.strip()
|
|
118
|
+
return name if proc.returncode == 0 and name else None
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def main() -> None:
|
|
122
|
+
"""Entry point for the mneme-code console script."""
|
|
123
|
+
parser = argparse.ArgumentParser(
|
|
124
|
+
prog="mneme-code",
|
|
125
|
+
description="mneme-code: deterministic Python code-failure memory layer.",
|
|
126
|
+
)
|
|
127
|
+
subparsers = parser.add_subparsers(dest="command", required=True)
|
|
128
|
+
|
|
129
|
+
# parse-trace subcommand
|
|
130
|
+
pt = subparsers.add_parser(
|
|
131
|
+
"parse-trace",
|
|
132
|
+
help="Parse a CPython traceback and print a redacted summary.",
|
|
133
|
+
)
|
|
134
|
+
pt.add_argument(
|
|
135
|
+
"--input",
|
|
136
|
+
metavar="FILE",
|
|
137
|
+
default=None,
|
|
138
|
+
help="Path to a file containing the traceback text (default: stdin).",
|
|
139
|
+
)
|
|
140
|
+
pt.add_argument(
|
|
141
|
+
"--vault",
|
|
142
|
+
metavar="VAULT_ROOT",
|
|
143
|
+
default=None,
|
|
144
|
+
help="Vault root directory (required with --write).",
|
|
145
|
+
)
|
|
146
|
+
pt.add_argument(
|
|
147
|
+
"--write",
|
|
148
|
+
action="store_true",
|
|
149
|
+
default=False,
|
|
150
|
+
help="Write the failure note into <vault>/code-failures/<failure_id>.md.",
|
|
151
|
+
)
|
|
152
|
+
pt.add_argument(
|
|
153
|
+
"--source",
|
|
154
|
+
metavar="LABEL",
|
|
155
|
+
default=None,
|
|
156
|
+
help="Optional source label (e.g. 'ci-run-42').",
|
|
157
|
+
)
|
|
158
|
+
pt.add_argument(
|
|
159
|
+
"--branch",
|
|
160
|
+
metavar="NAME",
|
|
161
|
+
default=None,
|
|
162
|
+
help="Git branch recorded in the note (default: auto-detect).",
|
|
163
|
+
)
|
|
164
|
+
pt.add_argument(
|
|
165
|
+
"--no-branch",
|
|
166
|
+
action="store_true",
|
|
167
|
+
default=False,
|
|
168
|
+
help="Disable branch auto-detection.",
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
args = parser.parse_args()
|
|
172
|
+
|
|
173
|
+
if args.command == "parse-trace":
|
|
174
|
+
sys.exit(_cmd_parse_trace(args))
|
mneme_code/failure.py
ADDED
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
"""FailureMemory dataclass and vault-ready markdown serialization.
|
|
2
|
+
|
|
3
|
+
Provenance invariant: every ``FailureMemory`` carries:
|
|
4
|
+
failure_id — deterministic UUID5 derived from exc_type + first frame + source_label.
|
|
5
|
+
content_hash — sha256 hex of a canonical redacted text rendering.
|
|
6
|
+
observed_at — injected by caller; never produced inside this module.
|
|
7
|
+
trust — default ``"user"`` (operator-supplied traceback).
|
|
8
|
+
confidence — default ``"EXTRACTED"`` (directly observed, no inference).
|
|
9
|
+
|
|
10
|
+
All string content on the incoming ``ParsedTraceback`` has already been
|
|
11
|
+
redacted by ``mneme_code.stacktrace.parse_traceback``; this module does
|
|
12
|
+
not re-apply redaction but must not introduce new unredacted content.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import hashlib
|
|
18
|
+
import json
|
|
19
|
+
import uuid
|
|
20
|
+
from dataclasses import dataclass, field
|
|
21
|
+
from datetime import UTC, datetime
|
|
22
|
+
|
|
23
|
+
from mneme_core.privacy import redact
|
|
24
|
+
|
|
25
|
+
from mneme_code.stacktrace import Frame, ParsedTraceback
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass(frozen=True)
|
|
29
|
+
class FailureMemory:
|
|
30
|
+
"""A provenance-labelled, redacted record of one Python failure.
|
|
31
|
+
|
|
32
|
+
Attributes:
|
|
33
|
+
failure_id Deterministic UUID5 (namespace URL) derived from
|
|
34
|
+
``exc_type``, first frame coordinates, and
|
|
35
|
+
``source_label``.
|
|
36
|
+
exc_type Exception class name (already redacted upstream).
|
|
37
|
+
exc_message Redacted exception message.
|
|
38
|
+
frames Redacted stack frames, innermost last.
|
|
39
|
+
observed_at Tz-aware UTC datetime injected by the caller.
|
|
40
|
+
source_label Optional human label identifying the failure source
|
|
41
|
+
(e.g. ``"ci-run-42"``).
|
|
42
|
+
content_hash SHA-256 hex of a canonical redacted rendering.
|
|
43
|
+
trust Provenance tier; default ``"user"``.
|
|
44
|
+
confidence Confidence label; default ``"EXTRACTED"``.
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
failure_id: str
|
|
48
|
+
exc_type: str
|
|
49
|
+
exc_message: str
|
|
50
|
+
frames: tuple[Frame, ...]
|
|
51
|
+
observed_at: datetime
|
|
52
|
+
source_label: str | None
|
|
53
|
+
content_hash: str
|
|
54
|
+
trust: str = field(default="user")
|
|
55
|
+
confidence: str = field(default="EXTRACTED")
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def failure_from_traceback(
|
|
59
|
+
parsed: ParsedTraceback,
|
|
60
|
+
*,
|
|
61
|
+
observed_at: datetime,
|
|
62
|
+
source_label: str | None = None,
|
|
63
|
+
) -> FailureMemory:
|
|
64
|
+
"""Construct a :class:`FailureMemory` from a parsed, redacted traceback.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
parsed: Already-redacted :class:`ParsedTraceback` from
|
|
68
|
+
``parse_traceback``.
|
|
69
|
+
observed_at: Tz-aware UTC datetime for this failure observation.
|
|
70
|
+
Must be injected by the caller; this function never
|
|
71
|
+
calls ``datetime.now()``.
|
|
72
|
+
source_label: Optional label for the failure source.
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
A deterministic :class:`FailureMemory` — same inputs always
|
|
76
|
+
produce the same ``failure_id`` and ``content_hash``.
|
|
77
|
+
"""
|
|
78
|
+
first_frame = parsed.frames[0] if parsed.frames else Frame("", 0, "", None)
|
|
79
|
+
label_part = source_label or ""
|
|
80
|
+
|
|
81
|
+
# Deterministic UUID5: namespace URL + canonical key.
|
|
82
|
+
key = f"{parsed.exc_type}\x00{first_frame.file_path}:{first_frame.line}\x00{label_part}"
|
|
83
|
+
failure_id = str(uuid.uuid5(uuid.NAMESPACE_URL, key))
|
|
84
|
+
|
|
85
|
+
# Canonical text for content_hash: all already-redacted fields.
|
|
86
|
+
canonical = _canonical_text(parsed, source_label)
|
|
87
|
+
content_hash = hashlib.sha256(canonical.encode("utf-8")).hexdigest()
|
|
88
|
+
|
|
89
|
+
return FailureMemory(
|
|
90
|
+
failure_id=failure_id,
|
|
91
|
+
exc_type=parsed.exc_type,
|
|
92
|
+
exc_message=parsed.exc_message,
|
|
93
|
+
frames=parsed.frames,
|
|
94
|
+
observed_at=observed_at,
|
|
95
|
+
source_label=source_label,
|
|
96
|
+
content_hash=content_hash,
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def failure_to_markdown(failure: FailureMemory, *, branch: str | None = None) -> str:
|
|
101
|
+
"""Render a :class:`FailureMemory` as a vault-ready markdown note.
|
|
102
|
+
|
|
103
|
+
Frontmatter fields:
|
|
104
|
+
id failure.failure_id
|
|
105
|
+
type ``"failure"``
|
|
106
|
+
created failure.observed_at as tz-aware UTC ISO 8601 with microseconds
|
|
107
|
+
tags ``["failure"]``
|
|
108
|
+
exc_type failure.exc_type
|
|
109
|
+
source_label failure.source_label (omitted when ``None``)
|
|
110
|
+
branch caller-supplied git branch (omitted when ``None``);
|
|
111
|
+
metadata only — it never participates in failure_id
|
|
112
|
+
or content_hash, so determinism is preserved
|
|
113
|
+
|
|
114
|
+
Body: redacted summary — exception message followed by frame list
|
|
115
|
+
(``file:line in func`` format).
|
|
116
|
+
|
|
117
|
+
All content is already redacted upstream; this function does not
|
|
118
|
+
re-apply redaction.
|
|
119
|
+
"""
|
|
120
|
+
# Build frontmatter manually to control field order and avoid yaml import.
|
|
121
|
+
# Normalise observed_at to tz-aware UTC so `created` is consistent with the
|
|
122
|
+
# rest of mneme regardless of the caller's tz (mirrors the temporal layer).
|
|
123
|
+
observed = failure.observed_at
|
|
124
|
+
observed = observed.replace(tzinfo=UTC) if observed.tzinfo is None else observed.astimezone(UTC)
|
|
125
|
+
created_str = observed.isoformat(timespec="microseconds")
|
|
126
|
+
fm_lines = [
|
|
127
|
+
"---",
|
|
128
|
+
f"id: {failure.failure_id}",
|
|
129
|
+
"type: failure",
|
|
130
|
+
f"created: {created_str}",
|
|
131
|
+
"tags:",
|
|
132
|
+
" - failure",
|
|
133
|
+
f"exc_type: {failure.exc_type}",
|
|
134
|
+
]
|
|
135
|
+
if failure.source_label is not None:
|
|
136
|
+
# JSON-encode so an arbitrary user label stays valid YAML.
|
|
137
|
+
fm_lines.append(f"source_label: {json.dumps(failure.source_label)}")
|
|
138
|
+
if branch is not None:
|
|
139
|
+
# Branch-aware failure tracking: redact then JSON-encode so an
|
|
140
|
+
# arbitrary branch name stays valid YAML and never leaks a
|
|
141
|
+
# <private> span embedded in an exotic branch name.
|
|
142
|
+
fm_lines.append(f"branch: {json.dumps(redact(branch))}")
|
|
143
|
+
fm_lines.append("---")
|
|
144
|
+
|
|
145
|
+
# Body
|
|
146
|
+
body_lines: list[str] = []
|
|
147
|
+
body_lines.append(f"# {failure.exc_type}")
|
|
148
|
+
body_lines.append("")
|
|
149
|
+
if failure.exc_message:
|
|
150
|
+
body_lines.append(f"> {failure.exc_message}")
|
|
151
|
+
body_lines.append("")
|
|
152
|
+
|
|
153
|
+
body_lines.append("## Stack frames")
|
|
154
|
+
body_lines.append("")
|
|
155
|
+
for frame in failure.frames:
|
|
156
|
+
body_lines.append(f"- `{frame.file_path}:{frame.line}` in `{frame.function}`")
|
|
157
|
+
|
|
158
|
+
return "\n".join(fm_lines) + "\n" + "\n".join(body_lines) + "\n"
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
# ---------------------------------------------------------------------------
|
|
162
|
+
# Internal helpers
|
|
163
|
+
# ---------------------------------------------------------------------------
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def _canonical_text(parsed: ParsedTraceback, source_label: str | None) -> str:
|
|
167
|
+
"""Produce the canonical deterministic text used for content_hash."""
|
|
168
|
+
parts = [parsed.exc_type, parsed.exc_message]
|
|
169
|
+
for frame in parsed.frames:
|
|
170
|
+
parts.append(f"{frame.file_path}:{frame.line}:{frame.function}")
|
|
171
|
+
if frame.code_context is not None:
|
|
172
|
+
parts.append(frame.code_context)
|
|
173
|
+
if source_label is not None:
|
|
174
|
+
parts.append(source_label)
|
|
175
|
+
return "\n".join(parts)
|
mneme_code/py.typed
ADDED
|
File without changes
|