PyPI - memir - Versions diffs - 0.3.0__py3-none-any.whl - Mend

memir 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

memir/__init__.py +22 -0
memir/autocapture.py +398 -0
memir/brain.py +1265 -0
memir/cli.py +297 -0
memir/embeddings.py +163 -0
memir/mcp_server.py +419 -0
memir/optimizer.py +150 -0
memir/py.typed +1 -0
memir/reasoner.py +110 -0
memir/reranker.py +156 -0
memir-0.3.0.dist-info/METADATA +239 -0
memir-0.3.0.dist-info/RECORD +16 -0
memir-0.3.0.dist-info/WHEEL +5 -0
memir-0.3.0.dist-info/entry_points.txt +3 -0
memir-0.3.0.dist-info/licenses/LICENSE.md +116 -0
memir-0.3.0.dist-info/top_level.txt +1 -0

memir/__init__.py ADDED Viewed

@@ -0,0 +1,22 @@
+"""Memir — the memoir your coding agent writes for itself.
+A local-first memory layer for coding agents: deterministic, zero-token writes,
+a first-class *failure* memory (never repeat a mistake), and a token-efficient
+briefing. Semantic recall runs on a local CPU model — no API keys, no cloud.
+"""
+from memir.brain import Memir, Memory
+__version__ = "0.3.0"
+__all__ = ["Memir", "Memory", "LocalEmbedder", "NLIReasoner", "__version__"]
+def __getattr__(name):
+    # Lazy import so `import memir` stays light until embeddings are used.
+    if name == "LocalEmbedder":
+        from memir.embeddings import LocalEmbedder
+        return LocalEmbedder
+    if name == "NLIReasoner":
+        from memir.reasoner import NLIReasoner
+        return NLIReasoner
+    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")

memir/autocapture.py ADDED Viewed

@@ -0,0 +1,398 @@
+"""
+Auto-capture: turn raw errors (Python tracebacks, pytest output, failed shell
+commands) into structured FAILURE memories — automatically, with zero tokens.
+The A/B subagent test proved the load-bearing insight here: a strong model
+already avoids *self-evident* and *textbook* mistakes on its own, so storing
+those is pure noise (bloat). The defensible value of the brain is remembering
+*non-obvious, project-specific* failures that leave no trace in the code and
+that no general model could guess (silent drops, env/version quirks, integration
+gotchas, flaky races, data-loss traps).
+So this module does two things:
+  1. PARSE an error into (attempt, reason, lesson, signature).
+  2. SCORE how worth-remembering it is (novelty / non-obviousness), and let the
+     caller skip the boring ones.
+Pure standard library. No network, no tokens.
+"""
+from __future__ import annotations
+import re
+import traceback as _tb
+from dataclasses import dataclass, field
+# ---------------------------------------------------------------------------
+# Obviousness model
+# ---------------------------------------------------------------------------
+# Exception types an agent fixes the instant it sees the traceback. Remembering
+# them adds no value (the next session would catch them just as fast) and only
+# bloats the store. Default novelty for these is LOW.
+_TEXTBOOK_EXC = {
+    "SyntaxError", "IndentationError", "TabError",
+    "NameError", "UnboundLocalError",
+    "ImportError", "ModuleNotFoundError",
+    "AttributeError",
+}
+# Pure-syntax errors can NEVER carry a meaningful runtime signal — their message
+# just echoes source code — so they are always self-evident. Hard-capped.
+_PURE_SYNTAX_EXC = {"SyntaxError", "IndentationError", "TabError"}
+# STRONG cues: each one alone marks a failure as the expensive, non-obvious kind
+# the brain exists to remember (silent drops, data-loss, integration, races).
+_STRONG_CUES = {
+    "silent", "silently", "no error", "empty body", "swallowed",
+    "returned 200", "status 200", "no exception", "ok response",
+    "timeout", "timed out", "connection", "refused", "reset", "unreachable",
+    "dns", "tls", "ssl", "certificate", "deadlock", "race", "concurren",
+    "flaky", "intermittent", "corrupt", "data loss", "overwrite", "reconcile",
+    "inconsistent", "leak", "injection", "overflow", "truncat", "off-by",
+    "off by", "dst", "throttl", "quota", "deprecated", "mismatch", "gateway",
+    "502", "503", "504", "security", "rate limit", "precision", "rounding",
+    "duplicate",
+}
+# WEAK cues: suggestive but noisy. One alone is not enough — needs corroboration
+# (another weak cue, a strong cue, or a runtime exception type).
+_WEAK_CUES = {
+    "env", "environment", "config", "version", "locale", "encoding",
+    "timezone", "utc", "path", "permission", "readonly", "read-only",
+    "limit", "proxy", "lock",
+}
+# Exception types that are inherently runtime/integration in nature => higher base.
+_RUNTIME_EXC = {
+    "TimeoutError", "ConnectionError", "ConnectionResetError",
+    "ConnectionRefusedError", "BrokenPipeError", "OSError", "IOError",
+    "PermissionError", "RuntimeError", "AssertionError",
+    "UnicodeDecodeError", "UnicodeEncodeError", "MemoryError",
+    "RecursionError",
+}
+_BUILTIN_TYPES = {
+    "str", "bytes", "bytearray", "int", "float", "bool", "complex", "list",
+    "dict", "tuple", "set", "frozenset", "nonetype", "range", "object", "type",
+    "function", "module", "generator",
+}
+_NUMERIC_MISMATCH = re.compile(r"\b\d[\d,_.]*\b.*\b(!=|==|expected|actual|got|vs)\b",
+                               re.IGNORECASE)
+# canonical textbook message forms — the fingerprints of a self-evident mistake
+_RE_NAMEERR = re.compile(r"name '[^']+' is not defined", re.IGNORECASE)
+_RE_UNBOUND = re.compile(r"referenced before assignment", re.IGNORECASE)
+_RE_NOMODULE = re.compile(r"no module named '[^']+'", re.IGNORECASE)
+_RE_SYNTAX = re.compile(r"invalid syntax|unexpected (eof|indent)|expected ':'",
+                        re.IGNORECASE)
+_RE_ATTR = re.compile(r"'(?P<obj>[A-Za-z_][\w.]*)' object has no attribute '[\w.]+'")
+# integration / release-skew signals that should rescue a textbook-typed error
+_RE_IMPORT_NAME = re.compile(r"cannot import name '[\w.]+' from '[\w.]+'", re.IGNORECASE)
+def _count_cues(blob: str, cues: set[str]) -> int:
+    n = 0
+    for c in cues:
+        if c.replace(" ", "").isalnum():  # plain word(s) -> require boundaries
+            if re.search(rf"\b{re.escape(c)}\b", blob):
+                n += 1
+        elif c in blob:                    # phrase with punctuation -> substring
+            n += 1
+    return n
+def _domain_lift(exc_type: str, message: str) -> float | None:
+    """A textbook-typed exception that is actually a project integration /
+    contract-drift bug (worth remembering). Returns a target score, else None.
+    Narrow on purpose: only AttributeError on a *domain* object (CamelCase, not
+    a builtin) and ImportError of a specific name from an internal module — the
+    cases the A/B red-team showed get silently dropped at 0.15.
+    """
+    m = _RE_ATTR.search(message)
+    if m:
+        obj = m.group("obj")
+        if obj.lower() not in _BUILTIN_TYPES and obj[:1].isupper():
+            return 0.55  # schema / contract drift between collaborating objects
+    if _RE_IMPORT_NAME.search(message):
+        return 0.55      # release/version skew between internal modules
+    return None
+@dataclass
+class ParsedError:
+    attempt: str                 # what was being done / the failing thing
+    reason: str                  # the error message / why it failed
+    lesson: str = ""             # actionable takeaway (may be empty -> caller fills)
+    signature: str = ""          # stable id for dedup across runs
+    exc_type: str = ""
+    novelty: float = 0.5         # 0..1, how worth-remembering this is
+    source: str = "error"        # error | traceback | pytest | command
+    tags: list[str] = field(default_factory=list)
+# ---------------------------------------------------------------------------
+# Normalisation — strip volatile bits so the same failure dedups across runs
+# ---------------------------------------------------------------------------
+_HEX_ADDR = re.compile(r"0x[0-9a-fA-F]+")
+_TMP_PATH = re.compile(r"[A-Za-z]:\\[^\s'\"]*|/(?:tmp|var)/[^\s'\"]*")
+_LONG_NUM = re.compile(r"\b\d{4,}\b")
+_LINE_NO = re.compile(r"\bline \d+\b", re.IGNORECASE)
+_WS = re.compile(r"\s+")
+def _normalize(text: str) -> str:
+    t = text or ""
+    t = _HEX_ADDR.sub("0xADDR", t)
+    t = _TMP_PATH.sub("<path>", t)
+    t = _LINE_NO.sub("line N", t)
+    t = _LONG_NUM.sub("N", t)
+    return _WS.sub(" ", t).strip()
+def score_novelty(exc_type: str, message: str) -> float:
+    """How worth-remembering is this failure? 0 = textbook/self-evident,
+    1 = expensive, non-obvious, project-specific. The A/B test showed the brain
+    only earns its keep on the high end of this scale.
+    Design (hardened after an adversarial red-team of the heuristic):
+      * pure syntax errors are always self-evident -> hard floor;
+      * cue words match on WORD BOUNDARIES (so "joinpath" no longer trips
+        "path", and "timeout=30" in a SyntaxError no longer counts);
+      * STRONG cues are decisive; a single WEAK cue only nudges;
+      * canonical textbook message forms are capped low UNLESS a domain/
+        integration-drift signal rescues them (AttributeError on a domain
+        object, ImportError of a name from an internal module).
+    """
+    message = message or ""
+    if exc_type in _PURE_SYNTAX_EXC:
+        return 0.1
+    blob = _normalize(f"{exc_type} {message}").lower()
+    strong = _count_cues(blob, _STRONG_CUES)
+    weak = _count_cues(blob, _WEAK_CUES)
+    numeric = bool(_NUMERIC_MISMATCH.search(message))
+    if exc_type in _RUNTIME_EXC:
+        base = 0.6
+    elif exc_type in _TEXTBOOK_EXC:
+        base = 0.15
+    else:
+        base = 0.5  # unknown / custom / domain-specific exception -> borderline keep
+    score = base
+    if strong >= 1:
+        score = max(score, 0.7 + min(0.25, 0.08 * (strong - 1)))
+    elif weak >= 2:
+        score = max(score, 0.6)
+    elif weak == 1:
+        score = max(score, base + 0.08)
+    if numeric:
+        score = max(score, 0.8)
+    # integration / contract-drift can rescue a textbook-typed exception
+    lift = _domain_lift(exc_type, message)
+    if lift is not None:
+        score = max(score, lift)
+    # otherwise, a canonical textbook message with no strong evidence is capped
+    canonical = bool(
+        _RE_NAMEERR.search(message) or _RE_UNBOUND.search(message)
+        or _RE_NOMODULE.search(message) or _RE_SYNTAX.search(message)
+        or _RE_ATTR.search(message)
+    )
+    if canonical and lift is None and strong == 0 and not numeric:
+        score = min(score, 0.3)
+    return max(0.0, min(1.0, score))
+# ---------------------------------------------------------------------------
+# Parsers
+# ---------------------------------------------------------------------------
+# Cap how much raw text the regex parsers ever scan — a defensive bound against
+# pathological inputs (huge logs / adversarial strings). Error signal lives in
+# the last lines of a traceback anyway.
+_MAX_PARSE_CHARS = 20_000
+def _clip(text: str | None) -> str:
+    text = text or ""
+    if len(text) > _MAX_PARSE_CHARS:
+        # keep the tail — the exception line and last frame are at the end
+        return text[-_MAX_PARSE_CHARS:]
+    return text
+def parse_exception(exc: BaseException, attempt: str | None = None) -> ParsedError:
+    """Turn a live exception object into a ParsedError."""
+    exc_type = type(exc).__name__
+    message = str(exc).strip() or exc_type
+    # last in-app frame for context
+    frame_desc = ""
+    tb = exc.__traceback__
+    last = None
+    for fr in _tb.extract_tb(tb):
+        last = fr
+    if last is not None:
+        fname = last.filename.replace("\\", "/").split("/")[-1]
+        frame_desc = f"{fname}:{last.name}()"
+    att = attempt or (f"Ran {frame_desc} → {exc_type}" if frame_desc
+                      else f"Operation raised {exc_type}")
+    sig = _normalize(f"{exc_type}: {message}")
+    return ParsedError(
+        attempt=att,
+        reason=f"{exc_type}: {message}",
+        signature=sig,
+        exc_type=exc_type,
+        novelty=score_novelty(exc_type, message),
+        source="traceback",
+        tags=["auto", "exception", exc_type],
+    )
+_PY_EXC_LINE = re.compile(
+    r"^(?P<type>[A-Za-z_][\w.]*Error|[A-Za-z_][\w.]*Exception|[A-Za-z_]\w*Warning"
+    r"|KeyboardInterrupt|StopIteration|SystemExit):?\s*(?P<msg>.*)$"
+)
+_PY_FRAME = re.compile(r'^\s*File "(?P<file>[^"]+)", line (?P<line>\d+), in (?P<fn>.+)$')
+def parse_traceback_text(text: str, attempt: str | None = None) -> ParsedError | None:
+    """Parse a Python traceback captured as text (e.g. from a subprocess)."""
+    text = _clip(text)
+    if not text or "Traceback (most recent call last)" not in text:
+        # still try: maybe it's a bare 'XxxError: msg' line
+        return _parse_bare_error_line(text, attempt)
+    lines = [ln.rstrip("\n") for ln in text.splitlines() if ln.strip()]
+    # last frame + final exception line
+    last_frame = None
+    for ln in lines:
+        m = _PY_FRAME.match(ln)
+        if m:
+            last_frame = m
+    exc_type, message = "", ""
+    for ln in reversed(lines):
+        m = _PY_EXC_LINE.match(ln.strip())
+        if m:
+            exc_type = m.group("type").split(".")[-1]
+            message = m.group("msg").strip()
+            break
+    if not exc_type:
+        return _parse_bare_error_line(text, attempt)
+    frame_desc = ""
+    if last_frame:
+        fname = last_frame.group("file").replace("\\", "/").split("/")[-1]
+        frame_desc = f"{fname}:{last_frame.group('fn')}()"
+    att = attempt or (f"Ran {frame_desc} → {exc_type}" if frame_desc
+                      else f"Code raised {exc_type}")
+    sig = _normalize(f"{exc_type}: {message}")
+    return ParsedError(
+        attempt=att,
+        reason=f"{exc_type}: {message}".strip(),
+        signature=sig,
+        exc_type=exc_type,
+        novelty=score_novelty(exc_type, message),
+        source="traceback",
+        tags=["auto", "traceback", exc_type],
+    )
+def _parse_bare_error_line(text: str, attempt: str | None) -> ParsedError | None:
+    if not text:
+        return None
+    for ln in text.splitlines():
+        m = _PY_EXC_LINE.match(ln.strip())
+        if m:
+            exc_type = m.group("type").split(".")[-1]
+            message = m.group("msg").strip()
+            sig = _normalize(f"{exc_type}: {message}")
+            return ParsedError(
+                attempt=attempt or f"Code raised {exc_type}",
+                reason=f"{exc_type}: {message}".strip(),
+                signature=sig,
+                exc_type=exc_type,
+                novelty=score_novelty(exc_type, message),
+                source="error",
+                tags=["auto", "error", exc_type],
+            )
+    return None
+_PYTEST_FAIL = re.compile(r"^(?:FAILED|ERROR)\s+(?P<test>[\w./:\[\]-]+)(?:\s+-\s+(?P<msg>.*))?$")
+_PYTEST_ASSERT = re.compile(r"^E\s+(?P<msg>.*)$")
+def parse_pytest(text: str) -> list[ParsedError]:
+    """Extract one ParsedError per failing test from pytest output."""
+    text = _clip(text)
+    if not text:
+        return []
+    out: list[ParsedError] = []
+    lines = text.splitlines()
+    # 1) the short test summary info lines: "FAILED path::test - AssertionError: ..."
+    for ln in lines:
+        m = _PYTEST_FAIL.match(ln.strip())
+        if not m:
+            continue
+        test = m.group("test")
+        msg = (m.group("msg") or "").strip()
+        exc_type = ""
+        em = re.match(r"([A-Za-z_][\w.]*(?:Error|Exception)):?\s*(.*)", msg)
+        if em:
+            exc_type = em.group(1).split(".")[-1]
+            msg = em.group(2).strip() or msg
+        sig = _normalize(f"pytest {test} {exc_type}: {msg}")
+        nov = score_novelty(exc_type, msg)
+        # a failing *test* is itself a signal of something non-trivial; floor it up
+        nov = max(nov, 0.55)
+        out.append(ParsedError(
+            attempt=f"Test failed: {test}",
+            reason=msg or "test failed",
+            signature=sig,
+            exc_type=exc_type or "TestFailure",
+            novelty=nov,
+            source="pytest",
+            tags=["auto", "pytest"],
+        ))
+    return out
+def parse_command(stderr: str, returncode: int = 1,
+                  command: str | None = None) -> ParsedError | None:
+    """Parse a failed shell/subprocess command into a ParsedError.
+    Prefers a Python traceback if present; otherwise uses the last meaningful
+    stderr line as the reason.
+    """
+    if returncode == 0:
+        return None
+    stderr = _clip(stderr)
+    tb = parse_traceback_text(stderr or "")
+    if tb is not None:
+        if command:
+            tb.attempt = f"Ran `{command}` (exit {returncode})"
+            tb.tags = list(dict.fromkeys([*tb.tags, "command"]))
+        return tb
+    # fall back to last non-empty stderr line
+    last = ""
+    for ln in (stderr or "").splitlines():
+        if ln.strip():
+            last = ln.strip()
+    if not last and returncode != 0:
+        last = f"command exited with code {returncode}"
+    sig = _normalize(f"cmd {command or ''} :: {last}")
+    return ParsedError(
+        attempt=f"Ran `{command}` (exit {returncode})" if command
+                else f"A command failed (exit {returncode})",
+        reason=last,
+        signature=sig,
+        exc_type="CommandError",
+        novelty=score_novelty("CommandError", last),
+        source="command",
+        tags=["auto", "command"],
+    )