PyPI - mem-context - Versions diffs - 0.1.0__py3-none-any.whl - Mend

mem-context 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

mem_context/__init__.py +3 -0
mem_context/capture/__init__.py +196 -0
mem_context/capture/formats.py +319 -0
mem_context/capture/wrapper.py +111 -0
mem_context/cli.py +774 -0
mem_context/config.py +283 -0
mem_context/consolidation/__init__.py +0 -0
mem_context/consolidation/ollama.py +427 -0
mem_context/consolidation/pipeline.py +229 -0
mem_context/consolidation/templates.py +78 -0
mem_context/data/__init__.py +1 -0
mem_context/data/agents/memory-manager.md +82 -0
mem_context/data/skills/mem-delete/SKILL.md +39 -0
mem_context/data/skills/mem-forget/SKILL.md +38 -0
mem_context/data/skills/mem-purge/SKILL.md +55 -0
mem_context/data/skills/mem-recall/SKILL.md +41 -0
mem_context/data/skills/mem-remember/SKILL.md +41 -0
mem_context/data/skills/mem-status/SKILL.md +40 -0
mem_context/install.py +356 -0
mem_context/mcp/__init__.py +0 -0
mem_context/mcp/server.py +473 -0
mem_context/ollama_provision.py +139 -0
mem_context/provision.py +575 -0
mem_context/retrieval/__init__.py +0 -0
mem_context/retrieval/embedder.py +166 -0
mem_context/retrieval/pipeline.py +142 -0
mem_context/retrieval/scoring.py +144 -0
mem_context/scope.py +135 -0
mem_context/storage/__init__.py +0 -0
mem_context/storage/lance.py +485 -0
mem_context/storage/schemas.py +127 -0
mem_context-0.1.0.dist-info/METADATA +21 -0
mem_context-0.1.0.dist-info/RECORD +35 -0
mem_context-0.1.0.dist-info/WHEEL +4 -0
mem_context-0.1.0.dist-info/entry_points.txt +4 -0

mem_context/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+"""mem-context — Temporal Memory MCP Server."""
+__version__ = "0.1.0"

mem_context/capture/__init__.py ADDED Viewed

@@ -0,0 +1,196 @@
+"""Capture importer — converts conversation transcripts into memories."""
+from __future__ import annotations
+import logging
+import sys
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any
+from ..retrieval.embedder import Embedder
+from ..scope import detect_scope
+from ..storage.lance import LanceMemoryStore
+from .formats import parse_transcript
+logger = logging.getLogger(__name__)
+class CaptureImporter:
+    """Import conversations from various sources into the memory store."""
+    def __init__(self, store: LanceMemoryStore, embedder: Embedder):
+        self._store = store
+        self._embedder = embedder
+    async def from_json(self, data: dict[str, Any], scope: str | None = None) -> list[str]:
+        """Import from structured JSON (universal format).
+        Expected format:
+        {
+            "messages": [
+                {"role": "user", "content": "..."},
+                {"role": "assistant", "content": "..."}
+            ],
+            "metadata": {"client": "claude-code", "session_id": "..."}
+        }
+        Returns list of created memory IDs.
+        """
+        if scope is None:
+            scope = detect_scope().scope
+        messages = data.get("messages", [])
+        metadata = data.get("metadata", {})
+        session_id = metadata.get("session_id", "")
+        if not messages:
+            logger.warning("Capture: no messages to import")
+            return []
+        created = []
+        # 1. Store full conversation in archive
+        full_text = _messages_to_text(messages)
+        conv_id = await self._store.put_conversation({
+            "scope": scope,
+            "date": datetime.now(timezone.utc).strftime("%Y-%m-%d"),
+            "full_text": full_text,
+            "summary": _auto_summary(messages),
+            "messages": messages,
+        })
+        logger.debug("Conversation archived: %s", conv_id)
+        # 2. Semantic chunking: split into logical blocks
+        chunks = _chunk_by_message_group(messages)
+        for chunk in chunks:
+            chunk_text = _messages_to_text(chunk)
+            chunk_summary = _auto_summary(chunk)
+            # Embed the chunk
+            embedding = await self._embedder.embed_query(chunk_summary + " " + chunk_text[:1000])
+            mid = await self._store.put({
+                "type": "episodic",
+                "scope": scope,
+                "scope_name": detect_scope().scope_name,
+                "summary": chunk_summary,
+                "content": chunk_text,
+                "weight": 0.5,
+                "tags": _detect_tags(chunk_text),
+                "embedding": embedding,
+                "source_session": session_id,
+            })
+            created.append(mid)
+        logger.info("Capture: %d messages → %d chunks stored (conv=%s)", len(messages), len(created), conv_id)
+        return created
+    async def from_transcript(self, path: str, client: str = "generic", scope: str | None = None) -> list[str]:
+        """Import from a transcript file according to client format.
+        Args:
+            path: Path to transcript file.
+            client: Client identifier (claude-code, opencode, generic, json).
+            scope: Scope override. Auto-detected if None.
+        Returns:
+            List of created memory IDs.
+        """
+        text = Path(path).read_text(encoding="utf-8")
+        data = parse_transcript(text, client)
+        # Enrich metadata
+        data.setdefault("metadata", {})
+        data["metadata"]["client"] = client
+        data["metadata"]["source_file"] = str(path)
+        return await self.from_json(data, scope=scope)
+    async def from_pipe(self, scope: str | None = None, *, client: str = "generic") -> list[str]:
+        """Import from stdin — universal fallback."""
+        text = sys.stdin.read()
+        data = parse_transcript(text, client)
+        data.setdefault("metadata", {})
+        data["metadata"]["client"] = client
+        data["metadata"]["source"] = "pipe"
+        return await self.from_json(data, scope=scope)
+# ── Helpers ──
+def _messages_to_text(messages: list[dict[str, str]]) -> str:
+    """Convert messages to a single text block."""
+    parts = []
+    for msg in messages:
+        role = msg.get("role", "unknown").capitalize()
+        content = msg.get("content", "")
+        parts.append(f"{role}: {content}")
+    return "\n\n".join(parts)
+def _auto_summary(messages: list[dict[str, str]], max_len: int = 160) -> str:
+    """Generate auto-summary from first user message or combined messages."""
+    for msg in messages:
+        if msg.get("role") == "user":
+            text = msg.get("content", "").replace("\n", " ")
+            if len(text) > max_len:
+                return text[:max_len - 3] + "..."
+            return text
+    # Fallback: combined first exchange
+    parts = []
+    for msg in messages[:2]:
+        text = msg.get("content", "").replace("\n", " ")
+        parts.append(text[:80])
+    combined = " | ".join(parts)
+    if len(combined) > max_len:
+        combined = combined[:max_len - 3] + "..."
+    return combined or "Untitled session"
+def _chunk_by_message_group(messages: list[dict[str, str]], max_per_chunk: int = 15) -> list[list[dict[str, str]]]:
+    """Split messages into chunks by conversation turns.
+    Simple heuristic: group consecutive user+assistant pairs.
+    For better chunking, use host model or Ollama via ConsolidationPipeline.
+    """
+    if len(messages) <= max_per_chunk:
+        return [messages]
+    chunks = []
+    for i in range(0, len(messages), max_per_chunk):
+        chunk = messages[i:i + max_per_chunk]
+        chunks.append(chunk)
+    return chunks
+def _detect_tags(text: str) -> list[str]:
+    """Auto-detect tags from conversation content."""
+    tags = []
+    text_lower = text.lower()
+    # Simple keyword-based tag detection
+    tag_keywords = {
+        "architecture": ["architecture", "design pattern", "microservice", "monolith"],
+        "debugging": ["debug", "memory leak", "segfault", "stack trace", "error"],
+        "testing": ["test", "pytest", "unit test", "integration test", "mock"],
+        "git": ["git", "commit", "branch", "merge", "rebase"],
+        "database": ["database", "sql", "sqlite", "postgres", "mysql"],
+        "api": ["api", "rest", "graphql", "endpoint", "http"],
+        "python": ["python", "pip", "venv", "asyncio", "pydantic"],
+        "rust": ["rust", "cargo", "borrow", "lifetime", "trait"],
+        "c++": ["c++", "cpp", "cmake", "template", "pointer"],
+        "refactoring": ["refactor", "clean up", "simplify", "extract method"],
+    }
+    for tag, keywords in tag_keywords.items():
+        for kw in keywords:
+            if kw in text_lower:
+                tags.append(tag)
+                break
+    return tags[:5]  # Max 5 auto-tags

mem_context/capture/formats.py ADDED Viewed

@@ -0,0 +1,319 @@
+"""Per-client transcript format parsers.
+Each parser takes raw transcript text and returns a standardized dict:
+{
+    "messages": [{"role": "user"|"assistant", "content": "..."}, ...],
+    "metadata": {"client": "claude-code"|"opencode"|"generic", "session_id": "..."},
+}
+"""
+from __future__ import annotations
+import json
+import re
+from typing import Any
+def parse_transcript(text: str, client: str) -> dict[str, Any]:
+    """Parse a transcript from a known client format.
+    Args:
+        text: Raw transcript file content.
+        client: Client identifier (claude-code, opencode, generic, json).
+    Returns:
+        Standardized dict with messages and metadata.
+    """
+    parser = PARSERS.get(client, parse_generic)
+    return parser(text)
+def parse_json(text: str) -> dict[str, Any]:
+    """Parse JSON transcript (already standardized)."""
+    data = json.loads(text)
+    if "messages" not in data:
+        raise ValueError("JSON transcript must have 'messages' key")
+    data.setdefault("metadata", {})
+    data["metadata"].setdefault("client", "json")
+    return data
+def parse_generic(text: str) -> dict[str, Any]:
+    """Generic parser — tries to detect format from content.
+    Heuristics:
+    1. Try JSON first
+    2. Look for "User:" and "Assistant:" prefixes
+    3. Fallback: single assistant message with full text
+    """
+    # Try JSON
+    try:
+        return parse_json(text)
+    except (json.JSONDecodeError, ValueError):
+        pass
+    # Try "Role: content" pattern
+    messages = _parse_role_prefix(text, r"(?m)^(User|Assistant|Human|AI|System):\s*(.*)")
+    if len(messages) >= 2:
+        return {
+            "messages": messages,
+            "metadata": {"client": "generic", "pattern": "role_prefix"},
+        }
+    # Fallback: entire text as one assistant message
+    return {
+        "messages": [{"role": "assistant", "content": text.strip()}],
+        "metadata": {"client": "generic", "pattern": "fallback"},
+    }
+def parse_claude_code(text: str) -> dict[str, Any]:
+    """Parse Claude Code transcript format.
+    Claude Code transcripts are JSONL (one JSON object per line) with
+    ``type`` discriminator fields.  We extract ``user`` and ``assistant``
+    entries and convert them to the standardized message format.
+    Falls back to plain JSON, role-prefix, and generic parsing.
+    """
+    # 1 ── JSONL (native Claude Code transcript) ──
+    messages = _try_jsonl(text)
+    if messages:
+        return {
+            "messages": messages,
+            "metadata": {"client": "claude-code", "pattern": "jsonl"},
+        }
+    # 2 ── Plain JSON ──
+    try:
+        data = json.loads(text)
+        if "messages" in data:
+            data.setdefault("metadata", {})
+            data["metadata"]["client"] = "claude-code"
+            return data
+        # Might be a different JSON structure — try extracting
+        if isinstance(data, list):
+            messages = []
+            for entry in data:
+                role = entry.get("role", entry.get("type", "assistant"))
+                content = entry.get("content", entry.get("text", str(entry)))
+                messages.append({"role": _normalize_role(role), "content": str(content)})
+            if messages:
+                return {"messages": messages, "metadata": {"client": "claude-code"}}
+    except (json.JSONDecodeError, ValueError):
+        pass
+    # 3 ── Role-prefix text ──
+    messages = _parse_role_prefix(text, r"(?m)^(?:User|Assistant|Human|Claude):\s*(.*)")
+    if len(messages) >= 2:
+        return {
+            "messages": messages,
+            "metadata": {"client": "claude-code", "pattern": "role_prefix"},
+        }
+    # 4 ── Fallback to generic ──
+    return parse_generic(text)
+def parse_opencode(text: str) -> dict[str, Any]:
+    """Parse OpenCode transcript format.
+    OpenCode uses a YAML-like or JSON transcript format.
+    """
+    # Try JSON first
+    try:
+        data = json.loads(text)
+        if "messages" in data:
+            data.setdefault("metadata", {})
+            data["metadata"]["client"] = "opencode"
+            return data
+    except (json.JSONDecodeError, ValueError):
+        pass
+    # OpenCode text format
+    messages = _parse_role_prefix(text, r"(?m)^(?:User|Assistant|System):\s*(.*)")
+    if len(messages) >= 2:
+        return {
+            "messages": messages,
+            "metadata": {"client": "opencode", "pattern": "role_prefix"},
+        }
+    return parse_generic(text)
+# ── Parser registry ──
+PARSERS: dict[str, Any] = {
+    "claude-code": parse_claude_code,
+    "opencode": parse_opencode,
+    "generic": parse_generic,
+    "json": parse_json,
+}
+# ── Helpers ──
+def _try_jsonl(text: str) -> list[dict[str, str]]:
+    """Try to parse text as JSONL (one JSON object per line).
+    Detects Claude Code's native transcript format where each line is a
+    JSON object with a ``type`` discriminator (``user``, ``assistant``,
+    ``system``).  Returns standardized message list or empty list.
+    """
+    lines = [l for l in text.split("\n") if l.strip()]
+    if not lines:
+        return []
+    # Heuristic: at least 2 lines must parse as JSON for JSONL detection
+    parsed = []
+    for line in lines:
+        try:
+            obj = json.loads(line)
+            parsed.append(obj)
+        except (json.JSONDecodeError, ValueError):
+            # Each line must be valid JSON for JSONL format
+            return []
+    if len(parsed) < 2:
+        return []
+    # Must have at least one recognized message type
+    msg_types = {"user", "assistant", "system"}
+    if not any(obj.get("type") in msg_types for obj in parsed):
+        return []
+    messages: list[dict[str, str]] = []
+    for obj in parsed:
+        typ = obj.get("type", "")
+        if typ not in msg_types:
+            continue
+        inner = obj.get("message", {})
+        role = _normalize_role(inner.get("role", typ))
+        content = _extract_jsonl_content(inner.get("content", ""))
+        if not content:
+            continue
+        messages.append({"role": role, "content": content})
+    return messages
+def _extract_jsonl_content(content: Any) -> str:
+    """Extract text from Claude Code JSONL content (string or content-block array).
+    Claude Code assistant messages use a content-block array:
+        [{"type": "text", "text": "..."}, {"type": "tool_use", ...}, ...]
+    User messages use a plain string.
+    """
+    if isinstance(content, str):
+        return content.strip()
+    if isinstance(content, list):
+        parts: list[str] = []
+        for block in content:
+            if not isinstance(block, dict):
+                continue
+            btype = block.get("type", "")
+            if btype == "text":
+                t = block.get("text", "")
+                if t:
+                    parts.append(t.strip())
+            elif btype == "thinking":
+                # Thinking blocks are internal reasoning — include but de-emphasize
+                t = block.get("thinking", "")
+                if t:
+                    parts.append(f"[thinking] {t.strip()}")
+            elif btype == "tool_use":
+                # Summarize tool calls: name + brief input
+                name = block.get("name", "tool")
+                inp = block.get("input", {})
+                if isinstance(inp, dict):
+                    brief = _summarize_tool_input(name, inp)
+                    parts.append(f"[tool_call: {brief}]")
+                else:
+                    parts.append(f"[tool_call: {name}]")
+            elif btype == "tool_result":
+                # Skip large tool results, just note them
+                parts.append("[tool_result]")
+            # Skip other block types (tool_use, tool_result, etc.)
+        return "\n".join(parts) if parts else ""
+    return str(content).strip()
+def _summarize_tool_input(name: str, inp: dict) -> str:
+    """Create a brief summary of a tool call for embedding."""
+    # Keep first key-value pair for context
+    brief_parts = [name]
+    for k, v in inp.items():
+        if isinstance(v, str):
+            brief_parts.append(f"{k}={v[:80]}")
+        else:
+            brief_parts.append(f"{k}=...")
+        if len(brief_parts) >= 3:
+            break
+    return " ".join(brief_parts)
+def _parse_role_prefix(text: str, pattern: str) -> list[dict[str, str]]:
+    """Parse text where each turn starts with a role prefix.
+    Args:
+        text: Raw transcript.
+        pattern: Regex with named groups or capturing role+content pairs.
+    Returns:
+        List of message dicts.
+    """
+    messages = []
+    current_role = None
+    current_content: list[str] = []
+    for line in text.split("\n"):
+        match = re.match(pattern, line)
+        if match:
+            # Save previous message
+            if current_role and current_content:
+                messages.append({
+                    "role": _normalize_role(current_role),
+                    "content": "\n".join(current_content).strip(),
+                })
+            # Start new message
+            if len(match.groups()) >= 2:
+                current_role = match.group(1)
+                current_content = [match.group(2)]
+            elif len(match.groups()) == 1:
+                # Pattern matched role only, content in next lines?
+                current_role = match.group(1)
+                current_content = []
+            else:
+                current_role = "assistant"
+                current_content = [line]
+        else:
+            if current_role:
+                current_content.append(line)
+            elif line.strip():
+                # No role detected yet — treat as assistant
+                current_role = "assistant"
+                current_content.append(line)
+    # Save last message
+    if current_role and current_content:
+        messages.append({
+            "role": _normalize_role(current_role),
+            "content": "\n".join(current_content).strip(),
+        })
+    return messages
+def _normalize_role(role: str) -> str:
+    """Normalize role to user/assistant/system."""
+    role = role.strip().lower()
+    if role in ("user", "human", "u"):
+        return "user"
+    if role in ("assistant", "ai", "claude", "bot", "a"):
+        return "assistant"
+    if role in ("system", "sys", "s"):
+        return "system"
+    return "assistant"

mem_context/capture/wrapper.py ADDED Viewed

@@ -0,0 +1,111 @@
+"""mem-context-capture-cc — capture Claude Code transcript into mem-context.
+Entry point called by Claude Code hooks (Stop, SessionEnd).  Locates the
+current session's JSONL transcript and feeds it to ``mem-context capture``.
+Tries ``$CLAUDE_TRANSCRIPT_FILE`` first.  If unset or missing, looks under
+``~/.claude/projects/<project-slug>/`` for the most recent JSONL transcript.
+"""
+from __future__ import annotations
+import os
+import shutil
+import subprocess
+import sys
+from pathlib import Path
+def _find_mem_context_bin() -> str | None:
+    """Return the absolute path to the ``mem-context`` CLI binary.
+    Resolution order:
+    1. ``~/.mem-context/.venv/bin/mem-context`` (standalone install)
+    2. Same directory as this script (pip-installed sibling)
+    3. ``mem-context`` in PATH
+    """
+    # 1) Standalone install (primary location)
+    standalone = Path.home() / ".mem-context" / ".venv" / "bin" / "mem-context"
+    if standalone.is_file():
+        return str(standalone)
+    # 2) Sibling — same directory as this script.
+    #    Prefer __file__: when invoked by bare name from PATH, sys.argv[0]
+    #    is just "mem-context-capture-cc" and Path resolves relative to CWD.
+    #    When the hook uses an absolute path (set by the installer),
+    #    sys.argv[0] is absolute — but __file__ is always correct.
+    script_dir = Path(__file__).resolve().parent
+    sibling = script_dir / "mem-context"
+    if sibling.is_file():
+        return str(sibling)
+    # 3) PATH
+    in_path = shutil.which("mem-context")
+    if in_path:
+        return in_path
+    return None
+def _find_transcript() -> str | None:
+    """Locate the Claude Code transcript JSONL file.
+    Returns the path to the transcript, or None if not found.
+    """
+    # 1) Env-var provided by Claude Code hooks (Stop / SessionEnd)
+    transcript = os.environ.get("CLAUDE_TRANSCRIPT_FILE", "")
+    if transcript and os.path.isfile(transcript):
+        return transcript
+    # 2) Fallback — find latest transcript for current project
+    cwd = os.environ.get("CLAUDE_PROJECT_DIR", os.getcwd())
+    # Project slug = absolute path with '/' → '-' (bash: ${cwd//\//-})
+    slug = cwd.replace("/", "-")
+    proj_dir = Path.home() / ".claude" / "projects" / slug
+    if proj_dir.is_dir():
+        # SessionEnd: use $CLAUDE_SESSION_ID if available
+        session_id = os.environ.get("CLAUDE_SESSION_ID", "")
+        if session_id:
+            session_file = proj_dir / f"{session_id}.jsonl"
+            if session_file.is_file():
+                return str(session_file)
+        # Last resort: most recent .jsonl (exclude directories)
+        jsonl_files = sorted(
+            (p for p in proj_dir.glob("*.jsonl") if p.is_file()),
+            key=lambda p: p.stat().st_mtime,
+            reverse=True,
+        )
+        if jsonl_files:
+            return str(jsonl_files[0])
+    return None
+def main() -> None:
+    """Run capture — locate transcript and pipe to mem-context."""
+    mem_bin = _find_mem_context_bin()
+    if not mem_bin:
+        print("mem-context-capture-cc: mem-context binary not found", file=sys.stderr)
+        print("  Install with: pip install --user mem-context", file=sys.stderr)
+        print("  Or ensure ~/.mem-context/.venv/bin/ is in PATH", file=sys.stderr)
+        sys.exit(0)  # Not a hard error — don't break the hook chain
+    transcript = _find_transcript()
+    if not transcript:
+        print("mem-context-capture-cc: no transcript found", file=sys.stderr)
+        sys.exit(0)  # Not a hard error
+    result = subprocess.run(
+        [mem_bin, "capture", "transcript", transcript, "--client", "claude-code"],
+        capture_output=False,
+        text=True,
+    )
+    if result.returncode != 0:
+        print(f"mem-context-capture-cc: capture exited with code {result.returncode}", file=sys.stderr)
+    sys.exit(0)  # Always exit 0 — don't break the hook chain
+if __name__ == "__main__":
+    main()