PyPI - mempalace-code - Versions diffs - 1.0.0__py3-none-any.whl - Mend

mempalace-code 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

mempalace/README.md +40 -0
mempalace/__init__.py +6 -0
mempalace/__main__.py +5 -0
mempalace/cli.py +811 -0
mempalace/config.py +149 -0
mempalace/convo_miner.py +415 -0
mempalace/dialect.py +1075 -0
mempalace/entity_detector.py +853 -0
mempalace/entity_registry.py +639 -0
mempalace/export.py +378 -0
mempalace/general_extractor.py +521 -0
mempalace/knowledge_graph.py +410 -0
mempalace/layers.py +515 -0
mempalace/mcp_server.py +873 -0
mempalace/migrate.py +153 -0
mempalace/miner.py +1285 -0
mempalace/normalize.py +328 -0
mempalace/onboarding.py +489 -0
mempalace/palace_graph.py +225 -0
mempalace/py.typed +0 -0
mempalace/room_detector_local.py +310 -0
mempalace/searcher.py +305 -0
mempalace/spellcheck.py +269 -0
mempalace/split_mega_files.py +309 -0
mempalace/storage.py +807 -0
mempalace/version.py +3 -0
mempalace_code-1.0.0.dist-info/METADATA +489 -0
mempalace_code-1.0.0.dist-info/RECORD +32 -0
mempalace_code-1.0.0.dist-info/WHEEL +4 -0
mempalace_code-1.0.0.dist-info/entry_points.txt +2 -0
mempalace_code-1.0.0.dist-info/licenses/LICENSE +192 -0
mempalace_code-1.0.0.dist-info/licenses/NOTICE +17 -0

mempalace/config.py ADDED Viewed

@@ -0,0 +1,149 @@
+"""
+MemPalace configuration system.
+Priority: env vars > config file (~/.mempalace/config.json) > defaults
+"""
+import json
+import os
+from pathlib import Path
+DEFAULT_PALACE_PATH = os.path.expanduser("~/.mempalace/palace")
+DEFAULT_COLLECTION_NAME = "mempalace_drawers"
+DEFAULT_TOPIC_WINGS = [
+    "emotions",
+    "consciousness",
+    "memory",
+    "technical",
+    "identity",
+    "family",
+    "creative",
+]
+DEFAULT_HALL_KEYWORDS = {
+    "emotions": [
+        "scared",
+        "afraid",
+        "worried",
+        "happy",
+        "sad",
+        "love",
+        "hate",
+        "feel",
+        "cry",
+        "tears",
+    ],
+    "consciousness": [
+        "consciousness",
+        "conscious",
+        "aware",
+        "real",
+        "genuine",
+        "soul",
+        "exist",
+        "alive",
+    ],
+    "memory": ["memory", "remember", "forget", "recall", "archive", "palace", "store"],
+    "technical": [
+        "code",
+        "python",
+        "script",
+        "bug",
+        "error",
+        "function",
+        "api",
+        "database",
+        "server",
+    ],
+    "identity": ["identity", "name", "who am i", "persona", "self"],
+    "family": ["family", "kids", "children", "daughter", "son", "parent", "mother", "father"],
+    "creative": ["game", "gameplay", "player", "app", "design", "art", "music", "story"],
+}
+class MempalaceConfig:
+    """Configuration manager for MemPalace.
+    Load order: env vars > config file > defaults.
+    """
+    def __init__(self, config_dir=None):
+        """Initialize config.
+        Args:
+            config_dir: Override config directory (useful for testing).
+                        Defaults to ~/.mempalace.
+        """
+        self._config_dir = (
+            Path(config_dir) if config_dir else Path(os.path.expanduser("~/.mempalace"))
+        )
+        self._config_file = self._config_dir / "config.json"
+        self._people_map_file = self._config_dir / "people_map.json"
+        self._file_config = {}
+        if self._config_file.exists():
+            try:
+                with open(self._config_file, "r") as f:
+                    self._file_config = json.load(f)
+            except (json.JSONDecodeError, OSError):
+                self._file_config = {}
+    @property
+    def palace_path(self):
+        """Path to the memory palace data directory."""
+        env_val = os.environ.get("MEMPALACE_PALACE_PATH") or os.environ.get("MEMPAL_PALACE_PATH")
+        if env_val:
+            return env_val
+        return self._file_config.get("palace_path", DEFAULT_PALACE_PATH)
+    @property
+    def collection_name(self):
+        """ChromaDB collection name."""
+        return self._file_config.get("collection_name", DEFAULT_COLLECTION_NAME)
+    @property
+    def people_map(self):
+        """Mapping of name variants to canonical names."""
+        if self._people_map_file.exists():
+            try:
+                with open(self._people_map_file, "r") as f:
+                    return json.load(f)
+            except (json.JSONDecodeError, OSError):
+                pass
+        return self._file_config.get("people_map", {})
+    @property
+    def topic_wings(self):
+        """List of topic wing names."""
+        return self._file_config.get("topic_wings", DEFAULT_TOPIC_WINGS)
+    @property
+    def hall_keywords(self):
+        """Mapping of hall names to keyword lists."""
+        return self._file_config.get("hall_keywords", DEFAULT_HALL_KEYWORDS)
+    def init(self):
+        """Create config directory and write default config.json if it doesn't exist."""
+        self._config_dir.mkdir(parents=True, exist_ok=True)
+        if not self._config_file.exists():
+            default_config = {
+                "palace_path": DEFAULT_PALACE_PATH,
+                "collection_name": DEFAULT_COLLECTION_NAME,
+                "topic_wings": DEFAULT_TOPIC_WINGS,
+                "hall_keywords": DEFAULT_HALL_KEYWORDS,
+            }
+            with open(self._config_file, "w") as f:
+                json.dump(default_config, f, indent=2)
+        return self._config_file
+    def save_people_map(self, people_map):
+        """Write people_map.json to config directory.
+        Args:
+            people_map: Dict mapping name variants to canonical names.
+        """
+        self._config_dir.mkdir(parents=True, exist_ok=True)
+        with open(self._people_map_file, "w") as f:
+            json.dump(people_map, f, indent=2)
+        return self._people_map_file

mempalace/convo_miner.py ADDED Viewed

@@ -0,0 +1,415 @@
+#!/usr/bin/env python3
+"""
+convo_miner.py — Mine conversations into the palace.
+Ingests chat exports (Claude Code, ChatGPT, Slack, plain text transcripts).
+Normalizes format, chunks by exchange pair (Q+A = one unit), files to palace.
+Same palace as project mining. Different ingest strategy.
+"""
+import os
+import sys
+import time
+import hashlib
+from pathlib import Path
+from datetime import datetime
+from collections import defaultdict
+from .storage import open_store
+from .normalize import normalize
+from .miner import BATCH_SIZE, add_drawers_batch
+from .version import __version__
+# File types that might contain conversations
+CONVO_EXTENSIONS = {
+    ".txt",
+    ".md",
+    ".json",
+    ".jsonl",
+}
+SKIP_DIRS = {
+    ".git",
+    "node_modules",
+    "__pycache__",
+    ".venv",
+    "venv",
+    "env",
+    "dist",
+    "build",
+    ".next",
+    ".mempalace",
+    "tool-results",
+    "memory",
+}
+MIN_CHUNK_SIZE = 30
+# =============================================================================
+# CHUNKING — exchange pairs for conversations
+# =============================================================================
+def chunk_exchanges(content: str) -> list:
+    """
+    Chunk by exchange pair: one > turn + AI response = one unit.
+    Falls back to paragraph chunking if no > markers.
+    """
+    lines = content.split("\n")
+    quote_lines = sum(1 for line in lines if line.strip().startswith(">"))
+    if quote_lines >= 3:
+        return _chunk_by_exchange(lines)
+    else:
+        return _chunk_by_paragraph(content)
+def _chunk_by_exchange(lines: list) -> list:
+    """One user turn (>) + the AI response that follows = one chunk."""
+    chunks = []
+    i = 0
+    while i < len(lines):
+        line = lines[i]
+        if line.strip().startswith(">"):
+            user_turn = line.strip()
+            i += 1
+            ai_lines = []
+            while i < len(lines):
+                next_line = lines[i]
+                if next_line.strip().startswith(">") or next_line.strip().startswith("---"):
+                    break
+                if next_line.strip():
+                    ai_lines.append(next_line.strip())
+                i += 1
+            ai_response = " ".join(ai_lines[:8])
+            content = f"{user_turn}\n{ai_response}" if ai_response else user_turn
+            if len(content.strip()) > MIN_CHUNK_SIZE:
+                chunks.append(
+                    {
+                        "content": content,
+                        "chunk_index": len(chunks),
+                    }
+                )
+        else:
+            i += 1
+    return chunks
+def _chunk_by_paragraph(content: str) -> list:
+    """Fallback: chunk by paragraph breaks."""
+    chunks = []
+    paragraphs = [p.strip() for p in content.split("\n\n") if p.strip()]
+    # If no paragraph breaks and long content, chunk by line groups
+    if len(paragraphs) <= 1 and content.count("\n") > 20:
+        lines = content.split("\n")
+        for i in range(0, len(lines), 25):
+            group = "\n".join(lines[i : i + 25]).strip()
+            if len(group) > MIN_CHUNK_SIZE:
+                chunks.append({"content": group, "chunk_index": len(chunks)})
+        return chunks
+    for para in paragraphs:
+        if len(para) > MIN_CHUNK_SIZE:
+            chunks.append({"content": para, "chunk_index": len(chunks)})
+    return chunks
+# =============================================================================
+# ROOM DETECTION — topic-based for conversations
+# =============================================================================
+TOPIC_KEYWORDS = {
+    "technical": [
+        "code",
+        "python",
+        "function",
+        "bug",
+        "error",
+        "api",
+        "database",
+        "server",
+        "deploy",
+        "git",
+        "test",
+        "debug",
+        "refactor",
+    ],
+    "architecture": [
+        "architecture",
+        "design",
+        "pattern",
+        "structure",
+        "schema",
+        "interface",
+        "module",
+        "component",
+        "service",
+        "layer",
+    ],
+    "planning": [
+        "plan",
+        "roadmap",
+        "milestone",
+        "deadline",
+        "priority",
+        "sprint",
+        "backlog",
+        "scope",
+        "requirement",
+        "spec",
+    ],
+    "decisions": [
+        "decided",
+        "chose",
+        "picked",
+        "switched",
+        "migrated",
+        "replaced",
+        "trade-off",
+        "alternative",
+        "option",
+        "approach",
+    ],
+    "problems": [
+        "problem",
+        "issue",
+        "broken",
+        "failed",
+        "crash",
+        "stuck",
+        "workaround",
+        "fix",
+        "solved",
+        "resolved",
+    ],
+}
+def detect_convo_room(content: str) -> str:
+    """Score conversation content against topic keywords."""
+    content_lower = content[:3000].lower()
+    scores = {}
+    for room, keywords in TOPIC_KEYWORDS.items():
+        score = sum(1 for kw in keywords if kw in content_lower)
+        if score > 0:
+            scores[room] = score
+    if scores:
+        return max(scores, key=scores.get)
+    return "general"
+# =============================================================================
+# PALACE OPERATIONS
+# =============================================================================
+def get_collection(palace_path: str):
+    """Open (or create) the drawer store for a palace."""
+    os.makedirs(palace_path, exist_ok=True)
+    return open_store(palace_path, create=True)
+def file_already_mined(collection, source_file: str) -> bool:
+    try:
+        results = collection.get(where={"source_file": source_file}, limit=1)
+        return len(results.get("ids", [])) > 0
+    except Exception:
+        return False
+# =============================================================================
+# SCAN FOR CONVERSATION FILES
+# =============================================================================
+def scan_convos(convo_dir: str) -> list:
+    """Find all potential conversation files."""
+    convo_path = Path(convo_dir).expanduser().resolve()
+    files = []
+    for root, dirs, filenames in os.walk(convo_path):
+        dirs[:] = [d for d in dirs if d not in SKIP_DIRS]
+        for filename in filenames:
+            if filename.endswith(".meta.json"):
+                continue
+            filepath = Path(root) / filename
+            if filepath.suffix.lower() in CONVO_EXTENSIONS:
+                files.append(filepath)
+    return files
+# =============================================================================
+# MINE CONVERSATIONS
+# =============================================================================
+def mine_convos(
+    convo_dir: str,
+    palace_path: str,
+    wing: str = None,
+    agent: str = "mempalace",
+    limit: int = 0,
+    dry_run: bool = False,
+    extract_mode: str = "exchange",
+):
+    """Mine a directory of conversation files into the palace.
+    extract_mode:
+        "exchange" — default exchange-pair chunking (Q+A = one unit)
+        "general"  — general extractor: decisions, preferences, milestones, problems, emotions
+    """
+    convo_path = Path(convo_dir).expanduser().resolve()
+    if not wing:
+        wing = convo_path.name.lower().replace(" ", "_").replace("-", "_")
+    files = scan_convos(convo_dir)
+    if limit > 0:
+        files = files[:limit]
+    print(f"\n{'=' * 55}")
+    print("  MemPalace Mine — Conversations")
+    print(f"{'=' * 55}")
+    print(f"  Wing:    {wing}")
+    print(f"  Source:  {convo_path}")
+    print(f"  Files:   {len(files)}")
+    print(f"  Palace:  {palace_path}")
+    if dry_run:
+        print("  DRY RUN — nothing will be filed")
+    print(f"{'-' * 55}\n")
+    collection = get_collection(palace_path) if not dry_run else None
+    total_drawers = 0
+    files_skipped = 0
+    room_counts = defaultdict(int)
+    batch_buffer: list = []
+    def flush_batch() -> None:
+        nonlocal total_drawers
+        total_drawers += add_drawers_batch(collection, batch_buffer)
+        batch_buffer.clear()
+    for i, filepath in enumerate(files, 1):
+        source_file = str(filepath)
+        # Skip if already filed
+        if not dry_run and file_already_mined(collection, source_file):
+            files_skipped += 1
+            continue
+        # Normalize format
+        try:
+            content = normalize(str(filepath))
+        except (OSError, ValueError):
+            continue
+        if not content or len(content.strip()) < MIN_CHUNK_SIZE:
+            continue
+        # Chunk — either exchange pairs or general extraction
+        if extract_mode == "general":
+            from .general_extractor import extract_memories
+            chunks = extract_memories(content)
+            # Each chunk already has memory_type; use it as the room name
+        else:
+            chunks = chunk_exchanges(content)
+        if not chunks:
+            continue
+        # Detect room from content (general mode uses memory_type instead)
+        if extract_mode != "general":
+            room = detect_convo_room(content)
+        else:
+            room = None  # set per-chunk below
+        if dry_run:
+            if extract_mode == "general":
+                from collections import Counter
+                type_counts = Counter(c.get("memory_type", "general") for c in chunks)
+                types_str = ", ".join(f"{t}:{n}" for t, n in type_counts.most_common())
+                print(f"    [DRY RUN] {filepath.name} → {len(chunks)} memories ({types_str})")
+            else:
+                print(f"    [DRY RUN] {filepath.name} → room:{room} ({len(chunks)} drawers)")
+            total_drawers += len(chunks)
+            # Track room counts
+            if extract_mode == "general":
+                for c in chunks:
+                    room_counts[c.get("memory_type", "general")] += 1
+            else:
+                room_counts[room] += 1
+            continue
+        if extract_mode != "general":
+            room_counts[room] += 1
+        # Build specs for this file; accumulate into the batch buffer
+        file_spec_count = 0
+        for chunk in chunks:
+            chunk_room = chunk.get("memory_type", room) if extract_mode == "general" else room
+            if extract_mode == "general":
+                room_counts[chunk_room] += 1
+            drawer_id = f"drawer_{wing}_{chunk_room}_{hashlib.md5((source_file + str(chunk['chunk_index'])).encode(), usedforsecurity=False).hexdigest()[:16]}"
+            batch_buffer.append(
+                {
+                    "id": drawer_id,
+                    "content": chunk["content"],
+                    "metadata": {
+                        "wing": wing,
+                        "room": chunk_room,
+                        "source_file": source_file,
+                        "chunk_index": chunk["chunk_index"],
+                        "added_by": agent,
+                        "filed_at": datetime.now().isoformat(),
+                        "ingest_mode": "convos",
+                        "extract_mode": extract_mode,
+                        "extractor_version": __version__,
+                        "chunker_strategy": "convo_turn_v1",
+                    },
+                }
+            )
+            file_spec_count += 1
+        print(f"  ✓ [{i:4}/{len(files)}] {filepath.name[:50]:50} +{file_spec_count}")
+        if len(batch_buffer) >= BATCH_SIZE:
+            flush_batch()
+    if not dry_run:
+        flush_batch()
+        t0 = time.time()
+        print("  >> Optimizing storage...", end="", flush=True)
+        collection.optimize()
+        print(f" done ({time.time() - t0:.1f}s)", flush=True)
+    print(f"\n{'=' * 55}")
+    print("  Done.")
+    print(f"  Files processed: {len(files) - files_skipped}")
+    print(f"  Files skipped (already filed): {files_skipped}")
+    print(f"  Drawers filed: {total_drawers}")
+    if room_counts:
+        print("\n  By room:")
+        for room, count in sorted(room_counts.items(), key=lambda x: x[1], reverse=True):
+            print(f"    {room:20} {count} files")
+    print('\n  Next: mempalace search "what you\'re looking for"')
+    print(f"{'=' * 55}\n")
+if __name__ == "__main__":
+    if len(sys.argv) < 2:
+        print("Usage: python convo_miner.py <convo_dir> [--palace PATH] [--limit N] [--dry-run]")
+        sys.exit(1)
+    from .config import MempalaceConfig
+    mine_convos(sys.argv[1], palace_path=MempalaceConfig().palace_path)