npm - get-claudia - Versions diffs - 1.6.0 → 1.7.0 - Mend

get-claudia 1.6.0 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/memory-daemon/claudia_memory/database.py +19 -0
package/memory-daemon/claudia_memory/mcp/server.py +108 -1
package/memory-daemon/claudia_memory/schema.sql +5 -0
package/memory-daemon/claudia_memory/services/recall.py +176 -24
package/memory-daemon/claudia_memory/services/remember.py +97 -20
package/package.json +1 -1

package/memory-daemon/claudia_memory/database.py CHANGED Viewed

@@ -240,6 +240,25 @@ class Database:
             conn.commit()
             logger.info("Applied migration 2: turn buffer and session narratives")
+        if current_version < 3:
+            # Migration 3: Add source_context to memories, is_archived to turn_buffer
+            migration_stmts = [
+                "ALTER TABLE memories ADD COLUMN source_context TEXT",
+                "ALTER TABLE turn_buffer ADD COLUMN is_archived INTEGER DEFAULT 0",
+            ]
+            for stmt in migration_stmts:
+                try:
+                    conn.execute(stmt)
+                except sqlite3.OperationalError as e:
+                    if "duplicate column" not in str(e).lower():
+                        logger.warning(f"Migration 3 statement failed: {e}")
+            conn.execute(
+                "INSERT OR IGNORE INTO schema_migrations (version, description) VALUES (3, 'Add source_context to memories, is_archived to turn_buffer for episodic provenance')"
+            )
+            conn.commit()
+            logger.info("Applied migration 3: episodic memory provenance")
     def execute(
         self, sql: str, params: Tuple = (), fetch: bool = False
     ) -> Optional[List[sqlite3.Row]]:

package/memory-daemon/claudia_memory/mcp/server.py CHANGED Viewed

@@ -31,6 +31,7 @@ from ..services.recall import (
     recall_about,
     recall_episodes,
     search_entities,
+    trace_memory,
 )
 from ..services.remember import (
     buffer_turn,
@@ -79,6 +80,18 @@ async def list_tools() -> ListToolsResult:
                         "description": "Importance score from 0.0 to 1.0",
                         "default": 1.0,
                     },
+                    "source": {
+                        "type": "string",
+                        "description": "Source type: email, transcript, document, conversation, user_input",
+                    },
+                    "source_context": {
+                        "type": "string",
+                        "description": "One-line breadcrumb describing origin (e.g., 'Email from Jim Ferry re: Forum V+, 2025-01-28')",
+                    },
+                    "source_material": {
+                        "type": "string",
+                        "description": "Full raw text of the source (email body, transcript, etc.). Saved to disk, not stored in DB.",
+                    },
                 },
                 "required": ["content"],
             },
@@ -303,6 +316,18 @@ async def list_tools() -> ListToolsResult:
                                     "description": "Entity names this fact relates to",
                                 },
                                 "importance": {"type": "number", "default": 1.0},
+                                "source": {
+                                    "type": "string",
+                                    "description": "Override source type (default: session_summary)",
+                                },
+                                "source_context": {
+                                    "type": "string",
+                                    "description": "One-line breadcrumb describing origin",
+                                },
+                                "source_material": {
+                                    "type": "string",
+                                    "description": "Full raw source text, saved to disk",
+                                },
                             },
                             "required": ["content"],
                         },
@@ -319,6 +344,18 @@ async def list_tools() -> ListToolsResult:
                                     "items": {"type": "string"},
                                 },
                                 "importance": {"type": "number", "default": 1.0},
+                                "source": {
+                                    "type": "string",
+                                    "description": "Override source type (default: session_summary)",
+                                },
+                                "source_context": {
+                                    "type": "string",
+                                    "description": "One-line breadcrumb describing origin",
+                                },
+                                "source_material": {
+                                    "type": "string",
+                                    "description": "Full raw source text, saved to disk",
+                                },
                             },
                             "required": ["content"],
                         },
@@ -436,7 +473,15 @@ async def list_tools() -> ListToolsResult:
                                 },
                                 "source": {
                                     "type": "string",
-                                    "description": "Source entity (for 'relate' op)",
+                                    "description": "Source entity (for 'relate' op) or source type (for 'remember' op)",
+                                },
+                                "source_context": {
+                                    "type": "string",
+                                    "description": "One-line breadcrumb (for 'remember' op)",
+                                },
+                                "source_material": {
+                                    "type": "string",
+                                    "description": "Full raw source text, saved to disk (for 'remember' op)",
                                 },
                                 "target": {
                                     "type": "string",
@@ -458,6 +503,25 @@ async def list_tools() -> ListToolsResult:
                 "required": ["operations"],
             },
         ),
+        Tool(
+            name="memory.trace",
+            description=(
+                "Reconstruct full provenance for a memory. Returns the memory with all fields, "
+                "the source episode narrative and archived conversation turns (if applicable), "
+                "related entities, and a preview of any source material file saved on disk. "
+                "Zero cost until invoked -- use when asked 'where did that come from?'"
+            ),
+            inputSchema={
+                "type": "object",
+                "properties": {
+                    "memory_id": {
+                        "type": "integer",
+                        "description": "The memory ID to trace provenance for",
+                    },
+                },
+                "required": ["memory_id"],
+            },
+        ),
     ]
     return ListToolsResult(tools=tools)
@@ -472,7 +536,20 @@ async def call_tool(name: str, arguments: Dict[str, Any]) -> CallToolResult:
                 memory_type=arguments.get("type", "fact"),
                 about_entities=arguments.get("about"),
                 importance=arguments.get("importance", 1.0),
+                source=arguments.get("source"),
+                source_context=arguments.get("source_context"),
             )
+            # Save source material to disk if provided
+            if memory_id and arguments.get("source_material"):
+                svc = get_remember_service()
+                svc.save_source_material(
+                    memory_id,
+                    arguments["source_material"],
+                    metadata={
+                        "source": arguments.get("source"),
+                        "source_context": arguments.get("source_context"),
+                    },
+                )
             return CallToolResult(
                 content=[
                     TextContent(
@@ -504,6 +581,9 @@ async def call_tool(name: str, arguments: Dict[str, Any]) -> CallToolResult:
                                         "importance": r.importance,
                                         "entities": r.entities,
                                         "created_at": r.created_at,
+                                        "source": r.source,
+                                        "source_id": r.source_id,
+                                        "source_context": r.source_context,
                                     }
                                     for r in results
                                 ]
@@ -528,6 +608,9 @@ async def call_tool(name: str, arguments: Dict[str, Any]) -> CallToolResult:
                         "type": m.type,
                         "importance": m.importance,
                         "created_at": m.created_at,
+                        "source": m.source,
+                        "source_id": m.source_id,
+                        "source_context": m.source_context,
                     }
                     for m in result["memories"]
                 ]
@@ -698,9 +781,22 @@ async def call_tool(name: str, arguments: Dict[str, Any]) -> CallToolResult:
                             memory_type=op.get("type", "fact"),
                             about_entities=op.get("about"),
                             importance=op.get("importance", 1.0),
+                            source=op.get("source"),
+                            source_context=op.get("source_context"),
                         )
                         op_result["success"] = True
                         op_result["memory_id"] = memory_id
+                        # Save source material to disk if provided
+                        if memory_id and op.get("source_material"):
+                            svc = get_remember_service()
+                            svc.save_source_material(
+                                memory_id,
+                                op["source_material"],
+                                metadata={
+                                    "source": op.get("source"),
+                                    "source_context": op.get("source_context"),
+                                },
+                            )
                     elif op_type == "relate":
                         relationship_id = relate_entities(
                             source=op["source"],
@@ -736,6 +832,17 @@ async def call_tool(name: str, arguments: Dict[str, Any]) -> CallToolResult:
                 ]
             )
+        elif name == "memory.trace":
+            result = trace_memory(memory_id=arguments["memory_id"])
+            return CallToolResult(
+                content=[
+                    TextContent(
+                        type="text",
+                        text=json.dumps(result),
+                    )
+                ]
+            )
         else:
             return CallToolResult(
                 content=[

package/memory-daemon/claudia_memory/schema.sql CHANGED Viewed

@@ -53,6 +53,7 @@ CREATE TABLE IF NOT EXISTS memories (
     confidence REAL DEFAULT 1.0,  -- How sure we are about this
     source TEXT,  -- Where this came from (conversation, document, etc.)
     source_id TEXT,  -- Reference to source (episode_id, etc.)
+    source_context TEXT,  -- One-line breadcrumb (e.g., "Email from Jim re: Forum V+, 2025-01-28")
     created_at TEXT DEFAULT (datetime('now')),
     updated_at TEXT DEFAULT (datetime('now')),
     last_accessed_at TEXT,  -- For rehearsal-based importance boost
@@ -220,6 +221,7 @@ CREATE TABLE IF NOT EXISTS turn_buffer (
     turn_number INTEGER NOT NULL,
     user_content TEXT,
     assistant_content TEXT,
+    is_archived INTEGER DEFAULT 0,
     created_at TEXT DEFAULT (datetime('now'))
 );
@@ -247,3 +249,6 @@ VALUES (1, 'Initial schema with entities, memories, relationships, episodes, pat
 INSERT OR IGNORE INTO schema_migrations (version, description)
 VALUES (2, 'Add turn_buffer table, episode narrative/summary columns, episode_embeddings');
+INSERT OR IGNORE INTO schema_migrations (version, description)
+VALUES (3, 'Add source_context to memories, is_archived to turn_buffer for episodic provenance');

package/memory-daemon/claudia_memory/services/recall.py CHANGED Viewed

@@ -10,6 +10,7 @@ import logging
 import math
 from dataclasses import dataclass
 from datetime import datetime, timedelta
+from pathlib import Path
 from typing import Any, Dict, List, Optional, Tuple
 from ..config import get_config
@@ -32,6 +33,9 @@ class RecallResult:
     created_at: str
     entities: List[str]  # Related entity names
     metadata: Optional[Dict] = None
+    source: Optional[str] = None
+    source_id: Optional[str] = None
+    source_context: Optional[str] = None
 @dataclass
@@ -196,6 +200,12 @@ class RecallService:
             # Parse metadata
             metadata_val = row["metadata"] if "metadata" in row.keys() else None
+            # Extract source fields (may not exist in older DBs)
+            row_keys = row.keys()
+            source_val = row["source"] if "source" in row_keys else None
+            source_id_val = row["source_id"] if "source_id" in row_keys else None
+            source_context_val = row["source_context"] if "source_context" in row_keys else None
             results.append(
                 RecallResult(
                     id=row["id"],
@@ -206,6 +216,9 @@ class RecallService:
                     created_at=row["created_at"],
                     entities=entity_names,
                     metadata=json.loads(metadata_val) if metadata_val else None,
+                    source=source_val,
+                    source_id=source_id_val,
+                    source_context=source_context_val,
                 )
             )
@@ -288,19 +301,24 @@ class RecallService:
         memory_rows = self.db.execute(sql, tuple(params), fetch=True) or []
-        memories = [
-            RecallResult(
-                id=row["id"],
-                content=row["content"],
-                type=row["type"],
-                score=row["importance"],
-                importance=row["importance"],
-                created_at=row["created_at"],
-                entities=[entity["name"]],
-                metadata=json.loads(row["metadata"]) if row["metadata"] else None,
+        memories = []
+        for row in memory_rows:
+            row_keys = row.keys()
+            memories.append(
+                RecallResult(
+                    id=row["id"],
+                    content=row["content"],
+                    type=row["type"],
+                    score=row["importance"],
+                    importance=row["importance"],
+                    created_at=row["created_at"],
+                    entities=[entity["name"]],
+                    metadata=json.loads(row["metadata"]) if row["metadata"] else None,
+                    source=row["source"] if "source" in row_keys else None,
+                    source_id=row["source_id"] if "source_id" in row_keys else None,
+                    source_context=row["source_context"] if "source_context" in row_keys else None,
+                )
             )
-            for row in memory_rows
-        ]
         # Get relationships
         rel_sql = """
@@ -456,19 +474,25 @@ class RecallService:
         rows = self.db.execute(sql, tuple(params), fetch=True) or []
-        return [
-            RecallResult(
-                id=row["id"],
-                content=row["content"],
-                type=row["type"],
-                score=row["importance"],
-                importance=row["importance"],
-                created_at=row["created_at"],
-                entities=row["entity_names"].split(",") if row["entity_names"] else [],
-                metadata=json.loads(row["metadata"]) if row["metadata"] else None,
+        results = []
+        for row in rows:
+            row_keys = row.keys()
+            results.append(
+                RecallResult(
+                    id=row["id"],
+                    content=row["content"],
+                    type=row["type"],
+                    score=row["importance"],
+                    importance=row["importance"],
+                    created_at=row["created_at"],
+                    entities=row["entity_names"].split(",") if row["entity_names"] else [],
+                    metadata=json.loads(row["metadata"]) if row["metadata"] else None,
+                    source=row["source"] if "source" in row_keys else None,
+                    source_id=row["source_id"] if "source_id" in row_keys else None,
+                    source_context=row["source_context"] if "source_context" in row_keys else None,
+                )
             )
-            for row in rows
-        ]
+        return results
     def recall_episodes(
         self,
@@ -547,6 +571,129 @@ class RecallService:
             for row in rows
         ]
+    def trace_memory(self, memory_id: int) -> Dict[str, Any]:
+        """
+        Reconstruct full provenance for a memory.
+        Returns the memory with all fields, the source episode and its
+        archived turns (if the memory came from a session), and a preview
+        of any source material file saved to disk.
+        Args:
+            memory_id: The memory ID to trace
+        Returns:
+            Dict with memory, episode, archived_turns, source_file info
+        """
+        result: Dict[str, Any] = {
+            "memory": None,
+            "episode": None,
+            "archived_turns": None,
+            "source_file": None,
+            "source_file_preview": None,
+            "entities": [],
+        }
+        # 1. Fetch the memory row
+        memory_row = self.db.get_one(
+            "memories", where="id = ?", where_params=(memory_id,)
+        )
+        if not memory_row:
+            return result
+        row_keys = memory_row.keys()
+        result["memory"] = {
+            "id": memory_row["id"],
+            "content": memory_row["content"],
+            "type": memory_row["type"],
+            "importance": memory_row["importance"],
+            "confidence": memory_row["confidence"],
+            "source": memory_row["source"] if "source" in row_keys else None,
+            "source_id": memory_row["source_id"] if "source_id" in row_keys else None,
+            "source_context": memory_row["source_context"] if "source_context" in row_keys else None,
+            "created_at": memory_row["created_at"],
+            "updated_at": memory_row["updated_at"],
+            "access_count": memory_row["access_count"],
+        }
+        # 2. Fetch related entities
+        entity_rows = self.db.execute(
+            """
+            SELECT e.name, e.type FROM entities e
+            JOIN memory_entities me ON e.id = me.entity_id
+            WHERE me.memory_id = ?
+            """,
+            (memory_id,),
+            fetch=True,
+        ) or []
+        result["entities"] = [
+            {"name": row["name"], "type": row["type"]} for row in entity_rows
+        ]
+        # 3. If source_id points to an episode, fetch it with archived turns
+        source_id = result["memory"].get("source_id")
+        if source_id:
+            try:
+                episode_id = int(source_id)
+                episode_row = self.db.get_one(
+                    "episodes", where="id = ?", where_params=(episode_id,)
+                )
+                if episode_row:
+                    ep_keys = episode_row.keys()
+                    result["episode"] = {
+                        "id": episode_row["id"],
+                        "narrative": episode_row["narrative"] if "narrative" in ep_keys else None,
+                        "started_at": episode_row["started_at"],
+                        "ended_at": episode_row["ended_at"] if "ended_at" in ep_keys else None,
+                        "key_topics": json.loads(episode_row["key_topics"]) if episode_row.get("key_topics") else [],
+                    }
+                    # Fetch archived turns
+                    turn_rows = self.db.execute(
+                        """
+                        SELECT turn_number, user_content, assistant_content, created_at
+                        FROM turn_buffer
+                        WHERE episode_id = ? AND is_archived = 1
+                        ORDER BY turn_number ASC
+                        """,
+                        (episode_id,),
+                        fetch=True,
+                    ) or []
+                    if turn_rows:
+                        result["archived_turns"] = [
+                            {
+                                "turn": row["turn_number"],
+                                "user": row["user_content"],
+                                "assistant": row["assistant_content"],
+                                "timestamp": row["created_at"],
+                            }
+                            for row in turn_rows
+                        ]
+            except (ValueError, TypeError):
+                pass  # source_id wasn't a numeric episode ID
+        # 4. Check for source material file on disk
+        sources_dir = self.db.db_path.parent / "sources"
+        source_file = sources_dir / f"{memory_id}.md"
+        if source_file.exists():
+            result["source_file"] = str(source_file)
+            try:
+                file_text = source_file.read_text(encoding="utf-8")
+                # Skip frontmatter for preview
+                if file_text.startswith("---"):
+                    end_idx = file_text.find("---", 3)
+                    if end_idx != -1:
+                        body = file_text[end_idx + 3:].strip()
+                    else:
+                        body = file_text
+                else:
+                    body = file_text
+                result["source_file_preview"] = body[:200]
+            except Exception:
+                result["source_file_preview"] = "(could not read file)"
+        return result
     def _keyword_search(
         self,
         query: str,
@@ -610,3 +757,8 @@ def search_entities(query: str, **kwargs) -> List[EntityResult]:
 def recall_episodes(query: str, **kwargs) -> List[Dict[str, Any]]:
     """Search episode narratives"""
     return get_recall_service().recall_episodes(query, **kwargs)
+def trace_memory(memory_id: int) -> Dict[str, Any]:
+    """Reconstruct full provenance for a memory"""
+    return get_recall_service().trace_memory(memory_id)

package/memory-daemon/claudia_memory/services/remember.py CHANGED Viewed

@@ -9,6 +9,7 @@ import json
 import logging
 import uuid
 from datetime import datetime
+from pathlib import Path
 from typing import Any, Dict, List, Optional
 from ..database import content_hash, get_db
@@ -127,6 +128,7 @@ class RememberService:
         confidence: float = 1.0,
         source: Optional[str] = None,
         source_id: Optional[str] = None,
+        source_context: Optional[str] = None,
         metadata: Optional[Dict] = None,
     ) -> Optional[int]:
         """
@@ -140,6 +142,7 @@ class RememberService:
             confidence: How confident we are (0.0-1.0)
             source: Where this came from
             source_id: Reference to source
+            source_context: One-line breadcrumb describing the source material
             metadata: Additional metadata
         Returns:
@@ -164,21 +167,22 @@ class RememberService:
             return existing["id"]
         # Insert new memory
-        memory_id = self.db.insert(
-            "memories",
-            {
-                "content": content,
-                "content_hash": mem_hash,
-                "type": memory_type,
-                "importance": importance,
-                "confidence": confidence,
-                "source": source,
-                "source_id": source_id,
-                "created_at": datetime.utcnow().isoformat(),
-                "updated_at": datetime.utcnow().isoformat(),
-                "metadata": json.dumps(metadata) if metadata else None,
-            },
-        )
+        insert_data = {
+            "content": content,
+            "content_hash": mem_hash,
+            "type": memory_type,
+            "importance": importance,
+            "confidence": confidence,
+            "source": source,
+            "source_id": source_id,
+            "created_at": datetime.utcnow().isoformat(),
+            "updated_at": datetime.utcnow().isoformat(),
+            "metadata": json.dumps(metadata) if metadata else None,
+        }
+        if source_context:
+            insert_data["source_context"] = source_context
+        memory_id = self.db.insert("memories", insert_data)
         # Generate and store embedding
         embedding = embed_sync(content)
@@ -485,11 +489,22 @@ class RememberService:
                     memory_type=fact.get("type", "fact"),
                     about_entities=fact.get("about"),
                     importance=fact.get("importance", 1.0),
-                    source="session_summary",
+                    source=fact.get("source", "session_summary"),
                     source_id=str(episode_id),
+                    source_context=fact.get("source_context"),
                 )
                 if memory_id:
                     result["facts_stored"] += 1
+                    # Save source material to disk if provided
+                    if fact.get("source_material"):
+                        self.save_source_material(
+                            memory_id,
+                            fact["source_material"],
+                            metadata={
+                                "source": fact.get("source", "session_summary"),
+                                "source_context": fact.get("source_context"),
+                            },
+                        )
         # 4. Store commitments
         if commitments:
@@ -499,11 +514,21 @@ class RememberService:
                     memory_type="commitment",
                     about_entities=commitment.get("about"),
                     importance=commitment.get("importance", 1.0),
-                    source="session_summary",
+                    source=commitment.get("source", "session_summary"),
                     source_id=str(episode_id),
+                    source_context=commitment.get("source_context"),
                 )
                 if memory_id:
                     result["commitments_stored"] += 1
+                    if commitment.get("source_material"):
+                        self.save_source_material(
+                            memory_id,
+                            commitment["source_material"],
+                            metadata={
+                                "source": commitment.get("source", "session_summary"),
+                                "source_context": commitment.get("source_context"),
+                            },
+                        )
         # 5. Store entities
         if entities:
@@ -529,8 +554,11 @@ class RememberService:
                 if rel_id:
                     result["relationships_stored"] += 1
-        # 7. Clear turn buffer for this episode
-        self.db.delete("turn_buffer", "episode_id = ?", (episode_id,))
+        # 7. Archive turn buffer for this episode (preserve for provenance tracing)
+        self.db.execute(
+            "UPDATE turn_buffer SET is_archived = 1 WHERE episode_id = ?",
+            (episode_id,),
+        )
         logger.info(
             f"Session {episode_id} summarized: {result['facts_stored']} facts, "
@@ -568,7 +596,7 @@ class RememberService:
                 """
                 SELECT turn_number, user_content, assistant_content, created_at
                 FROM turn_buffer
-                WHERE episode_id = ?
+                WHERE episode_id = ? AND (is_archived = 0 OR is_archived IS NULL)
                 ORDER BY turn_number ASC
                 """,
                 (ep["id"],),
@@ -594,6 +622,55 @@ class RememberService:
         return results
+    def save_source_material(
+        self,
+        memory_id: int,
+        content: str,
+        metadata: Optional[Dict] = None,
+    ) -> Optional[Path]:
+        """
+        Save raw source material (email, transcript, document) to disk.
+        Files are plain markdown with a YAML frontmatter header, stored at
+        ~/.claudia/memory/sources/{memory_id}.md. The directory is created
+        lazily on first write.
+        Args:
+            memory_id: The memory this source material belongs to
+            content: Full raw text of the source material
+            metadata: Optional dict with source, source_context, etc.
+        Returns:
+            Path to the saved file, or None on failure
+        """
+        try:
+            sources_dir = self.db.db_path.parent / "sources"
+            sources_dir.mkdir(parents=True, exist_ok=True)
+            file_path = sources_dir / f"{memory_id}.md"
+            # Build frontmatter
+            header_lines = ["---"]
+            header_lines.append(f"memory_id: {memory_id}")
+            if metadata:
+                for key, value in metadata.items():
+                    if value is not None:
+                        # Quote strings that might contain YAML-special chars
+                        header_lines.append(f'{key}: "{value}"')
+            header_lines.append(f"saved_at: {datetime.utcnow().isoformat()}")
+            header_lines.append("---")
+            header_lines.append("")
+            file_content = "\n".join(header_lines) + content
+            file_path.write_text(file_content, encoding="utf-8")
+            logger.debug(f"Saved source material for memory {memory_id} to {file_path}")
+            return file_path
+        except Exception as e:
+            logger.warning(f"Could not save source material for memory {memory_id}: {e}")
+            return None
     def _ensure_entity(self, extracted: ExtractedEntity) -> Optional[int]:
         """Ensure an extracted entity exists in the database"""
         existing = self.db.get_one(

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "get-claudia",
-  "version": "1.6.0",
+  "version": "1.7.0",
   "description": "An AI assistant who learns how you work.",
   "keywords": [
     "claudia",