npm - get-claudia - Versions diffs - 1.34.2 → 1.35.1 - Mend

get-claudia 1.34.2 → 1.35.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

package/memory-daemon/claudia_memory/__main__.py +401 -3
package/memory-daemon/claudia_memory/config.py +42 -0
package/memory-daemon/claudia_memory/daemon/health.py +19 -0
package/memory-daemon/claudia_memory/database.py +97 -26
package/memory-daemon/claudia_memory/embeddings.py +114 -1
package/memory-daemon/claudia_memory/mcp/server.py +122 -0
package/memory-daemon/claudia_memory/schema.sql +5 -28
package/memory-daemon/claudia_memory/services/consolidate.py +146 -24
package/memory-daemon/claudia_memory/services/recall.py +6 -0
package/memory-daemon/scripts/install.sh +73 -8
package/memory-daemon/tests/test_backup.py +72 -0
package/memory-daemon/tests/test_embedding_cache.py +210 -0
package/memory-daemon/tests/test_embedding_migration.py +415 -0
package/memory-daemon/tests/test_invalidated_filter.py +180 -0
package/memory-daemon/tests/test_retention.py +312 -0
package/package.json +1 -1
package/template-v2/.claude/hooks/hooks.json +20 -0
package/template-v2/.claude/hooks/session-health-check.py +43 -1
package/template-v2/.claude/hooks/session-health-check.sh +40 -3
package/template-v2/.claude/skills/README.md +50 -0
package/template-v2/.claude/skills/capability-suggester.md +18 -0
package/template-v2/.claude/skills/commitment-detector.md +18 -0
package/template-v2/.claude/skills/ingest-sources/SKILL.md +1 -0
package/template-v2/.claude/skills/morning-brief/SKILL.md +1 -0
package/template-v2/.claude/skills/pattern-recognizer.md +18 -0
package/template-v2/.claude/skills/relationship-tracker.md +18 -0
package/template-v2/.claude/skills/risk-surfacer.md +18 -0
package/template-v2/.claude/skills/skill-index.json +350 -0
package/template-v2/.claude/skills/structure-evolution.md +18 -0

package/memory-daemon/claudia_memory/__main__.py CHANGED Viewed

@@ -13,6 +13,7 @@ import hashlib
 import logging
 import os
 import signal
+import sqlite3
 import sys
 from pathlib import Path
@@ -182,6 +183,16 @@ def main():
         action="store_true",
         help="Generate embeddings for all memories that don't have them yet, then exit",
     )
+    parser.add_argument(
+        "--migrate-embeddings",
+        action="store_true",
+        help="Migrate embeddings to a new model/dimensions (drop and recreate vec0 tables, re-embed all data)",
+    )
+    parser.add_argument(
+        "--backup",
+        action="store_true",
+        help="Create a database backup and exit",
+    )
     args = parser.parse_args()
@@ -232,6 +243,21 @@ def main():
         db = get_db()
         db.initialize()
+        config = get_config()
+        # Fail fast if dimensions mismatch (user needs --migrate-embeddings instead)
+        stored_dims = db.execute(
+            "SELECT value FROM _meta WHERE key = 'embedding_dimensions'",
+            fetch=True,
+        )
+        if stored_dims and int(stored_dims[0]["value"]) != config.embedding_dimensions:
+            print(
+                f"Error: Dimension mismatch detected. "
+                f"Database has {stored_dims[0]['value']}D embeddings, "
+                f"config specifies {config.embedding_dimensions}D. "
+                f"Run --migrate-embeddings first."
+            )
+            sys.exit(1)
         # Find memories not in the memory_embeddings table
         missing = db.execute(
@@ -256,11 +282,10 @@ def main():
         for i, row in enumerate(missing, 1):
             embedding = svc.embed_sync(row["content"])
             if embedding:
-                import struct
-                blob = struct.pack(f"{len(embedding)}f", *embedding)
+                import json as _json
                 db.execute(
                     "INSERT OR REPLACE INTO memory_embeddings (memory_id, embedding) VALUES (?, ?)",
-                    (row["id"], blob),
+                    (row["id"], _json.dumps(embedding)),
                 )
                 success += 1
             else:
@@ -268,9 +293,382 @@ def main():
             if i % 10 == 0 or i == len(missing):
                 print(f"  Progress: {i}/{len(missing)} (success={success}, failed={failed})")
+        # Update stored embedding model to match current config (clears mismatch warning)
+        db.execute(
+            "INSERT OR REPLACE INTO _meta (key, value) VALUES ('embedding_model', ?)",
+            (svc.model,),
+        )
         print(f"Backfill complete: {success} embedded, {failed} failed, {len(missing)} total.")
         return
+    if args.migrate_embeddings:
+        # Full embedding migration: change model and/or dimensions
+        setup_logging(debug=args.debug)
+        import json as _json
+        from .database import Database
+        from .embeddings import get_embedding_service
+        db = get_db()
+        db.initialize()
+        config = get_config()
+        svc = get_embedding_service()
+        new_model = config.embedding_model
+        new_dim = config.embedding_dimensions
+        # Read current state from _meta
+        old_model_row = db.execute(
+            "SELECT value FROM _meta WHERE key = 'embedding_model'",
+            fetch=True,
+        )
+        old_dims_row = db.execute(
+            "SELECT value FROM _meta WHERE key = 'embedding_dimensions'",
+            fetch=True,
+        )
+        old_model = old_model_row[0]["value"] if old_model_row else "unknown"
+        old_dim = int(old_dims_row[0]["value"]) if old_dims_row else 384
+        if old_model == new_model and old_dim == new_dim:
+            # No mismatch -- offer interactive model selection
+            print(f"\nCurrent embedding model: {old_model} ({old_dim}D)")
+            print()
+            print("Available models:")
+            models_info = [
+                ("1", "all-minilm:l6-v2", 384, "  23MB", "Fast, good baseline"),
+                ("2", "nomic-embed-text", 768, " 274MB", "Better retrieval (+6%)"),
+                ("3", "mxbai-embed-large", 1024, " 669MB", "Best accuracy, larger"),
+            ]
+            for num, name, dim, size, desc in models_info:
+                current = " (current)" if name == old_model else ""
+                print(f"  {num}) {name:<20s} {dim}D  {size}   {desc}{current}")
+            print("  4) Cancel")
+            print()
+            choice = input("Switch to [1-4, default=4]: ").strip()
+            model_map = {
+                "1": ("all-minilm:l6-v2", 384),
+                "2": ("nomic-embed-text", 768),
+                "3": ("mxbai-embed-large", 1024),
+            }
+            if choice not in model_map:
+                print("No changes made.")
+                return
+            new_model, new_dim = model_map[choice]
+            if new_model == old_model and new_dim == old_dim:
+                print(f"Already using {new_model}. No changes needed.")
+                return
+            # Update config.json with the user's choice
+            config_path = Path.home() / ".claudia" / "config.json"
+            try:
+                if config_path.exists():
+                    with open(config_path) as f:
+                        cfg_data = _json.load(f)
+                else:
+                    cfg_data = {}
+                cfg_data["embedding_model"] = new_model
+                cfg_data["embedding_dimensions"] = new_dim
+                with open(config_path, "w") as f:
+                    _json.dump(cfg_data, f, indent=2)
+                print(f"\nConfig updated: {new_model} ({new_dim}D)")
+            except Exception as e:
+                print(f"Warning: Could not update config.json: {e}")
+            # Reinitialize embedding service with new model
+            svc.model = new_model
+            svc.dimensions = new_dim
+            svc._available = None  # Force re-check
+        # Pre-flight: verify Ollama is running and model is available
+        if not svc.is_available_sync():
+            # Distinguish: Ollama not running vs model not pulled
+            import subprocess
+            import httpx
+            ollama_running = False
+            try:
+                resp = httpx.get(f"{svc.host}/api/tags", timeout=5)
+                ollama_running = resp.status_code == 200
+            except Exception:
+                pass
+            if not ollama_running:
+                print(f"Error: Ollama is not running.")
+                print(f"Please start Ollama and try again.")
+                sys.exit(1)
+            # Ollama is running but model is missing -- offer to pull it
+            print(f"\nThe model '{new_model}' is not installed in Ollama.")
+            pull_choice = input(f"Download it now? (Y/n): ").strip().lower()
+            if pull_choice in ("", "y", "yes"):
+                print(f"Downloading {new_model}... (this may take a minute)")
+                try:
+                    result = subprocess.run(
+                        ["ollama", "pull", new_model],
+                        capture_output=False,
+                        text=True,
+                    )
+                    if result.returncode != 0:
+                        print(f"Error: Failed to pull {new_model}.")
+                        sys.exit(1)
+                except FileNotFoundError:
+                    print("Error: 'ollama' command not found. Please install Ollama.")
+                    sys.exit(1)
+                # Re-check availability after pull
+                svc._available = None
+                if not svc.is_available_sync():
+                    print(f"Error: Model still not available after pull.")
+                    sys.exit(1)
+                print(f"Model '{new_model}' ready.")
+            else:
+                print("Migration cancelled.")
+                return
+        # Count embeddings across all tables
+        embedding_counts = {}
+        for table, pk in Database.VEC0_TABLES:
+            try:
+                rows = db.execute(f"SELECT COUNT(*) as cnt FROM {table}", fetch=True)
+                embedding_counts[table] = rows[0]["cnt"] if rows else 0
+            except Exception:
+                embedding_counts[table] = 0
+        total_embeddings = sum(embedding_counts.values())
+        # Show migration summary
+        print(f"\nEmbedding Migration")
+        print(f"  Current: {old_model} ({old_dim}D)")
+        print(f"  Target:  {new_model} ({new_dim}D)")
+        print(f"  Embeddings to regenerate: {total_embeddings}")
+        print()
+        # Count source data to re-embed
+        mem_count_rows = db.execute(
+            "SELECT COUNT(*) as cnt FROM memories WHERE deleted_at IS NULL",
+            fetch=True,
+        )
+        ent_count_rows = db.execute(
+            "SELECT COUNT(*) as cnt FROM entities WHERE deleted_at IS NULL",
+            fetch=True,
+        )
+        ep_count_rows = db.execute(
+            "SELECT COUNT(*) as cnt FROM episodes WHERE summary IS NOT NULL AND summary != ''",
+            fetch=True,
+        )
+        msg_count_rows = db.execute(
+            "SELECT COUNT(*) as cnt FROM messages",
+            fetch=True,
+        )
+        ref_count_rows = db.execute(
+            "SELECT COUNT(*) as cnt FROM reflections",
+            fetch=True,
+        )
+        mem_count = mem_count_rows[0]["cnt"] if mem_count_rows else 0
+        ent_count = ent_count_rows[0]["cnt"] if ent_count_rows else 0
+        ep_count = ep_count_rows[0]["cnt"] if ep_count_rows else 0
+        msg_count = msg_count_rows[0]["cnt"] if msg_count_rows else 0
+        ref_count = ref_count_rows[0]["cnt"] if ref_count_rows else 0
+        total_to_embed = mem_count + ent_count + ep_count + msg_count + ref_count
+        print(f"  Source data to re-embed:")
+        print(f"    Memories:    {mem_count}")
+        print(f"    Entities:    {ent_count}")
+        print(f"    Episodes:    {ep_count}")
+        print(f"    Messages:    {msg_count}")
+        print(f"    Reflections: {ref_count}")
+        print(f"    Total:       {total_to_embed}")
+        print()
+        # Pre-flight: verify sqlite-vec is available
+        try:
+            db.execute("SELECT vec_version()", fetch=True)
+        except Exception:
+            print("Error: sqlite-vec extension not available. Cannot migrate embeddings.")
+            print("Install with: pip install sqlite-vec")
+            sys.exit(1)
+        # Confirmation
+        confirm = input("Proceed with migration? (y/N): ").strip().lower()
+        if confirm != "y":
+            print("Migration cancelled.")
+            return
+        # Step 1: Backup
+        print("\nStep 1/4: Creating backup...")
+        backup_path = db.backup()
+        print(f"  Backup at: {backup_path}")
+        # Step 2: Drop and recreate vec0 tables with new dimensions
+        print("\nStep 2/4: Recreating vector tables...")
+        with db.transaction() as conn:
+            for table, pk in Database.VEC0_TABLES:
+                try:
+                    conn.execute(f"DROP TABLE IF EXISTS {table}")
+                    conn.execute(f"""
+                        CREATE VIRTUAL TABLE {table} USING vec0(
+                            {pk} INTEGER PRIMARY KEY,
+                            embedding FLOAT[{new_dim}]
+                        )
+                    """)
+                    print(f"  Recreated {table} ({new_dim}D)")
+                except sqlite3.OperationalError as e:
+                    if "no such module: vec0" in str(e):
+                        print(f"  Warning: sqlite-vec not available, skipping {table}")
+                    else:
+                        print(f"  Error recreating {table}: {e}")
+                        print("Aborting. Restore from backup to recover.")
+                        sys.exit(1)
+        # Step 3: Re-embed everything
+        print("\nStep 3/4: Re-embedding all data...")
+        results = {}
+        # 3a. Memory embeddings (largest, most important)
+        if mem_count > 0:
+            memories = db.execute(
+                "SELECT id, content FROM memories WHERE deleted_at IS NULL",
+                fetch=True,
+            )
+            success = 0
+            for i, row in enumerate(memories or [], 1):
+                embedding = svc.embed_sync(row["content"])
+                if embedding:
+                    db.execute(
+                        "INSERT INTO memory_embeddings (memory_id, embedding) VALUES (?, ?)",
+                        (row["id"], _json.dumps(embedding)),
+                    )
+                    success += 1
+                if i % 25 == 0 or i == mem_count:
+                    print(f"  Memories:    {i}/{mem_count}")
+            results["memories"] = success
+        else:
+            results["memories"] = 0
+        # 3b. Entity embeddings
+        if ent_count > 0:
+            entities = db.execute(
+                "SELECT id, name, description FROM entities WHERE deleted_at IS NULL",
+                fetch=True,
+            )
+            success = 0
+            for i, row in enumerate(entities or [], 1):
+                text = f"{row['name']}: {row['description'] or ''}"
+                embedding = svc.embed_sync(text)
+                if embedding:
+                    db.execute(
+                        "INSERT INTO entity_embeddings (entity_id, embedding) VALUES (?, ?)",
+                        (row["id"], _json.dumps(embedding)),
+                    )
+                    success += 1
+                if i % 25 == 0 or i == ent_count:
+                    print(f"  Entities:    {i}/{ent_count}")
+            results["entities"] = success
+        else:
+            results["entities"] = 0
+        # 3c. Episode embeddings (from summaries)
+        if ep_count > 0:
+            episodes = db.execute(
+                "SELECT id, summary FROM episodes WHERE summary IS NOT NULL AND summary != ''",
+                fetch=True,
+            )
+            success = 0
+            for i, row in enumerate(episodes or [], 1):
+                embedding = svc.embed_sync(row["summary"])
+                if embedding:
+                    db.execute(
+                        "INSERT INTO episode_embeddings (episode_id, embedding) VALUES (?, ?)",
+                        (row["id"], _json.dumps(embedding)),
+                    )
+                    success += 1
+                if i % 25 == 0 or i == ep_count:
+                    print(f"  Episodes:    {i}/{ep_count}")
+            results["episodes"] = success
+        else:
+            results["episodes"] = 0
+        # 3d. Message embeddings
+        if msg_count > 0:
+            messages = db.execute(
+                "SELECT id, content FROM messages",
+                fetch=True,
+            )
+            success = 0
+            for i, row in enumerate(messages or [], 1):
+                embedding = svc.embed_sync(row["content"])
+                if embedding:
+                    db.execute(
+                        "INSERT INTO message_embeddings (message_id, embedding) VALUES (?, ?)",
+                        (row["id"], _json.dumps(embedding)),
+                    )
+                    success += 1
+                if i % 25 == 0 or i == msg_count:
+                    print(f"  Messages:    {i}/{msg_count}")
+            results["messages"] = success
+        else:
+            results["messages"] = 0
+        # 3e. Reflection embeddings
+        if ref_count > 0:
+            reflections = db.execute(
+                "SELECT id, content FROM reflections",
+                fetch=True,
+            )
+            success = 0
+            for i, row in enumerate(reflections or [], 1):
+                embedding = svc.embed_sync(row["content"])
+                if embedding:
+                    db.execute(
+                        "INSERT INTO reflection_embeddings (reflection_id, embedding) VALUES (?, ?)",
+                        (row["id"], _json.dumps(embedding)),
+                    )
+                    success += 1
+                if i % 25 == 0 or i == ref_count:
+                    print(f"  Reflections: {i}/{ref_count}")
+            results["reflections"] = success
+        else:
+            results["reflections"] = 0
+        # Step 4: Update _meta
+        print("\nStep 4/4: Updating metadata...")
+        db.execute(
+            "INSERT OR REPLACE INTO _meta (key, value) VALUES ('embedding_model', ?)",
+            (new_model,),
+        )
+        db.execute(
+            "INSERT OR REPLACE INTO _meta (key, value) VALUES ('embedding_dimensions', ?)",
+            (str(new_dim),),
+        )
+        # Clear embedding cache (old-dimension entries)
+        svc._cache.clear()
+        svc._model_mismatch = False
+        # Summary
+        print(f"\nMigration complete:")
+        print(f"  Model: {new_model} ({new_dim}D)")
+        print(f"  Memories re-embedded:    {results['memories']}/{mem_count}")
+        print(f"  Entities re-embedded:    {results['entities']}/{ent_count}")
+        print(f"  Episodes re-embedded:    {results['episodes']}/{ep_count}")
+        print(f"  Messages re-embedded:    {results['messages']}/{msg_count}")
+        print(f"  Reflections re-embedded: {results['reflections']}/{ref_count}")
+        print(f"  Backup at: {backup_path}")
+        print(f"\n  To rollback: restore the backup file.")
+        return
+    if args.backup:
+        setup_logging(debug=args.debug)
+        db = get_db()
+        db.initialize()
+        backup_path = db.backup()
+        print(f"Backup created: {backup_path}")
+        return
     # Run the daemon
     run_daemon(mcp_mode=not args.standalone, debug=args.debug, project_id=project_id)

package/memory-daemon/claudia_memory/config.py CHANGED Viewed

@@ -68,6 +68,16 @@ class MemoryConfig:
     # Health check
     health_port: int = 3848
+    # Backup settings
+    backup_retention_count: int = 3  # Number of rolling backups to keep
+    enable_pre_consolidation_backup: bool = True  # Auto-backup before consolidation
+    # Retention settings (data cleanup during consolidation)
+    audit_log_retention_days: int = 90
+    prediction_retention_days: int = 30
+    turn_buffer_retention_days: int = 60
+    metrics_retention_days: int = 90
     # Daemon settings
     log_path: Path = field(default_factory=lambda: Path.home() / ".claudia" / "daemon.log")
@@ -119,6 +129,18 @@ class MemoryConfig:
                     config.fts_weight = data["fts_weight"]
                 if "health_port" in data:
                     config.health_port = data["health_port"]
+                if "backup_retention_count" in data:
+                    config.backup_retention_count = data["backup_retention_count"]
+                if "enable_pre_consolidation_backup" in data:
+                    config.enable_pre_consolidation_backup = data["enable_pre_consolidation_backup"]
+                if "audit_log_retention_days" in data:
+                    config.audit_log_retention_days = data["audit_log_retention_days"]
+                if "prediction_retention_days" in data:
+                    config.prediction_retention_days = data["prediction_retention_days"]
+                if "turn_buffer_retention_days" in data:
+                    config.turn_buffer_retention_days = data["turn_buffer_retention_days"]
+                if "metrics_retention_days" in data:
+                    config.metrics_retention_days = data["metrics_retention_days"]
                 if "log_path" in data:
                     config.log_path = Path(data["log_path"])
@@ -171,6 +193,20 @@ class MemoryConfig:
         weights = self.vector_weight + self.importance_weight + self.recency_weight + self.fts_weight
         if abs(weights - 1.0) > 0.01:
             logger.warning(f"Ranking weights sum to {weights:.3f}, not 1.0. Results may be skewed.")
+        if self.backup_retention_count < 1:
+            logger.warning(f"backup_retention_count={self.backup_retention_count} below minimum, using 1")
+            self.backup_retention_count = 1
+        for attr in ("audit_log_retention_days", "prediction_retention_days", "turn_buffer_retention_days", "metrics_retention_days"):
+            val = getattr(self, attr)
+            if val < 1:
+                logger.warning(f"{attr}={val} below minimum, using 1")
+                setattr(self, attr, 1)
+        common_dims = {384, 512, 768, 1024, 1536}
+        if self.embedding_dimensions not in common_dims:
+            logger.warning(
+                f"embedding_dimensions={self.embedding_dimensions} is not a common value "
+                f"({sorted(common_dims)}). Verify this matches your embedding model's output."
+            )
     def save(self) -> None:
         """Save current configuration to ~/.claudia/config.json"""
@@ -193,6 +229,12 @@ class MemoryConfig:
             "recency_weight": self.recency_weight,
             "fts_weight": self.fts_weight,
             "health_port": self.health_port,
+            "backup_retention_count": self.backup_retention_count,
+            "enable_pre_consolidation_backup": self.enable_pre_consolidation_backup,
+            "audit_log_retention_days": self.audit_log_retention_days,
+            "prediction_retention_days": self.prediction_retention_days,
+            "turn_buffer_retention_days": self.turn_buffer_retention_days,
+            "metrics_retention_days": self.metrics_retention_days,
             "log_path": str(self.log_path),
         }

package/memory-daemon/claudia_memory/daemon/health.py CHANGED Viewed

@@ -10,6 +10,7 @@ import logging
 import threading
 from datetime import datetime
 from http.server import BaseHTTPRequestHandler, HTTPServer
+from pathlib import Path
 from typing import Any, Callable, Dict, Optional
 from ..config import get_config
@@ -68,6 +69,24 @@ def build_status_report(*, db=None) -> dict:
             except Exception:
                 report["counts"][table] = -1
+        # Backup status
+        try:
+            import glob
+            db_path = str(get_config().db_path)
+            pattern = f"{db_path}.backup-*.db"
+            backups = sorted(glob.glob(pattern))
+            if backups:
+                latest = Path(backups[-1])
+                report["backup"] = {
+                    "count": len(backups),
+                    "latest_path": str(latest),
+                    "latest_size_bytes": latest.stat().st_size if latest.exists() else 0,
+                }
+            else:
+                report["backup"] = {"count": 0}
+        except Exception:
+            report["backup"] = {"count": -1, "error": "unable to check"}
     except Exception:
         report["components"]["database"] = "error"
         report["status"] = "degraded"