npm - superlocalmemory - Versions diffs - 2.7.5 → 2.8.0 - Mend

superlocalmemory 2.7.5 → 2.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (174) hide show

package/CHANGELOG.md +120 -155
package/README.md +115 -89
package/api_server.py +2 -12
package/docs/PATTERN-LEARNING.md +64 -199
package/docs/example_graph_usage.py +4 -6
package/install.ps1 +226 -0
package/install.sh +59 -0
package/mcp_server.py +83 -7
package/package.json +3 -10
package/scripts/generate-thumbnails.py +3 -5
package/skills/slm-build-graph/SKILL.md +1 -1
package/skills/slm-list-recent/SKILL.md +1 -1
package/skills/slm-recall/SKILL.md +1 -1
package/skills/slm-remember/SKILL.md +1 -1
package/skills/slm-show-patterns/SKILL.md +1 -1
package/skills/slm-status/SKILL.md +1 -1
package/skills/slm-switch-profile/SKILL.md +1 -1
package/src/agent_registry.py +7 -18
package/src/auth_middleware.py +3 -5
package/src/auto_backup.py +3 -7
package/src/behavioral/__init__.py +49 -0
package/src/behavioral/behavioral_listener.py +203 -0
package/src/behavioral/behavioral_patterns.py +275 -0
package/src/behavioral/cross_project_transfer.py +206 -0
package/src/behavioral/outcome_inference.py +194 -0
package/src/behavioral/outcome_tracker.py +193 -0
package/src/behavioral/tests/__init__.py +4 -0
package/src/behavioral/tests/test_behavioral_integration.py +108 -0
package/src/behavioral/tests/test_behavioral_patterns.py +150 -0
package/src/behavioral/tests/test_cross_project_transfer.py +142 -0
package/src/behavioral/tests/test_mcp_behavioral.py +139 -0
package/src/behavioral/tests/test_mcp_report_outcome.py +117 -0
package/src/behavioral/tests/test_outcome_inference.py +107 -0
package/src/behavioral/tests/test_outcome_tracker.py +96 -0
package/src/cache_manager.py +4 -6
package/src/compliance/__init__.py +48 -0
package/src/compliance/abac_engine.py +149 -0
package/src/compliance/abac_middleware.py +116 -0
package/src/compliance/audit_db.py +215 -0
package/src/compliance/audit_logger.py +148 -0
package/src/compliance/retention_manager.py +289 -0
package/src/compliance/retention_scheduler.py +186 -0
package/src/compliance/tests/__init__.py +4 -0
package/src/compliance/tests/test_abac_enforcement.py +95 -0
package/src/compliance/tests/test_abac_engine.py +124 -0
package/src/compliance/tests/test_abac_mcp_integration.py +118 -0
package/src/compliance/tests/test_audit_db.py +123 -0
package/src/compliance/tests/test_audit_logger.py +98 -0
package/src/compliance/tests/test_mcp_audit.py +128 -0
package/src/compliance/tests/test_mcp_retention_policy.py +125 -0
package/src/compliance/tests/test_retention_manager.py +131 -0
package/src/compliance/tests/test_retention_scheduler.py +99 -0
package/src/db_connection_manager.py +2 -12
package/src/embedding_engine.py +61 -669
package/src/embeddings/__init__.py +47 -0
package/src/embeddings/cache.py +70 -0
package/src/embeddings/cli.py +113 -0
package/src/embeddings/constants.py +47 -0
package/src/embeddings/database.py +91 -0
package/src/embeddings/engine.py +247 -0
package/src/embeddings/model_loader.py +145 -0
package/src/event_bus.py +3 -13
package/src/graph/__init__.py +36 -0
package/src/graph/build_helpers.py +74 -0
package/src/graph/cli.py +87 -0
package/src/graph/cluster_builder.py +188 -0
package/src/graph/cluster_summary.py +148 -0
package/src/graph/constants.py +47 -0
package/src/graph/edge_builder.py +162 -0
package/src/graph/entity_extractor.py +95 -0
package/src/graph/graph_core.py +226 -0
package/src/graph/graph_search.py +231 -0
package/src/graph/hierarchical.py +207 -0
package/src/graph/schema.py +99 -0
package/src/graph_engine.py +45 -1451
package/src/hnsw_index.py +3 -7
package/src/hybrid_search.py +36 -683
package/src/learning/__init__.py +27 -12
package/src/learning/adaptive_ranker.py +50 -12
package/src/learning/cross_project_aggregator.py +2 -12
package/src/learning/engagement_tracker.py +2 -12
package/src/learning/feature_extractor.py +175 -43
package/src/learning/feedback_collector.py +7 -12
package/src/learning/learning_db.py +180 -12
package/src/learning/project_context_manager.py +2 -12
package/src/learning/source_quality_scorer.py +2 -12
package/src/learning/synthetic_bootstrap.py +2 -12
package/src/learning/tests/__init__.py +2 -0
package/src/learning/tests/test_adaptive_ranker.py +2 -6
package/src/learning/tests/test_adaptive_ranker_v28.py +60 -0
package/src/learning/tests/test_aggregator.py +2 -6
package/src/learning/tests/test_auto_retrain_v28.py +35 -0
package/src/learning/tests/test_e2e_ranking_v28.py +82 -0
package/src/learning/tests/test_feature_extractor_v28.py +93 -0
package/src/learning/tests/test_feedback_collector.py +2 -6
package/src/learning/tests/test_learning_db.py +2 -6
package/src/learning/tests/test_learning_db_v28.py +110 -0
package/src/learning/tests/test_learning_init_v28.py +48 -0
package/src/learning/tests/test_outcome_signals.py +48 -0
package/src/learning/tests/test_project_context.py +2 -6
package/src/learning/tests/test_schema_migration.py +319 -0
package/src/learning/tests/test_signal_inference.py +11 -13
package/src/learning/tests/test_source_quality.py +2 -6
package/src/learning/tests/test_synthetic_bootstrap.py +3 -7
package/src/learning/tests/test_workflow_miner.py +2 -6
package/src/learning/workflow_pattern_miner.py +2 -12
package/src/lifecycle/__init__.py +54 -0
package/src/lifecycle/bounded_growth.py +239 -0
package/src/lifecycle/compaction_engine.py +226 -0
package/src/lifecycle/lifecycle_engine.py +302 -0
package/src/lifecycle/lifecycle_evaluator.py +225 -0
package/src/lifecycle/lifecycle_scheduler.py +130 -0
package/src/lifecycle/retention_policy.py +285 -0
package/src/lifecycle/tests/__init__.py +4 -0
package/src/lifecycle/tests/test_bounded_growth.py +193 -0
package/src/lifecycle/tests/test_compaction.py +179 -0
package/src/lifecycle/tests/test_lifecycle_engine.py +137 -0
package/src/lifecycle/tests/test_lifecycle_evaluation.py +177 -0
package/src/lifecycle/tests/test_lifecycle_scheduler.py +127 -0
package/src/lifecycle/tests/test_lifecycle_search.py +109 -0
package/src/lifecycle/tests/test_mcp_compact.py +149 -0
package/src/lifecycle/tests/test_mcp_lifecycle_status.py +114 -0
package/src/lifecycle/tests/test_retention_policy.py +162 -0
package/src/mcp_tools_v28.py +280 -0
package/src/memory-profiles.py +2 -12
package/src/memory-reset.py +2 -12
package/src/memory_compression.py +2 -12
package/src/memory_store_v2.py +76 -20
package/src/migrate_v1_to_v2.py +2 -12
package/src/pattern_learner.py +29 -975
package/src/patterns/__init__.py +24 -0
package/src/patterns/analyzers.py +247 -0
package/src/patterns/learner.py +267 -0
package/src/patterns/scoring.py +167 -0
package/src/patterns/store.py +223 -0
package/src/patterns/terminology.py +138 -0
package/src/provenance_tracker.py +4 -14
package/src/query_optimizer.py +4 -6
package/src/rate_limiter.py +2 -6
package/src/search/__init__.py +20 -0
package/src/search/cli.py +77 -0
package/src/search/constants.py +26 -0
package/src/search/engine.py +239 -0
package/src/search/fusion.py +122 -0
package/src/search/index_loader.py +112 -0
package/src/search/methods.py +162 -0
package/src/search_engine_v2.py +4 -6
package/src/setup_validator.py +7 -13
package/src/subscription_manager.py +2 -12
package/src/tree/__init__.py +59 -0
package/src/tree/builder.py +183 -0
package/src/tree/nodes.py +196 -0
package/src/tree/queries.py +252 -0
package/src/tree/schema.py +76 -0
package/src/tree_manager.py +10 -711
package/src/trust/__init__.py +45 -0
package/src/trust/constants.py +66 -0
package/src/trust/queries.py +157 -0
package/src/trust/schema.py +95 -0
package/src/trust/scorer.py +299 -0
package/src/trust/signals.py +95 -0
package/src/trust_scorer.py +39 -697
package/src/webhook_dispatcher.py +2 -12
package/ui/app.js +1 -1
package/ui/index.html +3 -0
package/ui/js/agents.js +1 -1
package/ui/js/core.js +21 -5
package/ui/js/profiles.js +29 -7
package/ui_server.py +2 -14
package/ATTRIBUTION.md +0 -140
package/docs/ARCHITECTURE-V2.5.md +0 -190
package/docs/GRAPH-ENGINE.md +0 -503
package/docs/architecture-diagram.drawio +0 -405
package/docs/plans/2026-02-13-benchmark-suite.md +0 -1349

package/src/graph/graph_search.py ADDED Viewed

@@ -0,0 +1,231 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: MIT
+# Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
+"""Graph traversal and query operations.
+Provides graph traversal (get_related), cluster membership queries,
+and graph statistics collection for the active profile.
+"""
+import sqlite3
+import json
+from pathlib import Path
+from typing import List, Dict
+from graph.constants import logger, MEMORY_DIR
+def _get_active_profile() -> str:
+    """Get the currently active profile name from config."""
+    config_file = MEMORY_DIR / "profiles.json"
+    if config_file.exists():
+        try:
+            with open(config_file, 'r') as f:
+                config = json.load(f)
+            return config.get('active_profile', 'default')
+        except (json.JSONDecodeError, IOError):
+            pass
+    return 'default'
+def get_related(db_path: Path, memory_id: int, max_hops: int = 2) -> List[Dict]:
+    """
+    Get memories connected to this memory via graph edges (active profile only).
+    Args:
+        db_path: Path to SQLite database
+        memory_id: Source memory ID
+        max_hops: Maximum traversal depth (1 or 2)
+    Returns:
+        List of related memory dictionaries
+    """
+    conn = sqlite3.connect(db_path)
+    cursor = conn.cursor()
+    active_profile = _get_active_profile()
+    try:
+        # Get 1-hop neighbors (filtered to active profile)
+        edges = cursor.execute('''
+            SELECT ge.target_memory_id, ge.relationship_type, ge.weight, ge.shared_entities
+            FROM graph_edges ge
+            JOIN memories m ON ge.target_memory_id = m.id
+            WHERE ge.source_memory_id = ? AND m.profile = ?
+            UNION
+            SELECT ge.source_memory_id, ge.relationship_type, ge.weight, ge.shared_entities
+            FROM graph_edges ge
+            JOIN memories m ON ge.source_memory_id = m.id
+            WHERE ge.target_memory_id = ? AND m.profile = ?
+        ''', (memory_id, active_profile, memory_id, active_profile)).fetchall()
+        results = []
+        seen_ids = {memory_id}
+        for target_id, rel_type, weight, shared_entities in edges:
+            if target_id in seen_ids:
+                continue
+            seen_ids.add(target_id)
+            # Get memory details
+            memory = cursor.execute('''
+                SELECT id, summary, importance, tags
+                FROM memories WHERE id = ?
+            ''', (target_id,)).fetchone()
+            if memory:
+                results.append({
+                    'id': memory[0],
+                    'summary': memory[1],
+                    'importance': memory[2],
+                    'tags': json.loads(memory[3]) if memory[3] else [],
+                    'relationship': rel_type,
+                    'weight': weight,
+                    'shared_entities': json.loads(shared_entities) if shared_entities else [],
+                    'hops': 1
+                })
+        # If max_hops == 2, get 2-hop neighbors
+        if max_hops >= 2:
+            for result in results[:]:  # Copy to avoid modification during iteration
+                second_hop = cursor.execute('''
+                    SELECT target_memory_id, relationship_type, weight
+                    FROM graph_edges
+                    WHERE source_memory_id = ?
+                    UNION
+                    SELECT source_memory_id, relationship_type, weight
+                    FROM graph_edges
+                    WHERE target_memory_id = ?
+                ''', (result['id'], result['id'])).fetchall()
+                for target_id, rel_type, weight in second_hop:
+                    if target_id in seen_ids:
+                        continue
+                    seen_ids.add(target_id)
+                    memory = cursor.execute('''
+                        SELECT id, summary, importance, tags
+                        FROM memories WHERE id = ?
+                    ''', (target_id,)).fetchone()
+                    if memory:
+                        results.append({
+                            'id': memory[0],
+                            'summary': memory[1],
+                            'importance': memory[2],
+                            'tags': json.loads(memory[3]) if memory[3] else [],
+                            'relationship': rel_type,
+                            'weight': weight,
+                            'shared_entities': [],
+                            'hops': 2
+                        })
+        # Sort by weight (strongest connections first)
+        results.sort(key=lambda x: (-x['hops'], -x['weight']))
+        return results
+    finally:
+        conn.close()
+def get_cluster_members(db_path: Path, cluster_id: int) -> List[Dict]:
+    """
+    Get all memories in a cluster (filtered by active profile).
+    Args:
+        db_path: Path to SQLite database
+        cluster_id: Cluster ID
+    Returns:
+        List of memory dictionaries
+    """
+    conn = sqlite3.connect(db_path)
+    cursor = conn.cursor()
+    active_profile = _get_active_profile()
+    try:
+        memories = cursor.execute('''
+            SELECT id, summary, importance, tags, created_at
+            FROM memories
+            WHERE cluster_id = ? AND profile = ?
+            ORDER BY importance DESC
+        ''', (cluster_id, active_profile)).fetchall()
+        return [
+            {
+                'id': m[0],
+                'summary': m[1],
+                'importance': m[2],
+                'tags': json.loads(m[3]) if m[3] else [],
+                'created_at': m[4]
+            }
+            for m in memories
+        ]
+    finally:
+        conn.close()
+def get_stats(db_path: Path) -> Dict[str, any]:
+    """Get graph statistics for the active profile."""
+    conn = sqlite3.connect(db_path)
+    cursor = conn.cursor()
+    active_profile = _get_active_profile()
+    try:
+        # Count nodes for active profile's memories
+        nodes = cursor.execute('''
+            SELECT COUNT(*) FROM graph_nodes
+            WHERE memory_id IN (SELECT id FROM memories WHERE profile = ?)
+        ''', (active_profile,)).fetchone()[0]
+        # Count edges where at least one end is in active profile
+        edges = cursor.execute('''
+            SELECT COUNT(*) FROM graph_edges
+            WHERE source_memory_id IN (SELECT id FROM memories WHERE profile = ?)
+        ''', (active_profile,)).fetchone()[0]
+        # Clusters that have members in active profile
+        clusters = cursor.execute('''
+            SELECT COUNT(DISTINCT cluster_id) FROM memories
+            WHERE cluster_id IS NOT NULL AND profile = ?
+        ''', (active_profile,)).fetchone()[0]
+        # Cluster breakdown for active profile (including hierarchy)
+        cluster_info = cursor.execute('''
+            SELECT gc.name, gc.member_count, gc.avg_importance,
+                   gc.summary, gc.parent_cluster_id, gc.depth
+            FROM graph_clusters gc
+            WHERE gc.id IN (
+                SELECT DISTINCT cluster_id FROM memories
+                WHERE cluster_id IS NOT NULL AND profile = ?
+            )
+            ORDER BY gc.depth ASC, gc.member_count DESC
+            LIMIT 20
+        ''', (active_profile,)).fetchall()
+        # Count hierarchical depth
+        max_depth = max((c[5] or 0 for c in cluster_info), default=0) if cluster_info else 0
+        return {
+            'profile': active_profile,
+            'nodes': nodes,
+            'edges': edges,
+            'clusters': clusters,
+            'max_depth': max_depth,
+            'top_clusters': [
+                {
+                    'name': c[0],
+                    'members': c[1],
+                    'avg_importance': round(c[2], 1) if c[2] else 5.0,
+                    'summary': c[3],
+                    'parent_cluster_id': c[4],
+                    'depth': c[5] or 0
+                }
+                for c in cluster_info
+            ]
+        }
+    finally:
+        conn.close()

package/src/graph/hierarchical.py ADDED Viewed

@@ -0,0 +1,207 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: MIT
+# Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
+"""Hierarchical sub-clustering for the graph engine.
+Implements recursive Leiden-based hierarchical clustering that decomposes
+large communities into finer-grained thematic sub-clusters.
+"""
+import sqlite3
+from typing import List, Dict, Tuple
+from graph.constants import logger, IGRAPH_AVAILABLE, MEMORY_DIR
+def _get_active_profile() -> str:
+    """Get the currently active profile name from config."""
+    import json
+    config_file = MEMORY_DIR / "profiles.json"
+    if config_file.exists():
+        try:
+            with open(config_file, 'r') as f:
+                config = json.load(f)
+            return config.get('active_profile', 'default')
+        except (json.JSONDecodeError, IOError):
+            pass
+    return 'default'
+def hierarchical_cluster(db_path, get_avg_importance_fn, generate_cluster_name_fn,
+                         min_subcluster_size: int = 5, max_depth: int = 3) -> Dict[str, any]:
+    """
+    Run recursive Leiden clustering -- cluster the clusters.
+    Large communities (>= min_subcluster_size * 2) are recursively sub-clustered
+    to reveal finer-grained thematic structure.
+    Args:
+        db_path: Path to SQLite database
+        get_avg_importance_fn: Callback to compute avg importance for memory IDs
+        generate_cluster_name_fn: Callback to generate cluster name from memory IDs
+        min_subcluster_size: Minimum members to attempt sub-clustering (default 5)
+        max_depth: Maximum recursion depth (default 3)
+    Returns:
+        Dictionary with hierarchical clustering statistics
+    """
+    if not IGRAPH_AVAILABLE:
+        logger.warning("igraph/leidenalg not installed. Hierarchical clustering disabled. Install with: pip3 install python-igraph leidenalg")
+        return {'subclusters_created': 0, 'depth_reached': 0}
+    import igraph as ig
+    import leidenalg
+    conn = sqlite3.connect(db_path)
+    cursor = conn.cursor()
+    active_profile = _get_active_profile()
+    try:
+        # Get top-level clusters for this profile that are large enough to sub-cluster
+        cursor.execute('''
+            SELECT cluster_id, COUNT(*) as cnt
+            FROM memories
+            WHERE cluster_id IS NOT NULL AND profile = ?
+            GROUP BY cluster_id
+            HAVING cnt >= ?
+        ''', (active_profile, min_subcluster_size * 2))
+        large_clusters = cursor.fetchall()
+        if not large_clusters:
+            logger.info("No clusters large enough for hierarchical decomposition")
+            return {'subclusters_created': 0, 'depth_reached': 0}
+        total_subclusters = 0
+        max_depth_reached = 0
+        for parent_cid, member_count in large_clusters:
+            subs, depth = _recursive_subcluster(
+                conn, cursor, parent_cid, active_profile,
+                min_subcluster_size, max_depth, current_depth=1,
+                get_avg_importance_fn=get_avg_importance_fn,
+                generate_cluster_name_fn=generate_cluster_name_fn,
+            )
+            total_subclusters += subs
+            max_depth_reached = max(max_depth_reached, depth)
+        conn.commit()
+        logger.info(f"Hierarchical clustering: {total_subclusters} sub-clusters, depth {max_depth_reached}")
+        return {
+            'subclusters_created': total_subclusters,
+            'depth_reached': max_depth_reached,
+            'parent_clusters_processed': len(large_clusters)
+        }
+    except Exception as e:
+        logger.error(f"Hierarchical clustering failed: {e}")
+        conn.rollback()
+        return {'subclusters_created': 0, 'error': str(e)}
+    finally:
+        conn.close()
+def _recursive_subcluster(conn, cursor, parent_cluster_id: int,
+                           profile: str, min_size: int, max_depth: int,
+                           current_depth: int,
+                           get_avg_importance_fn, generate_cluster_name_fn) -> Tuple[int, int]:
+    """Recursively sub-cluster a community using Leiden."""
+    if not IGRAPH_AVAILABLE:
+        return 0, current_depth - 1
+    import igraph as ig
+    import leidenalg
+    if current_depth > max_depth:
+        return 0, current_depth - 1
+    # Get memory IDs in this cluster
+    cursor.execute('''
+        SELECT id FROM memories
+        WHERE cluster_id = ? AND profile = ?
+    ''', (parent_cluster_id, profile))
+    member_ids = [row[0] for row in cursor.fetchall()]
+    if len(member_ids) < min_size * 2:
+        return 0, current_depth - 1
+    # Get edges between members of this cluster
+    placeholders = ','.join('?' * len(member_ids))
+    edges = cursor.execute(f'''
+        SELECT source_memory_id, target_memory_id, weight
+        FROM graph_edges
+        WHERE source_memory_id IN ({placeholders})
+          AND target_memory_id IN ({placeholders})
+    ''', member_ids + member_ids).fetchall()
+    if len(edges) < 2:
+        return 0, current_depth - 1
+    # Build sub-graph
+    id_to_vertex = {mid: idx for idx, mid in enumerate(member_ids)}
+    vertex_to_id = {idx: mid for mid, idx in id_to_vertex.items()}
+    g = ig.Graph()
+    g.add_vertices(len(member_ids))
+    edge_list, edge_weights = [], []
+    for src, tgt, w in edges:
+        if src in id_to_vertex and tgt in id_to_vertex:
+            edge_list.append((id_to_vertex[src], id_to_vertex[tgt]))
+            edge_weights.append(w)
+    if not edge_list:
+        return 0, current_depth - 1
+    g.add_edges(edge_list)
+    # Run Leiden with higher resolution for finer communities
+    partition = leidenalg.find_partition(
+        g, leidenalg.ModularityVertexPartition,
+        weights=edge_weights, n_iterations=100, seed=42
+    )
+    # Only proceed if Leiden found > 1 community (actual split)
+    non_singleton = [c for c in partition if len(c) >= 2]
+    if len(non_singleton) <= 1:
+        return 0, current_depth - 1
+    subclusters_created = 0
+    deepest = current_depth
+    # Get parent depth
+    cursor.execute('SELECT depth FROM graph_clusters WHERE id = ?', (parent_cluster_id,))
+    parent_row = cursor.fetchone()
+    parent_depth = parent_row[0] if parent_row else 0
+    for community in non_singleton:
+        sub_member_ids = [vertex_to_id[v] for v in community]
+        if len(sub_member_ids) < 2:
+            continue
+        avg_imp = get_avg_importance_fn(cursor, sub_member_ids)
+        cluster_name = generate_cluster_name_fn(cursor, sub_member_ids)
+        result = cursor.execute('''
+            INSERT INTO graph_clusters (name, member_count, avg_importance, parent_cluster_id, depth)
+            VALUES (?, ?, ?, ?, ?)
+        ''', (cluster_name, len(sub_member_ids), avg_imp, parent_cluster_id, parent_depth + 1))
+        sub_cluster_id = result.lastrowid
+        # Update memories to point to sub-cluster
+        cursor.executemany('''
+            UPDATE memories SET cluster_id = ? WHERE id = ?
+        ''', [(sub_cluster_id, mid) for mid in sub_member_ids])
+        subclusters_created += 1
+        logger.info(f"Sub-cluster {sub_cluster_id} under {parent_cluster_id}: "
+                    f"'{cluster_name}' ({len(sub_member_ids)} members, depth {parent_depth + 1})")
+        # Recurse into this sub-cluster if large enough
+        child_subs, child_depth = _recursive_subcluster(
+            conn, cursor, sub_cluster_id, profile,
+            min_size, max_depth, current_depth + 1,
+            get_avg_importance_fn=get_avg_importance_fn,
+            generate_cluster_name_fn=generate_cluster_name_fn,
+        )
+        subclusters_created += child_subs
+        deepest = max(deepest, child_depth)
+    return subclusters_created, deepest

package/src/graph/schema.py ADDED Viewed

@@ -0,0 +1,99 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: MIT
+# Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
+"""Database schema management for the graph engine.
+Creates and maintains the graph_nodes, graph_edges, and graph_clusters
+tables, including safe schema migrations for existing databases.
+"""
+import sqlite3
+from pathlib import Path
+from graph.constants import logger
+def ensure_graph_tables(db_path: Path):
+    """Create graph tables if they don't exist, or recreate if schema is incomplete."""
+    conn = sqlite3.connect(db_path)
+    cursor = conn.cursor()
+    # Check if existing tables have correct schema (not just id column)
+    for table_name, required_cols in [
+        ('graph_nodes', {'memory_id', 'entities'}),
+        ('graph_edges', {'source_memory_id', 'target_memory_id', 'weight'}),
+        ('graph_clusters', {'name', 'member_count'}),
+    ]:
+        cursor.execute(f"PRAGMA table_info({table_name})")
+        existing_cols = {row[1] for row in cursor.fetchall()}
+        if existing_cols and not required_cols.issubset(existing_cols):
+            # Table exists but has incomplete schema -- drop and recreate
+            logger.warning(f"Dropping incomplete {table_name} table (missing: {required_cols - existing_cols})")
+            cursor.execute(f'DROP TABLE IF EXISTS {table_name}')
+    # Graph nodes table
+    cursor.execute('''
+        CREATE TABLE IF NOT EXISTS graph_nodes (
+            id INTEGER PRIMARY KEY AUTOINCREMENT,
+            memory_id INTEGER UNIQUE NOT NULL,
+            entities TEXT,
+            embedding_vector TEXT,
+            created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+            FOREIGN KEY (memory_id) REFERENCES memories(id) ON DELETE CASCADE
+        )
+    ''')
+    # Graph edges table
+    cursor.execute('''
+        CREATE TABLE IF NOT EXISTS graph_edges (
+            id INTEGER PRIMARY KEY AUTOINCREMENT,
+            source_memory_id INTEGER NOT NULL,
+            target_memory_id INTEGER NOT NULL,
+            relationship_type TEXT,
+            weight REAL DEFAULT 1.0,
+            shared_entities TEXT,
+            similarity_score REAL,
+            created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+            FOREIGN KEY (source_memory_id) REFERENCES memories(id) ON DELETE CASCADE,
+            FOREIGN KEY (target_memory_id) REFERENCES memories(id) ON DELETE CASCADE,
+            UNIQUE(source_memory_id, target_memory_id)
+        )
+    ''')
+    # Graph clusters table
+    cursor.execute('''
+        CREATE TABLE IF NOT EXISTS graph_clusters (
+            id INTEGER PRIMARY KEY AUTOINCREMENT,
+            name TEXT NOT NULL,
+            description TEXT,
+            summary TEXT,
+            member_count INTEGER DEFAULT 0,
+            avg_importance REAL,
+            parent_cluster_id INTEGER,
+            depth INTEGER DEFAULT 0,
+            created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+            updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+            FOREIGN KEY (parent_cluster_id) REFERENCES graph_clusters(id) ON DELETE SET NULL
+        )
+    ''')
+    # Safe column additions for existing databases
+    for col, col_type in [('summary', 'TEXT'), ('parent_cluster_id', 'INTEGER'), ('depth', 'INTEGER DEFAULT 0')]:
+        try:
+            cursor.execute(f'ALTER TABLE graph_clusters ADD COLUMN {col} {col_type}')
+        except sqlite3.OperationalError:
+            pass
+    # Add cluster_id to memories if not exists
+    try:
+        cursor.execute('ALTER TABLE memories ADD COLUMN cluster_id INTEGER')
+    except sqlite3.OperationalError:
+        pass  # Column already exists
+    # Create indexes
+    cursor.execute('CREATE INDEX IF NOT EXISTS idx_graph_source ON graph_edges(source_memory_id)')
+    cursor.execute('CREATE INDEX IF NOT EXISTS idx_graph_target ON graph_edges(target_memory_id)')
+    cursor.execute('CREATE INDEX IF NOT EXISTS idx_cluster_members ON memories(cluster_id)')
+    conn.commit()
+    conn.close()
+    logger.info("Graph tables initialized")