superlocalmemory 2.7.5 → 2.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. package/CHANGELOG.md +120 -155
  2. package/README.md +115 -89
  3. package/api_server.py +2 -12
  4. package/docs/PATTERN-LEARNING.md +64 -199
  5. package/docs/example_graph_usage.py +4 -6
  6. package/install.ps1 +226 -0
  7. package/install.sh +59 -0
  8. package/mcp_server.py +83 -7
  9. package/package.json +3 -10
  10. package/scripts/generate-thumbnails.py +3 -5
  11. package/skills/slm-build-graph/SKILL.md +1 -1
  12. package/skills/slm-list-recent/SKILL.md +1 -1
  13. package/skills/slm-recall/SKILL.md +1 -1
  14. package/skills/slm-remember/SKILL.md +1 -1
  15. package/skills/slm-show-patterns/SKILL.md +1 -1
  16. package/skills/slm-status/SKILL.md +1 -1
  17. package/skills/slm-switch-profile/SKILL.md +1 -1
  18. package/src/agent_registry.py +7 -18
  19. package/src/auth_middleware.py +3 -5
  20. package/src/auto_backup.py +3 -7
  21. package/src/behavioral/__init__.py +49 -0
  22. package/src/behavioral/behavioral_listener.py +203 -0
  23. package/src/behavioral/behavioral_patterns.py +275 -0
  24. package/src/behavioral/cross_project_transfer.py +206 -0
  25. package/src/behavioral/outcome_inference.py +194 -0
  26. package/src/behavioral/outcome_tracker.py +193 -0
  27. package/src/behavioral/tests/__init__.py +4 -0
  28. package/src/behavioral/tests/test_behavioral_integration.py +108 -0
  29. package/src/behavioral/tests/test_behavioral_patterns.py +150 -0
  30. package/src/behavioral/tests/test_cross_project_transfer.py +142 -0
  31. package/src/behavioral/tests/test_mcp_behavioral.py +139 -0
  32. package/src/behavioral/tests/test_mcp_report_outcome.py +117 -0
  33. package/src/behavioral/tests/test_outcome_inference.py +107 -0
  34. package/src/behavioral/tests/test_outcome_tracker.py +96 -0
  35. package/src/cache_manager.py +4 -6
  36. package/src/compliance/__init__.py +48 -0
  37. package/src/compliance/abac_engine.py +149 -0
  38. package/src/compliance/abac_middleware.py +116 -0
  39. package/src/compliance/audit_db.py +215 -0
  40. package/src/compliance/audit_logger.py +148 -0
  41. package/src/compliance/retention_manager.py +289 -0
  42. package/src/compliance/retention_scheduler.py +186 -0
  43. package/src/compliance/tests/__init__.py +4 -0
  44. package/src/compliance/tests/test_abac_enforcement.py +95 -0
  45. package/src/compliance/tests/test_abac_engine.py +124 -0
  46. package/src/compliance/tests/test_abac_mcp_integration.py +118 -0
  47. package/src/compliance/tests/test_audit_db.py +123 -0
  48. package/src/compliance/tests/test_audit_logger.py +98 -0
  49. package/src/compliance/tests/test_mcp_audit.py +128 -0
  50. package/src/compliance/tests/test_mcp_retention_policy.py +125 -0
  51. package/src/compliance/tests/test_retention_manager.py +131 -0
  52. package/src/compliance/tests/test_retention_scheduler.py +99 -0
  53. package/src/db_connection_manager.py +2 -12
  54. package/src/embedding_engine.py +61 -669
  55. package/src/embeddings/__init__.py +47 -0
  56. package/src/embeddings/cache.py +70 -0
  57. package/src/embeddings/cli.py +113 -0
  58. package/src/embeddings/constants.py +47 -0
  59. package/src/embeddings/database.py +91 -0
  60. package/src/embeddings/engine.py +247 -0
  61. package/src/embeddings/model_loader.py +145 -0
  62. package/src/event_bus.py +3 -13
  63. package/src/graph/__init__.py +36 -0
  64. package/src/graph/build_helpers.py +74 -0
  65. package/src/graph/cli.py +87 -0
  66. package/src/graph/cluster_builder.py +188 -0
  67. package/src/graph/cluster_summary.py +148 -0
  68. package/src/graph/constants.py +47 -0
  69. package/src/graph/edge_builder.py +162 -0
  70. package/src/graph/entity_extractor.py +95 -0
  71. package/src/graph/graph_core.py +226 -0
  72. package/src/graph/graph_search.py +231 -0
  73. package/src/graph/hierarchical.py +207 -0
  74. package/src/graph/schema.py +99 -0
  75. package/src/graph_engine.py +45 -1451
  76. package/src/hnsw_index.py +3 -7
  77. package/src/hybrid_search.py +36 -683
  78. package/src/learning/__init__.py +27 -12
  79. package/src/learning/adaptive_ranker.py +50 -12
  80. package/src/learning/cross_project_aggregator.py +2 -12
  81. package/src/learning/engagement_tracker.py +2 -12
  82. package/src/learning/feature_extractor.py +175 -43
  83. package/src/learning/feedback_collector.py +7 -12
  84. package/src/learning/learning_db.py +180 -12
  85. package/src/learning/project_context_manager.py +2 -12
  86. package/src/learning/source_quality_scorer.py +2 -12
  87. package/src/learning/synthetic_bootstrap.py +2 -12
  88. package/src/learning/tests/__init__.py +2 -0
  89. package/src/learning/tests/test_adaptive_ranker.py +2 -6
  90. package/src/learning/tests/test_adaptive_ranker_v28.py +60 -0
  91. package/src/learning/tests/test_aggregator.py +2 -6
  92. package/src/learning/tests/test_auto_retrain_v28.py +35 -0
  93. package/src/learning/tests/test_e2e_ranking_v28.py +82 -0
  94. package/src/learning/tests/test_feature_extractor_v28.py +93 -0
  95. package/src/learning/tests/test_feedback_collector.py +2 -6
  96. package/src/learning/tests/test_learning_db.py +2 -6
  97. package/src/learning/tests/test_learning_db_v28.py +110 -0
  98. package/src/learning/tests/test_learning_init_v28.py +48 -0
  99. package/src/learning/tests/test_outcome_signals.py +48 -0
  100. package/src/learning/tests/test_project_context.py +2 -6
  101. package/src/learning/tests/test_schema_migration.py +319 -0
  102. package/src/learning/tests/test_signal_inference.py +11 -13
  103. package/src/learning/tests/test_source_quality.py +2 -6
  104. package/src/learning/tests/test_synthetic_bootstrap.py +3 -7
  105. package/src/learning/tests/test_workflow_miner.py +2 -6
  106. package/src/learning/workflow_pattern_miner.py +2 -12
  107. package/src/lifecycle/__init__.py +54 -0
  108. package/src/lifecycle/bounded_growth.py +239 -0
  109. package/src/lifecycle/compaction_engine.py +226 -0
  110. package/src/lifecycle/lifecycle_engine.py +302 -0
  111. package/src/lifecycle/lifecycle_evaluator.py +225 -0
  112. package/src/lifecycle/lifecycle_scheduler.py +130 -0
  113. package/src/lifecycle/retention_policy.py +285 -0
  114. package/src/lifecycle/tests/__init__.py +4 -0
  115. package/src/lifecycle/tests/test_bounded_growth.py +193 -0
  116. package/src/lifecycle/tests/test_compaction.py +179 -0
  117. package/src/lifecycle/tests/test_lifecycle_engine.py +137 -0
  118. package/src/lifecycle/tests/test_lifecycle_evaluation.py +177 -0
  119. package/src/lifecycle/tests/test_lifecycle_scheduler.py +127 -0
  120. package/src/lifecycle/tests/test_lifecycle_search.py +109 -0
  121. package/src/lifecycle/tests/test_mcp_compact.py +149 -0
  122. package/src/lifecycle/tests/test_mcp_lifecycle_status.py +114 -0
  123. package/src/lifecycle/tests/test_retention_policy.py +162 -0
  124. package/src/mcp_tools_v28.py +280 -0
  125. package/src/memory-profiles.py +2 -12
  126. package/src/memory-reset.py +2 -12
  127. package/src/memory_compression.py +2 -12
  128. package/src/memory_store_v2.py +76 -20
  129. package/src/migrate_v1_to_v2.py +2 -12
  130. package/src/pattern_learner.py +29 -975
  131. package/src/patterns/__init__.py +24 -0
  132. package/src/patterns/analyzers.py +247 -0
  133. package/src/patterns/learner.py +267 -0
  134. package/src/patterns/scoring.py +167 -0
  135. package/src/patterns/store.py +223 -0
  136. package/src/patterns/terminology.py +138 -0
  137. package/src/provenance_tracker.py +4 -14
  138. package/src/query_optimizer.py +4 -6
  139. package/src/rate_limiter.py +2 -6
  140. package/src/search/__init__.py +20 -0
  141. package/src/search/cli.py +77 -0
  142. package/src/search/constants.py +26 -0
  143. package/src/search/engine.py +239 -0
  144. package/src/search/fusion.py +122 -0
  145. package/src/search/index_loader.py +112 -0
  146. package/src/search/methods.py +162 -0
  147. package/src/search_engine_v2.py +4 -6
  148. package/src/setup_validator.py +7 -13
  149. package/src/subscription_manager.py +2 -12
  150. package/src/tree/__init__.py +59 -0
  151. package/src/tree/builder.py +183 -0
  152. package/src/tree/nodes.py +196 -0
  153. package/src/tree/queries.py +252 -0
  154. package/src/tree/schema.py +76 -0
  155. package/src/tree_manager.py +10 -711
  156. package/src/trust/__init__.py +45 -0
  157. package/src/trust/constants.py +66 -0
  158. package/src/trust/queries.py +157 -0
  159. package/src/trust/schema.py +95 -0
  160. package/src/trust/scorer.py +299 -0
  161. package/src/trust/signals.py +95 -0
  162. package/src/trust_scorer.py +39 -697
  163. package/src/webhook_dispatcher.py +2 -12
  164. package/ui/app.js +1 -1
  165. package/ui/index.html +3 -0
  166. package/ui/js/agents.js +1 -1
  167. package/ui/js/core.js +21 -5
  168. package/ui/js/profiles.js +29 -7
  169. package/ui_server.py +2 -14
  170. package/ATTRIBUTION.md +0 -140
  171. package/docs/ARCHITECTURE-V2.5.md +0 -190
  172. package/docs/GRAPH-ENGINE.md +0 -503
  173. package/docs/architecture-diagram.drawio +0 -405
  174. package/docs/plans/2026-02-13-benchmark-suite.md +0 -1349
@@ -0,0 +1,231 @@
1
+ #!/usr/bin/env python3
2
+ # SPDX-License-Identifier: MIT
3
+ # Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
4
+ """Graph traversal and query operations.
5
+
6
+ Provides graph traversal (get_related), cluster membership queries,
7
+ and graph statistics collection for the active profile.
8
+ """
9
+ import sqlite3
10
+ import json
11
+ from pathlib import Path
12
+ from typing import List, Dict
13
+
14
+ from graph.constants import logger, MEMORY_DIR
15
+
16
+
17
+ def _get_active_profile() -> str:
18
+ """Get the currently active profile name from config."""
19
+ config_file = MEMORY_DIR / "profiles.json"
20
+ if config_file.exists():
21
+ try:
22
+ with open(config_file, 'r') as f:
23
+ config = json.load(f)
24
+ return config.get('active_profile', 'default')
25
+ except (json.JSONDecodeError, IOError):
26
+ pass
27
+ return 'default'
28
+
29
+
30
+ def get_related(db_path: Path, memory_id: int, max_hops: int = 2) -> List[Dict]:
31
+ """
32
+ Get memories connected to this memory via graph edges (active profile only).
33
+
34
+ Args:
35
+ db_path: Path to SQLite database
36
+ memory_id: Source memory ID
37
+ max_hops: Maximum traversal depth (1 or 2)
38
+
39
+ Returns:
40
+ List of related memory dictionaries
41
+ """
42
+ conn = sqlite3.connect(db_path)
43
+ cursor = conn.cursor()
44
+ active_profile = _get_active_profile()
45
+
46
+ try:
47
+ # Get 1-hop neighbors (filtered to active profile)
48
+ edges = cursor.execute('''
49
+ SELECT ge.target_memory_id, ge.relationship_type, ge.weight, ge.shared_entities
50
+ FROM graph_edges ge
51
+ JOIN memories m ON ge.target_memory_id = m.id
52
+ WHERE ge.source_memory_id = ? AND m.profile = ?
53
+ UNION
54
+ SELECT ge.source_memory_id, ge.relationship_type, ge.weight, ge.shared_entities
55
+ FROM graph_edges ge
56
+ JOIN memories m ON ge.source_memory_id = m.id
57
+ WHERE ge.target_memory_id = ? AND m.profile = ?
58
+ ''', (memory_id, active_profile, memory_id, active_profile)).fetchall()
59
+
60
+ results = []
61
+ seen_ids = {memory_id}
62
+
63
+ for target_id, rel_type, weight, shared_entities in edges:
64
+ if target_id in seen_ids:
65
+ continue
66
+
67
+ seen_ids.add(target_id)
68
+
69
+ # Get memory details
70
+ memory = cursor.execute('''
71
+ SELECT id, summary, importance, tags
72
+ FROM memories WHERE id = ?
73
+ ''', (target_id,)).fetchone()
74
+
75
+ if memory:
76
+ results.append({
77
+ 'id': memory[0],
78
+ 'summary': memory[1],
79
+ 'importance': memory[2],
80
+ 'tags': json.loads(memory[3]) if memory[3] else [],
81
+ 'relationship': rel_type,
82
+ 'weight': weight,
83
+ 'shared_entities': json.loads(shared_entities) if shared_entities else [],
84
+ 'hops': 1
85
+ })
86
+
87
+ # If max_hops == 2, get 2-hop neighbors
88
+ if max_hops >= 2:
89
+ for result in results[:]: # Copy to avoid modification during iteration
90
+ second_hop = cursor.execute('''
91
+ SELECT target_memory_id, relationship_type, weight
92
+ FROM graph_edges
93
+ WHERE source_memory_id = ?
94
+ UNION
95
+ SELECT source_memory_id, relationship_type, weight
96
+ FROM graph_edges
97
+ WHERE target_memory_id = ?
98
+ ''', (result['id'], result['id'])).fetchall()
99
+
100
+ for target_id, rel_type, weight in second_hop:
101
+ if target_id in seen_ids:
102
+ continue
103
+
104
+ seen_ids.add(target_id)
105
+
106
+ memory = cursor.execute('''
107
+ SELECT id, summary, importance, tags
108
+ FROM memories WHERE id = ?
109
+ ''', (target_id,)).fetchone()
110
+
111
+ if memory:
112
+ results.append({
113
+ 'id': memory[0],
114
+ 'summary': memory[1],
115
+ 'importance': memory[2],
116
+ 'tags': json.loads(memory[3]) if memory[3] else [],
117
+ 'relationship': rel_type,
118
+ 'weight': weight,
119
+ 'shared_entities': [],
120
+ 'hops': 2
121
+ })
122
+
123
+ # Sort by weight (strongest connections first)
124
+ results.sort(key=lambda x: (-x['hops'], -x['weight']))
125
+
126
+ return results
127
+
128
+ finally:
129
+ conn.close()
130
+
131
+
132
+ def get_cluster_members(db_path: Path, cluster_id: int) -> List[Dict]:
133
+ """
134
+ Get all memories in a cluster (filtered by active profile).
135
+
136
+ Args:
137
+ db_path: Path to SQLite database
138
+ cluster_id: Cluster ID
139
+
140
+ Returns:
141
+ List of memory dictionaries
142
+ """
143
+ conn = sqlite3.connect(db_path)
144
+ cursor = conn.cursor()
145
+ active_profile = _get_active_profile()
146
+
147
+ try:
148
+ memories = cursor.execute('''
149
+ SELECT id, summary, importance, tags, created_at
150
+ FROM memories
151
+ WHERE cluster_id = ? AND profile = ?
152
+ ORDER BY importance DESC
153
+ ''', (cluster_id, active_profile)).fetchall()
154
+
155
+ return [
156
+ {
157
+ 'id': m[0],
158
+ 'summary': m[1],
159
+ 'importance': m[2],
160
+ 'tags': json.loads(m[3]) if m[3] else [],
161
+ 'created_at': m[4]
162
+ }
163
+ for m in memories
164
+ ]
165
+
166
+ finally:
167
+ conn.close()
168
+
169
+
170
+ def get_stats(db_path: Path) -> Dict[str, any]:
171
+ """Get graph statistics for the active profile."""
172
+ conn = sqlite3.connect(db_path)
173
+ cursor = conn.cursor()
174
+ active_profile = _get_active_profile()
175
+
176
+ try:
177
+ # Count nodes for active profile's memories
178
+ nodes = cursor.execute('''
179
+ SELECT COUNT(*) FROM graph_nodes
180
+ WHERE memory_id IN (SELECT id FROM memories WHERE profile = ?)
181
+ ''', (active_profile,)).fetchone()[0]
182
+
183
+ # Count edges where at least one end is in active profile
184
+ edges = cursor.execute('''
185
+ SELECT COUNT(*) FROM graph_edges
186
+ WHERE source_memory_id IN (SELECT id FROM memories WHERE profile = ?)
187
+ ''', (active_profile,)).fetchone()[0]
188
+
189
+ # Clusters that have members in active profile
190
+ clusters = cursor.execute('''
191
+ SELECT COUNT(DISTINCT cluster_id) FROM memories
192
+ WHERE cluster_id IS NOT NULL AND profile = ?
193
+ ''', (active_profile,)).fetchone()[0]
194
+
195
+ # Cluster breakdown for active profile (including hierarchy)
196
+ cluster_info = cursor.execute('''
197
+ SELECT gc.name, gc.member_count, gc.avg_importance,
198
+ gc.summary, gc.parent_cluster_id, gc.depth
199
+ FROM graph_clusters gc
200
+ WHERE gc.id IN (
201
+ SELECT DISTINCT cluster_id FROM memories
202
+ WHERE cluster_id IS NOT NULL AND profile = ?
203
+ )
204
+ ORDER BY gc.depth ASC, gc.member_count DESC
205
+ LIMIT 20
206
+ ''', (active_profile,)).fetchall()
207
+
208
+ # Count hierarchical depth
209
+ max_depth = max((c[5] or 0 for c in cluster_info), default=0) if cluster_info else 0
210
+
211
+ return {
212
+ 'profile': active_profile,
213
+ 'nodes': nodes,
214
+ 'edges': edges,
215
+ 'clusters': clusters,
216
+ 'max_depth': max_depth,
217
+ 'top_clusters': [
218
+ {
219
+ 'name': c[0],
220
+ 'members': c[1],
221
+ 'avg_importance': round(c[2], 1) if c[2] else 5.0,
222
+ 'summary': c[3],
223
+ 'parent_cluster_id': c[4],
224
+ 'depth': c[5] or 0
225
+ }
226
+ for c in cluster_info
227
+ ]
228
+ }
229
+
230
+ finally:
231
+ conn.close()
@@ -0,0 +1,207 @@
1
+ #!/usr/bin/env python3
2
+ # SPDX-License-Identifier: MIT
3
+ # Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
4
+ """Hierarchical sub-clustering for the graph engine.
5
+
6
+ Implements recursive Leiden-based hierarchical clustering that decomposes
7
+ large communities into finer-grained thematic sub-clusters.
8
+ """
9
+ import sqlite3
10
+ from typing import List, Dict, Tuple
11
+
12
+ from graph.constants import logger, IGRAPH_AVAILABLE, MEMORY_DIR
13
+
14
+
15
+ def _get_active_profile() -> str:
16
+ """Get the currently active profile name from config."""
17
+ import json
18
+ config_file = MEMORY_DIR / "profiles.json"
19
+ if config_file.exists():
20
+ try:
21
+ with open(config_file, 'r') as f:
22
+ config = json.load(f)
23
+ return config.get('active_profile', 'default')
24
+ except (json.JSONDecodeError, IOError):
25
+ pass
26
+ return 'default'
27
+
28
+
29
+ def hierarchical_cluster(db_path, get_avg_importance_fn, generate_cluster_name_fn,
30
+ min_subcluster_size: int = 5, max_depth: int = 3) -> Dict[str, any]:
31
+ """
32
+ Run recursive Leiden clustering -- cluster the clusters.
33
+
34
+ Large communities (>= min_subcluster_size * 2) are recursively sub-clustered
35
+ to reveal finer-grained thematic structure.
36
+
37
+ Args:
38
+ db_path: Path to SQLite database
39
+ get_avg_importance_fn: Callback to compute avg importance for memory IDs
40
+ generate_cluster_name_fn: Callback to generate cluster name from memory IDs
41
+ min_subcluster_size: Minimum members to attempt sub-clustering (default 5)
42
+ max_depth: Maximum recursion depth (default 3)
43
+
44
+ Returns:
45
+ Dictionary with hierarchical clustering statistics
46
+ """
47
+ if not IGRAPH_AVAILABLE:
48
+ logger.warning("igraph/leidenalg not installed. Hierarchical clustering disabled. Install with: pip3 install python-igraph leidenalg")
49
+ return {'subclusters_created': 0, 'depth_reached': 0}
50
+ import igraph as ig
51
+ import leidenalg
52
+
53
+ conn = sqlite3.connect(db_path)
54
+ cursor = conn.cursor()
55
+ active_profile = _get_active_profile()
56
+
57
+ try:
58
+ # Get top-level clusters for this profile that are large enough to sub-cluster
59
+ cursor.execute('''
60
+ SELECT cluster_id, COUNT(*) as cnt
61
+ FROM memories
62
+ WHERE cluster_id IS NOT NULL AND profile = ?
63
+ GROUP BY cluster_id
64
+ HAVING cnt >= ?
65
+ ''', (active_profile, min_subcluster_size * 2))
66
+ large_clusters = cursor.fetchall()
67
+
68
+ if not large_clusters:
69
+ logger.info("No clusters large enough for hierarchical decomposition")
70
+ return {'subclusters_created': 0, 'depth_reached': 0}
71
+
72
+ total_subclusters = 0
73
+ max_depth_reached = 0
74
+
75
+ for parent_cid, member_count in large_clusters:
76
+ subs, depth = _recursive_subcluster(
77
+ conn, cursor, parent_cid, active_profile,
78
+ min_subcluster_size, max_depth, current_depth=1,
79
+ get_avg_importance_fn=get_avg_importance_fn,
80
+ generate_cluster_name_fn=generate_cluster_name_fn,
81
+ )
82
+ total_subclusters += subs
83
+ max_depth_reached = max(max_depth_reached, depth)
84
+
85
+ conn.commit()
86
+ logger.info(f"Hierarchical clustering: {total_subclusters} sub-clusters, depth {max_depth_reached}")
87
+ return {
88
+ 'subclusters_created': total_subclusters,
89
+ 'depth_reached': max_depth_reached,
90
+ 'parent_clusters_processed': len(large_clusters)
91
+ }
92
+
93
+ except Exception as e:
94
+ logger.error(f"Hierarchical clustering failed: {e}")
95
+ conn.rollback()
96
+ return {'subclusters_created': 0, 'error': str(e)}
97
+ finally:
98
+ conn.close()
99
+
100
+
101
+ def _recursive_subcluster(conn, cursor, parent_cluster_id: int,
102
+ profile: str, min_size: int, max_depth: int,
103
+ current_depth: int,
104
+ get_avg_importance_fn, generate_cluster_name_fn) -> Tuple[int, int]:
105
+ """Recursively sub-cluster a community using Leiden."""
106
+ if not IGRAPH_AVAILABLE:
107
+ return 0, current_depth - 1
108
+ import igraph as ig
109
+ import leidenalg
110
+
111
+ if current_depth > max_depth:
112
+ return 0, current_depth - 1
113
+
114
+ # Get memory IDs in this cluster
115
+ cursor.execute('''
116
+ SELECT id FROM memories
117
+ WHERE cluster_id = ? AND profile = ?
118
+ ''', (parent_cluster_id, profile))
119
+ member_ids = [row[0] for row in cursor.fetchall()]
120
+
121
+ if len(member_ids) < min_size * 2:
122
+ return 0, current_depth - 1
123
+
124
+ # Get edges between members of this cluster
125
+ placeholders = ','.join('?' * len(member_ids))
126
+ edges = cursor.execute(f'''
127
+ SELECT source_memory_id, target_memory_id, weight
128
+ FROM graph_edges
129
+ WHERE source_memory_id IN ({placeholders})
130
+ AND target_memory_id IN ({placeholders})
131
+ ''', member_ids + member_ids).fetchall()
132
+
133
+ if len(edges) < 2:
134
+ return 0, current_depth - 1
135
+
136
+ # Build sub-graph
137
+ id_to_vertex = {mid: idx for idx, mid in enumerate(member_ids)}
138
+ vertex_to_id = {idx: mid for mid, idx in id_to_vertex.items()}
139
+
140
+ g = ig.Graph()
141
+ g.add_vertices(len(member_ids))
142
+ edge_list, edge_weights = [], []
143
+ for src, tgt, w in edges:
144
+ if src in id_to_vertex and tgt in id_to_vertex:
145
+ edge_list.append((id_to_vertex[src], id_to_vertex[tgt]))
146
+ edge_weights.append(w)
147
+
148
+ if not edge_list:
149
+ return 0, current_depth - 1
150
+
151
+ g.add_edges(edge_list)
152
+
153
+ # Run Leiden with higher resolution for finer communities
154
+ partition = leidenalg.find_partition(
155
+ g, leidenalg.ModularityVertexPartition,
156
+ weights=edge_weights, n_iterations=100, seed=42
157
+ )
158
+
159
+ # Only proceed if Leiden found > 1 community (actual split)
160
+ non_singleton = [c for c in partition if len(c) >= 2]
161
+ if len(non_singleton) <= 1:
162
+ return 0, current_depth - 1
163
+
164
+ subclusters_created = 0
165
+ deepest = current_depth
166
+
167
+ # Get parent depth
168
+ cursor.execute('SELECT depth FROM graph_clusters WHERE id = ?', (parent_cluster_id,))
169
+ parent_row = cursor.fetchone()
170
+ parent_depth = parent_row[0] if parent_row else 0
171
+
172
+ for community in non_singleton:
173
+ sub_member_ids = [vertex_to_id[v] for v in community]
174
+
175
+ if len(sub_member_ids) < 2:
176
+ continue
177
+
178
+ avg_imp = get_avg_importance_fn(cursor, sub_member_ids)
179
+ cluster_name = generate_cluster_name_fn(cursor, sub_member_ids)
180
+
181
+ result = cursor.execute('''
182
+ INSERT INTO graph_clusters (name, member_count, avg_importance, parent_cluster_id, depth)
183
+ VALUES (?, ?, ?, ?, ?)
184
+ ''', (cluster_name, len(sub_member_ids), avg_imp, parent_cluster_id, parent_depth + 1))
185
+
186
+ sub_cluster_id = result.lastrowid
187
+
188
+ # Update memories to point to sub-cluster
189
+ cursor.executemany('''
190
+ UPDATE memories SET cluster_id = ? WHERE id = ?
191
+ ''', [(sub_cluster_id, mid) for mid in sub_member_ids])
192
+
193
+ subclusters_created += 1
194
+ logger.info(f"Sub-cluster {sub_cluster_id} under {parent_cluster_id}: "
195
+ f"'{cluster_name}' ({len(sub_member_ids)} members, depth {parent_depth + 1})")
196
+
197
+ # Recurse into this sub-cluster if large enough
198
+ child_subs, child_depth = _recursive_subcluster(
199
+ conn, cursor, sub_cluster_id, profile,
200
+ min_size, max_depth, current_depth + 1,
201
+ get_avg_importance_fn=get_avg_importance_fn,
202
+ generate_cluster_name_fn=generate_cluster_name_fn,
203
+ )
204
+ subclusters_created += child_subs
205
+ deepest = max(deepest, child_depth)
206
+
207
+ return subclusters_created, deepest
@@ -0,0 +1,99 @@
1
+ #!/usr/bin/env python3
2
+ # SPDX-License-Identifier: MIT
3
+ # Copyright (c) 2026 SuperLocalMemory (superlocalmemory.com)
4
+ """Database schema management for the graph engine.
5
+
6
+ Creates and maintains the graph_nodes, graph_edges, and graph_clusters
7
+ tables, including safe schema migrations for existing databases.
8
+ """
9
+ import sqlite3
10
+ from pathlib import Path
11
+
12
+ from graph.constants import logger
13
+
14
+
15
+ def ensure_graph_tables(db_path: Path):
16
+ """Create graph tables if they don't exist, or recreate if schema is incomplete."""
17
+ conn = sqlite3.connect(db_path)
18
+ cursor = conn.cursor()
19
+
20
+ # Check if existing tables have correct schema (not just id column)
21
+ for table_name, required_cols in [
22
+ ('graph_nodes', {'memory_id', 'entities'}),
23
+ ('graph_edges', {'source_memory_id', 'target_memory_id', 'weight'}),
24
+ ('graph_clusters', {'name', 'member_count'}),
25
+ ]:
26
+ cursor.execute(f"PRAGMA table_info({table_name})")
27
+ existing_cols = {row[1] for row in cursor.fetchall()}
28
+ if existing_cols and not required_cols.issubset(existing_cols):
29
+ # Table exists but has incomplete schema -- drop and recreate
30
+ logger.warning(f"Dropping incomplete {table_name} table (missing: {required_cols - existing_cols})")
31
+ cursor.execute(f'DROP TABLE IF EXISTS {table_name}')
32
+
33
+ # Graph nodes table
34
+ cursor.execute('''
35
+ CREATE TABLE IF NOT EXISTS graph_nodes (
36
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
37
+ memory_id INTEGER UNIQUE NOT NULL,
38
+ entities TEXT,
39
+ embedding_vector TEXT,
40
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
41
+ FOREIGN KEY (memory_id) REFERENCES memories(id) ON DELETE CASCADE
42
+ )
43
+ ''')
44
+
45
+ # Graph edges table
46
+ cursor.execute('''
47
+ CREATE TABLE IF NOT EXISTS graph_edges (
48
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
49
+ source_memory_id INTEGER NOT NULL,
50
+ target_memory_id INTEGER NOT NULL,
51
+ relationship_type TEXT,
52
+ weight REAL DEFAULT 1.0,
53
+ shared_entities TEXT,
54
+ similarity_score REAL,
55
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
56
+ FOREIGN KEY (source_memory_id) REFERENCES memories(id) ON DELETE CASCADE,
57
+ FOREIGN KEY (target_memory_id) REFERENCES memories(id) ON DELETE CASCADE,
58
+ UNIQUE(source_memory_id, target_memory_id)
59
+ )
60
+ ''')
61
+
62
+ # Graph clusters table
63
+ cursor.execute('''
64
+ CREATE TABLE IF NOT EXISTS graph_clusters (
65
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
66
+ name TEXT NOT NULL,
67
+ description TEXT,
68
+ summary TEXT,
69
+ member_count INTEGER DEFAULT 0,
70
+ avg_importance REAL,
71
+ parent_cluster_id INTEGER,
72
+ depth INTEGER DEFAULT 0,
73
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
74
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
75
+ FOREIGN KEY (parent_cluster_id) REFERENCES graph_clusters(id) ON DELETE SET NULL
76
+ )
77
+ ''')
78
+
79
+ # Safe column additions for existing databases
80
+ for col, col_type in [('summary', 'TEXT'), ('parent_cluster_id', 'INTEGER'), ('depth', 'INTEGER DEFAULT 0')]:
81
+ try:
82
+ cursor.execute(f'ALTER TABLE graph_clusters ADD COLUMN {col} {col_type}')
83
+ except sqlite3.OperationalError:
84
+ pass
85
+
86
+ # Add cluster_id to memories if not exists
87
+ try:
88
+ cursor.execute('ALTER TABLE memories ADD COLUMN cluster_id INTEGER')
89
+ except sqlite3.OperationalError:
90
+ pass # Column already exists
91
+
92
+ # Create indexes
93
+ cursor.execute('CREATE INDEX IF NOT EXISTS idx_graph_source ON graph_edges(source_memory_id)')
94
+ cursor.execute('CREATE INDEX IF NOT EXISTS idx_graph_target ON graph_edges(target_memory_id)')
95
+ cursor.execute('CREATE INDEX IF NOT EXISTS idx_cluster_members ON memories(cluster_id)')
96
+
97
+ conn.commit()
98
+ conn.close()
99
+ logger.info("Graph tables initialized")